From a4388e68b26f46864688cf440c21ff2cad7a9751 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 28 Feb 2025 18:28:58 -0800
Subject: [PATCH 01/15] initial chat template impl

---
 shared/api/tokenizer_impl.cc | 54 ++++++++++++++++++++++++++++++++++++
 shared/api/tokenizer_impl.h  |  6 ++++
 2 files changed, 60 insertions(+)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index fe7d6440f..d865b919c 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -131,6 +131,60 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
   return {};
 }
 
+// Constant string variable to store predefined chat template strings for popular supported models
+const std::string PHI4_CHAT_TEMPLATE = 
+    R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
+
+// Member variable to store the messages
+std::vector<std::unordered_map<std::string, std::string>> messages;
+    
+// Member variable to store the chat_template (customized for each instance)
+std::string chat_template;
+
+// Phi4ChatTemplate method to process messages and store result in output
+OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
+    // Clear the output string before starting
+    output->clear();
+
+    // Process the messages
+    for (const auto& message : messages) {
+        std::string role = message.at("role");
+        std::string content = message.at("content");
+
+        // Check if "tools" is present in the message and is not empty for "system" role
+        if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) {
+            std::string tools = message.at("tools");
+            *output += "<|" + role + "|>\n";
+            *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>\n";
+        } else {
+            // For other messages, no tools
+            *output += "<|" + role + "|>\n";
+            *output += content + "<|end|>\n";
+        }
+    }
+
+    // Add generation prompt or eos_token
+    if (add_generation_prompt) {
+        *output += "<|assistant|>\n";
+    } else {
+        *output += eos_token;
+    }
+
+    return OrtxStatus(kOrtxOK, "Created chat template.");
+}
+
+// ApplyChatTemplate method to choose the template logic based on chat_template
+OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
+    // Check if the chat_template matches the global PHI4_CHAT_TEMPLATE string
+    if (chat_template == PHI4_CHAT_TEMPLATE) {
+        // If the template matches, apply Phi4ChatTemplate logic
+        return Phi4ChatTemplate(output, add_generation_prompt, eos_token);
+    } else {
+        // Handle other templates or custom logic here
+        return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed.");
+    }
+}
+
 OrtxStatus TokenizerImpl::Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const {
   return std::visit([&](auto& detokenizer) {
     return detokenizer->Id2Token(id, token, state); }, detokenizer_);
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 395d2cb7c..74b500efb 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -30,6 +30,8 @@ class TokenizerImpl : public OrtxObjectImpl {
     return BatchDecode(t_ids, t_text);
   }
 
+
+
   OrtxStatus Token2Id(const std::string& token, extTokenId_t& id) const {
     id = std::visit([&](auto& tokenizer) { return tokenizer->GetTokenId(token); }, tokenizer_);
     return {};
@@ -52,6 +54,10 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   OrtxStatus BatchDecode(const std::vector<span<extTokenId_t const>>& t_ids, std::vector<std::string>& t_text) const;
 
+  OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+  
+  OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;
 
   OrtxStatus GetDecoderPromptIds(size_t batch_size, const char* lang, const char* task, int no_timestamps,

From be0f462fb83803eb7ea1fb11fd860520f896a3dd Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 28 Feb 2025 18:45:18 -0800
Subject: [PATCH 02/15] add phi 3.5 support

---
 shared/api/tokenizer_impl.cc | 41 ++++++++++++++++++++++++++++++++----
 shared/api/tokenizer_impl.h  |  2 ++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index d865b919c..5d484b46e 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -132,8 +132,8 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 }
 
 // Constant string variable to store predefined chat template strings for popular supported models
-const std::string PHI4_CHAT_TEMPLATE = 
-    R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
 
 // Member variable to store the messages
 std::vector<std::unordered_map<std::string, std::string>> messages;
@@ -173,12 +173,45 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat
     return OrtxStatus(kOrtxOK, "Created chat template.");
 }
 
+OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
+  // Clear the output string before starting
+  output->clear();
+
+  // Process the messages
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Check for different roles and format accordingly
+      if (role == "system" && !content.empty()) {
+          *output += "<|system|>\n";
+          *output += content + "<|end|>\n";
+      } else if (role == "user") {
+          *output += "<|user|>\n";
+          *output += content + "<|end|>\n";
+      } else if (role == "assistant") {
+          *output += "<|assistant|>\n";
+          *output += content + "<|end|>\n";
+      }
+  }
+
+  // Add generation prompt or eos_token
+  if (add_generation_prompt) {
+      *output += "<|assistant|>\n";
+  } else {
+      *output += eos_token;
+  }
+
+  return OrtxStatus(kOrtxOK, "Created chat template.");
+}
+
 // ApplyChatTemplate method to choose the template logic based on chat_template
 OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
-    // Check if the chat_template matches the global PHI4_CHAT_TEMPLATE string
+    // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template.
     if (chat_template == PHI4_CHAT_TEMPLATE) {
-        // If the template matches, apply Phi4ChatTemplate logic
         return Phi4ChatTemplate(output, add_generation_prompt, eos_token);
+    } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
+      return Phi3_5ChatTemplate(output, add_generation_prompt, eos_token);
     } else {
         // Handle other templates or custom logic here
         return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed.");
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 74b500efb..b712762cd 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -56,6 +56,8 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
   
+  OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+
   OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;

From c8176e27289ac9cc324c668fa80874d2164adc77 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Mon, 3 Mar 2025 10:55:46 -0800
Subject: [PATCH 03/15] remove unnecessary qualification

---
 shared/api/tokenizer_impl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index b712762cd..4a25a5dcc 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -54,11 +54,11 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   OrtxStatus BatchDecode(const std::vector<span<extTokenId_t const>>& t_ids, std::vector<std::string>& t_text) const;
 
-  OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+  OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
   
-  OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+  OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
-  OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+  OrtxStatus ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;
 

From a0d74e3b782b4a681e01d5a3d3c0c9db9e217c95 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Mon, 3 Mar 2025 18:17:27 -0800
Subject: [PATCH 04/15] add llama3 chat template

---
 shared/api/tokenizer_impl.cc | 95 ++++++++++++++++++++++++++++++++++++
 shared/api/tokenizer_impl.h  |  2 +
 2 files changed, 97 insertions(+)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 5d484b46e..e1979d28d 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -134,6 +134,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 // Constant string variable to store predefined chat template strings for popular supported models
 const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"
 
 // Member variable to store the messages
 std::vector<std::unordered_map<std::string, std::string>> messages;
@@ -205,6 +206,98 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
   return OrtxStatus(kOrtxOK, "Created chat template.");
 }
 
+OrtxStatus TokenizerImpl::Llama3ChatTemplate(
+  std::string* output, 
+  bool add_generation_prompt = true, 
+  const std::string& eos_token = "<|eot_id|>", 
+  const std::vector<std::string>& custom_tools = {}, 
+  bool tools_in_user_message = true, 
+  const std::string& strftime_now = "",
+  const std::string& bos_token = "<|begin_of_text|>") {  // Add bos_token as a parameter
+
+  // Clear the output string before starting
+  output->clear();
+
+  // Prepend BOS token at the start of the output
+  *output += bos_token + "\n";  // BOS token goes first
+
+  // Initialize date_string with default value
+  std::string date_string = "26 Jul 2024";  // Default date
+  if (!strftime_now.empty()) {
+      date_string = strftime_now;  // Override with provided date string if available
+  }
+
+  // Loop through messages and process each one
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Handle the system message
+      if (role == "system") {
+          *output += "<|start_header_id|>system<|end_header_id|>\n\n";
+          *output += "Cutting Knowledge Date: December 2023\n";
+          *output += "Today Date: " + date_string + "\n\n";
+
+          // Check if tools exist and append relevant information
+          if (!custom_tools.empty()) {
+              *output += "You have access to the following functions. To call a function, please respond with JSON for a function call.\n";
+              *output += "Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value.}\n";
+              *output += "Do not use variables.\n\n";
+
+              // Convert tools to JSON (assuming custom_tools is a vector of tool names as strings)
+              nlohmann::json tools_json = nlohmann::json::array();
+              for (const auto& tool : custom_tools) {
+                  tools_json.push_back(tool);
+              }
+
+              *output += tools_json.dump(4) + "\n\n";
+          }
+          *output += "<|eot_id|>\n";
+      }
+
+      // Handle user message with tools in it
+      if (tools_in_user_message && message.find("tool_calls") != message.end()) {
+          // Parse the tool_calls string into JSON (assuming it's a valid JSON string)
+          nlohmann::json tool_calls_json = nlohmann::json::parse(message.at("tool_calls"));
+
+          if (tool_calls_json.size() != 1) {
+              // Handle multiple tool calls (not supported)
+              return OrtxStatus(kOrtxErrorInvalidArgument, "This model only supports single tool-calls at once!");
+          }
+
+          // Extract the function name and arguments from the first tool call
+          std::string function_name = tool_calls_json[0]["function"];
+          nlohmann::json arguments = tool_calls_json[0]["arguments"];
+
+          // Create the JSON object for the tool call
+          nlohmann::json tool_call_json;
+          tool_call_json["name"] = function_name;
+          tool_call_json["parameters"] = arguments;
+
+          // Serialize the tool call as JSON and append it to output
+          *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
+          *output += tool_call_json.dump() + "\n";
+          *output += "<|eot_id|>\n";  // End of tool call
+      }
+
+      // Handle other messages (user, assistant, etc.)
+      else {
+          *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n";
+          *output += content + "\n";
+          *output += "<|eot_id|>\n";
+      }
+  }
+
+  // Add generation prompt or eos_token at the end
+  if (add_generation_prompt) {
+      *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
+  } else {
+      *output += eos_token;  // Add the EOS token instead
+  }
+
+  return OrtxStatus(kOrtxOK, "Created chat template.");
+}
+
 // ApplyChatTemplate method to choose the template logic based on chat_template
 OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
     // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template.
@@ -212,6 +305,8 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_genera
         return Phi4ChatTemplate(output, add_generation_prompt, eos_token);
     } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
       return Phi3_5ChatTemplate(output, add_generation_prompt, eos_token);
+    } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
+      return Llama3ChatTemplate(output, add_generation_prompt, eos_token);
     } else {
         // Handle other templates or custom logic here
         return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed.");
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 4a25a5dcc..c42ea2698 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -58,6 +58,8 @@ class TokenizerImpl : public OrtxObjectImpl {
   
   OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
+  OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);
+
   OrtxStatus ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;

From db9f7150e30388e108c452cd109626e1b1cf1ddf Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Tue, 4 Mar 2025 15:11:53 -0800
Subject: [PATCH 05/15] fix typo

---
 shared/api/tokenizer_impl.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index e1979d28d..ca42285a6 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -134,7 +134,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 // Constant string variable to store predefined chat template strings for popular supported models
 const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
-const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"
+const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
 
 // Member variable to store the messages
 std::vector<std::unordered_map<std::string, std::string>> messages;

From 9955f0a1451fba36f4b4acb3d866ddeec168ef21 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Tue, 4 Mar 2025 17:24:28 -0800
Subject: [PATCH 06/15] add json loading and unit test

---
 operators/tokenizer/tokenizer_jsconfig.hpp |  5 ++++
 shared/api/tokenizer_impl.cc               | 28 +++++++++++++---------
 shared/api/tokenizer_impl.h                |  9 ++++++-
 test/pp_api_test/test_tokenizer.cc         | 27 +++++++++++++++++++++
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/operators/tokenizer/tokenizer_jsconfig.hpp b/operators/tokenizer/tokenizer_jsconfig.hpp
index 22b44a1d5..65745c9d1 100644
--- a/operators/tokenizer/tokenizer_jsconfig.hpp
+++ b/operators/tokenizer/tokenizer_jsconfig.hpp
@@ -71,6 +71,7 @@ class TokenJsonConfig final {
       bos_token_ = "<s>";
       eos_token_ = "</s>";
       unk_token_ = "<unk>";
+      chat_template_ = ""; // can add default chat template
       return {};
     }
 
@@ -91,6 +92,8 @@ class TokenJsonConfig final {
     parse_token(json_config, "eos_token", eos_token_);
     parse_token(json_config, "unk_token", unk_token_);
 
+    parse_token(json_config, "chat_template", chat_template_);
+
     auto pad_iter = json_config.find("pad_token");
     if (pad_iter != json_config.end() && pad_iter->is_string()) {
       pad_token_ = json_config.value("pad_token", "");
@@ -245,6 +248,8 @@ class TokenJsonConfig final {
   std::string unk_token_;
   std::string pad_token_;
 
+  std::string chat_template_;
+
   AddedTokenMap added_tokens_;
 
   static AddedToken ParseAddedToken(const json& token) {
diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index ca42285a6..10d401296 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -87,6 +87,8 @@ OrtxStatus TokenizerImpl::Load(const std::string& tok_path) {
     return status;
   }
 
+  chat_template = tok_config_->chat_template_;
+  
   return LoadTokenizer();
 }
 
@@ -143,7 +145,7 @@ std::vector<std::unordered_map<std::string, std::string>> messages;
 std::string chat_template;
 
 // Phi4ChatTemplate method to process messages and store result in output
-OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
+OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
     // Clear the output string before starting
     output->clear();
 
@@ -155,18 +157,18 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat
         // Check if "tools" is present in the message and is not empty for "system" role
         if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) {
             std::string tools = message.at("tools");
-            *output += "<|" + role + "|>\n";
-            *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>\n";
+            *output += "<|" + role + "|>";
+            *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>";
         } else {
             // For other messages, no tools
-            *output += "<|" + role + "|>\n";
-            *output += content + "<|end|>\n";
+            *output += "<|" + role + "|>";
+            *output += content + "<|end|>";
         }
     }
 
     // Add generation prompt or eos_token
     if (add_generation_prompt) {
-        *output += "<|assistant|>\n";
+        *output += "<|assistant|>";
     } else {
         *output += eos_token;
     }
@@ -174,7 +176,7 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat
     return OrtxStatus(kOrtxOK, "Created chat template.");
 }
 
-OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
+OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
   // Clear the output string before starting
   output->clear();
 
@@ -299,14 +301,18 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
 }
 
 // ApplyChatTemplate method to choose the template logic based on chat_template
-OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") {
+OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::string, std::string>> message_list, std::string* output, bool add_generation_prompt = true) {
+    
+    // Initialize messages
+    messages = message_list;
+  
     // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template.
     if (chat_template == PHI4_CHAT_TEMPLATE) {
-        return Phi4ChatTemplate(output, add_generation_prompt, eos_token);
+        return Phi4ChatTemplate(output, add_generation_prompt);
     } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
-      return Phi3_5ChatTemplate(output, add_generation_prompt, eos_token);
+      return Phi3_5ChatTemplate(output, add_generation_prompt);
     } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
-      return Llama3ChatTemplate(output, add_generation_prompt, eos_token);
+      return Llama3ChatTemplate(output, add_generation_prompt);
     } else {
         // Handle other templates or custom logic here
         return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed.");
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index c42ea2698..3fabf7eb1 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -54,13 +54,20 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   OrtxStatus BatchDecode(const std::vector<span<extTokenId_t const>>& t_ids, std::vector<std::string>& t_text) const;
 
+  const std::string PHI4_CHAT_TEMPLATE;
+  const std::string PHI3_5_CHAT_TEMPLATE;
+  const std::string LLAMA3_CHAT_TEMPLATE;
+
+  std::string chat_template;
+  std::vector<std::unordered_map<std::string, std::string>> messages;
+  
   OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
   
   OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
   OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);
 
-  OrtxStatus ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+  OrtxStatus ApplyChatTemplate(std::vector<std::unordered_map<std::string, std::string>> messages, std::string* output, bool add_generation_prompt);
 
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;
 
diff --git a/test/pp_api_test/test_tokenizer.cc b/test/pp_api_test/test_tokenizer.cc
index 705ef2bfc..b45800180 100644
--- a/test/pp_api_test/test_tokenizer.cc
+++ b/test/pp_api_test/test_tokenizer.cc
@@ -611,3 +611,30 @@ TEST(OrtxTokenizerTest, AddedTokensTest) {
   DumpTokenIds(token_ids);
   EXPECT_EQ(token_ids[0], EXPECTED_IDS_0);
 }
+
+TEST(OrtxTokenizerTest, ChatTemplate) {
+  auto tokenizer = std::make_unique<ort_extensions::TokenizerImpl>();
+
+  // Since we do not have local test files for Phi4/Llama3/DeepSeek, we simply manually
+  // set the chat_template, but otherwise this will be loaded from the tokenizer config automatically.
+  tokenizer->chat_template = tokenizer->PHI4_CHAT_TEMPLATE;
+
+  std::vector<std::unordered_map<std::string, std::string>> messages = {
+    {{"role", "system"}, {"content", "You are a helpful assistant."}, {"tools", "Calculator"}},
+    {{"role", "user"}, {"content", "How do I add two numbers?"}},
+    {{"role", "assistant"}, {"content", "You can add numbers by using the '+' operator."}}
+  };
+
+  // From HuggingFace Python output for 'microsoft/Phi-4-multimodal-instruct'
+  std::string expected_output = "<|system|>You are a helpful assistant.<|tool|>Calculator<|/tool|><|end|><|user|>How do I add two numbers?<|end|><|assistant|>You can add numbers by using the '+' operator.<|end|><|assistant|>";
+
+  std::string output = "";
+  
+  auto status = tokenizer->ApplyChatTemplate(messages, &output, true);
+
+  if (!status.IsOk()) {
+    std::cout << status.ToString() << std::endl;
+  }
+
+  ASSERT_EQ(output, expected_output);
+}

From 75aa05be28460761412b2971b14cbe6abf5e9069 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Wed, 5 Mar 2025 14:52:56 -0800
Subject: [PATCH 07/15] add deepseek chat template support

---
 shared/api/tokenizer_impl.cc | 149 +++++++++++++++++++++++++++++++----
 shared/api/tokenizer_impl.h  |   2 +
 2 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 10d401296..56c23516b 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -137,6 +137,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
+const std::string DEEPSEEK_CHAT_TEMPLATE = R"({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %})";
 
 // Member variable to store the messages
 std::vector<std::unordered_map<std::string, std::string>> messages;
@@ -173,7 +174,7 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat
         *output += eos_token;
     }
 
-    return OrtxStatus(kOrtxOK, "Created chat template.");
+    return OrtxStatus(kOrtxOK, "Created Phi-4 chat template.");
 }
 
 OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
@@ -205,7 +206,7 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
       *output += eos_token;
   }
 
-  return OrtxStatus(kOrtxOK, "Created chat template.");
+  return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template.");
 }
 
 OrtxStatus TokenizerImpl::Llama3ChatTemplate(
@@ -297,26 +298,140 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
       *output += eos_token;  // Add the EOS token instead
   }
 
-  return OrtxStatus(kOrtxOK, "Created chat template.");
+  return OrtxStatus(kOrtxOK, "Created Llama3 chat template.");
+}
+
+OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
+  std::string* output, 
+  bool add_generation_prompt = false, 
+  const std::string& eos_token = "<|eot_id|>",
+  const std::string& bos_token = "<|begin_of_text|>") {  // Add bos_token as a parameter
+
+  // Clear the output string before starting
+  output->clear();
+
+  // Initialize the namespace for template variables
+  bool is_first = true;  // Track the first occurrence of the tool call or assistant message
+  bool is_tool = false;
+  bool is_output_first = true;
+  std::string system_prompt = "";
+
+  // Prepend BOS token at the start of the output
+  *output += bos_token;
+
+  // Loop through messages and process each one
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Handle the system message
+      if (role == "system") {
+          system_prompt = content;
+      }
+  }
+
+  *output += system_prompt;  // Add system prompt to the output
+
+  // Process each message in the conversation
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");  // Now content is correctly defined here
+
+      // Handle user message
+      if (role == "user") {
+          is_tool = false;
+          *output += "<|User|>" + content;
+      }
+
+      // Handle assistant message with tool calls
+      if (role == "assistant" && message.find("tool_calls") != message.end()) {
+          is_tool = false;
+
+          // Parse the tool_calls string into JSON
+          nlohmann::json tool_calls_json = nlohmann::json::parse(message.at("tool_calls"));
+
+          if (tool_calls_json.size() != 1) {
+              // Handle multiple tool calls (not supported)
+              return OrtxStatus(kOrtxErrorInvalidArgument, "This model only supports single tool-calls at once!");
+          }
+
+          // Extract the function name and arguments from the first tool call
+          std::string function_name = tool_calls_json[0]["function"];
+          nlohmann::json arguments = tool_calls_json[0]["arguments"];
+
+          // Create the JSON object for the tool call
+          nlohmann::json tool_call_json;
+          tool_call_json["name"] = function_name;
+          tool_call_json["parameters"] = arguments;
+
+          // Handle the first tool call differently
+          if (is_first) {
+              *output += "<|Assistant|><|tool_calls_begin|><|tool_call_begin|>" + tool_calls_json[0]["type"].get<std::string>() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>";
+              is_first = false;  // Mark as first tool call processed
+          } else {
+              // Subsequent tool calls
+              *output += "\n<|tool_call_begin|>" + tool_calls_json[0]["type"].get<std::string>() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>";
+          }
+
+          *output += "<|tool_calls_end|><|end_of_sentence|>";
+      }
+
+      // Handle assistant message without tool calls
+      if (role == "assistant" && !content.empty()) {
+          if (is_tool) {
+              *output += "<|tool_outputs_end|>" + content + "<|end_of_sentence|>";
+              is_tool = false;
+          } else {
+              *output += "<|Assistant|>" + content + "<|end_of_sentence|>";
+          }
+      }
+
+      // Handle tool messages
+      if (role == "tool") {
+          is_tool = true;
+          if (is_output_first) {
+              *output += "<|tool_outputs_begin|><|tool_output_begin|>" + content + "<|tool_output_end|>";
+              is_output_first = false;
+          } else {
+              *output += "\n<|tool_output_begin|>" + content + "<|tool_output_end|>";
+          }
+      }
+  }
+
+  // If still in a tool message, close it
+  if (is_tool) {
+      *output += "<|tool_outputs_end|>";
+  }
+
+  // Add generation prompt or eos_token at the end
+  if (add_generation_prompt && !is_tool) {
+      *output += "<|Assistant|><think>\n";
+  } else {
+      *output += eos_token;  // Add the EOS token instead
+  }
+
+  return OrtxStatus(kOrtxOK, "Created DeepSeek chat template.");
 }
 
 // ApplyChatTemplate method to choose the template logic based on chat_template
 OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::string, std::string>> message_list, std::string* output, bool add_generation_prompt = true) {
     
-    // Initialize messages
-    messages = message_list;
-  
-    // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template.
-    if (chat_template == PHI4_CHAT_TEMPLATE) {
-        return Phi4ChatTemplate(output, add_generation_prompt);
-    } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
-      return Phi3_5ChatTemplate(output, add_generation_prompt);
-    } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
-      return Llama3ChatTemplate(output, add_generation_prompt);
-    } else {
-        // Handle other templates or custom logic here
-        return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed.");
-    }
+  // Initialize messages
+  messages = message_list;
+
+  // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template.
+  if (chat_template == PHI4_CHAT_TEMPLATE) {
+    return Phi4ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
+    return Phi3_5ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
+    return Llama3ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == DEEPSEEK_CHAT_TEMPLATE) {
+    return DeepSeekChatTemplate(output, add_generation_prompt);
+  } else {
+      // Handle other templates or custom logic here
+      return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed.");
+  }
 }
 
 OrtxStatus TokenizerImpl::Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const {
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 3fabf7eb1..6b94d84aa 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -67,6 +67,8 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);
 
+  OrtxStatus DeepSeekChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
+  
   OrtxStatus ApplyChatTemplate(std::vector<std::unordered_map<std::string, std::string>> messages, std::string* output, bool add_generation_prompt);
 
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;

From 3043d3cdca1b9dcd23ec4f98f38521454a04bbe0 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Wed, 5 Mar 2025 19:55:01 -0800
Subject: [PATCH 08/15] fix llama3 issue

---
 shared/api/tokenizer_impl.cc | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 56c23516b..90633dd1e 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -222,7 +222,7 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
   output->clear();
 
   // Prepend BOS token at the start of the output
-  *output += bos_token + "\n";  // BOS token goes first
+  *output += bos_token;  // BOS token goes first
 
   // Initialize date_string with default value
   std::string date_string = "26 Jul 2024";  // Default date
@@ -255,7 +255,6 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
 
               *output += tools_json.dump(4) + "\n\n";
           }
-          *output += "<|eot_id|>\n";
       }
 
       // Handle user message with tools in it
@@ -279,15 +278,17 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
 
           // Serialize the tool call as JSON and append it to output
           *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
-          *output += tool_call_json.dump() + "\n";
-          *output += "<|eot_id|>\n";  // End of tool call
+          *output += tool_call_json.dump();
+          *output += "<|eot_id|>";  // End of tool call
       }
 
       // Handle other messages (user, assistant, etc.)
       else {
-          *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n";
-          *output += content + "\n";
-          *output += "<|eot_id|>\n";
+          if (role != "system") {
+            *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n";
+          }
+          *output += content;
+          *output += "<|eot_id|>";
       }
   }
 

From d08a61766472bfd52c9f14282bf9d590d53fc7ee Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Thu, 6 Mar 2025 14:50:08 -0800
Subject: [PATCH 09/15] fix deepseek issues

---
 shared/api/tokenizer_impl.cc | 24 ++++++++++++------------
 shared/api/tokenizer_impl.h  |  1 +
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 90633dd1e..7e148be47 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -305,8 +305,8 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
 OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
   std::string* output, 
   bool add_generation_prompt = false, 
-  const std::string& eos_token = "<|eot_id|>",
-  const std::string& bos_token = "<|begin_of_text|>") {  // Add bos_token as a parameter
+  const std::string& eos_token = "<｜end▁of▁sentence｜>",
+  const std::string& bos_token = "<｜begin▁of▁sentence｜>") {  // Add bos_token as a parameter
 
   // Clear the output string before starting
   output->clear();
@@ -341,7 +341,7 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
       // Handle user message
       if (role == "user") {
           is_tool = false;
-          *output += "<|User|>" + content;
+          *output += "<｜User｜>" + content;
       }
 
       // Handle assistant message with tool calls
@@ -367,23 +367,23 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
 
           // Handle the first tool call differently
           if (is_first) {
-              *output += "<|Assistant|><|tool_calls_begin|><|tool_call_begin|>" + tool_calls_json[0]["type"].get<std::string>() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>";
+              *output += "<｜Assistant｜><｜tool_calls_begin｜><｜tool_call_begin｜>" + tool_calls_json[0]["type"].get<std::string>() + "<｜tool_sep｜>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<｜tool_call_end｜>";
               is_first = false;  // Mark as first tool call processed
           } else {
               // Subsequent tool calls
-              *output += "\n<|tool_call_begin|>" + tool_calls_json[0]["type"].get<std::string>() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>";
+              *output += "\n<｜tool_call_begin｜>" + tool_calls_json[0]["type"].get<std::string>() + "<｜tool_sep｜>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<｜tool_call_end｜>";
           }
 
-          *output += "<|tool_calls_end|><|end_of_sentence|>";
+          *output += "<｜tool_calls_end｜><｜end▁of▁sentence｜>";
       }
 
       // Handle assistant message without tool calls
       if (role == "assistant" && !content.empty()) {
           if (is_tool) {
-              *output += "<|tool_outputs_end|>" + content + "<|end_of_sentence|>";
+              *output += "<｜tool_outputs_end｜>" + content + "<｜end▁of▁sentence｜>";
               is_tool = false;
           } else {
-              *output += "<|Assistant|>" + content + "<|end_of_sentence|>";
+              *output += "<｜Assistant｜>" + content + "<｜end▁of▁sentence｜>";
           }
       }
 
@@ -391,22 +391,22 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
       if (role == "tool") {
           is_tool = true;
           if (is_output_first) {
-              *output += "<|tool_outputs_begin|><|tool_output_begin|>" + content + "<|tool_output_end|>";
+              *output += "<｜tool_outputs_begin｜><｜tool_output_begin｜>" + content + "<｜tool_output_end｜>";
               is_output_first = false;
           } else {
-              *output += "\n<|tool_output_begin|>" + content + "<|tool_output_end|>";
+              *output += "\n<｜tool_output_begin｜>" + content + "<｜tool_output_end｜>";
           }
       }
   }
 
   // If still in a tool message, close it
   if (is_tool) {
-      *output += "<|tool_outputs_end|>";
+      *output += "<｜tool_outputs_end｜>";
   }
 
   // Add generation prompt or eos_token at the end
   if (add_generation_prompt && !is_tool) {
-      *output += "<|Assistant|><think>\n";
+      *output += "<｜Assistant｜><think>\n";
   } else {
       *output += eos_token;  // Add the EOS token instead
   }
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 6b94d84aa..2c1e743fa 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -57,6 +57,7 @@ class TokenizerImpl : public OrtxObjectImpl {
   const std::string PHI4_CHAT_TEMPLATE;
   const std::string PHI3_5_CHAT_TEMPLATE;
   const std::string LLAMA3_CHAT_TEMPLATE;
+  const std::string DEEPSEEK_CHAT_TEMPLATE;
 
   std::string chat_template;
   std::vector<std::unordered_map<std::string, std::string>> messages;

From 21d024519d470418391d6471872ab9d4d65dbb06 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 7 Mar 2025 11:09:31 -0800
Subject: [PATCH 10/15] fix eos handing in some cases

---
 shared/api/tokenizer_impl.cc | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 7e148be47..4203ba7a5 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -279,7 +279,7 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
           // Serialize the tool call as JSON and append it to output
           *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
           *output += tool_call_json.dump();
-          *output += "<|eot_id|>";  // End of tool call
+          *output += eos_token;  // End of tool call
       }
 
       // Handle other messages (user, assistant, etc.)
@@ -288,7 +288,7 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
             *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n";
           }
           *output += content;
-          *output += "<|eot_id|>";
+          *output += eos_token;
       }
   }
 
@@ -374,16 +374,19 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
               *output += "\n<｜tool_call_begin｜>" + tool_calls_json[0]["type"].get<std::string>() + "<｜tool_sep｜>" + tool_calls_json[0]["function"]["name"].get<std::string>() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<｜tool_call_end｜>";
           }
 
-          *output += "<｜tool_calls_end｜><｜end▁of▁sentence｜>";
+          *output += "<｜tool_calls_end｜>";
+          *output += eos_token;
       }
 
       // Handle assistant message without tool calls
       if (role == "assistant" && !content.empty()) {
           if (is_tool) {
-              *output += "<｜tool_outputs_end｜>" + content + "<｜end▁of▁sentence｜>";
+              *output += "<｜tool_outputs_end｜>" + content;
+              *output += eos_token;
               is_tool = false;
           } else {
-              *output += "<｜Assistant｜>" + content + "<｜end▁of▁sentence｜>";
+              *output += "<｜Assistant｜>" + content;
+              *output += eos_token;
           }
       }
 

From 52de34f5481f461c3cf7ed65c3a33e87f3c4f818 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 7 Mar 2025 15:05:19 -0800
Subject: [PATCH 11/15] add llama 3.3 support

---
 shared/api/tokenizer_impl.cc | 140 ++++++++++++++++++++++++++++++++++-
 shared/api/tokenizer_impl.h  |   6 +-
 2 files changed, 141 insertions(+), 5 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 4203ba7a5..f8d6d5063 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -136,7 +136,9 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 // Constant string variable to store predefined chat template strings for popular supported models
 const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
-const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
+const std::string LLAMA3_CHAT_TEMPLATE = R"({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %})";
+const std::string LLAMA3_2_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
+const std::string LLAMA3_3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
 const std::string DEEPSEEK_CHAT_TEMPLATE = R"({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %})";
 
 // Member variable to store the messages
@@ -209,7 +211,7 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
   return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template.");
 }
 
-OrtxStatus TokenizerImpl::Llama3ChatTemplate(
+OrtxStatus TokenizerImpl::Llama3_2ChatTemplate(
   std::string* output, 
   bool add_generation_prompt = true, 
   const std::string& eos_token = "<|eot_id|>", 
@@ -302,6 +304,134 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate(
   return OrtxStatus(kOrtxOK, "Created Llama3 chat template.");
 }
 
+OrtxStatus TokenizerImpl::Llama3_3ChatTemplate(
+  std::string* output,
+  bool add_generation_prompt = true,
+  const std::string& eos_token = "<|eot_id|>",
+  const std::vector<std::string>& custom_tools = {},
+  const std::vector<std::string>& builtin_tools = {},  // Added builtin_tools as parameter
+  bool tools_in_user_message = true,
+  const std::string& date_string = "26 Jul 2024",  // Default date string parameter
+  const std::string& bos_token = "<|begin_of_text|>") {  // BOS token as a parameter
+
+  // Clear the output string before starting
+  output->clear();
+
+  // Prepend BOS token at the start of the output
+  *output += bos_token;  // BOS token goes first
+
+  // Loop through messages and process each one
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Handle the system message
+      if (role == "system") {
+          *output += "<|start_header_id|>system<|end_header_id|>\n\n";
+          *output += "Cutting Knowledge Date: December 2023\n";
+          *output += "Today Date: " + date_string + "\n\n";
+
+          // Check if builtin_tools or custom_tools exist and append relevant information
+          if (!builtin_tools.empty() || !custom_tools.empty()) {
+              *output += "Environment: ipython\n";
+          }
+
+          // Add builtin tools if defined (excluding 'code_interpreter')
+          if (!builtin_tools.empty()) {
+              *output += "Tools: ";
+              bool first = true;
+              for (const auto& tool : builtin_tools) {
+                  if (tool != "code_interpreter") {
+                      if (!first) {
+                          *output += ", ";
+                      }
+                      *output += tool;
+                      first = false;
+                  }
+              }
+              *output += "\n\n";
+          }
+
+          // Add the tools section if custom tools are provided
+          if (!custom_tools.empty()) {
+              *output += "You have access to the following functions. To call a function, please respond with JSON for a function call.\n";
+              *output += "Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.\n";
+              *output += "Do not use variables.\n\n";
+
+              // Convert custom_tools to JSON
+              nlohmann::json tools_json = nlohmann::json::array();
+              for (const auto& tool : custom_tools) {
+                  tools_json.push_back(tool);
+              }
+              *output += tools_json.dump(4) + "\n\n";
+          }
+      }
+
+      // Handle user message with tools in it
+      if (tools_in_user_message && message.find("tool_calls") != message.end()) {
+          // Parse the tool_calls string into JSON
+          nlohmann::json tool_calls_json = nlohmann::json::parse(message.at("tool_calls"));
+
+          if (tool_calls_json.size() != 1) {
+              // Handle multiple tool calls (not supported)
+              return OrtxStatus(kOrtxErrorInvalidArgument, "This model only supports single tool-calls at once!");
+          }
+
+          // Extract the function name and arguments from the first tool call
+          std::string function_name = tool_calls_json[0]["function"];
+          nlohmann::json arguments = tool_calls_json[0]["arguments"];
+
+          // Create the JSON object for the tool call
+          nlohmann::json tool_call_json;
+          tool_call_json["name"] = function_name;
+          tool_call_json["parameters"] = arguments;
+
+          // If the tool is a built-in tool, use the specific format for ipython
+          bool is_builtin_tool = std::find(builtin_tools.begin(), builtin_tools.end(), function_name) != builtin_tools.end();
+          if (is_builtin_tool) {
+              *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
+              *output += "<|python_tag|>" + function_name + ".call(";
+              bool first = true;
+              for (auto& [arg_name, arg_val] : arguments.items()) {
+                  if (!first) {
+                      *output += ", ";
+                  }
+                  *output += arg_name + "=\"" + arg_val.get<std::string>() + "\"";
+                  first = false;
+              }
+              *output += ")";
+          } else {
+              *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
+              *output += tool_call_json.dump();
+          }
+
+          if (!builtin_tools.empty()) {
+              *output += "<|eom_id|>";
+          } else {
+              *output += eos_token;  // Replaced <|eot_id|> with eos_token
+          }
+      }
+
+      // Handle other messages (user, assistant, etc.)
+      else {
+          if (role != "system") {
+            *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n";
+          }
+          *output += content;
+          *output += eos_token;  // Replaced <|eot_id|> with eos_token
+      }
+  }
+
+  // Add generation prompt or eos_token at the end
+  if (add_generation_prompt) {
+      *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
+  } else {
+      *output += eos_token;  // Replaced <|eot_id|> with eos_token
+  }
+
+  return OrtxStatus(kOrtxOK, "Created chat template.");
+}
+
 OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
   std::string* output, 
   bool add_generation_prompt = false, 
@@ -428,8 +558,10 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::
     return Phi4ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
     return Phi3_5ChatTemplate(output, add_generation_prompt);
-  } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
-    return Llama3ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == LLAMA3_2_CHAT_TEMPLATE) {
+    return Llama3_2ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == LLAMA3_3_CHAT_TEMPLATE) {
+    return Llama3_3ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == DEEPSEEK_CHAT_TEMPLATE) {
     return DeepSeekChatTemplate(output, add_generation_prompt);
   } else {
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 2c1e743fa..687dada9b 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -57,6 +57,8 @@ class TokenizerImpl : public OrtxObjectImpl {
   const std::string PHI4_CHAT_TEMPLATE;
   const std::string PHI3_5_CHAT_TEMPLATE;
   const std::string LLAMA3_CHAT_TEMPLATE;
+  const std::string LLAMA3_2_CHAT_TEMPLATE;
+  const std::string LLAMA3_3_CHAT_TEMPLATE;
   const std::string DEEPSEEK_CHAT_TEMPLATE;
 
   std::string chat_template;
@@ -66,8 +68,10 @@ class TokenizerImpl : public OrtxObjectImpl {
   
   OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
-  OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);
+  OrtxStatus Llama3_2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);
 
+  OrtxStatus Llama3_3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, const std::vector<std::string>& builtin_tools, bool tools_in_user_message, const std::string& date_string, const std::string& bos_token);
+  
   OrtxStatus DeepSeekChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
   
   OrtxStatus ApplyChatTemplate(std::vector<std::unordered_map<std::string, std::string>> messages, std::string* output, bool add_generation_prompt);

From 7e6e4b67f9c37bb7e573840c0d4742b0165051c7 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 7 Mar 2025 16:06:30 -0800
Subject: [PATCH 12/15] add base llama 3 support

---
 shared/api/tokenizer_impl.cc | 71 ++++++++++++++++++++++++++++--------
 shared/api/tokenizer_impl.h  |  2 +
 2 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index f8d6d5063..fc8157574 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -201,7 +201,7 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
       }
   }
 
-  // Add generation prompt or eos_token
+  // Add generation prompt or EOS token
   if (add_generation_prompt) {
       *output += "<|assistant|>\n";
   } else {
@@ -211,6 +211,43 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
   return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template.");
 }
 
+OrtxStatus TokenizerImpl::Llama3ChatTemplate(
+  std::string* output,
+  bool add_generation_prompt = true,
+  const std::string& eos_token = "<|eot_id|>",
+  const std::string& bos_token = "<|begin_of_text|>") {
+
+  // Clear the output string before starting
+  output->clear();
+
+  // Iterate over the messages to construct the template
+  for (size_t i = 0; i < messages.size(); ++i) {
+      const auto& message = messages[i];
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Build the message with header and content
+      std::string formatted_content = "<|start_header_id|>" + role + "<|end_header_id|>\n\n" + content + eos_token;
+
+      // Add BOS token only to the first message
+      if (i == 0) {
+          formatted_content = bos_token + formatted_content;
+      }
+
+      // Append the formatted message to the output
+      *output += formatted_content;
+  }
+
+  // Add generation prompt or eos_token at the end
+  if (add_generation_prompt) {
+      *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
+  } else {
+      *output += eos_token;
+  }
+
+  return OrtxStatus(kOrtxOK, "Created Llama 3 chat template.");
+}
+
 OrtxStatus TokenizerImpl::Llama3_2ChatTemplate(
   std::string* output, 
   bool add_generation_prompt = true, 
@@ -218,13 +255,13 @@ OrtxStatus TokenizerImpl::Llama3_2ChatTemplate(
   const std::vector<std::string>& custom_tools = {}, 
   bool tools_in_user_message = true, 
   const std::string& strftime_now = "",
-  const std::string& bos_token = "<|begin_of_text|>") {  // Add bos_token as a parameter
+  const std::string& bos_token = "<|begin_of_text|>") {
 
   // Clear the output string before starting
   output->clear();
 
   // Prepend BOS token at the start of the output
-  *output += bos_token;  // BOS token goes first
+  *output += bos_token;
 
   // Initialize date_string with default value
   std::string date_string = "26 Jul 2024";  // Default date
@@ -298,10 +335,10 @@ OrtxStatus TokenizerImpl::Llama3_2ChatTemplate(
   if (add_generation_prompt) {
       *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
   } else {
-      *output += eos_token;  // Add the EOS token instead
+      *output += eos_token;
   }
 
-  return OrtxStatus(kOrtxOK, "Created Llama3 chat template.");
+  return OrtxStatus(kOrtxOK, "Created Llama 3.2 chat template.");
 }
 
 OrtxStatus TokenizerImpl::Llama3_3ChatTemplate(
@@ -309,16 +346,16 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate(
   bool add_generation_prompt = true,
   const std::string& eos_token = "<|eot_id|>",
   const std::vector<std::string>& custom_tools = {},
-  const std::vector<std::string>& builtin_tools = {},  // Added builtin_tools as parameter
+  const std::vector<std::string>& builtin_tools = {},
   bool tools_in_user_message = true,
-  const std::string& date_string = "26 Jul 2024",  // Default date string parameter
-  const std::string& bos_token = "<|begin_of_text|>") {  // BOS token as a parameter
+  const std::string& date_string = "26 Jul 2024",
+  const std::string& bos_token = "<|begin_of_text|>") {
 
   // Clear the output string before starting
   output->clear();
 
   // Prepend BOS token at the start of the output
-  *output += bos_token;  // BOS token goes first
+  *output += bos_token;
 
   // Loop through messages and process each one
   for (const auto& message : messages) {
@@ -408,7 +445,7 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate(
           if (!builtin_tools.empty()) {
               *output += "<|eom_id|>";
           } else {
-              *output += eos_token;  // Replaced <|eot_id|> with eos_token
+              *output += eos_token;
           }
       }
 
@@ -418,7 +455,7 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate(
             *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n";
           }
           *output += content;
-          *output += eos_token;  // Replaced <|eot_id|> with eos_token
+          *output += eos_token;
       }
   }
 
@@ -426,17 +463,17 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate(
   if (add_generation_prompt) {
       *output += "<|start_header_id|>assistant<|end_header_id|>\n\n";
   } else {
-      *output += eos_token;  // Replaced <|eot_id|> with eos_token
+      *output += eos_token;
   }
 
-  return OrtxStatus(kOrtxOK, "Created chat template.");
+  return OrtxStatus(kOrtxOK, "Created Llama 3.1/3.3 chat template."); // Llama 3.1 and 3.3 have the same chat template
 }
 
 OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
   std::string* output, 
   bool add_generation_prompt = false, 
   const std::string& eos_token = "<｜end▁of▁sentence｜>",
-  const std::string& bos_token = "<｜begin▁of▁sentence｜>") {  // Add bos_token as a parameter
+  const std::string& bos_token = "<｜begin▁of▁sentence｜>") {
 
   // Clear the output string before starting
   output->clear();
@@ -466,7 +503,7 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
   // Process each message in the conversation
   for (const auto& message : messages) {
       std::string role = message.at("role");
-      std::string content = message.at("content");  // Now content is correctly defined here
+      std::string content = message.at("content");
 
       // Handle user message
       if (role == "user") {
@@ -541,7 +578,7 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate(
   if (add_generation_prompt && !is_tool) {
       *output += "<｜Assistant｜><think>\n";
   } else {
-      *output += eos_token;  // Add the EOS token instead
+      *output += eos_token;
   }
 
   return OrtxStatus(kOrtxOK, "Created DeepSeek chat template.");
@@ -558,6 +595,8 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::
     return Phi4ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
     return Phi3_5ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
+    return Llama3ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA3_2_CHAT_TEMPLATE) {
     return Llama3_2ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA3_3_CHAT_TEMPLATE) {
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 687dada9b..12c679244 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -68,6 +68,8 @@ class TokenizerImpl : public OrtxObjectImpl {
   
   OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
+  OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
+
   OrtxStatus Llama3_2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);
 
   OrtxStatus Llama3_3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, const std::vector<std::string>& builtin_tools, bool tools_in_user_message, const std::string& date_string, const std::string& bos_token);

From c93486ef73494364a616dd94589a54219b7889e7 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 7 Mar 2025 17:05:09 -0800
Subject: [PATCH 13/15] add llama 2 support

---
 shared/api/tokenizer_impl.cc | 54 ++++++++++++++++++++++++++++++++++++
 shared/api/tokenizer_impl.h  |  3 ++
 2 files changed, 57 insertions(+)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index fc8157574..9dde47524 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -136,6 +136,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 // Constant string variable to store predefined chat template strings for popular supported models
 const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string LLAMA2_CHAT_TEMPLATE = R"({% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %})";
 const std::string LLAMA3_CHAT_TEMPLATE = R"({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %})";
 const std::string LLAMA3_2_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
 const std::string LLAMA3_3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
@@ -211,6 +212,57 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
   return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template.");
 }
 
+OrtxStatus TokenizerImpl::Llama2ChatTemplate(
+  std::string* output,
+  bool add_generation_prompt = true,
+  const std::string& eos_token = "</s>",
+  const std::string& bos_token = "<s>") {
+
+  // Clear the output string before starting
+  output->clear();
+
+  // Initialize system message and process it
+  bool system_message_exists = false;
+  std::string system_message = "";
+
+  if (!messages.empty() && messages[0].at("role") == "system") {
+      system_message = messages[0].at("content");
+      system_message_exists = true;
+  }
+
+  // If system message exists, we start processing from the second message
+  size_t start_index = system_message_exists ? 1 : 0;
+
+  // Iterate over the messages to construct the template
+  for (size_t i = start_index; i < messages.size(); ++i) {
+      const auto& message = messages[i];
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Check if the conversation roles alternate between user and assistant
+      if ((role == "user") != (i % 2 == start_index % 2)) {
+          return OrtxStatus(kOrtxErrorInvalidArgument, "Conversation roles must alternate user/assistant/user/assistant...");
+      }
+
+      // Handle system message by prepending it to the first assistant's message
+      std::string formatted_content;
+      if (i == start_index && system_message_exists) {
+          formatted_content = "<<SYS>>\n" + system_message + "\n<</SYS>>\n\n" + content;
+      } else {
+          formatted_content = content;
+      }
+
+      // Add the appropriate markers for user and assistant roles
+      if (role == "user") {
+          *output += bos_token + "[INST] " + formatted_content + " [/INST]";
+      } else if (role == "assistant") {
+          *output += " " + formatted_content + " " + eos_token;
+      }
+  }
+
+  return OrtxStatus(kOrtxOK, "Created Llama 2 chat template.");
+}
+
 OrtxStatus TokenizerImpl::Llama3ChatTemplate(
   std::string* output,
   bool add_generation_prompt = true,
@@ -595,6 +647,8 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::
     return Phi4ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
     return Phi3_5ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == LLAMA2_CHAT_TEMPLATE) {
+    return Llama2ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
     return Llama3ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA3_2_CHAT_TEMPLATE) {
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 12c679244..4892d317b 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -56,6 +56,7 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   const std::string PHI4_CHAT_TEMPLATE;
   const std::string PHI3_5_CHAT_TEMPLATE;
+  const std::string LLAMA2_CHAT_TEMPLATE;
   const std::string LLAMA3_CHAT_TEMPLATE;
   const std::string LLAMA3_2_CHAT_TEMPLATE;
   const std::string LLAMA3_3_CHAT_TEMPLATE;
@@ -68,6 +69,8 @@ class TokenizerImpl : public OrtxObjectImpl {
   
   OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
 
+  OrtxStatus Llama2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
+
   OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
 
   OrtxStatus Llama3_2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector<std::string>& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token);

From 231093c01a277a9e31dff4c786c4f5d8927ccdad Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 7 Mar 2025 18:00:40 -0800
Subject: [PATCH 14/15] add phi vision support and code clean up

---
 shared/api/tokenizer_impl.cc | 90 +++++++++++++++++++++++-------------
 shared/api/tokenizer_impl.h  | 10 ++--
 2 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 9dde47524..624e1ae49 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -134,8 +134,10 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 }
 
 // Constant string variable to store predefined chat template strings for popular supported models
-const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string PHI_VISION_CHAT_TEMPLATE = R"({% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %})";
+const std::string PHI3_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string LLAMA2_CHAT_TEMPLATE = R"({% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %})";
 const std::string LLAMA3_CHAT_TEMPLATE = R"({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %})";
 const std::string LLAMA3_2_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)";
@@ -148,39 +150,30 @@ std::vector<std::unordered_map<std::string, std::string>> messages;
 // Member variable to store the chat_template (customized for each instance)
 std::string chat_template;
 
-// Phi4ChatTemplate method to process messages and store result in output
-OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
-    // Clear the output string before starting
-    output->clear();
-
-    // Process the messages
-    for (const auto& message : messages) {
-        std::string role = message.at("role");
-        std::string content = message.at("content");
-
-        // Check if "tools" is present in the message and is not empty for "system" role
-        if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) {
-            std::string tools = message.at("tools");
-            *output += "<|" + role + "|>";
-            *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>";
-        } else {
-            // For other messages, no tools
-            *output += "<|" + role + "|>";
-            *output += content + "<|end|>";
-        }
-    }
+OrtxStatus TokenizerImpl::PhiVisionChatTemplate(std::string* output, bool add_generation_prompt = true) {
 
-    // Add generation prompt or eos_token
-    if (add_generation_prompt) {
-        *output += "<|assistant|>";
-    } else {
-        *output += eos_token;
-    }
+  // Clear the output string before starting
+  output->clear();
 
-    return OrtxStatus(kOrtxOK, "Created Phi-4 chat template.");
+  // Iterate over the messages
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Format the message according to the role
+      *output += "<|" + role + "|>\n" + content + "<|end|>\n";
+  }
+
+  // Check if a generation prompt is needed and the last message isn't from the assistant
+  if (add_generation_prompt && messages.back().at("role") != "assistant") {
+      *output += "<|assistant|>\n";
+  }
+
+  return OrtxStatus(kOrtxOK, "Created Phi Vision chat template.");
 }
 
-OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
+// Note Phi-3 and Phi-3.5 have slightly different chat template strings but share the same functionality so this method can be used for both.
+OrtxStatus TokenizerImpl::Phi3ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
   // Clear the output string before starting
   output->clear();
 
@@ -212,6 +205,37 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener
   return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template.");
 }
 
+OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
+  // Clear the output string before starting
+  output->clear();
+
+  // Process the messages
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Check if "tools" is present in the message and is not empty for "system" role
+      if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) {
+          std::string tools = message.at("tools");
+          *output += "<|" + role + "|>";
+          *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>";
+      } else {
+          // For other messages, no tools
+          *output += "<|" + role + "|>";
+          *output += content + "<|end|>";
+      }
+  }
+
+  // Add generation prompt or eos_token
+  if (add_generation_prompt) {
+      *output += "<|assistant|>";
+  } else {
+      *output += eos_token;
+  }
+
+  return OrtxStatus(kOrtxOK, "Created Phi-4 chat template.");
+}
+
 OrtxStatus TokenizerImpl::Llama2ChatTemplate(
   std::string* output,
   bool add_generation_prompt = true,
@@ -645,8 +669,10 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::
   // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template.
   if (chat_template == PHI4_CHAT_TEMPLATE) {
     return Phi4ChatTemplate(output, add_generation_prompt);
-  } else if (chat_template == PHI3_5_CHAT_TEMPLATE) {
-    return Phi3_5ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == PHI3_CHAT_TEMPLATE || chat_template == PHI3_5_CHAT_TEMPLATE) {
+    return Phi3ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == PHI_VISION_CHAT_TEMPLATE) {
+    return PhiVisionChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA2_CHAT_TEMPLATE) {
     return Llama2ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA3_CHAT_TEMPLATE) {
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index 4892d317b..afe622412 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -54,8 +54,10 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   OrtxStatus BatchDecode(const std::vector<span<extTokenId_t const>>& t_ids, std::vector<std::string>& t_text) const;
 
-  const std::string PHI4_CHAT_TEMPLATE;
+  const std::string PHI_VISION_CHAT_TEMPLATE;
+  const std::string PHI3_CHAT_TEMPLATE;
   const std::string PHI3_5_CHAT_TEMPLATE;
+  const std::string PHI4_CHAT_TEMPLATE;
   const std::string LLAMA2_CHAT_TEMPLATE;
   const std::string LLAMA3_CHAT_TEMPLATE;
   const std::string LLAMA3_2_CHAT_TEMPLATE;
@@ -64,11 +66,13 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   std::string chat_template;
   std::vector<std::unordered_map<std::string, std::string>> messages;
+
+  OrtxStatus PhiVisionChatTemplate(std::string* output, bool add_generation_prompt);
+  
+  OrtxStatus Phi3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
   
   OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
   
-  OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
-
   OrtxStatus Llama2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
 
   OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);

From 34b8ed762c024243fac8465e1c36876121d00ec2 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Fri, 7 Mar 2025 19:15:52 -0800
Subject: [PATCH 15/15] add phi 3 small and medium support

---
 shared/api/tokenizer_impl.cc | 57 ++++++++++++++++++++++++++++++++++--
 shared/api/tokenizer_impl.h  |  6 ++++
 2 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc
index 624e1ae49..e1a37310c 100644
--- a/shared/api/tokenizer_impl.cc
+++ b/shared/api/tokenizer_impl.cc
@@ -136,6 +136,8 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector<span<extTokenId_t const>
 // Constant string variable to store predefined chat template strings for popular supported models
 const std::string PHI_VISION_CHAT_TEMPLATE = R"({% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %})";
 const std::string PHI3_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string PHI3_SMALL_CHAT_TEMPLATE = R"({{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
+const std::string PHI3_MEDIUM_CHAT_TEMPLATE = R"({% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %})";
 const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})";
 const std::string LLAMA2_CHAT_TEMPLATE = R"({% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %})";
@@ -169,7 +171,7 @@ OrtxStatus TokenizerImpl::PhiVisionChatTemplate(std::string* output, bool add_ge
       *output += "<|assistant|>\n";
   }
 
-  return OrtxStatus(kOrtxOK, "Created Phi Vision chat template.");
+  return OrtxStatus(kOrtxOK, "Created Phi vision chat template.");
 }
 
 // Note Phi-3 and Phi-3.5 have slightly different chat template strings but share the same functionality so this method can be used for both.
@@ -202,7 +204,54 @@ OrtxStatus TokenizerImpl::Phi3ChatTemplate(std::string* output, bool add_generat
       *output += eos_token;
   }
 
-  return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template.");
+  return OrtxStatus(kOrtxOK, "Created Phi-3/3.5 chat template.");
+}
+
+OrtxStatus TokenizerImpl::Phi3SmallChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>", const std::string& bos_token = "<|startoftext|>") {
+
+  // Clear the output string before starting
+  output->clear();
+
+  // Add the beginning-of-sequence token
+  *output += bos_token;
+
+  // Iterate over the messages
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Format the message according to the role
+      *output += "<|" + role + "|>\n" + content + "<|end|>\n";
+  }
+
+  // Add the generation prompt or eos_token
+  if (add_generation_prompt) {
+      *output += "<|assistant|>\n";
+  } else {
+      *output += eos_token;
+  }
+
+  return OrtxStatus(kOrtxOK, "Created Phi-3-small chat template.");
+}
+
+OrtxStatus TokenizerImpl::Phi3MediumChatTemplate(std::string* output) {
+  // Clear the output string before starting
+  output->clear();
+
+  // Process the messages
+  for (const auto& message : messages) {
+      std::string role = message.at("role");
+      std::string content = message.at("content");
+
+      // Format based on role (user/assistant)
+      if (role == "user") {
+          *output += "<|user|>\n" + content + "<|end|>\n<|assistant|>\n";
+      } else if (role == "assistant") {
+          *output += content + "<|end|>\n";
+      }
+  }
+
+  return OrtxStatus(kOrtxOK, "Created Phi-3-medium chat template.");
 }
 
 OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") {
@@ -671,6 +720,10 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector<std::unordered_map<std::
     return Phi4ChatTemplate(output, add_generation_prompt);
   } else if (chat_template == PHI3_CHAT_TEMPLATE || chat_template == PHI3_5_CHAT_TEMPLATE) {
     return Phi3ChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == PHI3_SMALL_CHAT_TEMPLATE) {
+    return Phi3SmallChatTemplate(output, add_generation_prompt);
+  } else if (chat_template == PHI3_MEDIUM_CHAT_TEMPLATE) {
+    return Phi3MediumChatTemplate(output);
   } else if (chat_template == PHI_VISION_CHAT_TEMPLATE) {
     return PhiVisionChatTemplate(output, add_generation_prompt);
   } else if (chat_template == LLAMA2_CHAT_TEMPLATE) {
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
index afe622412..7691f4114 100644
--- a/shared/api/tokenizer_impl.h
+++ b/shared/api/tokenizer_impl.h
@@ -56,6 +56,8 @@ class TokenizerImpl : public OrtxObjectImpl {
 
   const std::string PHI_VISION_CHAT_TEMPLATE;
   const std::string PHI3_CHAT_TEMPLATE;
+  const std::string PHI3_SMALL_CHAT_TEMPLATE;
+  const std::string PHI3_MEDIUM_CHAT_TEMPLATE;
   const std::string PHI3_5_CHAT_TEMPLATE;
   const std::string PHI4_CHAT_TEMPLATE;
   const std::string LLAMA2_CHAT_TEMPLATE;
@@ -70,6 +72,10 @@ class TokenizerImpl : public OrtxObjectImpl {
   OrtxStatus PhiVisionChatTemplate(std::string* output, bool add_generation_prompt);
   
   OrtxStatus Phi3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);
+
+  OrtxStatus Phi3SmallChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token);
+
+  OrtxStatus Phi3MediumChatTemplate(std::string* output);
   
   OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token);