From a4388e68b26f46864688cf440c21ff2cad7a9751 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 28 Feb 2025 18:28:58 -0800 Subject: [PATCH 01/15] initial chat template impl --- shared/api/tokenizer_impl.cc | 54 ++++++++++++++++++++++++++++++++++++ shared/api/tokenizer_impl.h | 6 ++++ 2 files changed, 60 insertions(+) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index fe7d6440f..d865b919c 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -131,6 +131,60 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector return {}; } +// Constant string variable to store predefined chat template strings for popular supported models +const std::string PHI4_CHAT_TEMPLATE = + R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; + +// Member variable to store the messages +std::vector> messages; + +// Member variable to store the chat_template (customized for each instance) +std::string chat_template; + +// Phi4ChatTemplate method to process messages and store result in output +OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { + // Clear the output string before starting + output->clear(); + + // Process the messages + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Check if "tools" is present in the message and is not empty for "system" role + if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) { + std::string tools = message.at("tools"); + *output += "<|" + role + "|>\n"; + *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>\n"; + } else { + // For other messages, no tools + *output += "<|" + role + "|>\n"; + *output += content + "<|end|>\n"; + } + } + + // Add generation prompt or eos_token + if (add_generation_prompt) { + *output += "<|assistant|>\n"; + } else { + *output += eos_token; + } + + return OrtxStatus(kOrtxOK, "Created chat template."); +} + +// ApplyChatTemplate method to choose the template logic based on chat_template +OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { + // Check if the chat_template matches the global PHI4_CHAT_TEMPLATE string + if (chat_template == PHI4_CHAT_TEMPLATE) { + // If the template matches, apply Phi4ChatTemplate logic + return Phi4ChatTemplate(output, add_generation_prompt, eos_token); + } else { + // Handle other templates or custom logic here + return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed."); + } +} + OrtxStatus TokenizerImpl::Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const { return std::visit([&](auto& detokenizer) { return detokenizer->Id2Token(id, token, state); }, detokenizer_); diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index 395d2cb7c..74b500efb 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -30,6 +30,8 @@ class TokenizerImpl : public OrtxObjectImpl { return BatchDecode(t_ids, t_text); } + + OrtxStatus Token2Id(const std::string& token, extTokenId_t& id) const { id = std::visit([&](auto& tokenizer) { return tokenizer->GetTokenId(token); }, tokenizer_); return {}; @@ -52,6 +54,10 @@ class TokenizerImpl : public OrtxObjectImpl { OrtxStatus BatchDecode(const std::vector>& t_ids, std::vector& t_text) const; + OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + + OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const; OrtxStatus GetDecoderPromptIds(size_t batch_size, const char* lang, const char* task, int no_timestamps, From be0f462fb83803eb7ea1fb11fd860520f896a3dd Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 28 Feb 2025 18:45:18 -0800 Subject: [PATCH 02/15] add phi 3.5 support --- shared/api/tokenizer_impl.cc | 41 ++++++++++++++++++++++++++++++++---- shared/api/tokenizer_impl.h | 2 ++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index d865b919c..5d484b46e 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -132,8 +132,8 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector } // Constant string variable to store predefined chat template strings for popular supported models -const std::string PHI4_CHAT_TEMPLATE = - R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; // Member variable to store the messages std::vector> messages; @@ -173,12 +173,45 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat return OrtxStatus(kOrtxOK, "Created chat template."); } +OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { + // Clear the output string before starting + output->clear(); + + // Process the messages + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Check for different roles and format accordingly + if (role == "system" && !content.empty()) { + *output += "<|system|>\n"; + *output += content + "<|end|>\n"; + } else if (role == "user") { + *output += "<|user|>\n"; + *output += content + "<|end|>\n"; + } else if (role == "assistant") { + *output += "<|assistant|>\n"; + *output += content + "<|end|>\n"; + } + } + + // Add generation prompt or eos_token + if (add_generation_prompt) { + *output += "<|assistant|>\n"; + } else { + *output += eos_token; + } + + return OrtxStatus(kOrtxOK, "Created chat template."); +} + // ApplyChatTemplate method to choose the template logic based on chat_template OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { - // Check if the chat_template matches the global PHI4_CHAT_TEMPLATE string + // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template. if (chat_template == PHI4_CHAT_TEMPLATE) { - // If the template matches, apply Phi4ChatTemplate logic return Phi4ChatTemplate(output, add_generation_prompt, eos_token); + } else if (chat_template == PHI3_5_CHAT_TEMPLATE) { + return Phi3_5ChatTemplate(output, add_generation_prompt, eos_token); } else { // Handle other templates or custom logic here return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed."); diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index 74b500efb..b712762cd 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -56,6 +56,8 @@ class TokenizerImpl : public OrtxObjectImpl { OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const; From c8176e27289ac9cc324c668fa80874d2164adc77 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Mon, 3 Mar 2025 10:55:46 -0800 Subject: [PATCH 03/15] remove unnecessary qualification --- shared/api/tokenizer_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index b712762cd..4a25a5dcc 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -54,11 +54,11 @@ class TokenizerImpl : public OrtxObjectImpl { OrtxStatus BatchDecode(const std::vector>& t_ids, std::vector& t_text) const; - OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); - OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); - OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const; From a0d74e3b782b4a681e01d5a3d3c0c9db9e217c95 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Mon, 3 Mar 2025 18:17:27 -0800 Subject: [PATCH 04/15] add llama3 chat template --- shared/api/tokenizer_impl.cc | 95 ++++++++++++++++++++++++++++++++++++ shared/api/tokenizer_impl.h | 2 + 2 files changed, 97 insertions(+) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 5d484b46e..e1979d28d 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -134,6 +134,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector // Constant string variable to store predefined chat template strings for popular supported models const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)" // Member variable to store the messages std::vector> messages; @@ -205,6 +206,98 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener return OrtxStatus(kOrtxOK, "Created chat template."); } +OrtxStatus TokenizerImpl::Llama3ChatTemplate( + std::string* output, + bool add_generation_prompt = true, + const std::string& eos_token = "<|eot_id|>", + const std::vector& custom_tools = {}, + bool tools_in_user_message = true, + const std::string& strftime_now = "", + const std::string& bos_token = "<|begin_of_text|>") { // Add bos_token as a parameter + + // Clear the output string before starting + output->clear(); + + // Prepend BOS token at the start of the output + *output += bos_token + "\n"; // BOS token goes first + + // Initialize date_string with default value + std::string date_string = "26 Jul 2024"; // Default date + if (!strftime_now.empty()) { + date_string = strftime_now; // Override with provided date string if available + } + + // Loop through messages and process each one + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Handle the system message + if (role == "system") { + *output += "<|start_header_id|>system<|end_header_id|>\n\n"; + *output += "Cutting Knowledge Date: December 2023\n"; + *output += "Today Date: " + date_string + "\n\n"; + + // Check if tools exist and append relevant information + if (!custom_tools.empty()) { + *output += "You have access to the following functions. To call a function, please respond with JSON for a function call.\n"; + *output += "Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value.}\n"; + *output += "Do not use variables.\n\n"; + + // Convert tools to JSON (assuming custom_tools is a vector of tool names as strings) + nlohmann::json tools_json = nlohmann::json::array(); + for (const auto& tool : custom_tools) { + tools_json.push_back(tool); + } + + *output += tools_json.dump(4) + "\n\n"; + } + *output += "<|eot_id|>\n"; + } + + // Handle user message with tools in it + if (tools_in_user_message && message.find("tool_calls") != message.end()) { + // Parse the tool_calls string into JSON (assuming it's a valid JSON string) + nlohmann::json tool_calls_json = nlohmann::json::parse(message.at("tool_calls")); + + if (tool_calls_json.size() != 1) { + // Handle multiple tool calls (not supported) + return OrtxStatus(kOrtxErrorInvalidArgument, "This model only supports single tool-calls at once!"); + } + + // Extract the function name and arguments from the first tool call + std::string function_name = tool_calls_json[0]["function"]; + nlohmann::json arguments = tool_calls_json[0]["arguments"]; + + // Create the JSON object for the tool call + nlohmann::json tool_call_json; + tool_call_json["name"] = function_name; + tool_call_json["parameters"] = arguments; + + // Serialize the tool call as JSON and append it to output + *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; + *output += tool_call_json.dump() + "\n"; + *output += "<|eot_id|>\n"; // End of tool call + } + + // Handle other messages (user, assistant, etc.) + else { + *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n"; + *output += content + "\n"; + *output += "<|eot_id|>\n"; + } + } + + // Add generation prompt or eos_token at the end + if (add_generation_prompt) { + *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; + } else { + *output += eos_token; // Add the EOS token instead + } + + return OrtxStatus(kOrtxOK, "Created chat template."); +} + // ApplyChatTemplate method to choose the template logic based on chat_template OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template. @@ -212,6 +305,8 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_genera return Phi4ChatTemplate(output, add_generation_prompt, eos_token); } else if (chat_template == PHI3_5_CHAT_TEMPLATE) { return Phi3_5ChatTemplate(output, add_generation_prompt, eos_token); + } else if (chat_template == LLAMA3_CHAT_TEMPLATE) { + return Llama3ChatTemplate(output, add_generation_prompt, eos_token); } else { // Handle other templates or custom logic here return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed."); diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index 4a25a5dcc..c42ea2698 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -58,6 +58,8 @@ class TokenizerImpl : public OrtxObjectImpl { OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); + OrtxStatus ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const; From db9f7150e30388e108c452cd109626e1b1cf1ddf Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Tue, 4 Mar 2025 15:11:53 -0800 Subject: [PATCH 05/15] fix typo --- shared/api/tokenizer_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index e1979d28d..ca42285a6 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -134,7 +134,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector // Constant string variable to store predefined chat template strings for popular supported models const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; -const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)" +const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; // Member variable to store the messages std::vector> messages; From 9955f0a1451fba36f4b4acb3d866ddeec168ef21 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Tue, 4 Mar 2025 17:24:28 -0800 Subject: [PATCH 06/15] add json loading and unit test --- operators/tokenizer/tokenizer_jsconfig.hpp | 5 ++++ shared/api/tokenizer_impl.cc | 28 +++++++++++++--------- shared/api/tokenizer_impl.h | 9 ++++++- test/pp_api_test/test_tokenizer.cc | 27 +++++++++++++++++++++ 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/operators/tokenizer/tokenizer_jsconfig.hpp b/operators/tokenizer/tokenizer_jsconfig.hpp index 22b44a1d5..65745c9d1 100644 --- a/operators/tokenizer/tokenizer_jsconfig.hpp +++ b/operators/tokenizer/tokenizer_jsconfig.hpp @@ -71,6 +71,7 @@ class TokenJsonConfig final { bos_token_ = ""; eos_token_ = ""; unk_token_ = ""; + chat_template_ = ""; // can add default chat template return {}; } @@ -91,6 +92,8 @@ class TokenJsonConfig final { parse_token(json_config, "eos_token", eos_token_); parse_token(json_config, "unk_token", unk_token_); + parse_token(json_config, "chat_template", chat_template_); + auto pad_iter = json_config.find("pad_token"); if (pad_iter != json_config.end() && pad_iter->is_string()) { pad_token_ = json_config.value("pad_token", ""); @@ -245,6 +248,8 @@ class TokenJsonConfig final { std::string unk_token_; std::string pad_token_; + std::string chat_template_; + AddedTokenMap added_tokens_; static AddedToken ParseAddedToken(const json& token) { diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index ca42285a6..10d401296 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -87,6 +87,8 @@ OrtxStatus TokenizerImpl::Load(const std::string& tok_path) { return status; } + chat_template = tok_config_->chat_template_; + return LoadTokenizer(); } @@ -143,7 +145,7 @@ std::vector> messages; std::string chat_template; // Phi4ChatTemplate method to process messages and store result in output -OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { +OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { // Clear the output string before starting output->clear(); @@ -155,18 +157,18 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat // Check if "tools" is present in the message and is not empty for "system" role if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) { std::string tools = message.at("tools"); - *output += "<|" + role + "|>\n"; - *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>\n"; + *output += "<|" + role + "|>"; + *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>"; } else { // For other messages, no tools - *output += "<|" + role + "|>\n"; - *output += content + "<|end|>\n"; + *output += "<|" + role + "|>"; + *output += content + "<|end|>"; } } // Add generation prompt or eos_token if (add_generation_prompt) { - *output += "<|assistant|>\n"; + *output += "<|assistant|>"; } else { *output += eos_token; } @@ -174,7 +176,7 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat return OrtxStatus(kOrtxOK, "Created chat template."); } -OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { +OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { // Clear the output string before starting output->clear(); @@ -299,14 +301,18 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( } // ApplyChatTemplate method to choose the template logic based on chat_template -OrtxStatus TokenizerImpl::ApplyChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eos|>") { +OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector> message_list, std::string* output, bool add_generation_prompt = true) { + + // Initialize messages + messages = message_list; + // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template. if (chat_template == PHI4_CHAT_TEMPLATE) { - return Phi4ChatTemplate(output, add_generation_prompt, eos_token); + return Phi4ChatTemplate(output, add_generation_prompt); } else if (chat_template == PHI3_5_CHAT_TEMPLATE) { - return Phi3_5ChatTemplate(output, add_generation_prompt, eos_token); + return Phi3_5ChatTemplate(output, add_generation_prompt); } else if (chat_template == LLAMA3_CHAT_TEMPLATE) { - return Llama3ChatTemplate(output, add_generation_prompt, eos_token); + return Llama3ChatTemplate(output, add_generation_prompt); } else { // Handle other templates or custom logic here return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed."); diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index c42ea2698..3fabf7eb1 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -54,13 +54,20 @@ class TokenizerImpl : public OrtxObjectImpl { OrtxStatus BatchDecode(const std::vector>& t_ids, std::vector& t_text) const; + const std::string PHI4_CHAT_TEMPLATE; + const std::string PHI3_5_CHAT_TEMPLATE; + const std::string LLAMA3_CHAT_TEMPLATE; + + std::string chat_template; + std::vector> messages; + OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); - OrtxStatus ApplyChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); + OrtxStatus ApplyChatTemplate(std::vector> messages, std::string* output, bool add_generation_prompt); OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const; diff --git a/test/pp_api_test/test_tokenizer.cc b/test/pp_api_test/test_tokenizer.cc index 705ef2bfc..b45800180 100644 --- a/test/pp_api_test/test_tokenizer.cc +++ b/test/pp_api_test/test_tokenizer.cc @@ -611,3 +611,30 @@ TEST(OrtxTokenizerTest, AddedTokensTest) { DumpTokenIds(token_ids); EXPECT_EQ(token_ids[0], EXPECTED_IDS_0); } + +TEST(OrtxTokenizerTest, ChatTemplate) { + auto tokenizer = std::make_unique(); + + // Since we do not have local test files for Phi4/Llama3/DeepSeek, we simply manually + // set the chat_template, but otherwise this will be loaded from the tokenizer config automatically. + tokenizer->chat_template = tokenizer->PHI4_CHAT_TEMPLATE; + + std::vector> messages = { + {{"role", "system"}, {"content", "You are a helpful assistant."}, {"tools", "Calculator"}}, + {{"role", "user"}, {"content", "How do I add two numbers?"}}, + {{"role", "assistant"}, {"content", "You can add numbers by using the '+' operator."}} + }; + + // From HuggingFace Python output for 'microsoft/Phi-4-multimodal-instruct' + std::string expected_output = "<|system|>You are a helpful assistant.<|tool|>Calculator<|/tool|><|end|><|user|>How do I add two numbers?<|end|><|assistant|>You can add numbers by using the '+' operator.<|end|><|assistant|>"; + + std::string output = ""; + + auto status = tokenizer->ApplyChatTemplate(messages, &output, true); + + if (!status.IsOk()) { + std::cout << status.ToString() << std::endl; + } + + ASSERT_EQ(output, expected_output); +} From 75aa05be28460761412b2971b14cbe6abf5e9069 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Wed, 5 Mar 2025 14:52:56 -0800 Subject: [PATCH 07/15] add deepseek chat template support --- shared/api/tokenizer_impl.cc | 149 +++++++++++++++++++++++++++++++---- shared/api/tokenizer_impl.h | 2 + 2 files changed, 134 insertions(+), 17 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 10d401296..56c23516b 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -137,6 +137,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; +const std::string DEEPSEEK_CHAT_TEMPLATE = R"({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %})"; // Member variable to store the messages std::vector> messages; @@ -173,7 +174,7 @@ OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generat *output += eos_token; } - return OrtxStatus(kOrtxOK, "Created chat template."); + return OrtxStatus(kOrtxOK, "Created Phi-4 chat template."); } OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { @@ -205,7 +206,7 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener *output += eos_token; } - return OrtxStatus(kOrtxOK, "Created chat template."); + return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template."); } OrtxStatus TokenizerImpl::Llama3ChatTemplate( @@ -297,26 +298,140 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( *output += eos_token; // Add the EOS token instead } - return OrtxStatus(kOrtxOK, "Created chat template."); + return OrtxStatus(kOrtxOK, "Created Llama3 chat template."); +} + +OrtxStatus TokenizerImpl::DeepSeekChatTemplate( + std::string* output, + bool add_generation_prompt = false, + const std::string& eos_token = "<|eot_id|>", + const std::string& bos_token = "<|begin_of_text|>") { // Add bos_token as a parameter + + // Clear the output string before starting + output->clear(); + + // Initialize the namespace for template variables + bool is_first = true; // Track the first occurrence of the tool call or assistant message + bool is_tool = false; + bool is_output_first = true; + std::string system_prompt = ""; + + // Prepend BOS token at the start of the output + *output += bos_token; + + // Loop through messages and process each one + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Handle the system message + if (role == "system") { + system_prompt = content; + } + } + + *output += system_prompt; // Add system prompt to the output + + // Process each message in the conversation + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); // Now content is correctly defined here + + // Handle user message + if (role == "user") { + is_tool = false; + *output += "<|User|>" + content; + } + + // Handle assistant message with tool calls + if (role == "assistant" && message.find("tool_calls") != message.end()) { + is_tool = false; + + // Parse the tool_calls string into JSON + nlohmann::json tool_calls_json = nlohmann::json::parse(message.at("tool_calls")); + + if (tool_calls_json.size() != 1) { + // Handle multiple tool calls (not supported) + return OrtxStatus(kOrtxErrorInvalidArgument, "This model only supports single tool-calls at once!"); + } + + // Extract the function name and arguments from the first tool call + std::string function_name = tool_calls_json[0]["function"]; + nlohmann::json arguments = tool_calls_json[0]["arguments"]; + + // Create the JSON object for the tool call + nlohmann::json tool_call_json; + tool_call_json["name"] = function_name; + tool_call_json["parameters"] = arguments; + + // Handle the first tool call differently + if (is_first) { + *output += "<|Assistant|><|tool_calls_begin|><|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; + is_first = false; // Mark as first tool call processed + } else { + // Subsequent tool calls + *output += "\n<|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; + } + + *output += "<|tool_calls_end|><|end_of_sentence|>"; + } + + // Handle assistant message without tool calls + if (role == "assistant" && !content.empty()) { + if (is_tool) { + *output += "<|tool_outputs_end|>" + content + "<|end_of_sentence|>"; + is_tool = false; + } else { + *output += "<|Assistant|>" + content + "<|end_of_sentence|>"; + } + } + + // Handle tool messages + if (role == "tool") { + is_tool = true; + if (is_output_first) { + *output += "<|tool_outputs_begin|><|tool_output_begin|>" + content + "<|tool_output_end|>"; + is_output_first = false; + } else { + *output += "\n<|tool_output_begin|>" + content + "<|tool_output_end|>"; + } + } + } + + // If still in a tool message, close it + if (is_tool) { + *output += "<|tool_outputs_end|>"; + } + + // Add generation prompt or eos_token at the end + if (add_generation_prompt && !is_tool) { + *output += "<|Assistant|>\n"; + } else { + *output += eos_token; // Add the EOS token instead + } + + return OrtxStatus(kOrtxOK, "Created DeepSeek chat template."); } // ApplyChatTemplate method to choose the template logic based on chat_template OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector> message_list, std::string* output, bool add_generation_prompt = true) { - // Initialize messages - messages = message_list; - - // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template. - if (chat_template == PHI4_CHAT_TEMPLATE) { - return Phi4ChatTemplate(output, add_generation_prompt); - } else if (chat_template == PHI3_5_CHAT_TEMPLATE) { - return Phi3_5ChatTemplate(output, add_generation_prompt); - } else if (chat_template == LLAMA3_CHAT_TEMPLATE) { - return Llama3ChatTemplate(output, add_generation_prompt); - } else { - // Handle other templates or custom logic here - return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed."); - } + // Initialize messages + messages = message_list; + + // Check if the chat_template matches any of the supported template strings and if so apply the corresponding template. + if (chat_template == PHI4_CHAT_TEMPLATE) { + return Phi4ChatTemplate(output, add_generation_prompt); + } else if (chat_template == PHI3_5_CHAT_TEMPLATE) { + return Phi3_5ChatTemplate(output, add_generation_prompt); + } else if (chat_template == LLAMA3_CHAT_TEMPLATE) { + return Llama3ChatTemplate(output, add_generation_prompt); + } else if (chat_template == DEEPSEEK_CHAT_TEMPLATE) { + return DeepSeekChatTemplate(output, add_generation_prompt); + } else { + // Handle other templates or custom logic here + return OrtxStatus(kOrtxErrorNotImplemented, "The provided chat template is currently not supported. Custom template handling needed."); + } } OrtxStatus TokenizerImpl::Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const { diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index 3fabf7eb1..6b94d84aa 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -67,6 +67,8 @@ class TokenizerImpl : public OrtxObjectImpl { OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); + OrtxStatus DeepSeekChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token); + OrtxStatus ApplyChatTemplate(std::vector> messages, std::string* output, bool add_generation_prompt); OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const; From 3043d3cdca1b9dcd23ec4f98f38521454a04bbe0 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Wed, 5 Mar 2025 19:55:01 -0800 Subject: [PATCH 08/15] fix llama3 issue --- shared/api/tokenizer_impl.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 56c23516b..90633dd1e 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -222,7 +222,7 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( output->clear(); // Prepend BOS token at the start of the output - *output += bos_token + "\n"; // BOS token goes first + *output += bos_token; // BOS token goes first // Initialize date_string with default value std::string date_string = "26 Jul 2024"; // Default date @@ -255,7 +255,6 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( *output += tools_json.dump(4) + "\n\n"; } - *output += "<|eot_id|>\n"; } // Handle user message with tools in it @@ -279,15 +278,17 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( // Serialize the tool call as JSON and append it to output *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; - *output += tool_call_json.dump() + "\n"; - *output += "<|eot_id|>\n"; // End of tool call + *output += tool_call_json.dump(); + *output += "<|eot_id|>"; // End of tool call } // Handle other messages (user, assistant, etc.) else { - *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n"; - *output += content + "\n"; - *output += "<|eot_id|>\n"; + if (role != "system") { + *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n"; + } + *output += content; + *output += "<|eot_id|>"; } } From d08a61766472bfd52c9f14282bf9d590d53fc7ee Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Thu, 6 Mar 2025 14:50:08 -0800 Subject: [PATCH 09/15] fix deepseek issues --- shared/api/tokenizer_impl.cc | 24 ++++++++++++------------ shared/api/tokenizer_impl.h | 1 + 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 90633dd1e..7e148be47 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -305,8 +305,8 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( OrtxStatus TokenizerImpl::DeepSeekChatTemplate( std::string* output, bool add_generation_prompt = false, - const std::string& eos_token = "<|eot_id|>", - const std::string& bos_token = "<|begin_of_text|>") { // Add bos_token as a parameter + const std::string& eos_token = "<|end▁of▁sentence|>", + const std::string& bos_token = "<|begin▁of▁sentence|>") { // Add bos_token as a parameter // Clear the output string before starting output->clear(); @@ -341,7 +341,7 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate( // Handle user message if (role == "user") { is_tool = false; - *output += "<|User|>" + content; + *output += "<|User|>" + content; } // Handle assistant message with tool calls @@ -367,23 +367,23 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate( // Handle the first tool call differently if (is_first) { - *output += "<|Assistant|><|tool_calls_begin|><|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; + *output += "<|Assistant|><|tool_calls_begin|><|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; is_first = false; // Mark as first tool call processed } else { // Subsequent tool calls - *output += "\n<|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; + *output += "\n<|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; } - *output += "<|tool_calls_end|><|end_of_sentence|>"; + *output += "<|tool_calls_end|><|end▁of▁sentence|>"; } // Handle assistant message without tool calls if (role == "assistant" && !content.empty()) { if (is_tool) { - *output += "<|tool_outputs_end|>" + content + "<|end_of_sentence|>"; + *output += "<|tool_outputs_end|>" + content + "<|end▁of▁sentence|>"; is_tool = false; } else { - *output += "<|Assistant|>" + content + "<|end_of_sentence|>"; + *output += "<|Assistant|>" + content + "<|end▁of▁sentence|>"; } } @@ -391,22 +391,22 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate( if (role == "tool") { is_tool = true; if (is_output_first) { - *output += "<|tool_outputs_begin|><|tool_output_begin|>" + content + "<|tool_output_end|>"; + *output += "<|tool_outputs_begin|><|tool_output_begin|>" + content + "<|tool_output_end|>"; is_output_first = false; } else { - *output += "\n<|tool_output_begin|>" + content + "<|tool_output_end|>"; + *output += "\n<|tool_output_begin|>" + content + "<|tool_output_end|>"; } } } // If still in a tool message, close it if (is_tool) { - *output += "<|tool_outputs_end|>"; + *output += "<|tool_outputs_end|>"; } // Add generation prompt or eos_token at the end if (add_generation_prompt && !is_tool) { - *output += "<|Assistant|>\n"; + *output += "<|Assistant|>\n"; } else { *output += eos_token; // Add the EOS token instead } diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h index 6b94d84aa..2c1e743fa 100644 --- a/shared/api/tokenizer_impl.h +++ b/shared/api/tokenizer_impl.h @@ -57,6 +57,7 @@ class TokenizerImpl : public OrtxObjectImpl { const std::string PHI4_CHAT_TEMPLATE; const std::string PHI3_5_CHAT_TEMPLATE; const std::string LLAMA3_CHAT_TEMPLATE; + const std::string DEEPSEEK_CHAT_TEMPLATE; std::string chat_template; std::vector> messages; From 21d024519d470418391d6471872ab9d4d65dbb06 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 7 Mar 2025 11:09:31 -0800 Subject: [PATCH 10/15] fix eos handing in some cases --- shared/api/tokenizer_impl.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 7e148be47..4203ba7a5 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -279,7 +279,7 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( // Serialize the tool call as JSON and append it to output *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; *output += tool_call_json.dump(); - *output += "<|eot_id|>"; // End of tool call + *output += eos_token; // End of tool call } // Handle other messages (user, assistant, etc.) @@ -288,7 +288,7 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n"; } *output += content; - *output += "<|eot_id|>"; + *output += eos_token; } } @@ -374,16 +374,19 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate( *output += "\n<|tool_call_begin|>" + tool_calls_json[0]["type"].get() + "<|tool_sep|>" + tool_calls_json[0]["function"]["name"].get() + "\njson\n" + tool_calls_json[0]["function"]["arguments"].dump() + "\n<|tool_call_end|>"; } - *output += "<|tool_calls_end|><|end▁of▁sentence|>"; + *output += "<|tool_calls_end|>"; + *output += eos_token; } // Handle assistant message without tool calls if (role == "assistant" && !content.empty()) { if (is_tool) { - *output += "<|tool_outputs_end|>" + content + "<|end▁of▁sentence|>"; + *output += "<|tool_outputs_end|>" + content; + *output += eos_token; is_tool = false; } else { - *output += "<|Assistant|>" + content + "<|end▁of▁sentence|>"; + *output += "<|Assistant|>" + content; + *output += eos_token; } } From 52de34f5481f461c3cf7ed65c3a33e87f3c4f818 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 7 Mar 2025 15:05:19 -0800 Subject: [PATCH 11/15] add llama 3.3 support --- shared/api/tokenizer_impl.cc | 140 ++++++++++++++++++++++++++++++++++- shared/api/tokenizer_impl.h | 6 +- 2 files changed, 141 insertions(+), 5 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 4203ba7a5..f8d6d5063 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -136,7 +136,9 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector // Constant string variable to store predefined chat template strings for popular supported models const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; -const std::string LLAMA3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; +const std::string LLAMA3_CHAT_TEMPLATE = R"({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %})"; +const std::string LLAMA3_2_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; +const std::string LLAMA3_3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; const std::string DEEPSEEK_CHAT_TEMPLATE = R"({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %})"; // Member variable to store the messages @@ -209,7 +211,7 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template."); } -OrtxStatus TokenizerImpl::Llama3ChatTemplate( +OrtxStatus TokenizerImpl::Llama3_2ChatTemplate( std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|eot_id|>", @@ -302,6 +304,134 @@ OrtxStatus TokenizerImpl::Llama3ChatTemplate( return OrtxStatus(kOrtxOK, "Created Llama3 chat template."); } +OrtxStatus TokenizerImpl::Llama3_3ChatTemplate( + std::string* output, + bool add_generation_prompt = true, + const std::string& eos_token = "<|eot_id|>", + const std::vector& custom_tools = {}, + const std::vector& builtin_tools = {}, // Added builtin_tools as parameter + bool tools_in_user_message = true, + const std::string& date_string = "26 Jul 2024", // Default date string parameter + const std::string& bos_token = "<|begin_of_text|>") { // BOS token as a parameter + + // Clear the output string before starting + output->clear(); + + // Prepend BOS token at the start of the output + *output += bos_token; // BOS token goes first + + // Loop through messages and process each one + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Handle the system message + if (role == "system") { + *output += "<|start_header_id|>system<|end_header_id|>\n\n"; + *output += "Cutting Knowledge Date: December 2023\n"; + *output += "Today Date: " + date_string + "\n\n"; + + // Check if builtin_tools or custom_tools exist and append relevant information + if (!builtin_tools.empty() || !custom_tools.empty()) { + *output += "Environment: ipython\n"; + } + + // Add builtin tools if defined (excluding 'code_interpreter') + if (!builtin_tools.empty()) { + *output += "Tools: "; + bool first = true; + for (const auto& tool : builtin_tools) { + if (tool != "code_interpreter") { + if (!first) { + *output += ", "; + } + *output += tool; + first = false; + } + } + *output += "\n\n"; + } + + // Add the tools section if custom tools are provided + if (!custom_tools.empty()) { + *output += "You have access to the following functions. To call a function, please respond with JSON for a function call.\n"; + *output += "Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.\n"; + *output += "Do not use variables.\n\n"; + + // Convert custom_tools to JSON + nlohmann::json tools_json = nlohmann::json::array(); + for (const auto& tool : custom_tools) { + tools_json.push_back(tool); + } + *output += tools_json.dump(4) + "\n\n"; + } + } + + // Handle user message with tools in it + if (tools_in_user_message && message.find("tool_calls") != message.end()) { + // Parse the tool_calls string into JSON + nlohmann::json tool_calls_json = nlohmann::json::parse(message.at("tool_calls")); + + if (tool_calls_json.size() != 1) { + // Handle multiple tool calls (not supported) + return OrtxStatus(kOrtxErrorInvalidArgument, "This model only supports single tool-calls at once!"); + } + + // Extract the function name and arguments from the first tool call + std::string function_name = tool_calls_json[0]["function"]; + nlohmann::json arguments = tool_calls_json[0]["arguments"]; + + // Create the JSON object for the tool call + nlohmann::json tool_call_json; + tool_call_json["name"] = function_name; + tool_call_json["parameters"] = arguments; + + // If the tool is a built-in tool, use the specific format for ipython + bool is_builtin_tool = std::find(builtin_tools.begin(), builtin_tools.end(), function_name) != builtin_tools.end(); + if (is_builtin_tool) { + *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; + *output += "<|python_tag|>" + function_name + ".call("; + bool first = true; + for (auto& [arg_name, arg_val] : arguments.items()) { + if (!first) { + *output += ", "; + } + *output += arg_name + "=\"" + arg_val.get() + "\""; + first = false; + } + *output += ")"; + } else { + *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; + *output += tool_call_json.dump(); + } + + if (!builtin_tools.empty()) { + *output += "<|eom_id|>"; + } else { + *output += eos_token; // Replaced <|eot_id|> with eos_token + } + } + + // Handle other messages (user, assistant, etc.) + else { + if (role != "system") { + *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n"; + } + *output += content; + *output += eos_token; // Replaced <|eot_id|> with eos_token + } + } + + // Add generation prompt or eos_token at the end + if (add_generation_prompt) { + *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; + } else { + *output += eos_token; // Replaced <|eot_id|> with eos_token + } + + return OrtxStatus(kOrtxOK, "Created chat template."); +} + OrtxStatus TokenizerImpl::DeepSeekChatTemplate( std::string* output, bool add_generation_prompt = false, @@ -428,8 +558,10 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); + OrtxStatus Llama3_2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); + OrtxStatus Llama3_3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector& custom_tools, const std::vector& builtin_tools, bool tools_in_user_message, const std::string& date_string, const std::string& bos_token); + OrtxStatus DeepSeekChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token); OrtxStatus ApplyChatTemplate(std::vector> messages, std::string* output, bool add_generation_prompt); From 7e6e4b67f9c37bb7e573840c0d4742b0165051c7 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 7 Mar 2025 16:06:30 -0800 Subject: [PATCH 12/15] add base llama 3 support --- shared/api/tokenizer_impl.cc | 71 ++++++++++++++++++++++++++++-------- shared/api/tokenizer_impl.h | 2 + 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index f8d6d5063..fc8157574 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -201,7 +201,7 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener } } - // Add generation prompt or eos_token + // Add generation prompt or EOS token if (add_generation_prompt) { *output += "<|assistant|>\n"; } else { @@ -211,6 +211,43 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template."); } +OrtxStatus TokenizerImpl::Llama3ChatTemplate( + std::string* output, + bool add_generation_prompt = true, + const std::string& eos_token = "<|eot_id|>", + const std::string& bos_token = "<|begin_of_text|>") { + + // Clear the output string before starting + output->clear(); + + // Iterate over the messages to construct the template + for (size_t i = 0; i < messages.size(); ++i) { + const auto& message = messages[i]; + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Build the message with header and content + std::string formatted_content = "<|start_header_id|>" + role + "<|end_header_id|>\n\n" + content + eos_token; + + // Add BOS token only to the first message + if (i == 0) { + formatted_content = bos_token + formatted_content; + } + + // Append the formatted message to the output + *output += formatted_content; + } + + // Add generation prompt or eos_token at the end + if (add_generation_prompt) { + *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; + } else { + *output += eos_token; + } + + return OrtxStatus(kOrtxOK, "Created Llama 3 chat template."); +} + OrtxStatus TokenizerImpl::Llama3_2ChatTemplate( std::string* output, bool add_generation_prompt = true, @@ -218,13 +255,13 @@ OrtxStatus TokenizerImpl::Llama3_2ChatTemplate( const std::vector& custom_tools = {}, bool tools_in_user_message = true, const std::string& strftime_now = "", - const std::string& bos_token = "<|begin_of_text|>") { // Add bos_token as a parameter + const std::string& bos_token = "<|begin_of_text|>") { // Clear the output string before starting output->clear(); // Prepend BOS token at the start of the output - *output += bos_token; // BOS token goes first + *output += bos_token; // Initialize date_string with default value std::string date_string = "26 Jul 2024"; // Default date @@ -298,10 +335,10 @@ OrtxStatus TokenizerImpl::Llama3_2ChatTemplate( if (add_generation_prompt) { *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; } else { - *output += eos_token; // Add the EOS token instead + *output += eos_token; } - return OrtxStatus(kOrtxOK, "Created Llama3 chat template."); + return OrtxStatus(kOrtxOK, "Created Llama 3.2 chat template."); } OrtxStatus TokenizerImpl::Llama3_3ChatTemplate( @@ -309,16 +346,16 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate( bool add_generation_prompt = true, const std::string& eos_token = "<|eot_id|>", const std::vector& custom_tools = {}, - const std::vector& builtin_tools = {}, // Added builtin_tools as parameter + const std::vector& builtin_tools = {}, bool tools_in_user_message = true, - const std::string& date_string = "26 Jul 2024", // Default date string parameter - const std::string& bos_token = "<|begin_of_text|>") { // BOS token as a parameter + const std::string& date_string = "26 Jul 2024", + const std::string& bos_token = "<|begin_of_text|>") { // Clear the output string before starting output->clear(); // Prepend BOS token at the start of the output - *output += bos_token; // BOS token goes first + *output += bos_token; // Loop through messages and process each one for (const auto& message : messages) { @@ -408,7 +445,7 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate( if (!builtin_tools.empty()) { *output += "<|eom_id|>"; } else { - *output += eos_token; // Replaced <|eot_id|> with eos_token + *output += eos_token; } } @@ -418,7 +455,7 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate( *output += "<|start_header_id|>" + role + "<|end_header_id|>\n\n"; } *output += content; - *output += eos_token; // Replaced <|eot_id|> with eos_token + *output += eos_token; } } @@ -426,17 +463,17 @@ OrtxStatus TokenizerImpl::Llama3_3ChatTemplate( if (add_generation_prompt) { *output += "<|start_header_id|>assistant<|end_header_id|>\n\n"; } else { - *output += eos_token; // Replaced <|eot_id|> with eos_token + *output += eos_token; } - return OrtxStatus(kOrtxOK, "Created chat template."); + return OrtxStatus(kOrtxOK, "Created Llama 3.1/3.3 chat template."); // Llama 3.1 and 3.3 have the same chat template } OrtxStatus TokenizerImpl::DeepSeekChatTemplate( std::string* output, bool add_generation_prompt = false, const std::string& eos_token = "<|end▁of▁sentence|>", - const std::string& bos_token = "<|begin▁of▁sentence|>") { // Add bos_token as a parameter + const std::string& bos_token = "<|begin▁of▁sentence|>") { // Clear the output string before starting output->clear(); @@ -466,7 +503,7 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate( // Process each message in the conversation for (const auto& message : messages) { std::string role = message.at("role"); - std::string content = message.at("content"); // Now content is correctly defined here + std::string content = message.at("content"); // Handle user message if (role == "user") { @@ -541,7 +578,7 @@ OrtxStatus TokenizerImpl::DeepSeekChatTemplate( if (add_generation_prompt && !is_tool) { *output += "<|Assistant|>\n"; } else { - *output += eos_token; // Add the EOS token instead + *output += eos_token; } return OrtxStatus(kOrtxOK, "Created DeepSeek chat template."); @@ -558,6 +595,8 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); OrtxStatus Llama3_3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::vector& custom_tools, const std::vector& builtin_tools, bool tools_in_user_message, const std::string& date_string, const std::string& bos_token); From c93486ef73494364a616dd94589a54219b7889e7 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 7 Mar 2025 17:05:09 -0800 Subject: [PATCH 13/15] add llama 2 support --- shared/api/tokenizer_impl.cc | 54 ++++++++++++++++++++++++++++++++++++ shared/api/tokenizer_impl.h | 3 ++ 2 files changed, 57 insertions(+) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index fc8157574..9dde47524 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -136,6 +136,7 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector // Constant string variable to store predefined chat template strings for popular supported models const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string LLAMA2_CHAT_TEMPLATE = R"({% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %})"; const std::string LLAMA3_CHAT_TEMPLATE = R"({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %})"; const std::string LLAMA3_2_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; const std::string LLAMA3_3_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; @@ -211,6 +212,57 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template."); } +OrtxStatus TokenizerImpl::Llama2ChatTemplate( + std::string* output, + bool add_generation_prompt = true, + const std::string& eos_token = "", + const std::string& bos_token = "") { + + // Clear the output string before starting + output->clear(); + + // Initialize system message and process it + bool system_message_exists = false; + std::string system_message = ""; + + if (!messages.empty() && messages[0].at("role") == "system") { + system_message = messages[0].at("content"); + system_message_exists = true; + } + + // If system message exists, we start processing from the second message + size_t start_index = system_message_exists ? 1 : 0; + + // Iterate over the messages to construct the template + for (size_t i = start_index; i < messages.size(); ++i) { + const auto& message = messages[i]; + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Check if the conversation roles alternate between user and assistant + if ((role == "user") != (i % 2 == start_index % 2)) { + return OrtxStatus(kOrtxErrorInvalidArgument, "Conversation roles must alternate user/assistant/user/assistant..."); + } + + // Handle system message by prepending it to the first assistant's message + std::string formatted_content; + if (i == start_index && system_message_exists) { + formatted_content = "<>\n" + system_message + "\n<>\n\n" + content; + } else { + formatted_content = content; + } + + // Add the appropriate markers for user and assistant roles + if (role == "user") { + *output += bos_token + "[INST] " + formatted_content + " [/INST]"; + } else if (role == "assistant") { + *output += " " + formatted_content + " " + eos_token; + } + } + + return OrtxStatus(kOrtxOK, "Created Llama 2 chat template."); +} + OrtxStatus TokenizerImpl::Llama3ChatTemplate( std::string* output, bool add_generation_prompt = true, @@ -595,6 +647,8 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector& custom_tools, bool tools_in_user_message, const std::string& strftime_now, const std::string& bos_token); From 231093c01a277a9e31dff4c786c4f5d8927ccdad Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 7 Mar 2025 18:00:40 -0800 Subject: [PATCH 14/15] add phi vision support and code clean up --- shared/api/tokenizer_impl.cc | 90 +++++++++++++++++++++++------------- shared/api/tokenizer_impl.h | 10 ++-- 2 files changed, 65 insertions(+), 35 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 9dde47524..624e1ae49 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -134,8 +134,10 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector } // Constant string variable to store predefined chat template strings for popular supported models -const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string PHI_VISION_CHAT_TEMPLATE = R"({% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %})"; +const std::string PHI3_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string LLAMA2_CHAT_TEMPLATE = R"({% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %})"; const std::string LLAMA3_CHAT_TEMPLATE = R"({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %})"; const std::string LLAMA3_2_CHAT_TEMPLATE = R"({{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n)"; @@ -148,39 +150,30 @@ std::vector> messages; // Member variable to store the chat_template (customized for each instance) std::string chat_template; -// Phi4ChatTemplate method to process messages and store result in output -OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { - // Clear the output string before starting - output->clear(); - - // Process the messages - for (const auto& message : messages) { - std::string role = message.at("role"); - std::string content = message.at("content"); - - // Check if "tools" is present in the message and is not empty for "system" role - if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) { - std::string tools = message.at("tools"); - *output += "<|" + role + "|>"; - *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>"; - } else { - // For other messages, no tools - *output += "<|" + role + "|>"; - *output += content + "<|end|>"; - } - } +OrtxStatus TokenizerImpl::PhiVisionChatTemplate(std::string* output, bool add_generation_prompt = true) { - // Add generation prompt or eos_token - if (add_generation_prompt) { - *output += "<|assistant|>"; - } else { - *output += eos_token; - } + // Clear the output string before starting + output->clear(); - return OrtxStatus(kOrtxOK, "Created Phi-4 chat template."); + // Iterate over the messages + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Format the message according to the role + *output += "<|" + role + "|>\n" + content + "<|end|>\n"; + } + + // Check if a generation prompt is needed and the last message isn't from the assistant + if (add_generation_prompt && messages.back().at("role") != "assistant") { + *output += "<|assistant|>\n"; + } + + return OrtxStatus(kOrtxOK, "Created Phi Vision chat template."); } -OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { +// Note Phi-3 and Phi-3.5 have slightly different chat template strings but share the same functionality so this method can be used for both. +OrtxStatus TokenizerImpl::Phi3ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { // Clear the output string before starting output->clear(); @@ -212,6 +205,37 @@ OrtxStatus TokenizerImpl::Phi3_5ChatTemplate(std::string* output, bool add_gener return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template."); } +OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { + // Clear the output string before starting + output->clear(); + + // Process the messages + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Check if "tools" is present in the message and is not empty for "system" role + if (role == "system" && message.find("tools") != message.end() && !message.at("tools").empty()) { + std::string tools = message.at("tools"); + *output += "<|" + role + "|>"; + *output += content + "<|tool|>" + tools + "<|/tool|>" + "<|end|>"; + } else { + // For other messages, no tools + *output += "<|" + role + "|>"; + *output += content + "<|end|>"; + } + } + + // Add generation prompt or eos_token + if (add_generation_prompt) { + *output += "<|assistant|>"; + } else { + *output += eos_token; + } + + return OrtxStatus(kOrtxOK, "Created Phi-4 chat template."); +} + OrtxStatus TokenizerImpl::Llama2ChatTemplate( std::string* output, bool add_generation_prompt = true, @@ -645,8 +669,10 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector>& t_ids, std::vector& t_text) const; - const std::string PHI4_CHAT_TEMPLATE; + const std::string PHI_VISION_CHAT_TEMPLATE; + const std::string PHI3_CHAT_TEMPLATE; const std::string PHI3_5_CHAT_TEMPLATE; + const std::string PHI4_CHAT_TEMPLATE; const std::string LLAMA2_CHAT_TEMPLATE; const std::string LLAMA3_CHAT_TEMPLATE; const std::string LLAMA3_2_CHAT_TEMPLATE; @@ -64,11 +66,13 @@ class TokenizerImpl : public OrtxObjectImpl { std::string chat_template; std::vector> messages; + + OrtxStatus PhiVisionChatTemplate(std::string* output, bool add_generation_prompt); + + OrtxStatus Phi3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); OrtxStatus Phi4ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); - OrtxStatus Phi3_5ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token); - OrtxStatus Llama2ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token); OrtxStatus Llama3ChatTemplate(std::string* output, bool add_generation_prompt, const std::string& eos_token, const std::string& bos_token); From 34b8ed762c024243fac8465e1c36876121d00ec2 Mon Sep 17 00:00:00 2001 From: Sayan Shaw Date: Fri, 7 Mar 2025 19:15:52 -0800 Subject: [PATCH 15/15] add phi 3 small and medium support --- shared/api/tokenizer_impl.cc | 57 ++++++++++++++++++++++++++++++++++-- shared/api/tokenizer_impl.h | 6 ++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/shared/api/tokenizer_impl.cc b/shared/api/tokenizer_impl.cc index 624e1ae49..e1a37310c 100644 --- a/shared/api/tokenizer_impl.cc +++ b/shared/api/tokenizer_impl.cc @@ -136,6 +136,8 @@ OrtxStatus TokenizerImpl::BatchDecode(const std::vector // Constant string variable to store predefined chat template strings for popular supported models const std::string PHI_VISION_CHAT_TEMPLATE = R"({% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %})"; const std::string PHI3_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string PHI3_SMALL_CHAT_TEMPLATE = R"({{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; +const std::string PHI3_MEDIUM_CHAT_TEMPLATE = R"({% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %})"; const std::string PHI3_5_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %})"; const std::string PHI4_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %})"; const std::string LLAMA2_CHAT_TEMPLATE = R"({% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %})"; @@ -169,7 +171,7 @@ OrtxStatus TokenizerImpl::PhiVisionChatTemplate(std::string* output, bool add_ge *output += "<|assistant|>\n"; } - return OrtxStatus(kOrtxOK, "Created Phi Vision chat template."); + return OrtxStatus(kOrtxOK, "Created Phi vision chat template."); } // Note Phi-3 and Phi-3.5 have slightly different chat template strings but share the same functionality so this method can be used for both. @@ -202,7 +204,54 @@ OrtxStatus TokenizerImpl::Phi3ChatTemplate(std::string* output, bool add_generat *output += eos_token; } - return OrtxStatus(kOrtxOK, "Created Phi-3.5 chat template."); + return OrtxStatus(kOrtxOK, "Created Phi-3/3.5 chat template."); +} + +OrtxStatus TokenizerImpl::Phi3SmallChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>", const std::string& bos_token = "<|startoftext|>") { + + // Clear the output string before starting + output->clear(); + + // Add the beginning-of-sequence token + *output += bos_token; + + // Iterate over the messages + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Format the message according to the role + *output += "<|" + role + "|>\n" + content + "<|end|>\n"; + } + + // Add the generation prompt or eos_token + if (add_generation_prompt) { + *output += "<|assistant|>\n"; + } else { + *output += eos_token; + } + + return OrtxStatus(kOrtxOK, "Created Phi-3-small chat template."); +} + +OrtxStatus TokenizerImpl::Phi3MediumChatTemplate(std::string* output) { + // Clear the output string before starting + output->clear(); + + // Process the messages + for (const auto& message : messages) { + std::string role = message.at("role"); + std::string content = message.at("content"); + + // Format based on role (user/assistant) + if (role == "user") { + *output += "<|user|>\n" + content + "<|end|>\n<|assistant|>\n"; + } else if (role == "assistant") { + *output += content + "<|end|>\n"; + } + } + + return OrtxStatus(kOrtxOK, "Created Phi-3-medium chat template."); } OrtxStatus TokenizerImpl::Phi4ChatTemplate(std::string* output, bool add_generation_prompt = true, const std::string& eos_token = "<|endoftext|>") { @@ -671,6 +720,10 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(std::vector