From 04e452e23dbfaef45f385711040d41c9eec79b54 Mon Sep 17 00:00:00 2001 From: Wenbing Li <10278425+wenbingl@users.noreply.github.com> Date: Fri, 7 Mar 2025 14:13:25 -0800 Subject: [PATCH] skip special --- operators/tokenizer/bpe_streaming.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/operators/tokenizer/bpe_streaming.hpp b/operators/tokenizer/bpe_streaming.hpp index 53a87c77..dab2268e 100644 --- a/operators/tokenizer/bpe_streaming.hpp +++ b/operators/tokenizer/bpe_streaming.hpp @@ -93,9 +93,11 @@ class BpeStreamingDecoder : public KernelBpeDecoder { } OrtxStatus SpmId2Token(extTokenId_t id, std::string& token, bool& f_special_last) const { - bool f_special = all_special_ids_.count(id) ? true : false; + bool f_special = false; if (added_tokens_.count(id)) { - token = added_tokens_.at(id); + f_special = all_special_ids_.count(id) ? true : false; + // special token was skipped + token = f_special ? "" : added_tokens_.at(id); } else { std::string piece = id < arr_vocab_.size() ? arr_vocab_[id] : ""; if (piece.empty()) {