@@ -36,6 +36,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
3636 { " Hello" , { 258 , 23090 , }, },
3737 { " Hello" , { 466 , 23090 , }, },
3838 { " Hello\n Hello" , { 466 , 23090 , 742 , 23090 , }, },
39+ { " \n =" , { 1212 , 40 , }, },
40+ { " ' era" , { 18 , 4932 , }, },
3941 };
4042
4143 return _k_tests;
@@ -155,7 +157,7 @@ int main(int argc, char **argv) {
155157
156158 fprintf (stderr, " %s : text size: %zu\n " , __func__, text.size ());
157159
158- const std::vector<llama_token> res = llama_tokenize (ctx, text, true );
160+ const std::vector<llama_token> res = llama_tokenize (ctx, text, false );
159161
160162 fprintf (stderr, " %s : tokens: %zu\n " , __func__, res.size ());
161163
@@ -169,10 +171,8 @@ int main(int argc, char **argv) {
169171 }
170172
171173 for (const auto & tok : res) {
172- ofs << tok << " " ;
174+ ofs << tok << " ' " << llama_detokenize_bpe (ctx, std::vector< int >{tok}) << " ' " << std::endl ;
173175 }
174-
175- ofs << " \n " ;
176176 }
177177
178178 fprintf (stderr, " %s : tokens written to '%s'\n " , __func__, (fname_text + " .tokcpp" ).c_str ());
0 commit comments