@@ -1143,21 +1143,28 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
11431143 std::sort (slot.ban_phrases .begin (), slot.ban_phrases .end (), [](const std::string& a, const std::string& b) {
11441144 return a.length () > b.length ();
11451145 });
1146- }
1147- else if (params_base.ban_phrases .size ()>0 && params_base.n_buffer == 0 ) {
1148- slot.ban_phrases .clear ();
1149- for (const auto & val : params_base.ban_phrases ) {
1150- if (!val.empty ()) {
1151- std::string s = string_lower (val);
1152- auto ban_tokens = common_tokenize (llama_get_model (ctx), s, false , true );
1153- if (ban_tokens.size () > slot.n_buffer ) {
1154- slot.n_buffer = ban_tokens.size ();
1146+ } else if (params_base.ban_phrases .size () > 0 ) {
1147+ if (params_base.n_buffer == 0 ) {
1148+ slot.ban_phrases .clear ();
1149+ std::sort (params_base.ban_phrases .begin (), params_base.ban_phrases .end (), [](const std::string & a, const std::string & b) {
1150+ return a.length () > b.length ();
1151+ });
1152+ for (auto & val : params_base.ban_phrases ) {
1153+ if (!val.empty ()) {
1154+ val = string_lower (val);
1155+ auto ban_tokens = common_tokenize (llama_get_model (ctx), val, false , true );
1156+ if (ban_tokens.size () > slot.n_buffer ) {
1157+ slot.n_buffer = ban_tokens.size ();
1158+ }
1159+ slot.ban_phrases .push_back (val);
11551160 }
1156- slot.ban_phrases .push_back (s);
1157- }
1161+ }
1162+ slot.n_buffer = slot.n_buffer + 3 ; // extra buffer in case
1163+ params_base.n_buffer = slot.n_buffer ;
1164+ } else {
1165+ slot.ban_phrases = params_base.ban_phrases ;
1166+ slot.n_buffer = params_base.n_buffer ;
11581167 }
1159- params_base.n_buffer = slot.n_buffer + 3 ;
1160- slot.n_buffer = slot.n_buffer + 3 ; // extra buffer in case
11611168 }
11621169 slot.logit_bias = slot.sparams .logit_bias ; // keep a copy to restore
11631170 slot.ban_phrases_bias = json_value (data, " banned_bias" , params_base.ban_phrases_bias );
0 commit comments