Skip to content

Commit f1ccf34

Browse files
authored
fix model name missing in final response (#1250)
Co-authored-by: firecoperana <firecoperana>
1 parent dbcbfdb commit f1ccf34

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

examples/server/server-context.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,12 +1631,11 @@ void server_context::send_final_response(server_slot& slot) {
16311631
res->timings = slot.get_timings();
16321632
res->post_sampling_probs = slot.params.post_sampling_probs;
16331633
res->oaicompat = slot.params.oaicompat;
1634-
res->oaicompat_model = slot.params.oaicompat_model;
16351634
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
16361635
res->oaicompat_msg = slot.update_chat_msg(res->oaicompat_msg_diffs);
16371636
res->n_decoded = slot.n_decoded;
16381637
res->n_prompt_tokens = slot.n_prompt_tokens;
1639-
res->oaicompat_model = slot.oaicompat_model;
1638+
res->oaicompat_model = slot.task->params.oaicompat_model;
16401639
res->data = json{
16411640
{"content", !slot.params.stream ? slot.generated_text : ""},
16421641
{"generated_text", slot.generated_text}, // Always include full text for finish_reason logic
@@ -2590,9 +2589,9 @@ void server_context::batch_pending_prompt(const int32_t n_ubatch, const int32_t
25902589

25912590
slot.state = SLOT_STATE_PROCESSING;
25922591
slot.command = SLOT_COMMAND_NONE;
2592+
send_final_response(slot);
25932593
slot.release();
25942594
slot.print_timings();
2595-
send_final_response(slot);
25962595
continue;
25972596
}
25982597

@@ -2933,9 +2932,9 @@ void server_context::speculative_decoding_accept() {
29332932

29342933
if (!process_token(result, slot)) {
29352934
// release slot because of stop condition
2935+
send_final_response(slot);
29362936
slot.release();
29372937
slot.print_timings();
2938-
send_final_response(slot);
29392938
metrics.on_prediction(slot);
29402939
break;
29412940
}
@@ -2953,7 +2952,7 @@ void server_context::speculative_decoding_accept() {
29532952

29542953
bool server_context::accept_special_token(const server_slot& slot, const llama_token token) {
29552954
return params_base.special || slot.sparams.preserved_tokens.find(token) != slot.sparams.preserved_tokens.end();
2956-
};
2955+
}
29572956

29582957

29592958
void server_context::send_token_results(completion_token_outputs& results, server_slot& slot, int32_t n) {
@@ -2962,9 +2961,9 @@ void server_context::send_token_results(completion_token_outputs& results, serve
29622961
bool has_next = process_token(it, slot);
29632962
count++;
29642963
if (!has_next) {
2964+
send_final_response(slot);
29652965
slot.release();
29662966
slot.print_timings();
2967-
send_final_response(slot);
29682967
metrics.on_prediction(slot);
29692968
break;
29702969
}

examples/server/server-context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,4 +336,5 @@ struct server_context {
336336

337337
// Re-aggregates all active vectors and updates the model state
338338
bool apply_control_vectors_internal();
339+
339340
};

0 commit comments

Comments
 (0)