Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,14 +562,15 @@ struct server_context_impl {

llama_model_ptr model_dft;

bool add_bos_token = true;
bool add_bos_token = true;

int32_t n_ctx; // total context for all clients / slots

// slots / clients
std::vector<server_slot> slots;

int slots_debug = 0;
int n_empty_consequtive = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ggerganov
Did you mean consecutive?


std::unique_ptr<server_prompt_cache> prompt_cache;

Expand Down Expand Up @@ -2628,6 +2629,12 @@ struct server_context_impl {

if (batch.n_tokens == 0) {
SRV_WRN("%s", "no tokens to decode\n");

if (++n_empty_consequtive > 3) {
GGML_ABORT("fatal error - please provide logs and repro in %s\n", "https://github.com/ggml-org/llama.cpp/pull/20277");
}
} else {
n_empty_consequtive = 0;
}

int32_t i_next = 0;
Expand Down
Loading