Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tools/server/server-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ llama_pos server_tokens::pos_next(int64_t n_tokens) const {

size_t server_tokens::size_up_to_pos(llama_pos max_pos) const {
if (!has_mtmd) {
return std::min((size_t)(max_pos + 1), tokens.size());
return std::min((size_t)max_pos, tokens.size());
}

size_t idx = 0;
Expand All @@ -296,7 +296,7 @@ size_t server_tokens::size_up_to_pos(llama_pos max_pos) const {
idx++;
}

if (pos > max_pos) {
if (pos >= max_pos) {
break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion tools/server/server-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ struct server_tokens {
// the next position after n_tokens. if n_tokens < 0, return the next position after all tokens.
llama_pos pos_next(int64_t n_tokens = -1) const;

// number of tokens with position <= max_pos
// number of tokens with position < max_pos
size_t size_up_to_pos(llama_pos max_pos) const;

const mtmd::input_chunk_ptr & find_chunk(size_t idx) const;
Expand Down
8 changes: 4 additions & 4 deletions tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ struct server_context_impl {
std::vector<server_slot> slots;

int slots_debug = 0;
int n_empty_consequtive = 0;
int n_empty_consecutive = 0;

std::unique_ptr<server_prompt_cache> prompt_cache;

Expand Down Expand Up @@ -2372,7 +2372,7 @@ struct server_context_impl {
} else {
pos_next = std::min(pos_next, std::max(it->pos_min + 1, it->pos_max));
n_past = std::min(slot.prompt.tokens.size_up_to_pos(pos_next), (size_t) it->n_tokens);
SLT_WRN(slot, "restored context checkpoint (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 ", size = %.3f MiB)\n", it->pos_min, it->pos_max, it->n_tokens, (float) checkpoint_size / 1024 / 1024);
SLT_WRN(slot, "restored context checkpoint (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 ", n_past = %d, size = %.3f MiB)\n", it->pos_min, it->pos_max, it->n_tokens, n_past, (float) checkpoint_size / 1024 / 1024);
}
}

Expand Down Expand Up @@ -2630,11 +2630,11 @@ struct server_context_impl {
if (batch.n_tokens == 0) {
SRV_WRN("%s", "no tokens to decode\n");

if (++n_empty_consequtive > 3) {
if (++n_empty_consecutive > 3) {
GGML_ABORT("fatal error - please provide logs and repro in %s\n", "https://github.com/ggml-org/llama.cpp/pull/20277");
}
} else {
n_empty_consequtive = 0;
n_empty_consecutive = 0;
}

int32_t i_next = 0;
Expand Down