ikawrakow · ikawrakow · Jan 10, 2026 · Jan 7, 2026 · Jan 8, 2026
diff --git a/common/common.cpp b/common/common.cpp
@@ -3556,6 +3556,7 @@ void llama_batch_add(
                           llama_pos   pos,
     const std::vector<llama_seq_id> & seq_ids,
                                bool   logits) {
+    GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded");
     batch.token   [batch.n_tokens] = id;
     batch.pos     [batch.n_tokens] = pos;
     batch.n_seq_id[batch.n_tokens] = seq_ids.size();

diff --git a/examples/server/server-common.cpp b/examples/server/server-common.cpp
@@ -484,7 +484,7 @@ bool server_sent_event(httplib::DataSink& sink, const json& data) {
         data.dump(-1, ' ', false, json::error_handler_t::replace) +
         "\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
 
-    LOG_VERBOSE("data stream, to_send: %s", str.c_str());
+    //LOG_VERBOSE("data stream, to_send: %s", str.c_str());
 
     return sink.write(str.c_str(), str.size());
 }

diff --git a/examples/server/server-common.h b/examples/server/server-common.h
@@ -336,6 +336,10 @@ struct server_tokens {
 
     llama_pos pos_next() const;
 
+    int n_tokens() const {
+        return tokens.size();
+    }
+
     // for debugging
     std::string str() const;