Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 43 additions & 19 deletions proxy/metrics_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,18 @@ func (mp *metricsMonitor) wrapHandler(
}
} else {
if gjson.ValidBytes(body) {
if tm, err := parseMetrics(modelID, recorder.StartTime(), gjson.ParseBytes(body)); err != nil {
mp.logger.Warnf("error parsing metrics: %v, path=%s", err, request.URL.Path)
} else {
mp.addMetrics(tm)
parsed := gjson.ParseBytes(body)
usage := parsed.Get("usage")
timings := parsed.Get("timings")

if usage.Exists() || timings.Exists() {
if tm, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
mp.logger.Warnf("error parsing metrics: %v, path=%s", err, request.URL.Path)
} else {
mp.addMetrics(tm)
}
}

} else {
mp.logger.Warnf("metrics skipped, invalid JSON in response body path=%s", request.URL.Path)
}
Expand Down Expand Up @@ -174,19 +181,20 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
}

if gjson.ValidBytes(data) {
return parseMetrics(modelID, start, gjson.ParseBytes(data))
parsed := gjson.ParseBytes(data)
usage := parsed.Get("usage")
timings := parsed.Get("timings")

if usage.Exists() || timings.Exists() {
return parseMetrics(modelID, start, usage, timings)
}
}
}

return TokenMetrics{}, fmt.Errorf("no valid JSON data found in stream")
}

func parseMetrics(modelID string, start time.Time, jsonData gjson.Result) (TokenMetrics, error) {
usage := jsonData.Get("usage")
timings := jsonData.Get("timings")
if !usage.Exists() && !timings.Exists() {
return TokenMetrics{}, fmt.Errorf("no usage or timings data found")
}
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (TokenMetrics, error) {
// default values
cachedTokens := -1 // unknown or missing data
outputTokens := 0
Expand All @@ -198,19 +206,35 @@ func parseMetrics(modelID string, start time.Time, jsonData gjson.Result) (Token
durationMs := int(time.Since(start).Milliseconds())

if usage.Exists() {
outputTokens = int(jsonData.Get("usage.completion_tokens").Int())
inputTokens = int(jsonData.Get("usage.prompt_tokens").Int())
if pt := usage.Get("prompt_tokens"); pt.Exists() {
// v1/chat/completions
inputTokens = int(pt.Int())
} else if it := usage.Get("input_tokens"); it.Exists() {
// v1/messages
inputTokens = int(it.Int())
}

if ct := usage.Get("completion_tokens"); ct.Exists() {
// v1/chat/completions
outputTokens = int(ct.Int())
} else if ot := usage.Get("output_tokens"); ot.Exists() {
outputTokens = int(ot.Int())
}

if ct := usage.Get("cache_read_input_tokens"); ct.Exists() {
cachedTokens = int(ct.Int())
}
}

// use llama-server's timing data for tok/sec and duration as it is more accurate
if timings.Exists() {
inputTokens = int(jsonData.Get("timings.prompt_n").Int())
outputTokens = int(jsonData.Get("timings.predicted_n").Int())
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
inputTokens = int(timings.Get("prompt_n").Int())
outputTokens = int(timings.Get("predicted_n").Int())
promptPerSecond = timings.Get("prompt_per_second").Float()
tokensPerSecond = timings.Get("predicted_per_second").Float()
durationMs = int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())

if cachedValue := jsonData.Get("timings.cache_n"); cachedValue.Exists() {
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
cachedTokens = int(cachedValue.Int())
}
}
Comment on lines 230 to 240
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add existence checks before overwriting token counts from timings.

When timings exists but lacks specific fields (e.g., prompt_n), gjson.Result.Int() returns 0, which overwrites potentially valid values from usage. This is inconsistent with line 237 which correctly checks cachedValue.Exists() before using it.

 	// use llama-server's timing data for tok/sec and duration as it is more accurate
 	if timings.Exists() {
-		inputTokens = int(timings.Get("prompt_n").Int())
-		outputTokens = int(timings.Get("predicted_n").Int())
-		promptPerSecond = timings.Get("prompt_per_second").Float()
-		tokensPerSecond = timings.Get("predicted_per_second").Float()
-		durationMs = int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
+		if pn := timings.Get("prompt_n"); pn.Exists() {
+			inputTokens = int(pn.Int())
+		}
+		if predn := timings.Get("predicted_n"); predn.Exists() {
+			outputTokens = int(predn.Int())
+		}
+		if pps := timings.Get("prompt_per_second"); pps.Exists() {
+			promptPerSecond = pps.Float()
+		}
+		if tps := timings.Get("predicted_per_second"); tps.Exists() {
+			tokensPerSecond = tps.Float()
+		}
+		if pm := timings.Get("prompt_ms"); pm.Exists() {
+			if predm := timings.Get("predicted_ms"); predm.Exists() {
+				durationMs = int(pm.Float() + predm.Float())
+			}
+		}
 
 		if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
 			cachedTokens = int(cachedValue.Int())
 		}
 	}
🤖 Prompt for AI Agents
In proxy/metrics_monitor.go around lines 230 to 240, timings is checked for
existence but individual timing fields (prompt_n, predicted_n,
prompt_per_second, predicted_per_second, prompt_ms, predicted_ms) are read
unconditionally, causing zero-valued gjson results to overwrite valid
usage-derived token counts; change each assignment to first check that the
specific timings.Get("<field>").Exists() before converting and assigning
(similar to the existing cachedValue check) so you only overwrite values when
that timing field is actually present.

Expand Down
Loading