Skip to content

Commit 8357714

Browse files
authored
ui: fix avg token/sec calculation on models page (#357)
* ui: use percentiles for token stats * ui: add histogram of metrics * update vite to remove security warnings fixes #355
1 parent c07179d commit 8357714

File tree

2 files changed

+281
-23
lines changed

2 files changed

+281
-23
lines changed

ui/package-lock.json

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ui/src/pages/Models.tsx

Lines changed: 278 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -191,42 +191,300 @@ function ModelsPanel() {
191191
);
192192
}
193193

194+
interface HistogramData {
195+
bins: number[];
196+
min: number;
197+
max: number;
198+
binSize: number;
199+
p99: number;
200+
p95: number;
201+
p50: number;
202+
}
203+
204+
function TokenHistogram({ data }: { data: HistogramData }) {
205+
const { bins, min, max, p50, p95, p99 } = data;
206+
const maxCount = Math.max(...bins);
207+
208+
const height = 120;
209+
const padding = { top: 10, right: 15, bottom: 25, left: 45 };
210+
211+
// Use viewBox for responsive sizing
212+
const viewBoxWidth = 600;
213+
const chartWidth = viewBoxWidth - padding.left - padding.right;
214+
const chartHeight = height - padding.top - padding.bottom;
215+
216+
const barWidth = chartWidth / bins.length;
217+
const range = max - min;
218+
219+
// Calculate x position for a given value
220+
const getXPosition = (value: number) => {
221+
return padding.left + ((value - min) / range) * chartWidth;
222+
};
223+
224+
return (
225+
<div className="mt-2 w-full">
226+
<svg
227+
viewBox={`0 0 ${viewBoxWidth} ${height}`}
228+
className="w-full h-auto"
229+
preserveAspectRatio="xMidYMid meet"
230+
>
231+
{/* Y-axis */}
232+
<line
233+
x1={padding.left}
234+
y1={padding.top}
235+
x2={padding.left}
236+
y2={height - padding.bottom}
237+
stroke="currentColor"
238+
strokeWidth="1"
239+
opacity="0.3"
240+
/>
241+
242+
{/* X-axis */}
243+
<line
244+
x1={padding.left}
245+
y1={height - padding.bottom}
246+
x2={viewBoxWidth - padding.right}
247+
y2={height - padding.bottom}
248+
stroke="currentColor"
249+
strokeWidth="1"
250+
opacity="0.3"
251+
/>
252+
253+
{/* Histogram bars */}
254+
{bins.map((count, i) => {
255+
const barHeight = maxCount > 0 ? (count / maxCount) * chartHeight : 0;
256+
const x = padding.left + i * barWidth;
257+
const y = height - padding.bottom - barHeight;
258+
const binStart = min + i * data.binSize;
259+
const binEnd = binStart + data.binSize;
260+
261+
return (
262+
<g key={i}>
263+
<rect
264+
x={x}
265+
y={y}
266+
width={Math.max(barWidth - 1, 1)}
267+
height={barHeight}
268+
fill="currentColor"
269+
opacity="0.6"
270+
className="text-blue-500 dark:text-blue-400 hover:opacity-90 transition-opacity cursor-pointer"
271+
/>
272+
<title>{`${binStart.toFixed(1)} - ${binEnd.toFixed(1)} tokens/sec\nCount: ${count}`}</title>
273+
</g>
274+
);
275+
})}
276+
277+
{/* Percentile lines */}
278+
<line
279+
x1={getXPosition(p50)}
280+
y1={padding.top}
281+
x2={getXPosition(p50)}
282+
y2={height - padding.bottom}
283+
stroke="currentColor"
284+
strokeWidth="2"
285+
strokeDasharray="4 2"
286+
opacity="0.7"
287+
className="text-gray-600 dark:text-gray-400"
288+
/>
289+
290+
<line
291+
x1={getXPosition(p95)}
292+
y1={padding.top}
293+
x2={getXPosition(p95)}
294+
y2={height - padding.bottom}
295+
stroke="currentColor"
296+
strokeWidth="2"
297+
strokeDasharray="4 2"
298+
opacity="0.7"
299+
className="text-orange-500 dark:text-orange-400"
300+
/>
301+
302+
<line
303+
x1={getXPosition(p99)}
304+
y1={padding.top}
305+
x2={getXPosition(p99)}
306+
y2={height - padding.bottom}
307+
stroke="currentColor"
308+
strokeWidth="2"
309+
strokeDasharray="4 2"
310+
opacity="0.7"
311+
className="text-green-500 dark:text-green-400"
312+
/>
313+
314+
{/* X-axis labels */}
315+
<text
316+
x={padding.left}
317+
y={height - 5}
318+
fontSize="10"
319+
fill="currentColor"
320+
opacity="0.6"
321+
textAnchor="start"
322+
>
323+
{min.toFixed(1)}
324+
</text>
325+
326+
<text
327+
x={viewBoxWidth - padding.right}
328+
y={height - 5}
329+
fontSize="10"
330+
fill="currentColor"
331+
opacity="0.6"
332+
textAnchor="end"
333+
>
334+
{max.toFixed(1)}
335+
</text>
336+
337+
{/* X-axis label */}
338+
<text
339+
x={padding.left + chartWidth / 2}
340+
y={height - 2}
341+
fontSize="10"
342+
fill="currentColor"
343+
opacity="0.6"
344+
textAnchor="middle"
345+
>
346+
Tokens/Second Distribution
347+
</text>
348+
</svg>
349+
</div>
350+
);
351+
}
352+
194353
function StatsPanel() {
195354
const { metrics } = useAPI();
196355

197-
const [totalRequests, totalInputTokens, totalOutputTokens, avgTokensPerSecond] = useMemo(() => {
356+
const [totalRequests, totalInputTokens, totalOutputTokens, tokenStats, histogramData] = useMemo(() => {
198357
const totalRequests = metrics.length;
199358
if (totalRequests === 0) {
200-
return [0, 0, 0];
359+
return [0, 0, 0, { p99: 0, p95: 0, p50: 0 }, null];
201360
}
202361
const totalInputTokens = metrics.reduce((sum, m) => sum + m.input_tokens, 0);
203362
const totalOutputTokens = metrics.reduce((sum, m) => sum + m.output_tokens, 0);
204-
const avgTokensPerSecond = (metrics.reduce((sum, m) => sum + m.tokens_per_second, 0) / totalRequests).toFixed(2);
205-
return [totalRequests, totalInputTokens, totalOutputTokens, avgTokensPerSecond];
363+
364+
// Calculate token statistics using output_tokens and duration_ms
365+
// Filter out metrics with invalid duration or output tokens
366+
const validMetrics = metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
367+
if (validMetrics.length === 0) {
368+
return [totalRequests, totalInputTokens, totalOutputTokens, { p99: 0, p95: 0, p50: 0 }, null];
369+
}
370+
371+
// Calculate tokens/second for each valid metric
372+
const tokensPerSecond = validMetrics.map((m) => m.output_tokens / (m.duration_ms / 1000));
373+
374+
// Sort for percentile calculation
375+
const sortedTokensPerSecond = [...tokensPerSecond].sort((a, b) => a - b);
376+
377+
// Calculate percentiles - showing speed thresholds where X% of requests are SLOWER (below)
378+
// P99: 99% of requests are slower than this speed (99th percentile - fast requests)
379+
// P95: 95% of requests are slower than this speed (95th percentile)
380+
// P50: 50% of requests are slower than this speed (median)
381+
const p99 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.99)];
382+
const p95 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.95)];
383+
const p50 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.5)];
384+
385+
// Create histogram data
386+
const min = Math.min(...tokensPerSecond);
387+
const max = Math.max(...tokensPerSecond);
388+
const binCount = Math.min(30, Math.max(10, Math.floor(tokensPerSecond.length / 5))); // Adaptive bin count
389+
const binSize = (max - min) / binCount;
390+
391+
const bins = Array(binCount).fill(0);
392+
tokensPerSecond.forEach((value) => {
393+
const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1);
394+
bins[binIndex]++;
395+
});
396+
397+
const histogramData = {
398+
bins,
399+
min,
400+
max,
401+
binSize,
402+
p99,
403+
p95,
404+
p50,
405+
};
406+
407+
return [
408+
totalRequests,
409+
totalInputTokens,
410+
totalOutputTokens,
411+
{
412+
p99: p99.toFixed(2),
413+
p95: p95.toFixed(2),
414+
p50: p50.toFixed(2),
415+
},
416+
histogramData,
417+
];
206418
}, [metrics]);
207419

420+
const nf = new Intl.NumberFormat();
421+
208422
return (
209423
<div className="card">
210-
<div className="rounded-lg overflow-hidden border border-gray-200 dark:border-white/10">
211-
<table className="w-full">
212-
<thead>
213-
<tr className="border-b border-gray-200 dark:border-white/10 text-right">
214-
<th>Requests</th>
215-
<th className="border-l border-gray-200 dark:border-white/10">Processed</th>
216-
<th className="border-l border-gray-200 dark:border-white/10">Generated</th>
217-
<th className="border-l border-gray-200 dark:border-white/10">Tokens/Sec</th>
424+
<div className="rounded-lg overflow-hidden border border-card-border-inner">
425+
<table className="min-w-full divide-y divide-card-border-inner">
426+
<thead className="bg-secondary">
427+
<tr>
428+
<th className="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain">
429+
Requests
430+
</th>
431+
<th className="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
432+
Processed
433+
</th>
434+
<th className="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
435+
Generated
436+
</th>
437+
<th className="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
438+
Token Stats (tokens/sec)
439+
</th>
218440
</tr>
219441
</thead>
220-
<tbody>
221-
<tr className="text-right">
222-
<td className="border-r border-gray-200 dark:border-white/10">{totalRequests}</td>
223-
<td className="border-r border-gray-200 dark:border-white/10">
224-
{new Intl.NumberFormat().format(totalInputTokens)}
442+
443+
<tbody className="bg-surface divide-y divide-card-border-inner">
444+
<tr className="hover:bg-secondary">
445+
<td className="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">{totalRequests}</td>
446+
447+
<td className="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
448+
<div className="flex items-center gap-2">
449+
<span className="text-sm font-medium">{nf.format(totalInputTokens)}</span>
450+
<span className="text-xs text-gray-500 dark:text-gray-400">tokens</span>
451+
</div>
452+
</td>
453+
454+
<td className="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
455+
<div className="flex items-center gap-2">
456+
<span className="text-sm font-medium">{nf.format(totalOutputTokens)}</span>
457+
<span className="text-xs text-gray-500 dark:text-gray-400">tokens</span>
458+
</div>
225459
</td>
226-
<td className="border-r border-gray-200 dark:border-white/10">
227-
{new Intl.NumberFormat().format(totalOutputTokens)}
460+
461+
<td className="px-4 py-4 border-l border-gray-200 dark:border-white/10">
462+
<div className="space-y-3">
463+
<div className="grid grid-cols-3 gap-2 items-center">
464+
<div className="text-center">
465+
<div className="text-xs text-gray-500 dark:text-gray-400">P50</div>
466+
<div className="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
467+
{tokenStats.p50}
468+
</div>
469+
</div>
470+
471+
<div className="text-center">
472+
<div className="text-xs text-gray-500 dark:text-gray-400">P95</div>
473+
<div className="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
474+
{tokenStats.p95}
475+
</div>
476+
</div>
477+
478+
<div className="text-center">
479+
<div className="text-xs text-gray-500 dark:text-gray-400">P99</div>
480+
<div className="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
481+
{tokenStats.p99}
482+
</div>
483+
</div>
484+
</div>
485+
{histogramData && <TokenHistogram data={histogramData} />}
486+
</div>
228487
</td>
229-
<td>{avgTokensPerSecond}</td>
230488
</tr>
231489
</tbody>
232490
</table>

0 commit comments

Comments
 (0)