Skip to content
Merged
Show file tree
Hide file tree
Changes from 89 commits
Commits
Show all changes
96 commits
Select commit Hold shift + click to select a range
f669140
feat: add activity page and handler
g2mt Jul 18, 2025
f26779b
feat: add metrics logging and UI display
g2mt Jul 18, 2025
822e2cf
feat: add metrics parser and model-specific metrics endpoint
g2mt Jul 18, 2025
198646d
run go fmt
g2mt Jul 18, 2025
e9edd96
refactor: remove GetLatestMetrics and update apiGetMetrics to collect…
g2mt Jul 18, 2025
0d74cf0
refactor: remove ParseLogData from MetricsParser and refactor Process…
g2mt Jul 18, 2025
fc3ca90
refactor: update metrics parsing and API to use input/output tokens
g2mt Jul 18, 2025
67fd770
Remove setInterval
g2mt Jul 18, 2025
8b49999
Rename table column
g2mt Jul 18, 2025
d3f0147
Remove colors, hide token count if zero
g2mt Jul 18, 2025
b274f25
use - for empty token count column
g2mt Jul 18, 2025
75b2cdf
Add metricsMaxInMemory to config
g2mt Jul 18, 2025
4a26d32
Fix whitespace
g2mt Jul 18, 2025
ffd8dae
Remove getSummary
g2mt Jul 18, 2025
bacad51
refactor: remove model-specific metrics parsing and API endpoints
g2mt Jul 18, 2025
b3f5d2b
update tests
g2mt Jul 18, 2025
7ee133b
Run fetchMetrics on mount
g2mt Jul 18, 2025
e9a4156
refactor: update metrics parser to simplify method signatures
g2mt Jul 18, 2025
9f22155
Rename addMetric
g2mt Jul 18, 2025
fea861d
feat: add config-based metrics parser initialization
g2mt Jul 18, 2025
99b3eb2
remove newline
g2mt Jul 18, 2025
39908a1
feat: add metrics persistence to file
g2mt Jul 18, 2025
2fee6b8
document metricsLogPath in example config
g2mt Jul 18, 2025
4d30155
Add MetricsMaxInMemory to windows test
g2mt Jul 18, 2025
749ace4
Check if pm.metricsParser is nil in apiGetMetrics
g2mt Jul 19, 2025
fd7f626
feat: add metricsUseServerResponse config and update proxy logic
g2mt Jul 19, 2025
91b7efe
chore: add metricsUseServerResponse config option
g2mt Jul 19, 2025
9749b69
feat: add useServerResponse to MetricsParser
g2mt Jul 19, 2025
158a202
fix
g2mt Jul 19, 2025
f5b60a0
correct comment
g2mt Jul 19, 2025
6a84eab
Merge remote-tracking branch 'fork/activity-page' into activity-page
g2mt Jul 19, 2025
55efb27
refactor: update generation speed calculation
g2mt Jul 19, 2025
0e79f64
refactor: unify stdout and stderr handling in process.go
g2mt Jul 19, 2025
52436fd
feat: add streaming response handling in proxyOAIHandler
g2mt Jul 19, 2025
4f0ee68
feat: add log event subscription management
g2mt Jul 19, 2025
09e1e95
remove import
g2mt Jul 19, 2025
f473788
Use bufio.NewScanner to parse stdout lines
g2mt Jul 19, 2025
6d7bca3
use custom response recorder
g2mt Jul 19, 2025
f33222d
add bufio import
g2mt Jul 19, 2025
5cfbb84
refactor metrics debug logging
g2mt Jul 19, 2025
cdbc196
Remove metricsLogPath
g2mt Jul 19, 2025
c8be9ad
Merge branch 'activity-page-remove-httptest' into activity-page
g2mt Jul 19, 2025
8746468
Move responserecorder to another file
g2mt Jul 19, 2025
f574b6c
Merge branch 'activity-page-stream' into activity-page
g2mt Jul 19, 2025
e516610
move StreamingResponseRecorder to separate file
g2mt Jul 19, 2025
037e7d9
Rename responserecorder.go, remove NewResponseRecorder
g2mt Jul 19, 2025
3616243
Add Activity streaming
g2mt Jul 19, 2025
9508b7c
add fmt
g2mt Jul 19, 2025
814b533
Remove first fetch
g2mt Jul 19, 2025
b6b8046
fix missing !
g2mt Jul 19, 2025
f36cda1
Rename to StreamingResponseRecorder
g2mt Jul 19, 2025
0c44cfd
Refactor response recorder functions into a single middleware
g2mt Jul 19, 2025
2aa1c38
Rename metrics parser to MetricsMonitor
g2mt Jul 19, 2025
724d270
Rename to metricsDataCancel
g2mt Jul 19, 2025
cc8a1bb
Rename ResponseMiddleware to MetricsMiddleware, process HTTP request …
g2mt Jul 19, 2025
55516c6
Move log parsing to SubscribeToProcessLogs
g2mt Jul 19, 2025
9286fd9
Remove unused log
g2mt Jul 19, 2025
0c92922
Extract metrics recording into method
g2mt Jul 19, 2025
7a9a413
Add comment to OutputTokens
g2mt Jul 19, 2025
df3fbdb
Rename generationSpeed
g2mt Jul 19, 2025
990e8bd
Add comments for regexes
g2mt Jul 19, 2025
0e250a4
Add an ID for token metrics
g2mt Jul 19, 2025
252b451
Refactor into GetMetricsJSONByLines
g2mt Jul 19, 2025
1f5f850
cleanup
g2mt Jul 19, 2025
c80557c
add back toLocaleString
g2mt Jul 19, 2025
7c875e6
Merge branch 'mostlygeek:main' into activity-page
g2mt Jul 19, 2025
a2c6451
revert change
g2mt Jul 19, 2025
7a07a09
Remove GetMetricsJSONByLines
g2mt Jul 19, 2025
1d260f9
Remove apiStreamMetrics and move streaming into /api/events
g2mt Jul 20, 2025
1c6543c
Fix switch case variable declaration scope issue.
g2mt Jul 20, 2025
2e37d38
Fix loading state logic to handle empty metrics.
g2mt Jul 20, 2025
af4b4dc
Remove loading state from Activity
g2mt Jul 20, 2025
464d91c
Remove subtitle
g2mt Jul 20, 2025
ef97c68
fix style
g2mt Jul 20, 2025
6fd771b
remove debug logger
g2mt Jul 20, 2025
5ce9abd
Remove metricsMonitor event bus
g2mt Jul 20, 2025
6ed6dc8
remove nil checks
g2mt Jul 20, 2025
a0d1161
refactor MetricsMiddleware
g2mt Jul 20, 2025
e836ebc
Clean up ResponseWriter
g2mt Jul 20, 2025
93af520
refactor metrics middleware
g2mt Jul 20, 2025
741ca5a
add comment
g2mt Jul 20, 2025
8f2d75b
Remove flush
g2mt Jul 20, 2025
ce27be5
remove mm from irrelevant endpoints
g2mt Jul 22, 2025
b5ad4d9
move requested model parsing to ls-requested-model key
g2mt Jul 22, 2025
0813157
Remove import
g2mt Jul 22, 2025
2de9250
rm
g2mt Jul 22, 2025
82522b6
Add MiddlewareWritesMetrics tests
g2mt Jul 22, 2025
bab8b79
Rename metrics parser
g2mt Jul 22, 2025
f4288bb
record modelName for metrics in proxyOAIHandler
g2mt Jul 22, 2025
cd9dc5b
Add streaming to simple-responder.go
g2mt Jul 22, 2025
3a94a96
hide stream behind url query
g2mt Jul 22, 2025
19ca3a8
wrong test
g2mt Jul 22, 2025
4b0e94f
Convert to interface{}
g2mt Jul 22, 2025
a245674
fix test
g2mt Jul 22, 2025
05509e6
Get realModelName in middleware
g2mt Jul 22, 2025
242da36
add startTime
g2mt Jul 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ healthCheckTimeout: 500
# - Valid log levels: debug, info, warn, error
logLevel: info

# metricsMaxInMemory: maximum number of metrics to keep in memory
# - optional, default: 1000
# - controls how many metrics are stored in memory before older ones are discarded
# - useful for limiting memory usage when processing large volumes of metrics
metricsMaxInMemory: 1000

# startPort: sets the starting port number for the automatic ${PORT} macro.
# - optional, default: 5800
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
Expand Down Expand Up @@ -200,4 +206,4 @@ groups:
members:
- "forever-modelA"
- "forever-modelB"
- "forever-modelc"
- "forever-modelc"
1 change: 1 addition & 0 deletions docker/config.example.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
healthCheckTimeout: 300
logRequests: true
metricsMaxInMemory: 1000

models:
"qwen2.5":
Expand Down
10 changes: 10 additions & 0 deletions misc/simple-responder/simple-responder.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ func main() {
"responseMessage": *responseMessage,
"h_content_length": c.Request.Header.Get("Content-Length"),
"request_body": string(bodyBytes),
"usage": gin.H{
"completion_tokens": 10,
"prompt_tokens": 25,
"total_tokens": 35,
},
})
})

Expand All @@ -74,6 +79,11 @@ func main() {
c.Header("Content-Type", "application/json")
c.JSON(http.StatusOK, gin.H{
"responseMessage": *responseMessage,
"usage": gin.H{
"completion_tokens": 10,
"prompt_tokens": 25,
"total_tokens": 35,
},
})

})
Expand Down
2 changes: 2 additions & 0 deletions proxy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ type Config struct {
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
LogRequests bool `yaml:"logRequests"`
LogLevel string `yaml:"logLevel"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"`
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
Expand Down Expand Up @@ -194,6 +195,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
HealthCheckTimeout: 120,
StartPort: 5800,
LogLevel: "info",
MetricsMaxInMemory: 1000,
}
err = yaml.Unmarshal(data, &config)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions proxy/config_posix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ groups:
},
},
HealthCheckTimeout: 15,
MetricsMaxInMemory: 1000,
Profiles: map[string][]string{
"test": {"model1", "model2"},
},
Expand Down
1 change: 1 addition & 0 deletions proxy/config_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ groups:
},
},
HealthCheckTimeout: 15,
MetricsMaxInMemory: 1000,
Profiles: map[string][]string{
"test": {"model1", "model2"},
},
Expand Down
1 change: 1 addition & 0 deletions proxy/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const ProcessStateChangeEventID = 0x01
const ChatCompletionStatsEventID = 0x02
const ConfigFileChangedEventID = 0x03
const LogDataEventID = 0x04
const TokenMetricsEventID = 0x05

type ProcessStateChangeEvent struct {
ProcessName string
Expand Down
145 changes: 145 additions & 0 deletions proxy/metrics_middleware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package proxy

import (
"bytes"
"io"
"net/http"
"time"

"github.com/gin-gonic/gin"
"github.com/tidwall/gjson"
)

// MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests
func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
return func(c *gin.Context) {
bodyBytes, err := io.ReadAll(c.Request.Body)
if err != nil {
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
return
}
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))

requestedModel := gjson.GetBytes(bodyBytes, "model").String()
if requestedModel == "" {
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
return
}
c.Set("ls-requested-model", requestedModel)

writer := &MetricsResponseWriter{
ResponseWriter: c.Writer,
metricsRecorder: &MetricsRecorder{
metricsMonitor: pm.metricsMonitor,
modelName: requestedModel, // will be updated in proxyOAIHandler
isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
},
}
c.Writer = writer
c.Next()

rec := writer.metricsRecorder
rec.processBody(writer.body)
}
}

type MetricsRecorder struct {
metricsMonitor *MetricsMonitor
modelName string
isStreaming bool
startTime time.Time
}

// processBody handles response processing after request completes
func (rec *MetricsRecorder) processBody(body []byte) {
if rec.isStreaming {
rec.processStreamingResponse(body)
} else {
rec.processNonStreamingResponse(body)
}
}

func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) {
if !jsonData.Get("usage").Exists() {
return
}

outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())

if outputTokens > 0 {
duration := time.Since(rec.startTime)
tokensPerSecond := float64(inputTokens+outputTokens) / duration.Seconds()

metrics := TokenMetrics{
Timestamp: time.Now(),
Model: rec.modelName,
InputTokens: inputTokens,
OutputTokens: outputTokens,
TokensPerSecond: tokensPerSecond,
DurationMs: int(duration.Milliseconds()),
}
rec.metricsMonitor.addMetrics(metrics)
}
}

func (rec *MetricsRecorder) processStreamingResponse(body []byte) {
lines := bytes.Split(body, []byte("\n"))
for _, line := range lines {
line = bytes.TrimSpace(line)
if len(line) == 0 {
continue
}

// Check for SSE data prefix
if bytes.HasPrefix(line, []byte("data: ")) {
data := bytes.TrimSpace(line[6:])
if len(data) == 0 {
continue
}
if bytes.Equal(data, []byte("[DONE]")) {
break
}

// Parse JSON to look for usage data
if gjson.ValidBytes(data) {
rec.parseAndRecordMetrics(gjson.ParseBytes(data))
}
}
}
}

func (rec *MetricsRecorder) processNonStreamingResponse(body []byte) {
if len(body) == 0 {
return
}

// Parse JSON to extract usage information
if gjson.ValidBytes(body) {
rec.parseAndRecordMetrics(gjson.ParseBytes(body))
}
}

// MetricsResponseWriter captures the entire response for non-streaming
type MetricsResponseWriter struct {
gin.ResponseWriter
body []byte
metricsRecorder *MetricsRecorder
}

func (w *MetricsResponseWriter) Write(b []byte) (int, error) {
n, err := w.ResponseWriter.Write(b)
if err != nil {
return n, err
}
w.body = append(w.body, b...)
return n, nil
}

func (w *MetricsResponseWriter) WriteHeader(statusCode int) {
w.ResponseWriter.WriteHeader(statusCode)
}

func (w *MetricsResponseWriter) Header() http.Header {
return w.ResponseWriter.Header()
}
82 changes: 82 additions & 0 deletions proxy/metrics_monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package proxy

import (
"encoding/json"
"sync"
"time"

"github.com/mostlygeek/llama-swap/event"
)

// TokenMetrics represents parsed token statistics from llama-server logs
type TokenMetrics struct {
ID int `json:"id"`
Timestamp time.Time `json:"timestamp"`
Model string `json:"model"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
TokensPerSecond float64 `json:"tokens_per_second"`
DurationMs int `json:"duration_ms"`
}

// TokenMetricsEvent represents a token metrics event
type TokenMetricsEvent struct {
Metrics TokenMetrics
}

func (e TokenMetricsEvent) Type() uint32 {
return TokenMetricsEventID // defined in events.go
}

// MetricsMonitor parses llama-server output for token statistics
type MetricsMonitor struct {
mu sync.RWMutex
metrics []TokenMetrics
maxMetrics int
nextID int
}

func NewMetricsMonitor(config *Config) *MetricsMonitor {
maxMetrics := config.MetricsMaxInMemory
if maxMetrics <= 0 {
maxMetrics = 1000 // Default fallback
}

mp := &MetricsMonitor{
maxMetrics: maxMetrics,
}

return mp
}

// addMetrics adds a new metric to the collection and publishes an event
func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) {
mp.mu.Lock()
defer mp.mu.Unlock()

metric.ID = mp.nextID
mp.nextID++
mp.metrics = append(mp.metrics, metric)
if len(mp.metrics) > mp.maxMetrics {
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
}

event.Emit(TokenMetricsEvent{Metrics: metric})
}

// GetMetrics returns a copy of the current metrics
func (mp *MetricsMonitor) GetMetrics() []TokenMetrics {
mp.mu.RLock()
defer mp.mu.RUnlock()

result := make([]TokenMetrics, len(mp.metrics))
copy(result, mp.metrics)
return result
}

// GetMetricsJSON returns metrics as JSON
func (mp *MetricsMonitor) GetMetricsJSON() ([]byte, error) {
mp.mu.RLock()
defer mp.mu.RUnlock()
return json.Marshal(mp.metrics)
}
19 changes: 10 additions & 9 deletions proxy/proxymanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"time"

"github.com/gin-gonic/gin"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)

Expand All @@ -33,6 +32,8 @@ type ProxyManager struct {
upstreamLogger *LogMonitor
muxLogger *LogMonitor

metricsMonitor *MetricsMonitor

processGroups map[string]*ProcessGroup

// shutdown signaling
Expand Down Expand Up @@ -78,6 +79,8 @@ func New(config Config) *ProxyManager {
muxLogger: stdoutLogger,
upstreamLogger: upstreamLogger,

metricsMonitor: NewMetricsMonitor(&config),

processGroups: make(map[string]*ProcessGroup),

shutdownCtx: shutdownCtx,
Expand Down Expand Up @@ -149,10 +152,12 @@ func (pm *ProxyManager) setupGinEngine() {
c.Next()
})

mm := MetricsMiddleware(pm)

// Set up routes using the Gin engine
pm.ginEngine.POST("/v1/chat/completions", pm.proxyOAIHandler)
pm.ginEngine.POST("/v1/chat/completions", mm, pm.proxyOAIHandler)
// Support legacy /v1/completions api, see issue #12
pm.ginEngine.POST("/v1/completions", pm.proxyOAIHandler)
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)

// Support embeddings
pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler)
Expand Down Expand Up @@ -360,17 +365,13 @@ func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
return
}

requestedModel := gjson.GetBytes(bodyBytes, "model").String()
if requestedModel == "" {
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
return
}

requestedModel := c.GetString("ls-requested-model") // Should be set in MetricsMiddleware
processGroup, realModelName, err := pm.swapProcessGroup(requestedModel)
if err != nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
return
}
c.Writer.(*MetricsResponseWriter).metricsRecorder.modelName = realModelName

// issue #69 allow custom model names to be sent to upstream
useModelName := pm.config.Models[realModelName].UseModelName
Expand Down
Loading
Loading