Skip to content

Commit a89b803

Browse files
authored
Stream loading state when swapping models (#371)
Swapping models can take a long time and leave a lot of silence while the model is loading. Rather than silently load the model in the background, this PR allows llama-swap to send status updates in the reasoning_content of a streaming chat response. Fixes: #366
1 parent f852689 commit a89b803

File tree

8 files changed

+374
-50
lines changed

8 files changed

+374
-50
lines changed

config.example.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ metricsMaxInMemory: 1000
3535
# - it is automatically incremented for every model that uses it
3636
startPort: 10001
3737

38+
# sendLoadingState: inject loading status updates into the reasoning (thinking)
39+
# field
40+
# - optional, default: false
41+
# - when true, a stream of loading messages will be sent to the client in the
42+
# reasoning field so chat UIs can show that loading is in progress.
43+
# - see #366 for more details
44+
sendLoadingState: true
45+
3846
# macros: a dictionary of string substitutions
3947
# - optional, default: empty dictionary
4048
# - macros are reusable snippets
@@ -184,6 +192,10 @@ models:
184192
# - recommended to be omitted and the default used
185193
concurrencyLimit: 0
186194

195+
# sendLoadingState: overrides the global sendLoadingState setting for this model
196+
# - optional, default: undefined (use global setting)
197+
sendLoadingState: false
198+
187199
# Unlisted model example:
188200
"qwen-unlisted":
189201
# unlisted: boolean, true or false

proxy/config/config.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ type Config struct {
129129

130130
// hooks, see: #209
131131
Hooks HooksConfig `yaml:"hooks"`
132+
133+
// send loading state in reasoning
134+
SendLoadingState bool `yaml:"sendLoadingState"`
132135
}
133136

134137
func (c *Config) RealModelName(search string) (string, bool) {
@@ -350,6 +353,13 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
350353
)
351354
}
352355

356+
// if sendLoadingState is nil, set it to the global config value
357+
// see #366
358+
if modelConfig.SendLoadingState == nil {
359+
v := config.SendLoadingState // copy it
360+
modelConfig.SendLoadingState = &v
361+
}
362+
353363
config.Models[modelId] = modelConfig
354364
}
355365

proxy/config/config_posix_test.go

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ groups:
160160
t.Fatalf("Failed to load config: %v", err)
161161
}
162162

163+
modelLoadingState := false
164+
163165
expected := Config{
164166
LogLevel: "info",
165167
StartPort: 5800,
@@ -171,36 +173,41 @@ groups:
171173
Preload: []string{"model1", "model2"},
172174
},
173175
},
176+
SendLoadingState: false,
174177
Models: map[string]ModelConfig{
175178
"model1": {
176-
Cmd: "path/to/cmd --arg1 one",
177-
Proxy: "http://localhost:8080",
178-
Aliases: []string{"m1", "model-one"},
179-
Env: []string{"VAR1=value1", "VAR2=value2"},
180-
CheckEndpoint: "/health",
181-
Name: "Model 1",
182-
Description: "This is model 1",
179+
Cmd: "path/to/cmd --arg1 one",
180+
Proxy: "http://localhost:8080",
181+
Aliases: []string{"m1", "model-one"},
182+
Env: []string{"VAR1=value1", "VAR2=value2"},
183+
CheckEndpoint: "/health",
184+
Name: "Model 1",
185+
Description: "This is model 1",
186+
SendLoadingState: &modelLoadingState,
183187
},
184188
"model2": {
185-
Cmd: "path/to/server --arg1 one",
186-
Proxy: "http://localhost:8081",
187-
Aliases: []string{"m2"},
188-
Env: []string{},
189-
CheckEndpoint: "/",
189+
Cmd: "path/to/server --arg1 one",
190+
Proxy: "http://localhost:8081",
191+
Aliases: []string{"m2"},
192+
Env: []string{},
193+
CheckEndpoint: "/",
194+
SendLoadingState: &modelLoadingState,
190195
},
191196
"model3": {
192-
Cmd: "path/to/cmd --arg1 one",
193-
Proxy: "http://localhost:8081",
194-
Aliases: []string{"mthree"},
195-
Env: []string{},
196-
CheckEndpoint: "/",
197+
Cmd: "path/to/cmd --arg1 one",
198+
Proxy: "http://localhost:8081",
199+
Aliases: []string{"mthree"},
200+
Env: []string{},
201+
CheckEndpoint: "/",
202+
SendLoadingState: &modelLoadingState,
197203
},
198204
"model4": {
199-
Cmd: "path/to/cmd --arg1 one",
200-
Proxy: "http://localhost:8082",
201-
CheckEndpoint: "/",
202-
Aliases: []string{},
203-
Env: []string{},
205+
Cmd: "path/to/cmd --arg1 one",
206+
Proxy: "http://localhost:8082",
207+
CheckEndpoint: "/",
208+
Aliases: []string{},
209+
Env: []string{},
210+
SendLoadingState: &modelLoadingState,
204211
},
205212
},
206213
HealthCheckTimeout: 15,

proxy/config/config_windows_test.go

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -152,44 +152,51 @@ groups:
152152
t.Fatalf("Failed to load config: %v", err)
153153
}
154154

155+
modelLoadingState := false
156+
155157
expected := Config{
156158
LogLevel: "info",
157159
StartPort: 5800,
158160
Macros: MacroList{
159161
{"svr-path", "path/to/server"},
160162
},
163+
SendLoadingState: false,
161164
Models: map[string]ModelConfig{
162165
"model1": {
163-
Cmd: "path/to/cmd --arg1 one",
164-
CmdStop: "taskkill /f /t /pid ${PID}",
165-
Proxy: "http://localhost:8080",
166-
Aliases: []string{"m1", "model-one"},
167-
Env: []string{"VAR1=value1", "VAR2=value2"},
168-
CheckEndpoint: "/health",
166+
Cmd: "path/to/cmd --arg1 one",
167+
CmdStop: "taskkill /f /t /pid ${PID}",
168+
Proxy: "http://localhost:8080",
169+
Aliases: []string{"m1", "model-one"},
170+
Env: []string{"VAR1=value1", "VAR2=value2"},
171+
CheckEndpoint: "/health",
172+
SendLoadingState: &modelLoadingState,
169173
},
170174
"model2": {
171-
Cmd: "path/to/server --arg1 one",
172-
CmdStop: "taskkill /f /t /pid ${PID}",
173-
Proxy: "http://localhost:8081",
174-
Aliases: []string{"m2"},
175-
Env: []string{},
176-
CheckEndpoint: "/",
175+
Cmd: "path/to/server --arg1 one",
176+
CmdStop: "taskkill /f /t /pid ${PID}",
177+
Proxy: "http://localhost:8081",
178+
Aliases: []string{"m2"},
179+
Env: []string{},
180+
CheckEndpoint: "/",
181+
SendLoadingState: &modelLoadingState,
177182
},
178183
"model3": {
179-
Cmd: "path/to/cmd --arg1 one",
180-
CmdStop: "taskkill /f /t /pid ${PID}",
181-
Proxy: "http://localhost:8081",
182-
Aliases: []string{"mthree"},
183-
Env: []string{},
184-
CheckEndpoint: "/",
184+
Cmd: "path/to/cmd --arg1 one",
185+
CmdStop: "taskkill /f /t /pid ${PID}",
186+
Proxy: "http://localhost:8081",
187+
Aliases: []string{"mthree"},
188+
Env: []string{},
189+
CheckEndpoint: "/",
190+
SendLoadingState: &modelLoadingState,
185191
},
186192
"model4": {
187-
Cmd: "path/to/cmd --arg1 one",
188-
CmdStop: "taskkill /f /t /pid ${PID}",
189-
Proxy: "http://localhost:8082",
190-
CheckEndpoint: "/",
191-
Aliases: []string{},
192-
Env: []string{},
193+
Cmd: "path/to/cmd --arg1 one",
194+
CmdStop: "taskkill /f /t /pid ${PID}",
195+
Proxy: "http://localhost:8082",
196+
CheckEndpoint: "/",
197+
Aliases: []string{},
198+
Env: []string{},
199+
SendLoadingState: &modelLoadingState,
193200
},
194201
},
195202
HealthCheckTimeout: 15,

proxy/config/model_config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ type ModelConfig struct {
3535
// Metadata: see #264
3636
// Arbitrary metadata that can be exposed through the API
3737
Metadata map[string]any `yaml:"metadata"`
38+
39+
// override global setting
40+
SendLoadingState *bool `yaml:"sendLoadingState"`
3841
}
3942

4043
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {

proxy/config/model_config_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,25 @@ models:
5050
}
5151
})
5252
}
53+
}
5354

55+
func TestConfig_ModelSendLoadingState(t *testing.T) {
56+
content := `
57+
sendLoadingState: true
58+
models:
59+
model1:
60+
cmd: path/to/cmd --port ${PORT}
61+
sendLoadingState: false
62+
model2:
63+
cmd: path/to/cmd --port ${PORT}
64+
`
65+
config, err := LoadConfigFromReader(strings.NewReader(content))
66+
assert.NoError(t, err)
67+
assert.True(t, config.SendLoadingState)
68+
if assert.NotNil(t, config.Models["model1"].SendLoadingState) {
69+
assert.False(t, *config.Models["model1"].SendLoadingState)
70+
}
71+
if assert.NotNil(t, config.Models["model2"].SendLoadingState) {
72+
assert.True(t, *config.Models["model2"].SendLoadingState)
73+
}
5474
}

0 commit comments

Comments
 (0)