Skip to content

Commit 305e5a0

Browse files
committed
improve example config [skip ci]
1 parent 04fc673 commit 305e5a0

File tree

1 file changed

+35
-27
lines changed

1 file changed

+35
-27
lines changed

config.example.yaml

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
#
44
# 💡 Tip - Use an LLM with this file!
55
# ====================================
6-
# This example configuration is written to be LLM friendly! Try
6+
# This example configuration is written to be LLM friendly. Try
77
# copying this file into an LLM and asking it to explain or generate
88
# sections for you.
99
# ====================================
10-
#
10+
11+
# Usage notes:
1112
# - Below are all the available configuration options for llama-swap.
12-
# - Settings with a default value, or noted as optional can be omitted.
13-
# - Settings that are marked required must be in your configuration file
13+
# - Settings noted as "required" must be in your configuration file
14+
# - Settings noted as "optional" can be omitted
1415

1516
# healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests
1617
# - optional, default: 120
@@ -34,9 +35,9 @@ metricsMaxInMemory: 1000
3435
# - it is automatically incremented for every model that uses it
3536
startPort: 10001
3637

37-
# macros: sets a dictionary of string:string pairs
38+
# macros: a dictionary of string substitutions
3839
# - optional, default: empty dictionary
39-
# - these are reusable snippets
40+
# - macros are reusable snippets
4041
# - used in a model's cmd, cmdStop, proxy and checkEndpoint
4142
# - useful for reducing common configuration settings
4243
macros:
@@ -99,44 +100,46 @@ models:
99100

100101
# checkEndpoint: URL path to check if the server is ready
101102
# - optional, default: /health
102-
# - use "none" to skip endpoint ready checking
103103
# - endpoint is expected to return an HTTP 200 response
104-
# - all requests wait until the endpoint is ready (or fails)
104+
# - all requests wait until the endpoint is ready or fails
105+
# - use "none" to skip endpoint health checking
105106
checkEndpoint: /custom-endpoint
106107

107-
# ttl: automatically unload the model after this many seconds
108+
# ttl: automatically unload the model after ttl seconds
108109
# - optional, default: 0
109110
# - ttl values must be a value greater than 0
110111
# - a value of 0 disables automatic unloading of the model
111112
ttl: 60
112113

113-
# useModelName: overrides the model name that is sent to upstream server
114+
# useModelName: override the model name that is sent to upstream server
114115
# - optional, default: ""
115-
# - useful when the upstream server expects a specific model name or format
116+
# - useful for when the upstream server expects a specific model name that
117+
# is different from the model's ID
116118
useModelName: "qwen:qwq"
117119

118120
# filters: a dictionary of filter settings
119121
# - optional, default: empty dictionary
122+
# - only strip_params is currently supported
120123
filters:
121124
# strip_params: a comma separated list of parameters to remove from the request
122125
# - optional, default: ""
123-
# - useful for preventing overriding of default server params by requests
124-
# - `model` parameter is never removed
126+
# - useful for server side enforcement of sampling parameters
127+
# - the `model` parameter can never be removed
125128
# - can be any JSON key in the request body
126129
# - recommended to stick to sampling parameters
127130
strip_params: "temperature, top_p, top_k"
128131

129132
# Unlisted model example:
130133
"qwen-unlisted":
131-
# unlisted: true or false
134+
# unlisted: boolean, true or false
132135
# - optional, default: false
133-
# - unlisted models do not show up in /v1/models or /upstream lists
136+
# - unlisted models do not show up in /v1/models api requests
134137
# - can be requested as normal through all apis
135138
unlisted: true
136139
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
137140

138141
# Docker example:
139-
# container run times like Docker and Podman can also be used with a
142+
# container run times like Docker and Podman can be used reliably with a
140143
# a combination of cmd and cmdStop.
141144
"docker-llama":
142145
proxy: "http://127.0.0.1:${PORT}"
@@ -149,24 +152,26 @@ models:
149152
# cmdStop: command to run to stop the model gracefully
150153
# - optional, default: ""
151154
# - useful for stopping commands managed by another system
152-
# - on POSIX systems: a SIGTERM is sent for graceful shutdown
153-
# - on Windows, taskkill is used
154-
# - processes are given 5 seconds to shutdown until they are forcefully killed
155155
# - the upstream's process id is available in the ${PID} macro
156+
#
157+
# When empty, llama-swap has this default behaviour:
158+
# - on POSIX systems: a SIGTERM signal is sent
159+
# - on Windows, calls taskkill to stop the process
160+
# - processes have 5 seconds to shutdown until forceful termination is attempted
156161
cmdStop: docker stop dockertest
157162

158163
# groups: a dictionary of group settings
159164
# - optional, default: empty dictionary
160-
# - provide advanced controls over model swapping behaviour.
161-
# - Using groups some models can be kept loaded indefinitely, while others are swapped out.
162-
# - model ids must be defined in the Models section
165+
# - provides advanced controls over model swapping behaviour
166+
# - using groups some models can be kept loaded indefinitely, while others are swapped out
167+
# - model IDs must be defined in the Models section
163168
# - a model can only be a member of one group
164169
# - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields
165170
# - see issue #109 for details
166171
#
167172
# NOTE: the example below uses model names that are not defined above for demonstration purposes
168173
groups:
169-
# group1 is same as the default behaviour of llama-swap where only one model is allowed
174+
# group1 works the same as the default behaviour of llama-swap where only one model is allowed
170175
# to run a time across the whole llama-swap instance
171176
"group1":
172177
# swap: controls the model swapping behaviour in within the group
@@ -188,10 +193,13 @@ groups:
188193
- "qwen-unlisted"
189194

190195
# Example:
191-
# - in this group all the models can run at the same time
192-
# - when a different group loads all running models in this group are unloaded
196+
# - in group2 all models can run at the same time
197+
# - when a different group is loaded it causes all running models in this group to unload
193198
"group2":
194199
swap: false
200+
201+
# exclusive: false does not unload other groups when a model in group2 is requested
202+
# - the models in group2 will be loaded but will not unload any other groups
195203
exclusive: false
196204
members:
197205
- "docker-llama"
@@ -220,7 +228,7 @@ groups:
220228
# - the only supported hook is on_startup
221229
hooks:
222230
# on_startup: a dictionary of actions to perform on startup
223-
# - optional, default: empty dictionar
231+
# - optional, default: empty dictionary
224232
# - the only supported action is preload
225233
on_startup:
226234
# preload: a list of model ids to load on startup
@@ -229,4 +237,4 @@ hooks:
229237
# - when preloading multiple models at once, define a group
230238
# otherwise models will be loaded and swapped out
231239
preload:
232-
- "llama"
240+
- "llama"

0 commit comments

Comments
 (0)