Skip to content

Commit b3c236f

Browse files
committed
feat: config hot-reload
1 parent ec0348e commit b3c236f

File tree

5 files changed

+168
-46
lines changed

5 files changed

+168
-46
lines changed

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ llama-s
149149

150150
Docker is the quickest way to try out llama-swap:
151151

152-
```
152+
```shell
153153
# use CPU inference
154154
$ docker run -it --rm -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cpu
155155
@@ -185,7 +185,7 @@ Specific versions are also available and are tagged with the llama-swap, archite
185185

186186
Beyond the demo you will likely want to run the containers with your downloaded models and custom configuration.
187187

188-
```
188+
```shell
189189
$ docker run -it --rm --runtime nvidia -p 9292:8080 \
190190
-v /path/to/models:/models \
191191
-v /path/to/custom/config.yaml:/app/config.yaml \
@@ -200,7 +200,12 @@ Pre-built binaries are available for Linux, FreeBSD and Darwin (OSX). These are
200200

201201
1. Create a configuration file, see [config.example.yaml](config.example.yaml)
202202
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
203-
1. Run the binary with `llama-swap --config path/to/config.yaml`
203+
1. Run the binary with `llama-swap --config path/to/config.yaml`.
204+
Available flags:
205+
- `--config`: Path to the configuration file (default: `config.yaml`).
206+
- `--listen`: Address and port to listen on (default: `:8080`).
207+
- `--version`: Show version information and exit.
208+
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
204209

205210
### Building from source
206211

@@ -215,7 +220,7 @@ Open the `http://<host>/logs` with your browser to get a web interface with stre
215220

216221
Of course, CLI access is also supported:
217222

218-
```
223+
```shell
219224
# sends up to the last 10KB of logs
220225
curl http://host/logs'
221226

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module github.com/mostlygeek/llama-swap
33
go 1.23.0
44

55
require (
6+
github.com/fsnotify/fsnotify v1.9.0
67
github.com/gin-gonic/gin v1.10.0
78
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
89
github.com/stretchr/testify v1.9.0

go.sum

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,16 @@ github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQ
99
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
1010
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
1111
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
12+
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
13+
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
1214
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
1315
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
1416
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
1517
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
1618
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
1719
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
20+
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
21+
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
1822
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
1923
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
2024
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
@@ -23,6 +27,8 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
2327
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
2428
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
2529
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
30+
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
31+
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
2632
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
2733
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
2834
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
@@ -74,34 +80,18 @@ github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZ
7480
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
7581
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
7682
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
77-
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
78-
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
79-
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
80-
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
8183
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
8284
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
83-
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
84-
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
85-
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
86-
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
87-
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
88-
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
8985
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
9086
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
9187
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
9288
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
93-
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
94-
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
95-
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
96-
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
9789
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
9890
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
99-
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
100-
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
101-
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
102-
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
10391
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
10492
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
93+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
94+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
10595
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
10696
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
10797
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

llama-swap.go

Lines changed: 110 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,31 @@ package main
33
import (
44
"flag"
55
"fmt"
6+
"log"
7+
"net/http"
68
"os"
79
"os/signal"
10+
"path/filepath"
811
"syscall"
12+
"time"
913

14+
"github.com/fsnotify/fsnotify"
1015
"github.com/gin-gonic/gin"
1116
"github.com/mostlygeek/llama-swap/proxy"
1217
)
1318

14-
var version string = "0"
15-
var commit string = "abcd1234"
16-
var date = "unknown"
19+
var (
20+
version string = "0"
21+
commit string = "abcd1234"
22+
date string = "unknown"
23+
)
1724

1825
func main() {
1926
// Define a command-line flag for the port
2027
configPath := flag.String("config", "config.yaml", "config file name")
2128
listenStr := flag.String("listen", ":8080", "listen ip/port")
2229
showVersion := flag.Bool("version", false, "show version of build")
30+
watchConfig := flag.Bool("watch-config", false, "Automatically reload config file on change")
2331

2432
flag.Parse() // Parse the command-line flags
2533

@@ -42,18 +50,109 @@ func main() {
4250

4351
proxyManager := proxy.New(config)
4452

53+
// Setup channels for server management
54+
reloadChan := make(chan *proxy.ProxyManager)
55+
exitChan := make(chan struct{})
4556
sigChan := make(chan os.Signal, 1)
4657
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
58+
59+
// Start server manager
60+
fmt.Println("llama-swap listening on " + *listenStr)
4761
go func() {
48-
<-sigChan
49-
fmt.Println("Shutting down llama-swap")
50-
proxyManager.Shutdown()
51-
os.Exit(0)
62+
currentManager := proxyManager
63+
for {
64+
// Start current manager
65+
go func(pm *proxy.ProxyManager) {
66+
if err := pm.Run(*listenStr); err != nil && err != http.ErrServerClosed {
67+
fmt.Printf("Fatal server error: %v\n", err)
68+
close(exitChan)
69+
}
70+
}(currentManager)
71+
72+
// Wait for signals
73+
select {
74+
case newManager := <-reloadChan:
75+
// Shutdown current and swap to new
76+
currentManager.Shutdown()
77+
currentManager = newManager
78+
log.Println("Server restarted with new config")
79+
case sig := <-sigChan:
80+
fmt.Printf("Received signal %v, shutting down...\n", sig)
81+
currentManager.Shutdown()
82+
close(exitChan)
83+
return
84+
}
85+
}
5286
}()
5387

54-
fmt.Println("llama-swap listening on " + *listenStr)
55-
if err := proxyManager.Run(*listenStr); err != nil {
56-
fmt.Printf("Server error: %v\n", err)
57-
os.Exit(1)
88+
// Start file watcher if requested
89+
if *watchConfig {
90+
absConfigPath, err := filepath.Abs(*configPath)
91+
if err != nil {
92+
log.Printf("Error getting absolute path for config: %v. File watching disabled.", err)
93+
} else {
94+
go watchConfigFileWithReload(absConfigPath, reloadChan)
95+
}
96+
}
97+
98+
// Wait for exit signal
99+
<-exitChan
100+
}
101+
102+
// watchConfigFileWithReload monitors the configuration file and sends new ProxyManager instances through reloadChan.
103+
func watchConfigFileWithReload(configPath string, reloadChan chan<- *proxy.ProxyManager) {
104+
watcher, err := fsnotify.NewWatcher()
105+
if err != nil {
106+
log.Printf("Error creating file watcher: %v. File watching disabled.", err)
107+
return
108+
}
109+
defer watcher.Close()
110+
111+
err = watcher.Add(configPath)
112+
if err != nil {
113+
log.Printf("Error adding config path (%s) to watcher: %v. File watching disabled.", configPath, err)
114+
return
115+
}
116+
117+
log.Printf("Watching config file for changes: %s", configPath)
118+
119+
var debounceTimer *time.Timer
120+
debounceDuration := 2 * time.Second
121+
122+
for {
123+
select {
124+
case event, ok := <-watcher.Events:
125+
if !ok {
126+
return
127+
}
128+
// We only care about writes to the specific config file
129+
if event.Name == configPath && event.Has(fsnotify.Write) {
130+
// Reset or start the debounce timer
131+
if debounceTimer != nil {
132+
debounceTimer.Stop()
133+
}
134+
debounceTimer = time.AfterFunc(debounceDuration, func() {
135+
log.Printf("Config file modified: %s, reloading...", event.Name)
136+
137+
// Load new configuration
138+
newConfig, err := proxy.LoadConfig(configPath)
139+
if err != nil {
140+
log.Printf("Error loading new config: %v", err)
141+
return
142+
}
143+
144+
// Create new ProxyManager with new config
145+
newPM := proxy.New(newConfig)
146+
reloadChan <- newPM
147+
log.Println("Config reloaded successfully")
148+
})
149+
}
150+
case err, ok := <-watcher.Errors:
151+
if !ok {
152+
log.Println("File watcher error channel closed.")
153+
return
154+
}
155+
log.Printf("File watcher error: %v", err)
156+
}
58157
}
59158
}

proxy/proxymanager.go

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package proxy
22

33
import (
44
"bytes"
5+
"context"
56
"encoding/json"
67
"fmt"
78
"io"
@@ -29,6 +30,7 @@ type ProxyManager struct {
2930
config *Config
3031
currentProcesses map[string]*Process
3132
ginEngine *gin.Engine
33+
server *http.Server
3234

3335
// logging
3436
proxyLogger *LogMonitor
@@ -189,26 +191,42 @@ func New(config *Config) *ProxyManager {
189191
}
190192

191193
func (pm *ProxyManager) Run(addr ...string) error {
192-
return pm.ginEngine.Run(addr...)
194+
address := ":8080"
195+
if len(addr) > 0 {
196+
address = addr[0]
197+
}
198+
199+
pm.server = &http.Server{
200+
Addr: address,
201+
Handler: pm.ginEngine,
202+
}
203+
204+
return pm.server.ListenAndServe()
193205
}
194206

195207
func (pm *ProxyManager) HandlerFunc(w http.ResponseWriter, r *http.Request) {
196208
pm.ginEngine.ServeHTTP(w, r)
197209
}
198210

211+
// StopProcesses acquires a lock and stops all running upstream processes.
212+
// This is the public method safe for concurrent calls.
199213
func (pm *ProxyManager) StopProcesses() {
200214
pm.Lock()
201215
defer pm.Unlock()
202216

203217
pm.stopProcesses()
204218
}
205219

206-
// for internal usage
220+
// stopProcesses stops all running upstream processes.
221+
// This internal method assumes the caller holds the necessary lock.
207222
func (pm *ProxyManager) stopProcesses() {
208223
if len(pm.currentProcesses) == 0 {
224+
pm.proxyLogger.Debug("stopProcesses called, no processes are running.")
209225
return
210226
}
211227

228+
pm.proxyLogger.Debugf("Stopping %d running upstream process(es)...", len(pm.currentProcesses))
229+
212230
// stop Processes in parallel
213231
var wg sync.WaitGroup
214232
for _, process := range pm.currentProcesses {
@@ -223,22 +241,31 @@ func (pm *ProxyManager) stopProcesses() {
223241
pm.currentProcesses = make(map[string]*Process)
224242
}
225243

226-
// Shutdown is called to shutdown all upstream processes
227-
// when llama-swap is shutting down.
244+
// Shutdown gracefully shuts down the server and all upstream processes
228245
func (pm *ProxyManager) Shutdown() {
229246
pm.Lock()
230247
defer pm.Unlock()
231248

232-
// shutdown process in parallel
233-
var wg sync.WaitGroup
234-
for _, process := range pm.currentProcesses {
235-
wg.Add(1)
236-
go func(process *Process) {
237-
defer wg.Done()
238-
process.Shutdown()
239-
}(process)
249+
// First stop all processes
250+
pm.stopProcesses()
251+
252+
// Then shutdown the HTTP server gracefully
253+
if pm.server != nil {
254+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
255+
defer cancel()
256+
257+
// Log shutdown attempt
258+
pm.proxyLogger.Debug("Shutting down HTTP server...")
259+
260+
if err := pm.server.Shutdown(ctx); err != nil {
261+
pm.proxyLogger.Errorf("HTTP server Shutdown: %v", err)
262+
} else {
263+
pm.proxyLogger.Debug("HTTP server shutdown complete")
264+
}
265+
266+
// Close the server in case Shutdown times out
267+
pm.server.Close()
240268
}
241-
wg.Wait()
242269
}
243270

244271
func (pm *ProxyManager) listModelsHandler(c *gin.Context) {

0 commit comments

Comments
 (0)