feat: config hot-reload - fix merge conflicts

sammcj · sammcj · commit e5af39a72f63 · 2025-05-09T13:41:03.000+10:00
diff --git a/llama-swap.go b/llama-swap.go
@@ -153,10 +153,22 @@ func watchConfigFileWithReload(configPath string, reloadChan chan<- *proxy.Proxy
 				debounceTimer = time.AfterFunc(debounceDuration, func() {
 					log.Printf("Config file modified: %s, reloading...", event.Name)
 
-					// Load new configuration
-					newConfig, err := proxy.LoadConfig(configPath)
+					// Try up to 3 times with exponential backoff
+					var newConfig proxy.Config
+					var err error
+					for retries := 0; retries < 3; retries++ {
+						// Load new configuration
+						newConfig, err = proxy.LoadConfig(configPath)
+						if err == nil {
+							break
+						}
+						log.Printf("Error loading new config (attempt %d/3): %v", retries+1, err)
+						if retries < 2 {
+							time.Sleep(time.Duration(1<<retries) * time.Second)
+						}
+					}
 					if err != nil {
-						log.Printf("Error loading new config: %v", err)
+						log.Printf("Failed to load new config after retries: %v", err)
 						return
 					}
 
diff --git a/proxy/helpers_test.go b/proxy/helpers_test.go
@@ -63,7 +63,7 @@ func getTestSimpleResponderConfigPort(expectedMessage string, port int) ModelCon
 
 	// Create a process configuration
 	return ModelConfig{
-		Cmd:           fmt.Sprintf("%s --port %d --silent --respond %s", binaryPath, port, expectedMessage),
+		Cmd:           fmt.Sprintf("%s --port %d --silent --respond %s", binaryPath, port, expectedMessage), // Re-added --silent
 		Proxy:         fmt.Sprintf("http://127.0.0.1:%d", port),
 		CheckEndpoint: "/health",
 	}
diff --git a/proxy/logMonitor_test.go b/proxy/logMonitor_test.go
@@ -21,12 +21,18 @@ func TestLogMonitor(t *testing.T) {
 	client2Messages := make([]byte, 0)
 
 	var wg sync.WaitGroup
-	wg.Add(2) // One for each client
+	wg.Add(2) // One for each client (goroutine)
 
 	// Write messages first
-	logMonitor.Write([]byte("1"))
-	logMonitor.Write([]byte("2"))
-	logMonitor.Write([]byte("3"))
+	if _, err := logMonitor.Write([]byte("1")); err != nil {
+		t.Fatalf("Failed to write log message: %v", err)
+	}
+	if _, err := logMonitor.Write([]byte("2")); err != nil {
+		t.Fatalf("Failed to write log message: %v", err)
+	}
+	if _, err := logMonitor.Write([]byte("3")); err != nil {
+		t.Fatalf("Failed to write log message: %v", err)
+	}
 
 	// Start goroutines to collect messages
 	go func() {
diff --git a/proxy/process.go b/proxy/process.go
@@ -356,7 +356,9 @@ func (p *Process) stopCommand(sigtermTTL time.Duration) {
 	select {
 	case <-sigtermTimeout.Done():
 		p.proxyLogger.Debugf("<%s> Process timed out waiting to stop, sending KILL signal (normal during shutdown)", p.ID)
-		p.cmd.Process.Kill()
+		if err := p.cmd.Process.Kill(); err != nil {
+			p.proxyLogger.Errorf("<%s> Failed to kill process: %v", p.ID, err)
+		}
 	case err := <-p.cmdWaitChan:
 		// Note: in start(), p.cmdWaitChan also has a select { ... }. That should be OK
 		// because if we make it here then the cmd has been successfully running and made it
diff --git a/proxy/process_test.go b/proxy/process_test.go
@@ -18,20 +18,21 @@ var (
 
 func init() {
 	// flip to help with debugging tests
-	if false {
+	if false { // Reverted to false
 		debugLogger.SetLogLevel(LevelDebug)
 	} else {
-		debugLogger.SetLogLevel(LevelError)
+		debugLogger.SetLogLevel(LevelError) // This will now be active
 	}
 }
 
 func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
 
 	expectedMessage := "testing91931"
-	config := getTestSimpleResponderConfig(expectedMessage)
+	// Use a specific port for the first instance in this test
+	config1 := getTestSimpleResponderConfigPort(expectedMessage, 12901)
 
 	// Create a process
-	process := NewProcess("test-process", 5, config, debugLogger, debugLogger)
+	process := NewProcess("test-process", 5, config1, debugLogger, debugLogger)
 	defer process.Stop()
 
 	req := httptest.NewRequest("GET", "/test", nil)
@@ -48,6 +49,13 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
 	// Stop the process
 	process.Stop()
 
+	// Ensure the process is fully stopped and port is likely released
+	time.Sleep(100 * time.Millisecond) // Small delay to help port release
+
+	// Use a different specific port for the second instance
+	config2 := getTestSimpleResponderConfigPort(expectedMessage, 12902)
+	process.config = config2 // Update the process config to use the new port
+
 	req = httptest.NewRequest("GET", "/", nil)
 	w = httptest.NewRecorder()
 
@@ -56,7 +64,7 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
 
 	// should have automatically started the process again
 	if w.Code != http.StatusOK {
-		t.Errorf("Expected status code %d, got %d", http.StatusOK, w.Code)
+		t.Errorf("Expected status code %d, got %d (URL: %s, Port: %s)", http.StatusOK, w.Code, req.URL.Path, process.config.Proxy)
 	}
 }
 
@@ -65,7 +73,8 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
 func TestProcess_WaitOnMultipleStarts(t *testing.T) {
 
 	expectedMessage := "testing91931"
-	config := getTestSimpleResponderConfig(expectedMessage)
+	// Use a specific, high port for this test to reduce collision likelihood
+	config := getTestSimpleResponderConfigPort(expectedMessage, 12903)
 
 	process := NewProcess("test-process", 5, config, debugLogger, debugLogger)
 	defer process.Stop()
diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
@@ -37,7 +37,6 @@ type ProxyManager struct {
 	processGroups map[string]*ProcessGroup
 }
 
-// New creates a new ProxyManager with default loggers.
 func New(config Config) *ProxyManager {
 	// set up loggers
 	stdoutLogger := NewLogMonitorWriter(os.Stdout)
@@ -87,7 +86,6 @@ func New(config Config) *ProxyManager {
 	return pm
 }
 
-// setupGinEngine configures the Gin engine with all necessary routes and middleware
 func (pm *ProxyManager) setupGinEngine() {
 	pm.ginEngine.Use(func(c *gin.Context) {
 		// Start timer
@@ -208,6 +206,8 @@ func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 
 // StopProcesses acquires a lock and stops all running upstream processes.
 // This is the public method safe for concurrent calls.
+// Unlike Shutdown, this method only stops the processes but doesn't perform
+// a complete shutdown, allowing for process replacement without full termination.
 func (pm *ProxyManager) StopProcesses() {
 	pm.Lock()
 	defer pm.Unlock()
diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go
@@ -251,41 +251,56 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
 }
 
 func TestProxyManager_Shutdown(t *testing.T) {
-	// Test Case 1: Startup failure due to unavailable proxy
-	t.Run("startup failure with unavailable proxy", func(t *testing.T) {
-		// Create configuration with invalid command that will fail immediately
-		modelConfig := ModelConfig{
-			Cmd:           "/invalid-command", // Invalid executable path that will fail to start
-			Proxy:         "http://localhost:9991",
-			CheckEndpoint: "/health",
-		}
-
-		config := AddDefaultGroupToConfig(Config{
-			HealthCheckTimeout: 15,
-			Models: map[string]ModelConfig{
-				"model1": modelConfig,
+	// make broken model configurations
+	model1Config := getTestSimpleResponderConfigPort("model1", 9991)
+	model1Config.Proxy = "http://localhost:10001/"
+
+	model2Config := getTestSimpleResponderConfigPort("model2", 9992)
+	model2Config.Proxy = "http://localhost:10002/"
+
+	model3Config := getTestSimpleResponderConfigPort("model3", 9993)
+	model3Config.Proxy = "http://localhost:10003/"
+
+	config := AddDefaultGroupToConfig(Config{
+		HealthCheckTimeout: 15,
+		Models: map[string]ModelConfig{
+			"model1": model1Config,
+			"model2": model2Config,
+			"model3": model3Config,
+		},
+		LogLevel: "error",
+		Groups: map[string]GroupConfig{
+			"test": {
+				Swap:    false,
+				Members: []string{"model1", "model2", "model3"},
 			},
-			LogLevel: "error",
-		})
+		},
+	})
 
-		proxy := New(config)
-		defer proxy.Shutdown()
+	proxy := New(config)
 
-		// Try to start the model
-		reqBody := `{"model":"model1"}`
-		req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
-		w := httptest.NewRecorder()
-		proxy.ServeHTTP(w, req)
+	// Start all the processes
+	var wg sync.WaitGroup
+	for _, modelName := range []string{"model1", "model2", "model3"} {
+		wg.Add(1)
+		go func(modelName string) {
+			defer wg.Done()
+			reqBody := fmt.Sprintf(`{"model":"%s"}`, modelName)
+			req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
+			w := httptest.NewRecorder()
 
-		assert.Equal(t, http.StatusBadGateway, w.Code)
-		assert.Contains(t, w.Body.String(), "unable to start process: start() failed: fork/exec /invalid-command: no such file or directory")
+			// send a request to trigger the proxy to load ... this should hang waiting for start up
+			proxy.ServeHTTP(w, req)
+			assert.Equal(t, http.StatusBadGateway, w.Code)
+			assert.Contains(t, w.Body.String(), "health check interrupted due to shutdown")
+		}(modelName)
+	}
 
-		// Verify process is tracked but in failed state
-		processGroup := proxy.findGroupByModelName("model1")
-		assert.NotNil(t, processGroup)
-		process := processGroup.processes["model1"]
-		assert.Equal(t, StateFailed, process.CurrentState())
-	})
+	go func() {
+		<-time.After(time.Second)
+		proxy.Shutdown()
+	}()
+	wg.Wait()
 }
 
 func TestProxyManager_Unload(t *testing.T) {
@@ -382,8 +397,6 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
 	})
 }
 
-
-
 func TestProxyManager_AudioTranscriptionHandler(t *testing.T) {
 	config := AddDefaultGroupToConfig(Config{
 		HealthCheckTimeout: 15,
@@ -432,6 +445,68 @@ func TestProxyManager_AudioTranscriptionHandler(t *testing.T) {
 	assert.Equal(t, strconv.Itoa(370+contentLength), response["h_content_length"])
 }
 
+// Test useModelName in configuration sends overrides what is sent to upstream
+func TestProxyManager_UseModelName(t *testing.T) {
+	upstreamModelName := "upstreamModel"
+
+	modelConfig := getTestSimpleResponderConfig(upstreamModelName)
+	modelConfig.UseModelName = upstreamModelName
+
+	config := AddDefaultGroupToConfig(Config{
+		HealthCheckTimeout: 15,
+		Models: map[string]ModelConfig{
+			"model1": modelConfig,
+		},
+		LogLevel: "error",
+	})
+
+	proxy := New(config)
+	defer proxy.StopProcesses()
+
+	requestedModel := "model1"
+
+	t.Run("useModelName over rides requested model: /v1/chat/completions", func(t *testing.T) {
+		reqBody := fmt.Sprintf(`{"model":"%s"}`, requestedModel)
+		req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
+		w := httptest.NewRecorder()
+
+		proxy.ServeHTTP(w, req)
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Contains(t, w.Body.String(), upstreamModelName)
+	})
+
+	t.Run("useModelName over rides requested model: /v1/audio/transcriptions", func(t *testing.T) {
+		// Create a buffer with multipart form data
+		var b bytes.Buffer
+		w := multipart.NewWriter(&b)
+
+		// Add the model field
+		fw, err := w.CreateFormField("model")
+		assert.NoError(t, err)
+		_, err = fw.Write([]byte(requestedModel))
+		assert.NoError(t, err)
+
+		// Add a file field
+		fw, err = w.CreateFormFile("file", "test.mp3")
+		assert.NoError(t, err)
+		_, err = fw.Write([]byte("test"))
+		assert.NoError(t, err)
+		w.Close()
+
+		// Create the request with the multipart form data
+		req := httptest.NewRequest("POST", "/v1/audio/transcriptions", &b)
+		req.Header.Set("Content-Type", w.FormDataContentType())
+		rec := httptest.NewRecorder()
+		proxy.ServeHTTP(rec, req)
+
+		// Verify the response
+		assert.Equal(t, http.StatusOK, rec.Code)
+		var response map[string]string
+		err = json.Unmarshal(rec.Body.Bytes(), &response)
+		assert.NoError(t, err)
+		assert.Equal(t, upstreamModelName, response["model"])
+	})
+}
 
 func TestProxyManager_CORSOptionsHandler(t *testing.T) {
 	config := AddDefaultGroupToConfig(Config{
@@ -501,69 +576,6 @@ func TestProxyManager_CORSOptionsHandler(t *testing.T) {
 	}
 }
 
-// Test useModelName in configuration sends overrides what is sent to upstream
-func TestProxyManager_UseModelName(t *testing.T) {
-	upstreamModelName := "upstreamModel"
-
-	modelConfig := getTestSimpleResponderConfig(upstreamModelName)
-	modelConfig.UseModelName = upstreamModelName
-
-	config := AddDefaultGroupToConfig(Config{
-		HealthCheckTimeout: 15,
-		Models: map[string]ModelConfig{
-			"model1": modelConfig,
-		},
-		LogLevel: "error",
-	})
-
-	proxy := New(config)
-	defer proxy.StopProcesses()
-
-	requestedModel := "model1"
-
-	t.Run("useModelName over rides requested model: /v1/chat/completions", func(t *testing.T) {
-		reqBody := fmt.Sprintf(`{"model":"%s"}`, requestedModel)
-		req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
-		w := httptest.NewRecorder()
-
-		proxy.ServeHTTP(w, req)
-		assert.Equal(t, http.StatusOK, w.Code)
-		assert.Contains(t, w.Body.String(), upstreamModelName)
-	})
-
-	t.Run("useModelName over rides requested model: /v1/audio/transcriptions", func(t *testing.T) {
-		// Create a buffer with multipart form data
-		var b bytes.Buffer
-		w := multipart.NewWriter(&b)
-
-		// Add the model field
-		fw, err := w.CreateFormField("model")
-		assert.NoError(t, err)
-		_, err = fw.Write([]byte(requestedModel))
-		assert.NoError(t, err)
-
-		// Add a file field
-		fw, err = w.CreateFormFile("file", "test.mp3")
-		assert.NoError(t, err)
-		_, err = fw.Write([]byte("test"))
-		assert.NoError(t, err)
-		w.Close()
-
-		// Create the request with the multipart form data
-		req := httptest.NewRequest("POST", "/v1/audio/transcriptions", &b)
-		req.Header.Set("Content-Type", w.FormDataContentType())
-		rec := httptest.NewRecorder()
-		proxy.ServeHTTP(rec, req)
-
-		// Verify the response
-		assert.Equal(t, http.StatusOK, rec.Code)
-		var response map[string]string
-		err = json.Unmarshal(rec.Body.Bytes(), &response)
-		assert.NoError(t, err)
-		assert.Equal(t, upstreamModelName, response["model"])
-	})
-}
-
 func TestProxyManager_Upstream(t *testing.T) {
 	config := AddDefaultGroupToConfig(Config{
 		HealthCheckTimeout: 15,

Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ func getTestSimpleResponderConfigPort(expectedMessage string, port int) ModelCon`
`63`	`63`
`64`	`64`	`// Create a process configuration`
`65`	`65`	`return ModelConfig{`
`66`		`- Cmd: fmt.Sprintf("%s --port %d --silent --respond %s", binaryPath, port, expectedMessage),`
	`66`	`+ Cmd: fmt.Sprintf("%s --port %d --silent --respond %s", binaryPath, port, expectedMessage), // Re-added --silent`
`67`	`67`	`Proxy: fmt.Sprintf("http://127.0.0.1:%d", port),`
`68`	`68`	`CheckEndpoint: "/health",`
`69`	`69`	`}`