Skip to content

Commit 86b4251

Browse files
authored
proxy: add support for anthropic v1/messages api (mostlygeek#417)
* proxy: add support for anthropic v1/messages api * proxy: restrict loading message to /v1/chat/completions
1 parent d4b53bd commit 86b4251

2 files changed

Lines changed: 17 additions & 12 deletions

File tree

proxy/process.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,10 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
507507
// add a sync so the streaming client only runs when the goroutine has exited
508508

509509
isStreaming, _ := r.Context().Value(proxyCtxKey("streaming")).(bool)
510-
if p.config.SendLoadingState != nil && *p.config.SendLoadingState && isStreaming {
510+
511+
// PR #417 (no support for anthropic v1/messages yet)
512+
isChatCompletions := strings.HasPrefix(r.URL.Path, "/v1/chat/completions")
513+
if p.config.SendLoadingState != nil && *p.config.SendLoadingState && isStreaming && isChatCompletions {
511514
srw = newStatusResponseWriter(p, w)
512515
go srw.statusUpdates(swapCtx)
513516
} else {

proxy/proxymanager.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -236,27 +236,29 @@ func (pm *ProxyManager) setupGinEngine() {
236236
})
237237

238238
// Set up routes using the Gin engine
239-
pm.ginEngine.POST("/v1/chat/completions", pm.proxyOAIHandler)
239+
pm.ginEngine.POST("/v1/chat/completions", pm.proxyInferenceHandler)
240240
// Support legacy /v1/completions api, see issue #12
241-
pm.ginEngine.POST("/v1/completions", pm.proxyOAIHandler)
241+
pm.ginEngine.POST("/v1/completions", pm.proxyInferenceHandler)
242+
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
243+
pm.ginEngine.POST("/v1/messages", pm.proxyInferenceHandler)
242244

243245
// Support embeddings and reranking
244-
pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler)
246+
pm.ginEngine.POST("/v1/embeddings", pm.proxyInferenceHandler)
245247

246248
// llama-server's /reranking endpoint + aliases
247-
pm.ginEngine.POST("/reranking", pm.proxyOAIHandler)
248-
pm.ginEngine.POST("/rerank", pm.proxyOAIHandler)
249-
pm.ginEngine.POST("/v1/rerank", pm.proxyOAIHandler)
250-
pm.ginEngine.POST("/v1/reranking", pm.proxyOAIHandler)
249+
pm.ginEngine.POST("/reranking", pm.proxyInferenceHandler)
250+
pm.ginEngine.POST("/rerank", pm.proxyInferenceHandler)
251+
pm.ginEngine.POST("/v1/rerank", pm.proxyInferenceHandler)
252+
pm.ginEngine.POST("/v1/reranking", pm.proxyInferenceHandler)
251253

252254
// llama-server's /infill endpoint for code infilling
253-
pm.ginEngine.POST("/infill", pm.proxyOAIHandler)
255+
pm.ginEngine.POST("/infill", pm.proxyInferenceHandler)
254256

255257
// llama-server's /completion endpoint
256-
pm.ginEngine.POST("/completion", pm.proxyOAIHandler)
258+
pm.ginEngine.POST("/completion", pm.proxyInferenceHandler)
257259

258260
// Support audio/speech endpoint
259-
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
261+
pm.ginEngine.POST("/v1/audio/speech", pm.proxyInferenceHandler)
260262
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
261263

262264
pm.ginEngine.GET("/v1/models", pm.listModelsHandler)
@@ -545,7 +547,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
545547
}
546548
}
547549

548-
func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
550+
func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
549551
bodyBytes, err := io.ReadAll(c.Request.Body)
550552
if err != nil {
551553
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")

0 commit comments

Comments
 (0)