@@ -236,27 +236,29 @@ func (pm *ProxyManager) setupGinEngine() {
236236 })
237237
238238 // Set up routes using the Gin engine
239- pm .ginEngine .POST ("/v1/chat/completions" , pm .proxyOAIHandler )
239+ pm .ginEngine .POST ("/v1/chat/completions" , pm .proxyInferenceHandler )
240240 // Support legacy /v1/completions api, see issue #12
241- pm .ginEngine .POST ("/v1/completions" , pm .proxyOAIHandler )
241+ pm .ginEngine .POST ("/v1/completions" , pm .proxyInferenceHandler )
242+ // Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
243+ pm .ginEngine .POST ("/v1/messages" , pm .proxyInferenceHandler )
242244
243245 // Support embeddings and reranking
244- pm .ginEngine .POST ("/v1/embeddings" , pm .proxyOAIHandler )
246+ pm .ginEngine .POST ("/v1/embeddings" , pm .proxyInferenceHandler )
245247
246248 // llama-server's /reranking endpoint + aliases
247- pm .ginEngine .POST ("/reranking" , pm .proxyOAIHandler )
248- pm .ginEngine .POST ("/rerank" , pm .proxyOAIHandler )
249- pm .ginEngine .POST ("/v1/rerank" , pm .proxyOAIHandler )
250- pm .ginEngine .POST ("/v1/reranking" , pm .proxyOAIHandler )
249+ pm .ginEngine .POST ("/reranking" , pm .proxyInferenceHandler )
250+ pm .ginEngine .POST ("/rerank" , pm .proxyInferenceHandler )
251+ pm .ginEngine .POST ("/v1/rerank" , pm .proxyInferenceHandler )
252+ pm .ginEngine .POST ("/v1/reranking" , pm .proxyInferenceHandler )
251253
252254 // llama-server's /infill endpoint for code infilling
253- pm .ginEngine .POST ("/infill" , pm .proxyOAIHandler )
255+ pm .ginEngine .POST ("/infill" , pm .proxyInferenceHandler )
254256
255257 // llama-server's /completion endpoint
256- pm .ginEngine .POST ("/completion" , pm .proxyOAIHandler )
258+ pm .ginEngine .POST ("/completion" , pm .proxyInferenceHandler )
257259
258260 // Support audio/speech endpoint
259- pm .ginEngine .POST ("/v1/audio/speech" , pm .proxyOAIHandler )
261+ pm .ginEngine .POST ("/v1/audio/speech" , pm .proxyInferenceHandler )
260262 pm .ginEngine .POST ("/v1/audio/transcriptions" , pm .proxyOAIPostFormHandler )
261263
262264 pm .ginEngine .GET ("/v1/models" , pm .listModelsHandler )
@@ -545,7 +547,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
545547 }
546548}
547549
548- func (pm * ProxyManager ) proxyOAIHandler (c * gin.Context ) {
550+ func (pm * ProxyManager ) proxyInferenceHandler (c * gin.Context ) {
549551 bodyBytes , err := io .ReadAll (c .Request .Body )
550552 if err != nil {
551553 pm .sendErrorResponse (c , http .StatusBadRequest , "could not ready request body" )
0 commit comments