From fbdec31522df93346498d06f7ce88a0229898468 Mon Sep 17 00:00:00 2001 From: rongfengliang <1141591465@qq.com> Date: Fri, 4 Jul 2025 17:07:11 +0800 Subject: [PATCH 01/13] fix: OpenAIEmbeddingSpec setup check for multi endpoint --- src/litserve/specs/openai_embedding.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py index 4cacf61ce..de547dd8c 100644 --- a/src/litserve/specs/openai_embedding.py +++ b/src/litserve/specs/openai_embedding.py @@ -22,7 +22,6 @@ from fastapi import HTTPException, Request, Response, status from fastapi import status as status_code from pydantic import BaseModel - from litserve.specs.base import LitSpec from litserve.utils import LitAPIStatus @@ -131,11 +130,21 @@ def __init__(self): def setup(self, server: "LitServer"): from litserve import LitAPI - super().setup(server) lit_api = server.lit_api - if inspect.isgeneratorfunction(lit_api.predict): + + if isinstance(lit_api, LitAPI): + self._check_lit_api(lit_api) + if isinstance(lit_api,list): + for api in lit_api: + self._check_lit_api(api) + + print("OpenAI Embedding Spec is ready.") + + def _check_lit_api(self,api: "LitAPI"): + from litserve import LitAPI + if inspect.isgeneratorfunction(api.predict): raise ValueError( "You are using yield in your predict method, which is used for streaming.", "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ", @@ -144,8 +153,8 @@ def setup(self, server: "LitServer"): EMBEDDING_API_EXAMPLE, ) - is_encode_response_original = lit_api.encode_response.__code__ is LitAPI.encode_response.__code__ - if not is_encode_response_original and inspect.isgeneratorfunction(lit_api.encode_response): + is_encode_response_original = api.encode_response.__code__ is LitAPI.encode_response.__code__ + if not is_encode_response_original and inspect.isgeneratorfunction(api.encode_response): raise ValueError( "You are using yield in your encode_response method, which is used for streaming.", "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ", @@ -153,9 +162,6 @@ def setup(self, server: "LitServer"): "Please consider replacing yield with return in encode_response.\n", EMBEDDING_API_EXAMPLE, ) - - print("OpenAI Embedding Spec is ready.") - def decode_request(self, request: EmbeddingRequest, context_kwargs: Optional[dict] = None) -> List[str]: return request.input From 564aa91b8ae72894b010f79bba11fbd196c0aada Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 09:12:24 +0000 Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/litserve/specs/openai_embedding.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py index de547dd8c..dc6fe75b8 100644 --- a/src/litserve/specs/openai_embedding.py +++ b/src/litserve/specs/openai_embedding.py @@ -22,6 +22,7 @@ from fastapi import HTTPException, Request, Response, status from fastapi import status as status_code from pydantic import BaseModel + from litserve.specs.base import LitSpec from litserve.utils import LitAPIStatus @@ -130,20 +131,22 @@ def __init__(self): def setup(self, server: "LitServer"): from litserve import LitAPI + super().setup(server) lit_api = server.lit_api if isinstance(lit_api, LitAPI): self._check_lit_api(lit_api) - if isinstance(lit_api,list): + if isinstance(lit_api, list): for api in lit_api: self._check_lit_api(api) print("OpenAI Embedding Spec is ready.") - def _check_lit_api(self,api: "LitAPI"): + def _check_lit_api(self, api: "LitAPI"): from litserve import LitAPI + if inspect.isgeneratorfunction(api.predict): raise ValueError( "You are using yield in your predict method, which is used for streaming.", @@ -162,6 +165,7 @@ def _check_lit_api(self,api: "LitAPI"): "Please consider replacing yield with return in encode_response.\n", EMBEDDING_API_EXAMPLE, ) + def decode_request(self, request: EmbeddingRequest, context_kwargs: Optional[dict] = None) -> List[str]: return request.input From 9cf9736cac54171155d0044757d2534e621ec53d Mon Sep 17 00:00:00 2001 From: rong fengliang <1141591465@qq.com> Date: Fri, 4 Jul 2025 17:23:42 +0800 Subject: [PATCH 03/13] format remve type --- src/litserve/specs/openai_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py index dc6fe75b8..c42989fc4 100644 --- a/src/litserve/specs/openai_embedding.py +++ b/src/litserve/specs/openai_embedding.py @@ -144,7 +144,7 @@ def setup(self, server: "LitServer"): print("OpenAI Embedding Spec is ready.") - def _check_lit_api(self, api: "LitAPI"): + def _check_lit_api(self, api): from litserve import LitAPI if inspect.isgeneratorfunction(api.predict): From bab374c940dc25a4c15e5a4e52e62c92385511e9 Mon Sep 17 00:00:00 2001 From: rongfengliang <1141591465@qq.com> Date: Fri, 4 Jul 2025 21:19:21 +0800 Subject: [PATCH 04/13] test: add multi endpoint with test --- tests/unit/test_openai_embedding.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index 00a934965..febb6e0bf 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -22,6 +22,7 @@ from httpx import ASGITransport, AsyncClient import litserve as ls +from litserve.specs.openai import OpenAISpec from litserve.specs.openai_embedding import OpenAIEmbeddingSpec from litserve.test_examples.openai_embedding_spec_example import ( TestEmbedAPI, @@ -50,6 +51,22 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ assert len(resp.json()["data"]) == 1, "Length of data should be 1" assert len(resp.json()["data"][0]["embedding"]) == 768, "Embedding length should be 768" +@pytest.mark.asyncio +async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): + spec_openai = OpenAISpec() + spec_embedding = OpenAIEmbeddingSpec() + server = ls.LitServer([TestEmbedAPI(spec=spec_openai),TestEmbedAPI(spec=spec_embedding)]) + with wrap_litserve_start(server) as server: + async with LifespanManager(server.app) as manager, AsyncClient( + transport=ASGITransport(app=manager.app), base_url="http://test" + ) as ac: + resp = await ac.post("/v1/embeddings", json=openai_embedding_request_data, timeout=10) + assert resp.status_code == 200, "Status code should be 200" + assert resp.json()["object"] == "list", "Object should be list" + assert resp.json()["data"][0]["index"] == 0, "Index should be 0" + assert len(resp.json()["data"]) == 1, "Length of data should be 1" + assert len(resp.json()["data"][0]["embedding"]) == 768, "Embedding length should be 768" + @pytest.mark.asyncio async def test_openai_embedding_spec_with_multiple_inputs(openai_embedding_request_data_array): From 7cd1baee25199a447efad92df7a89e985e900db3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 13:19:44 +0000 Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit/test_openai_embedding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index febb6e0bf..edb39d99e 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -51,11 +51,12 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ assert len(resp.json()["data"]) == 1, "Length of data should be 1" assert len(resp.json()["data"][0]["embedding"]) == 768, "Embedding length should be 768" + @pytest.mark.asyncio async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): spec_openai = OpenAISpec() spec_embedding = OpenAIEmbeddingSpec() - server = ls.LitServer([TestEmbedAPI(spec=spec_openai),TestEmbedAPI(spec=spec_embedding)]) + server = ls.LitServer([TestEmbedAPI(spec=spec_openai), TestEmbedAPI(spec=spec_embedding)]) with wrap_litserve_start(server) as server: async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" From adf9252380d46f301aee4f96e525a4e3f002e919 Mon Sep 17 00:00:00 2001 From: rongfengliang <1141591465@qq.com> Date: Fri, 4 Jul 2025 22:00:00 +0800 Subject: [PATCH 06/13] fix: add add test class && fix _check_lit_api should for OpenAIEmbeddingSpec not all --- src/litserve/specs/openai_embedding.py | 38 +++++++++---------- .../openai_embedding_spec_example.py | 11 ++++++ tests/unit/test_openai_embedding.py | 3 +- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py index c42989fc4..1f661cce7 100644 --- a/src/litserve/specs/openai_embedding.py +++ b/src/litserve/specs/openai_embedding.py @@ -146,25 +146,25 @@ def setup(self, server: "LitServer"): def _check_lit_api(self, api): from litserve import LitAPI - - if inspect.isgeneratorfunction(api.predict): - raise ValueError( - "You are using yield in your predict method, which is used for streaming.", - "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ", - "is not a sequential operation.", - "Please consider replacing yield with return in predict.\n", - EMBEDDING_API_EXAMPLE, - ) - - is_encode_response_original = api.encode_response.__code__ is LitAPI.encode_response.__code__ - if not is_encode_response_original and inspect.isgeneratorfunction(api.encode_response): - raise ValueError( - "You are using yield in your encode_response method, which is used for streaming.", - "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ", - "is not a sequential operation.", - "Please consider replacing yield with return in encode_response.\n", - EMBEDDING_API_EXAMPLE, - ) + if isinstance(api.spec,OpenAIEmbeddingSpec): + if inspect.isgeneratorfunction(api.predict): + raise ValueError( + "You are using yield in your predict method, which is used for streaming.", + "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ", + "is not a sequential operation.", + "Please consider replacing yield with return in predict.\n", + EMBEDDING_API_EXAMPLE, + ) + + is_encode_response_original = api.encode_response.__code__ is LitAPI.encode_response.__code__ + if not is_encode_response_original and inspect.isgeneratorfunction(api.encode_response): + raise ValueError( + "You are using yield in your encode_response method, which is used for streaming.", + "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ", + "is not a sequential operation.", + "Please consider replacing yield with return in encode_response.\n", + EMBEDDING_API_EXAMPLE, + ) def decode_request(self, request: EmbeddingRequest, context_kwargs: Optional[dict] = None) -> List[str]: return request.input diff --git a/src/litserve/test_examples/openai_embedding_spec_example.py b/src/litserve/test_examples/openai_embedding_spec_example.py index 75bee3af8..09b9d69d4 100644 --- a/src/litserve/test_examples/openai_embedding_spec_example.py +++ b/src/litserve/test_examples/openai_embedding_spec_example.py @@ -16,6 +16,17 @@ def predict(self, x) -> List[List[float]]: def encode_response(self, output) -> dict: return {"embeddings": output} +class TestOpenAPI(LitAPI): + def setup(self, device): + self.model = None + + async def predict(self, x) -> List[List[float]]: + n = len(x) if isinstance(x, list) else 1 + yield np.random.rand(n, 768).tolist() + + async def encode_response(self, output) -> dict: + yield {"embeddings": output} + class TestEmbedBatchedAPI(TestEmbedAPI): def predict(self, batch) -> List[List[List[float]]]: diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index febb6e0bf..6cc78d3e1 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -26,6 +26,7 @@ from litserve.specs.openai_embedding import OpenAIEmbeddingSpec from litserve.test_examples.openai_embedding_spec_example import ( TestEmbedAPI, + TestOpenAPI, TestEmbedAPIWithMissingEmbeddings, TestEmbedAPIWithNonDictOutput, TestEmbedAPIWithUsage, @@ -55,7 +56,7 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): spec_openai = OpenAISpec() spec_embedding = OpenAIEmbeddingSpec() - server = ls.LitServer([TestEmbedAPI(spec=spec_openai),TestEmbedAPI(spec=spec_embedding)]) + server = ls.LitServer([TestOpenAPI(spec=spec_openai,enable_async=True),TestEmbedAPI(spec=spec_embedding)]) with wrap_litserve_start(server) as server: async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" From dad44f53a4fb1293f849fdb37af87324b9e18b7c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 14:04:29 +0000 Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/litserve/specs/openai_embedding.py | 3 ++- src/litserve/test_examples/openai_embedding_spec_example.py | 3 ++- tests/unit/test_openai_embedding.py | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py index 1f661cce7..2c04cc2ea 100644 --- a/src/litserve/specs/openai_embedding.py +++ b/src/litserve/specs/openai_embedding.py @@ -146,7 +146,8 @@ def setup(self, server: "LitServer"): def _check_lit_api(self, api): from litserve import LitAPI - if isinstance(api.spec,OpenAIEmbeddingSpec): + + if isinstance(api.spec, OpenAIEmbeddingSpec): if inspect.isgeneratorfunction(api.predict): raise ValueError( "You are using yield in your predict method, which is used for streaming.", diff --git a/src/litserve/test_examples/openai_embedding_spec_example.py b/src/litserve/test_examples/openai_embedding_spec_example.py index 09b9d69d4..cf3e54eeb 100644 --- a/src/litserve/test_examples/openai_embedding_spec_example.py +++ b/src/litserve/test_examples/openai_embedding_spec_example.py @@ -16,6 +16,7 @@ def predict(self, x) -> List[List[float]]: def encode_response(self, output) -> dict: return {"embeddings": output} + class TestOpenAPI(LitAPI): def setup(self, device): self.model = None @@ -24,7 +25,7 @@ async def predict(self, x) -> List[List[float]]: n = len(x) if isinstance(x, list) else 1 yield np.random.rand(n, 768).tolist() - async def encode_response(self, output) -> dict: + async def encode_response(self, output) -> dict: yield {"embeddings": output} diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index 6cc78d3e1..ebc1a505a 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -26,12 +26,12 @@ from litserve.specs.openai_embedding import OpenAIEmbeddingSpec from litserve.test_examples.openai_embedding_spec_example import ( TestEmbedAPI, - TestOpenAPI, TestEmbedAPIWithMissingEmbeddings, TestEmbedAPIWithNonDictOutput, TestEmbedAPIWithUsage, TestEmbedAPIWithYieldEncodeResponse, TestEmbedAPIWithYieldPredict, + TestOpenAPI, ) from litserve.utils import wrap_litserve_start @@ -52,11 +52,12 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ assert len(resp.json()["data"]) == 1, "Length of data should be 1" assert len(resp.json()["data"][0]["embedding"]) == 768, "Embedding length should be 768" + @pytest.mark.asyncio async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): spec_openai = OpenAISpec() spec_embedding = OpenAIEmbeddingSpec() - server = ls.LitServer([TestOpenAPI(spec=spec_openai,enable_async=True),TestEmbedAPI(spec=spec_embedding)]) + server = ls.LitServer([TestOpenAPI(spec=spec_openai, enable_async=True), TestEmbedAPI(spec=spec_embedding)]) with wrap_litserve_start(server) as server: async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" From c5d0b98c298975792edee44c7fc7515c5803896e Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 4 Jul 2025 19:59:03 +0530 Subject: [PATCH 08/13] Apply suggestions from code review --- .../test_examples/openai_embedding_spec_example.py | 12 ------------ tests/unit/test_openai_embedding.py | 4 +--- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/litserve/test_examples/openai_embedding_spec_example.py b/src/litserve/test_examples/openai_embedding_spec_example.py index cf3e54eeb..75bee3af8 100644 --- a/src/litserve/test_examples/openai_embedding_spec_example.py +++ b/src/litserve/test_examples/openai_embedding_spec_example.py @@ -17,18 +17,6 @@ def encode_response(self, output) -> dict: return {"embeddings": output} -class TestOpenAPI(LitAPI): - def setup(self, device): - self.model = None - - async def predict(self, x) -> List[List[float]]: - n = len(x) if isinstance(x, list) else 1 - yield np.random.rand(n, 768).tolist() - - async def encode_response(self, output) -> dict: - yield {"embeddings": output} - - class TestEmbedBatchedAPI(TestEmbedAPI): def predict(self, batch) -> List[List[List[float]]]: return [np.random.rand(len(x), 768).tolist() for x in batch] diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index ebc1a505a..5bf7b2e36 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -55,9 +55,7 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ @pytest.mark.asyncio async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): - spec_openai = OpenAISpec() - spec_embedding = OpenAIEmbeddingSpec() - server = ls.LitServer([TestOpenAPI(spec=spec_openai, enable_async=True), TestEmbedAPI(spec=spec_embedding)]) + server = ls.LitServer([TestEmbedAPI(spec=OpenAIEmbeddingSpec(), api_path="/v2/embeddings"), TestEmbedAPI(spec=OpenAIEmbeddingSpec())]) with wrap_litserve_start(server) as server: async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" From c6b30588392b51a6372a912101bac0df183e1fcc Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 4 Jul 2025 19:59:29 +0530 Subject: [PATCH 09/13] Update tests/unit/test_openai_embedding.py --- tests/unit/test_openai_embedding.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index 5bf7b2e36..f77a60206 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -31,7 +31,6 @@ TestEmbedAPIWithUsage, TestEmbedAPIWithYieldEncodeResponse, TestEmbedAPIWithYieldPredict, - TestOpenAPI, ) from litserve.utils import wrap_litserve_start From 0b7fdb1f9afddee5dc8c67c2d191ee0241b3bdbe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 14:33:56 +0000 Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit/test_openai_embedding.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index f77a60206..138e4a462 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -22,7 +22,6 @@ from httpx import ASGITransport, AsyncClient import litserve as ls -from litserve.specs.openai import OpenAISpec from litserve.specs.openai_embedding import OpenAIEmbeddingSpec from litserve.test_examples.openai_embedding_spec_example import ( TestEmbedAPI, @@ -54,7 +53,10 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ @pytest.mark.asyncio async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): - server = ls.LitServer([TestEmbedAPI(spec=OpenAIEmbeddingSpec(), api_path="/v2/embeddings"), TestEmbedAPI(spec=OpenAIEmbeddingSpec())]) + server = ls.LitServer([ + TestEmbedAPI(spec=OpenAIEmbeddingSpec(), api_path="/v2/embeddings"), + TestEmbedAPI(spec=OpenAIEmbeddingSpec()), + ]) with wrap_litserve_start(server) as server: async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" From dea59bac253e573fe01c9bfab9048fb108b8010e Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Sat, 5 Jul 2025 16:00:24 +0530 Subject: [PATCH 11/13] Apply suggestions from code review Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- src/litserve/specs/openai_embedding.py | 2 +- tests/unit/test_openai_embedding.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py index 2c04cc2ea..b80e1abef 100644 --- a/src/litserve/specs/openai_embedding.py +++ b/src/litserve/specs/openai_embedding.py @@ -138,7 +138,7 @@ def setup(self, server: "LitServer"): if isinstance(lit_api, LitAPI): self._check_lit_api(lit_api) - if isinstance(lit_api, list): + elif isinstance(lit_api, list): for api in lit_api: self._check_lit_api(api) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index 138e4a462..f2c409140 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -61,7 +61,7 @@ async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_reques async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" ) as ac: - resp = await ac.post("/v1/embeddings", json=openai_embedding_request_data, timeout=10) + resp = await ac.post("/v2/embeddings", json=openai_embedding_request_data, timeout=10) assert resp.status_code == 200, "Status code should be 200" assert resp.json()["object"] == "list", "Object should be list" assert resp.json()["data"][0]["index"] == 0, "Index should be 0" From 53eb5b855ba5eaee185c16897393b6e1653cb246 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Sat, 5 Jul 2025 16:44:18 +0530 Subject: [PATCH 12/13] Update tests/unit/test_openai_embedding.py --- tests/unit/test_openai_embedding.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index f2c409140..827303291 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -54,7 +54,6 @@ async def test_openai_embedding_spec_with_single_input(openai_embedding_request_ @pytest.mark.asyncio async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_request_data): server = ls.LitServer([ - TestEmbedAPI(spec=OpenAIEmbeddingSpec(), api_path="/v2/embeddings"), TestEmbedAPI(spec=OpenAIEmbeddingSpec()), ]) with wrap_litserve_start(server) as server: From 2db66e136d5ad1934f68377838aa348e1072e713 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Sat, 5 Jul 2025 20:08:51 +0530 Subject: [PATCH 13/13] Update tests/unit/test_openai_embedding.py Co-authored-by: Bhimraj Yadav --- tests/unit/test_openai_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py index 827303291..d14d2c69c 100644 --- a/tests/unit/test_openai_embedding.py +++ b/tests/unit/test_openai_embedding.py @@ -60,7 +60,7 @@ async def test_openai_embedding_spec_with_multi_endpoint(openai_embedding_reques async with LifespanManager(server.app) as manager, AsyncClient( transport=ASGITransport(app=manager.app), base_url="http://test" ) as ac: - resp = await ac.post("/v2/embeddings", json=openai_embedding_request_data, timeout=10) + resp = await ac.post("/v1/embeddings", json=openai_embedding_request_data, timeout=10) assert resp.status_code == 200, "Status code should be 200" assert resp.json()["object"] == "list", "Object should be list" assert resp.json()["data"][0]["index"] == 0, "Index should be 0"