refactor: validation logic to pre_setup method in Embed Spec, where there is access to correct api instance (#573)

bhimrazy · pre-commit-ci[bot] · Borda · web-flow · commit 25ef3db00ca2 · 2025-07-09T01:30:44.000+05:30
* refactor: move validation to presetup from setup where there is access to correct api instance * fix: improve validation tests for yield usage in OpenAIEmbeddingSpec * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com>
diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py
@@ -32,7 +32,7 @@
     import numpy as np
     import torch
 
-    from litserve import LitServer
+    from litserve import LitAPI, LitServer
 
 
 class EmbeddingRequest(BaseModel):
@@ -129,44 +129,32 @@ def __init__(self):
         self.add_endpoint("/v1/embeddings", self.embeddings_endpoint, ["POST"])
         self.add_endpoint("/v1/embeddings", self.options_embeddings, ["GET"])
 
-    def setup(self, server: "LitServer"):
+    def pre_setup(self, lit_api: "LitAPI"):
         from litserve import LitAPI
 
-        super().setup(server)
-
-        lit_api = server.lit_api
+        if inspect.isgeneratorfunction(lit_api.predict):
+            raise ValueError(
+                "You are using yield in your predict method, which is used for streaming.",
+                "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ",
+                "is not a sequential operation.",
+                "Please consider replacing yield with return in predict.\n",
+                EMBEDDING_API_EXAMPLE,
+            )
 
-        if isinstance(lit_api, LitAPI):
-            self._check_lit_api(lit_api)
-        elif isinstance(lit_api, list):
-            for api in lit_api:
-                self._check_lit_api(api)
+        is_encode_response_original = lit_api.encode_response.__code__ is LitAPI.encode_response.__code__
+        if not is_encode_response_original and inspect.isgeneratorfunction(lit_api.encode_response):
+            raise ValueError(
+                "You are using yield in your encode_response method, which is used for streaming.",
+                "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ",
+                "is not a sequential operation.",
+                "Please consider replacing yield with return in encode_response.\n",
+                EMBEDDING_API_EXAMPLE,
+            )
 
+    def setup(self, server: "LitServer"):
+        super().setup(server)
         print("OpenAI Embedding Spec is ready.")
 
-    def _check_lit_api(self, api):
-        from litserve import LitAPI
-
-        if isinstance(api.spec, OpenAIEmbeddingSpec):
-            if inspect.isgeneratorfunction(api.predict):
-                raise ValueError(
-                    "You are using yield in your predict method, which is used for streaming.",
-                    "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ",
-                    "is not a sequential operation.",
-                    "Please consider replacing yield with return in predict.\n",
-                    EMBEDDING_API_EXAMPLE,
-                )
-
-            is_encode_response_original = api.encode_response.__code__ is LitAPI.encode_response.__code__
-            if not is_encode_response_original and inspect.isgeneratorfunction(api.encode_response):
-                raise ValueError(
-                    "You are using yield in your encode_response method, which is used for streaming.",
-                    "OpenAIEmbeddingSpec doesn't support streaming because producing embeddings ",
-                    "is not a sequential operation.",
-                    "Please consider replacing yield with return in encode_response.\n",
-                    EMBEDDING_API_EXAMPLE,
-                )
-
     def decode_request(self, request: EmbeddingRequest, context_kwargs: Optional[dict] = None) -> List[str]:
         return request.input
 
diff --git a/tests/unit/test_openai_embedding.py b/tests/unit/test_openai_embedding.py
@@ -107,19 +107,11 @@ async def test_openai_embedding_spec_with_usage(openai_embedding_request_data):
 
 @pytest.mark.asyncio
 async def test_openai_embedding_spec_validation(openai_request_data):
-    server = ls.LitServer(TestEmbedAPIWithYieldPredict(), spec=OpenAIEmbeddingSpec())
-    with pytest.raises(ValueError, match="You are using yield in your predict method"), wrap_litserve_start(
-        server
-    ) as server:
-        async with LifespanManager(server.app):
-            pass
-
-    server = ls.LitServer(TestEmbedAPIWithYieldEncodeResponse(), spec=OpenAIEmbeddingSpec())
-    with pytest.raises(ValueError, match="You are using yield in your encode_response method"), wrap_litserve_start(
-        server
-    ) as server:
-        async with LifespanManager(server.app):
-            pass
+    with pytest.raises(ValueError, match="You are using yield in your predict method"):
+        ls.LitServer(TestEmbedAPIWithYieldPredict(), spec=OpenAIEmbeddingSpec())
+
+    with pytest.raises(ValueError, match="You are using yield in your encode_response method"):
+        ls.LitServer(TestEmbedAPIWithYieldEncodeResponse(), spec=OpenAIEmbeddingSpec())
 
 
 @pytest.mark.asyncio