Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/litserve/specs/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ class ChatCompletionRequest(BaseModel):
tools: Optional[List[Tool]] = None
tool_choice: Optional[ToolChoice] = ToolChoice.auto
response_format: Optional[ResponseFormat] = None
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None
metadata: Optional[Dict[str, str]] = None


Expand Down
8 changes: 8 additions & 0 deletions src/litserve/test_examples/openai_spec_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ def encode_response(self, output):
)


class TestAPIWithReasoningEffort(TestAPI):
def encode_response(self, output, context):
yield ChatMessage(
role="assistant",
content=f"This is a generated output with reasoning effort: {context.get('reasoning_effort', None)}",
)


class OpenAIBatchContext(LitAPI):
def setup(self, device: str) -> None:
self.model = None
Expand Down
21 changes: 21 additions & 0 deletions tests/test_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
OpenAIWithUsageEncodeResponse,
TestAPI,
TestAPIWithCustomEncode,
TestAPIWithReasoningEffort,
TestAPIWithStructuredOutput,
TestAPIWithToolCalls,
)
Expand Down Expand Up @@ -252,6 +253,26 @@ async def test_openai_spec_metadata_required_fail(openai_request_data):
assert "Missing required metadata" in resp.text


@pytest.mark.asyncio
@pytest.mark.parametrize("reasoning_effort", ["low", "medium", "high", None, "random"])
async def test_openai_spec_reasoning_effort(reasoning_effort, openai_request_data):
server = ls.LitServer(TestAPIWithReasoningEffort(), spec=OpenAISpec())
openai_request_data["reasoning_effort"] = reasoning_effort
with wrap_litserve_start(server) as server:
async with LifespanManager(server.app) as manager, AsyncClient(
transport=ASGITransport(app=manager.app), base_url="http://test"
) as ac:
resp = await ac.post("/v1/chat/completions", json=openai_request_data)
if reasoning_effort == "random":
assert resp.status_code == 422 # as random is not a valid reasoning effort
else:
assert resp.status_code == 200
assert (
resp.json()["choices"][0]["message"]["content"]
== f"This is a generated output with reasoning effort: {reasoning_effort}"
)


class IncorrectAPI1(ls.LitAPI):
def setup(self, device):
self.model = None
Expand Down
Loading