Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/sglang/srt/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from fastapi.responses import Response

from sglang.srt.disaggregation.utils import (
FakeBootstrapHost,
FAKE_BOOTSTRAP_HOST,
register_disaggregation_server,
)
from sglang.srt.entrypoints.engine import Engine, _launch_subprocesses
Expand Down Expand Up @@ -265,7 +265,7 @@ def _wait_and_warmup(
"max_new_tokens": 8,
"ignore_eos": True,
},
"bootstrap_host": [FakeBootstrapHost] * server_args.dp_size,
"bootstrap_host": [FAKE_BOOTSTRAP_HOST] * server_args.dp_size,
# This is a hack to ensure fake transfer is enabled during prefill warmup
# ensure each dp rank has a unique bootstrap_room during prefill warmup
"bootstrap_room": [
Expand Down
7 changes: 4 additions & 3 deletions test/srt/openai/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
import requests

from sglang.srt.utils import kill_process_tree # reuse SGLang helper
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST

SERVER_MODULE = "sglang.srt.entrypoints.openai.api_server"
DEFAULT_MODEL = "dummy-model"
DEFAULT_MODEL = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
STARTUP_TIMEOUT = float(os.getenv("SGLANG_OPENAI_STARTUP_TIMEOUT", 120))


Expand All @@ -39,7 +40,7 @@ def _wait_until_healthy(proc: subprocess.Popen, base: str, timeout: float) -> No


def launch_openai_server(model: str = DEFAULT_MODEL, **kw):
"""Spawn the draft OpenAI-compatible server and wait until its ready."""
"""Spawn the draft OpenAI-compatible server and wait until it's ready."""
port = _pick_free_port()
cmd = [
sys.executable,
Expand Down Expand Up @@ -79,7 +80,7 @@ def launch_openai_server(model: str = DEFAULT_MODEL, **kw):

@pytest.fixture(scope="session")
def openai_server() -> Generator[str, None, None]:
"""PyTest fixture that provides the servers base URL and cleans up."""
"""PyTest fixture that provides the server's base URL and cleans up."""
proc, base, log_file = launch_openai_server()
yield base
kill_process_tree(proc.pid)
Expand Down
46 changes: 41 additions & 5 deletions test/srt/openai/test_server.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,52 @@
# sglang/test/srt/openai/test_server.py
import pytest
import requests

from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST as MODEL_ID


def test_health(openai_server: str):
r = requests.get(f"{openai_server}/health")
assert r.status_code == 200, r.text
assert r.status_code == 200
# FastAPI returns an empty body → r.text == ""
assert r.text == ""


@pytest.mark.xfail(reason="Endpoint skeleton not implemented yet")
def test_models_endpoint(openai_server: str):
r = requests.get(f"{openai_server}/v1/models")
# once implemented this should be 200
assert r.status_code == 200
assert r.status_code == 200, r.text
payload = r.json()

# Basic contract
assert "data" in payload and isinstance(payload["data"], list) and payload["data"]

# Validate fields of the first model card
first = payload["data"][0]
for key in ("id", "root", "max_model_len"):
assert key in first, f"missing {key} in {first}"

# max_model_len must be positive
assert isinstance(first["max_model_len"], int) and first["max_model_len"] > 0

# The server should report the same model id we launched it with
ids = {m["id"] for m in payload["data"]}
assert MODEL_ID in ids


def test_get_model_info(openai_server: str):
r = requests.get(f"{openai_server}/get_model_info")
assert r.status_code == 200, r.text
info = r.json()

expected_keys = {"model_path", "tokenizer_path", "is_generation"}
assert expected_keys.issubset(info.keys())

# model_path must end with the one we passed on the CLI
assert info["model_path"].endswith(MODEL_ID)

# is_generation is documented as a boolean
assert isinstance(info["is_generation"], bool)


def test_unknown_route_returns_404(openai_server: str):
r = requests.get(f"{openai_server}/definitely-not-a-real-route")
assert r.status_code == 404
1 change: 1 addition & 0 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class TestFile:
TestFile("test_openai_adapter.py", 1),
TestFile("test_openai_function_calling.py", 60),
TestFile("test_openai_server.py", 149),
TestFile("openai/test_server.py", 120),
TestFile("test_openai_server_hidden_states.py", 240),
TestFile("test_penalty.py", 41),
TestFile("test_page_size.py", 60),
Expand Down
Loading