From cecf15b34952e0d0aadaf9f8648c3c65efcca63d Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 16 Jul 2024 07:11:59 +0000 Subject: [PATCH 1/4] Add tests --- tests/entrypoints/openai/test_basic.py | 63 ++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tests/entrypoints/openai/test_basic.py diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py new file mode 100644 index 000000000000..fce2fd57c3c6 --- /dev/null +++ b/tests/entrypoints/openai/test_basic.py @@ -0,0 +1,63 @@ +import openai +import pytest +import requests + +from vllm.version import __version__ as VLLM_VERSION + +from ...utils import RemoteOpenAIServer + +MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" + + +@pytest.fixture(scope="module") +def server(): + args = [ + "--model", + MODEL_NAME, + # use half precision for speed and memory savings in CI environment + "--dtype", + "bfloat16", + "--max-model-len", + "8192", + "--enforce-eager", + "--max-num-seqs", + "128", + ] + + with RemoteOpenAIServer(args) as remote_server: + yield remote_server + + +@pytest.fixture(scope="module") +def client(server): + return server.get_async_client() + + +@pytest.mark.asyncio +async def test_show_version(client: openai.AsyncOpenAI): + base_url = str(client.base_url)[:-3].strip("/") + + response = requests.get(base_url + "/version") + response.raise_for_status() + + assert response.json() == {"version": VLLM_VERSION} + + +@pytest.mark.asyncio +async def test_check_health(client: openai.AsyncOpenAI): + base_url = str(client.base_url)[:-3].strip("/") + + response = requests.get(base_url + "/health") + response.raise_for_status() + + assert len(response.content) == 0 + + +@pytest.mark.asyncio +async def test_log_metrics(client: openai.AsyncOpenAI): + base_url = str(client.base_url)[:-3].strip("/") + + response = requests.get(base_url + "/metrics") + response.raise_for_status() + + assert response.json() is not None From 4defc08bdf80b57185b529a1e1d4461dfcc09157 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 16 Jul 2024 07:21:11 +0000 Subject: [PATCH 2/4] Update test --- tests/entrypoints/openai/test_basic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index fce2fd57c3c6..2c370b01db6f 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -1,3 +1,5 @@ +from http import HTTPStatus + import openai import pytest import requests @@ -48,9 +50,8 @@ async def test_check_health(client: openai.AsyncOpenAI): base_url = str(client.base_url)[:-3].strip("/") response = requests.get(base_url + "/health") - response.raise_for_status() - assert len(response.content) == 0 + assert response.status_code == HTTPStatus.OK @pytest.mark.asyncio From fc19360e59f10c990528ff6d7eaaa3f5ab19e3dd Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 16 Jul 2024 07:30:26 +0000 Subject: [PATCH 3/4] Fix `/metrics` 404 error --- tests/entrypoints/openai/test_basic.py | 3 +-- vllm/entrypoints/openai/api_server.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index 2c370b01db6f..de329d8cc665 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -59,6 +59,5 @@ async def test_log_metrics(client: openai.AsyncOpenAI): base_url = str(client.base_url)[:-3].strip("/") response = requests.get(base_url + "/metrics") - response.raise_for_status() - assert response.json() is not None + assert response.status_code == HTTPStatus.OK diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 45c634b4a299..a86a5099d8b8 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -70,11 +70,13 @@ async def _force_log(): router = APIRouter() -# Add prometheus asgi middleware to route /metrics requests -route = Mount("/metrics", make_asgi_app()) -# Workaround for 307 Redirect for /metrics -route.path_regex = re.compile('^/metrics(?P.*)$') -router.routes.append(route) + +def mount_metrics(app: fastapi.FastAPI): + # Add prometheus asgi middleware to route /metrics requests + metrics_route = Mount("/metrics", make_asgi_app()) + # Workaround for 307 Redirect for /metrics + metrics_route.path_regex = re.compile('^/metrics(?P.*)$') + app.routes.append(metrics_route) @router.get("/health") @@ -164,6 +166,8 @@ def build_app(args): app.include_router(router) app.root_path = args.root_path + mount_metrics(app) + app.add_middleware( CORSMiddleware, allow_origins=args.allowed_origins, From bef87a2c6e86aab5ce8db0cb13b48260c88ff040 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Wed, 17 Jul 2024 07:46:28 +0000 Subject: [PATCH 4/4] Update args --- tests/entrypoints/openai/test_basic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index de329d8cc665..2c721d9ba760 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -14,8 +14,6 @@ @pytest.fixture(scope="module") def server(): args = [ - "--model", - MODEL_NAME, # use half precision for speed and memory savings in CI environment "--dtype", "bfloat16", @@ -26,7 +24,7 @@ def server(): "128", ] - with RemoteOpenAIServer(args) as remote_server: + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server