diff --git a/src/vllm_tgis_adapter/__main__.py b/src/vllm_tgis_adapter/__main__.py index b43d924f..f8dab362 100644 --- a/src/vllm_tgis_adapter/__main__.py +++ b/src/vllm_tgis_adapter/__main__.py @@ -75,11 +75,13 @@ router = APIRouter() -# Add prometheus asgi middleware to route /metrics requests -route = Mount("/metrics", make_asgi_app()) -# Workaround for 307 Redirect for /metrics -route.path_regex = re.compile("^/metrics(?P.*)$") -router.routes.append(route) + +def mount_metrics(app: fastapi.FastAPI) -> None: + # Add prometheus asgi middleware to route /metrics requests + metrics_route = Mount("/metrics", make_asgi_app()) + # Workaround for 307 Redirect for /metrics + metrics_route.path_regex = re.compile("^/metrics(?P.*)$") + app.routes.append(metrics_route) @router.get("/health") @@ -188,6 +190,8 @@ async def _force_log(): # noqa: ANN202 app.include_router(router) app.root_path = args.root_path + mount_metrics(app) + app.add_middleware( CORSMiddleware, allow_origins=args.allowed_origins, diff --git a/tests/conftest.py b/tests/conftest.py index e2655226..96b16ac5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,24 +39,24 @@ def monkeysession(): @pytest.fixture(scope="session") -def lora_enabled(): +def lora_available() -> bool: # lora does not work on cpu return not vllm.config.is_cpu() @pytest.fixture(scope="session") -def requires_lora(lora_enabled): # noqa: PT004 - if not lora_enabled: - pytest.skip(reason="Lora is not enabled. (disabled on cpu)") +def lora_adapter_name(request: pytest.FixtureRequest): + if not request.getfixturevalue("lora_available"): + pytest.skip("Lora is not available with this configuration") - -@pytest.fixture(scope="session") -def lora_adapter_name(requires_lora): return "lora-test" @pytest.fixture(scope="session") -def lora_adapter_path(requires_lora): +def lora_adapter_path(request: pytest.FixtureRequest): + if not request.getfixturevalue("lora_available"): + pytest.skip("Lora is not available with this configuration") + from huggingface_hub import snapshot_download path = snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test") @@ -64,25 +64,22 @@ def lora_adapter_path(requires_lora): @pytest.fixture(scope="session") -def args( # noqa: PLR0913 +def args( + request: pytest.FixtureRequest, monkeysession, grpc_server_thread_port, http_server_thread_port, - lora_enabled, - lora_adapter_name, - lora_adapter_path, + lora_available, ) -> argparse.Namespace: """Return parsed CLI arguments for the adapter/vLLM.""" # avoid parsing pytest arguments as vllm/vllm_tgis_adapter arguments extra_args: list[str] = [] - if lora_enabled: - extra_args.extend( - ( - "--enable-lora", - f"--lora-modules={lora_adapter_name}={lora_adapter_path}", - ) - ) + if lora_available: + name = request.getfixturevalue("lora_adapter_name") + path = request.getfixturevalue("lora_adapter_path") + + extra_args.extend(("--enable-lora", f"--lora-modules={name}={path}")) monkeysession.setattr( sys, diff --git a/tests/test_http_server.py b/tests/test_http_server.py index 01bb1370..a664aef5 100644 --- a/tests/test_http_server.py +++ b/tests/test_http_server.py @@ -27,3 +27,8 @@ def test_completions(http_server_url, _http_server): generated_text = completion["choices"][0]["text"] assert generated_text + + +def test_metrics(http_server_url, _http_server): + response = requests.get(f"{http_server_url}/metrics") + response.raise_for_status()