Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions src/vllm_tgis_adapter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,13 @@

router = APIRouter()

# Add prometheus asgi middleware to route /metrics requests
route = Mount("/metrics", make_asgi_app())
# Workaround for 307 Redirect for /metrics
route.path_regex = re.compile("^/metrics(?P<path>.*)$")
router.routes.append(route)

def mount_metrics(app: fastapi.FastAPI) -> None:
# Add prometheus asgi middleware to route /metrics requests
metrics_route = Mount("/metrics", make_asgi_app())
# Workaround for 307 Redirect for /metrics
metrics_route.path_regex = re.compile("^/metrics(?P<path>.*)$")
app.routes.append(metrics_route)


@router.get("/health")
Expand Down Expand Up @@ -188,6 +190,8 @@ async def _force_log(): # noqa: ANN202
app.include_router(router)
app.root_path = args.root_path

mount_metrics(app)

app.add_middleware(
CORSMiddleware,
allow_origins=args.allowed_origins,
Expand Down
35 changes: 16 additions & 19 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,50 +39,47 @@ def monkeysession():


@pytest.fixture(scope="session")
def lora_enabled():
def lora_available() -> bool:
# lora does not work on cpu
return not vllm.config.is_cpu()


@pytest.fixture(scope="session")
def requires_lora(lora_enabled): # noqa: PT004
if not lora_enabled:
pytest.skip(reason="Lora is not enabled. (disabled on cpu)")
def lora_adapter_name(request: pytest.FixtureRequest):
if not request.getfixturevalue("lora_available"):
pytest.skip("Lora is not available with this configuration")


@pytest.fixture(scope="session")
def lora_adapter_name(requires_lora):
return "lora-test"


@pytest.fixture(scope="session")
def lora_adapter_path(requires_lora):
def lora_adapter_path(request: pytest.FixtureRequest):
if not request.getfixturevalue("lora_available"):
pytest.skip("Lora is not available with this configuration")

from huggingface_hub import snapshot_download

path = snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test")
return path


@pytest.fixture(scope="session")
def args( # noqa: PLR0913
def args(
request: pytest.FixtureRequest,
monkeysession,
grpc_server_thread_port,
http_server_thread_port,
lora_enabled,
lora_adapter_name,
lora_adapter_path,
lora_available,
) -> argparse.Namespace:
"""Return parsed CLI arguments for the adapter/vLLM."""
# avoid parsing pytest arguments as vllm/vllm_tgis_adapter arguments

extra_args: list[str] = []
if lora_enabled:
extra_args.extend(
(
"--enable-lora",
f"--lora-modules={lora_adapter_name}={lora_adapter_path}",
)
)
if lora_available:
name = request.getfixturevalue("lora_adapter_name")
path = request.getfixturevalue("lora_adapter_path")

extra_args.extend(("--enable-lora", f"--lora-modules={name}={path}"))

monkeysession.setattr(
sys,
Expand Down
5 changes: 5 additions & 0 deletions tests/test_http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,8 @@ def test_completions(http_server_url, _http_server):
generated_text = completion["choices"][0]["text"]

assert generated_text


def test_metrics(http_server_url, _http_server):
response = requests.get(f"{http_server_url}/metrics")
response.raise_for_status()