test: make async suite pass on python 3.13

krystophny · krystophny · commit df4f9afc04ae · 2026-03-25T23:31:44.000+01:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -63,10 +63,10 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install pytest pytest-asyncio pytest-cov pydantic fastapi jsonschema httpx psutil transformers requests
+          pip install pytest pytest-cov pydantic fastapi jsonschema httpx psutil transformers requests
 
       - name: Verify async test plugin
-        run: python -c "import pytest_asyncio"
+        run: python -c "import anyio"
 
       - name: Run unit tests (no MLX required)
         run: |
@@ -118,7 +118,7 @@ jobs:
           pip install -e ".[dev,vision]"
 
       - name: Verify async test plugin
-        run: python -c "import pytest_asyncio"
+        run: python -c "import anyio"
 
       - name: Verify Apple Silicon
         run: |
diff --git a/docs/development/contributing.md b/docs/development/contributing.md
@@ -54,8 +54,8 @@ The full suite is intentionally split in CI:
 - The Ubuntu matrix runs the pure-Python subset only.
 - The Apple Silicon job runs MLX-dependent tests that require macOS on ARM.
 
-If you are running tests locally outside the documented `.[dev]` environment, async tests
-will fail because `pytest-asyncio` is a dev dependency rather than a runtime dependency.
+Async tests run through `anyio` on the asyncio backend. If you are running tests locally,
+use the documented dev environment so the test dependencies match CI.
 
 ### Code Style
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -65,7 +65,6 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "pytest>=7.0.0",
-    "pytest-asyncio>=0.21.0",
     "black>=23.0.0",
     "ruff>=0.1.0",
     "mypy>=1.0.0",
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -4,6 +4,12 @@
 import pytest
 
 
+@pytest.fixture(scope="session")
+def anyio_backend():
+    """Run anyio tests on asyncio consistently across environments."""
+    return "asyncio"
+
+
 def pytest_addoption(parser):
     """Add custom command line options."""
     parser.addoption(
diff --git a/tests/test_batching.py b/tests/test_batching.py
@@ -504,7 +504,7 @@ def test_multiple_concurrent_requests(self, model_and_tokenizer):
         assert len(finished) == len(prompts), f"Only {len(finished)} requests finished"
 
 
-@pytest.mark.asyncio
+@pytest.mark.anyio
 class TestEngineAsync:
     """Async tests for the engine."""
 
diff --git a/tests/test_batching_deterministic.py b/tests/test_batching_deterministic.py
@@ -37,7 +37,7 @@ def sampling_params():
 class TestDeterministicSingleRequest:
     """Test single request determinism."""
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_same_prompt_same_output(self, model_and_tokenizer, sampling_params):
         """Same prompt should produce same output with temp=0."""
         from vllm_mlx import AsyncEngineCore, EngineConfig, SchedulerConfig
@@ -68,7 +68,7 @@ async def test_same_prompt_same_output(self, model_and_tokenizer, sampling_param
         assert len(outputs) == 3
         assert outputs[0] == outputs[1] == outputs[2], f"Outputs differ: {outputs}"
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_token_streaming_order(self, model_and_tokenizer, sampling_params):
         """Tokens should stream in order."""
         from vllm_mlx import AsyncEngineCore
@@ -94,7 +94,7 @@ async def test_token_streaming_order(self, model_and_tokenizer, sampling_params)
 class TestDeterministicConcurrentRequests:
     """Test concurrent request handling with determinism."""
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_concurrent_same_prompt(self, model_and_tokenizer):
         """Multiple concurrent requests with same prompt should get same output."""
         from vllm_mlx import (
@@ -137,7 +137,7 @@ async def get_output(rid):
             # All should be the same
             assert all(r == results[0] for r in results), f"Outputs differ: {results}"
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_concurrent_different_prompts(self, model_and_tokenizer):
         """Different prompts should get different (but deterministic) outputs."""
         from vllm_mlx import (
@@ -191,7 +191,7 @@ async def get_output(rid):
 class TestBatchingPerformance:
     """Test that batching improves throughput."""
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_batched_faster_than_sequential(self, model_and_tokenizer):
         """Batched requests should be faster than sequential."""
         from vllm_mlx import (
@@ -274,7 +274,7 @@ async def get_output(rid):
 class TestRequestManagement:
     """Test request lifecycle management."""
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_abort_request(self, model_and_tokenizer):
         """Test aborting a request mid-generation."""
         from vllm_mlx import AsyncEngineCore, SamplingParams
@@ -304,7 +304,7 @@ async def test_abort_request(self, model_and_tokenizer):
             stats = engine.get_stats()
             assert stats["active_requests"] == 0
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_engine_stats(self, model_and_tokenizer):
         """Test engine statistics tracking."""
         from vllm_mlx import (
@@ -343,7 +343,7 @@ async def test_engine_stats(self, model_and_tokenizer):
 class TestSchedulerPolicy:
     """Test scheduler policies."""
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_fcfs_ordering(self, model_and_tokenizer):
         """Test that FCFS policy processes requests in order."""
         from vllm_mlx import (
@@ -396,7 +396,7 @@ async def track_completion(rid, name):
 class TestEdgeCases:
     """Test edge cases and error handling."""
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_empty_prompt(self, model_and_tokenizer):
         """Test handling of empty prompt."""
         from vllm_mlx import AsyncEngineCore, SamplingParams
@@ -414,7 +414,7 @@ async def test_empty_prompt(self, model_and_tokenizer):
                     assert out.finished
                     break
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_very_short_max_tokens(self, model_and_tokenizer):
         """Test with max_tokens=1."""
         from vllm_mlx import AsyncEngineCore, SamplingParams
@@ -436,7 +436,7 @@ async def test_very_short_max_tokens(self, model_and_tokenizer):
             # Should generate exactly 1 token
             assert token_count == 1
 
-    @pytest.mark.asyncio
+    @pytest.mark.anyio
     async def test_multiple_start_stop(self, model_and_tokenizer):
         """Test starting and stopping engine multiple times."""
         from vllm_mlx import AsyncEngineCore, SamplingParams
diff --git a/tests/test_chat_template_kwargs.py b/tests/test_chat_template_kwargs.py
@@ -11,11 +11,6 @@
 from vllm_mlx.engine.base import GenerationOutput
 
 
-@pytest.fixture
-def anyio_backend():
-    return "asyncio"
-
-
 def test_chat_completion_request_preserves_chat_template_kwargs():
     request = srv.ChatCompletionRequest(
         model="test-model",
diff --git a/tests/test_continuous_batching.py b/tests/test_continuous_batching.py
@@ -53,7 +53,7 @@ def test_scheduler_config_batching_params(self):
         assert config.completion_batch_size == 32
 
 
-@pytest.mark.asyncio
+@pytest.mark.anyio
 class TestContinuousBatchingIntegration:
     """Integration tests requiring actual model loading."""
 
diff --git a/tests/test_server.py b/tests/test_server.py
@@ -628,9 +628,7 @@ def test_verify_api_key_rejects_invalid(self):
 
             # Should raise HTTPException with 401
             with pytest.raises(HTTPException) as exc_info:
-                asyncio.get_event_loop().run_until_complete(
-                    server.verify_api_key(credentials)
-                )
+                asyncio.run(server.verify_api_key(credentials))
 
             assert exc_info.value.status_code == 401
             assert "Invalid API key" in str(exc_info.value.detail)
@@ -656,9 +654,7 @@ def test_verify_api_key_accepts_valid(self):
             )
 
             # Should not raise any exception
-            result = asyncio.get_event_loop().run_until_complete(
-                server.verify_api_key(credentials)
-            )
+            result = asyncio.run(server.verify_api_key(credentials))
             # verify_api_key returns True on success (no exception raised)
             assert result is True or result is None
         finally:
diff --git a/tests/test_simple_engine.py b/tests/test_simple_engine.py
@@ -10,10 +10,6 @@
 class TestSimpleEngineConcurrency:
     """Test SimpleEngine lock behavior with concurrent requests."""
 
-    @pytest.fixture
-    def anyio_backend(self):
-        return "asyncio"
-
     @pytest.fixture
     def mock_model(self):
         """Create a mock model that tracks concurrent calls."""
diff --git a/tests/test_streaming_latency.py b/tests/test_streaming_latency.py
@@ -206,7 +206,7 @@ async def run_benchmark(
             print(f"Throughput: {throughput:.1f} tokens/sec")
 
 
-@pytest.mark.asyncio
+@pytest.mark.anyio
 async def test_output_collector():
     """Unit test for RequestOutputCollector."""
     import sys