@@ -37,7 +37,7 @@ def sampling_params():
3737class TestDeterministicSingleRequest :
3838 """Test single request determinism."""
3939
40- @pytest .mark .asyncio
40+ @pytest .mark .anyio
4141 async def test_same_prompt_same_output (self , model_and_tokenizer , sampling_params ):
4242 """Same prompt should produce same output with temp=0."""
4343 from vllm_mlx import AsyncEngineCore , EngineConfig , SchedulerConfig
@@ -68,7 +68,7 @@ async def test_same_prompt_same_output(self, model_and_tokenizer, sampling_param
6868 assert len (outputs ) == 3
6969 assert outputs [0 ] == outputs [1 ] == outputs [2 ], f"Outputs differ: { outputs } "
7070
71- @pytest .mark .asyncio
71+ @pytest .mark .anyio
7272 async def test_token_streaming_order (self , model_and_tokenizer , sampling_params ):
7373 """Tokens should stream in order."""
7474 from vllm_mlx import AsyncEngineCore
@@ -94,7 +94,7 @@ async def test_token_streaming_order(self, model_and_tokenizer, sampling_params)
9494class TestDeterministicConcurrentRequests :
9595 """Test concurrent request handling with determinism."""
9696
97- @pytest .mark .asyncio
97+ @pytest .mark .anyio
9898 async def test_concurrent_same_prompt (self , model_and_tokenizer ):
9999 """Multiple concurrent requests with same prompt should get same output."""
100100 from vllm_mlx import (
@@ -137,7 +137,7 @@ async def get_output(rid):
137137 # All should be the same
138138 assert all (r == results [0 ] for r in results ), f"Outputs differ: { results } "
139139
140- @pytest .mark .asyncio
140+ @pytest .mark .anyio
141141 async def test_concurrent_different_prompts (self , model_and_tokenizer ):
142142 """Different prompts should get different (but deterministic) outputs."""
143143 from vllm_mlx import (
@@ -191,7 +191,7 @@ async def get_output(rid):
191191class TestBatchingPerformance :
192192 """Test that batching improves throughput."""
193193
194- @pytest .mark .asyncio
194+ @pytest .mark .anyio
195195 async def test_batched_faster_than_sequential (self , model_and_tokenizer ):
196196 """Batched requests should be faster than sequential."""
197197 from vllm_mlx import (
@@ -274,7 +274,7 @@ async def get_output(rid):
274274class TestRequestManagement :
275275 """Test request lifecycle management."""
276276
277- @pytest .mark .asyncio
277+ @pytest .mark .anyio
278278 async def test_abort_request (self , model_and_tokenizer ):
279279 """Test aborting a request mid-generation."""
280280 from vllm_mlx import AsyncEngineCore , SamplingParams
@@ -304,7 +304,7 @@ async def test_abort_request(self, model_and_tokenizer):
304304 stats = engine .get_stats ()
305305 assert stats ["active_requests" ] == 0
306306
307- @pytest .mark .asyncio
307+ @pytest .mark .anyio
308308 async def test_engine_stats (self , model_and_tokenizer ):
309309 """Test engine statistics tracking."""
310310 from vllm_mlx import (
@@ -343,7 +343,7 @@ async def test_engine_stats(self, model_and_tokenizer):
343343class TestSchedulerPolicy :
344344 """Test scheduler policies."""
345345
346- @pytest .mark .asyncio
346+ @pytest .mark .anyio
347347 async def test_fcfs_ordering (self , model_and_tokenizer ):
348348 """Test that FCFS policy processes requests in order."""
349349 from vllm_mlx import (
@@ -396,7 +396,7 @@ async def track_completion(rid, name):
396396class TestEdgeCases :
397397 """Test edge cases and error handling."""
398398
399- @pytest .mark .asyncio
399+ @pytest .mark .anyio
400400 async def test_empty_prompt (self , model_and_tokenizer ):
401401 """Test handling of empty prompt."""
402402 from vllm_mlx import AsyncEngineCore , SamplingParams
@@ -414,7 +414,7 @@ async def test_empty_prompt(self, model_and_tokenizer):
414414 assert out .finished
415415 break
416416
417- @pytest .mark .asyncio
417+ @pytest .mark .anyio
418418 async def test_very_short_max_tokens (self , model_and_tokenizer ):
419419 """Test with max_tokens=1."""
420420 from vllm_mlx import AsyncEngineCore , SamplingParams
@@ -436,7 +436,7 @@ async def test_very_short_max_tokens(self, model_and_tokenizer):
436436 # Should generate exactly 1 token
437437 assert token_count == 1
438438
439- @pytest .mark .asyncio
439+ @pytest .mark .anyio
440440 async def test_multiple_start_stop (self , model_and_tokenizer ):
441441 """Test starting and stopping engine multiple times."""
442442 from vllm_mlx import AsyncEngineCore , SamplingParams
0 commit comments