@@ -155,12 +155,12 @@ steps:
155155 - pytest -v -s test_inputs.py
156156 - pytest -v -s multimodal
157157
158- - label : Kernels Test %N
159- # mirror_hardwares: [amd]
160- commands :
161- - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
162- - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
163- parallelism : 4
158+ # - label: Kernels Test %N
159+ # #mirror_hardwares: [amd]
160+ # commands:
161+ # - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
162+ # - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
163+ # parallelism: 4
164164
165165- label : Models Test
166166 # mirror_hardwares: [amd]
@@ -202,20 +202,20 @@ steps:
202202 - export VLLM_ATTENTION_BACKEND=XFORMERS
203203 - pytest -v -s spec_decode
204204
205- - label : LoRA Test %N
206- # mirror_hardwares: [amd]
207- command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
208- parallelism : 4
209-
210- - label : LoRA Long Context (Distributed)
211- # mirror_hardwares: [amd]
212- num_gpus : 4
213- # This test runs llama 13B, so it is required to run on 4 GPUs.
214- commands :
215- # FIXIT: find out which code initialize cuda before running the test
216- # before the fix, we need to use spawn to test it
217- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
218- - pytest -v -s -x lora/test_long_context.py
205+ # - label: LoRA Test %N
206+ # #mirror_hardwares: [amd]
207+ # command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
208+ # parallelism: 4
209+
210+ # - label: LoRA Long Context (Distributed)
211+ # #mirror_hardwares: [amd]
212+ # num_gpus: 4
213+ # # This test runs llama 13B, so it is required to run on 4 GPUs.
214+ # commands:
215+ # # FIXIT: find out which code initialize cuda before running the test
216+ # # before the fix, we need to use spawn to test it
217+ # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
218+ # - pytest -v -s -x lora/test_long_context.py
219219
220220- label : Tensorizer Test
221221 # mirror_hardwares: [amd]
0 commit comments