add runai model streamer e2e test for GCS

amacaskill · amacaskill · commit 89c9529dab43 · 2025-11-05T00:06:18.000Z
Signed-off-by: Alexis MacAskill &lt;amacaskill@google.com&gt;
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -541,6 +541,18 @@ steps:
   commands:
     - pytest -v -s kernels/mamba
 
+- label: RunAI Model Streamer Test # 77s
+  # Cross continent streaming could take much take much longer.
+  timeout_in_minutes: 10
+  torch_nightly: true
+  mirror_hardwares: [amdexperimental]
+  source_file_dependencies:
+  - vllm/engine
+  - vllm/model_executor/model_loader
+  - tests/model_executor/model_loader/runai_model_streamer
+  commands:
+    - pytest -v -s model_executor/model_loader/runai_model_streamer
+
 - label: Model Executor Test # 23min
   timeout_in_minutes: 35
   mirror_hardwares: [amdexperimental]
diff --git a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py
@@ -7,6 +7,8 @@
 
 load_format = "runai_streamer"
 test_model = "openai-community/gpt2"
+# TODO(amacaskill): Replace with a GKE owned GCS bucket.
+test_gcs_model = "gs://vertex-model-garden-public-us/codegemma/codegemma-2b/"
 
 prompts = [
     "Hello, my name is",
@@ -32,3 +34,9 @@ def test_runai_model_loader_download_files(vllm_runner):
     with vllm_runner(test_model, load_format=load_format) as llm:
         deserialized_outputs = llm.generate(prompts, sampling_params)
         assert deserialized_outputs
+
+
+def test_runai_model_loader_download_files_gcs(vllm_runner):
+    with vllm_runner(test_gcs_model, load_format=load_format) as llm:
+        deserialized_outputs = llm.generate(prompts, sampling_params)
+        assert deserialized_outputs