diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 4a898df8f2a3..75c2a93f3285 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -543,8 +543,11 @@ steps: - label: Model Executor Test # 23min timeout_in_minutes: 35 + torch_nightly: true mirror_hardwares: [amdexperimental] source_file_dependencies: + - vllm/engine/arg_utils.py + - vllm/config/model.py - vllm/model_executor - tests/model_executor - tests/entrypoints/openai/test_tensorizer_entrypoint.py diff --git a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py index 22bdb3b44eb0..1119d0de1c8b 100644 --- a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py +++ b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py @@ -1,12 +1,16 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest + from vllm import SamplingParams from vllm.config.load import LoadConfig from vllm.model_executor.model_loader import get_model_loader load_format = "runai_streamer" test_model = "openai-community/gpt2" +# TODO(amacaskill): Replace with a GKE owned GCS bucket. +test_gcs_model = "gs://vertex-model-garden-public-us/codegemma/codegemma-2b/" prompts = [ "Hello, my name is", @@ -32,3 +36,16 @@ def test_runai_model_loader_download_files(vllm_runner): with vllm_runner(test_model, load_format=load_format) as llm: deserialized_outputs = llm.generate(prompts, sampling_params) assert deserialized_outputs + + +def test_runai_model_loader_download_files_gcs( + vllm_runner, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project") + monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true") + monkeypatch.setenv( + "CLOUD_STORAGE_EMULATOR_ENDPOINT", "https://storage.googleapis.com" + ) + with vllm_runner(test_gcs_model, load_format=load_format) as llm: + deserialized_outputs = llm.generate(prompts, sampling_params) + assert deserialized_outputs