From 8945499b34a24dc5ad54d06e4616c4f3beb404a7 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Fri, 27 Feb 2026 10:01:35 -0800 Subject: [PATCH 1/7] chore(0.16.0-support): bump vllm upper bound to v0.16.0 Signed-off-by: Prashant Gupta --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ef8c5747b..e351a73bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ license = {text = "Apache 2"} dependencies = [ "fms-model-optimizer[fp8]>=0.8.0", "ibm-fms>=1.7.0,<2.0", - "vllm>=0.15.1,<=0.15.1", + "vllm>=0.15.1,<=0.16.0", ] requires-python = ">=3.11" dynamic = ["version"] @@ -76,7 +76,7 @@ environments = [ ] [tool.uv.sources] -vllm = { git = "https://github.com/vllm-project/vllm", rev = "v0.15.1" } +vllm = { git = "https://github.com/vllm-project/vllm", rev = "v0.16.0" } [tool.ty.rules] possibly-missing-attribute = "ignore" From e68d1894a82b2cd7df87054cf38162754f9fc5aa Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Fri, 27 Feb 2026 10:29:35 -0800 Subject: [PATCH 2/7] chore(0.16.0-support): bump vllm lower bound to v0.16.0 Signed-off-by: Prashant Gupta --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e351a73bc..cbfcd5336 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ license = {text = "Apache 2"} dependencies = [ "fms-model-optimizer[fp8]>=0.8.0", "ibm-fms>=1.7.0,<2.0", - "vllm>=0.15.1,<=0.16.0", + "vllm>=0.16.0,<=0.16.0", ] requires-python = ">=3.11" dynamic = ["version"] From 016b476426e181828834c0a1c1dfa85702f8ec5c Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Fri, 27 Feb 2026 14:31:40 -0700 Subject: [PATCH 3/7] chore(0.16.0-support): push uv.lock update Signed-off-by: Travis Johnson --- uv.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/uv.lock b/uv.lock index 2d1ca3183..966a3f286 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'win32'", @@ -33,7 +33,7 @@ overrides = [ { name = "torchaudio", marker = "sys_platform == 'never'" }, { name = "torchvision", marker = "sys_platform == 'never'" }, { name = "triton", marker = "sys_platform == 'never'" }, - { name = "vllm", marker = "platform_machine not in 's390x, ppc64le'", git = "https://github.com/vllm-project/vllm?rev=v0.15.1" }, + { name = "vllm", marker = "platform_machine not in 's390x, ppc64le'", git = "https://github.com/vllm-project/vllm?rev=v0.16.0" }, ] [[package]] @@ -4283,8 +4283,8 @@ wheels = [ [[package]] name = "vllm" -version = "0.15.1" -source = { git = "https://github.com/vllm-project/vllm?rev=v0.15.1#1892993bc18e243e2c05841314c5e9c06a80c70d" } +version = "0.16.0" +source = { git = "https://github.com/vllm-project/vllm?rev=v0.16.0#89a77b10846fd96273cce78d86d2556ea582d26e" } dependencies = [ { name = "aiohttp" }, { name = "anthropic" }, @@ -4372,7 +4372,7 @@ dev = [ requires-dist = [ { name = "fms-model-optimizer", extras = ["fp8"], specifier = ">=0.8.0" }, { name = "ibm-fms", specifier = ">=1.7.0,<2.0" }, - { name = "vllm", git = "https://github.com/vllm-project/vllm?rev=v0.15.1" }, + { name = "vllm", git = "https://github.com/vllm-project/vllm?rev=v0.16.0" }, ] [package.metadata.requires-dev] From c8e370a5920cbbf00ee9e29aa28581a14f23cfb9 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Fri, 27 Feb 2026 15:32:51 -0700 Subject: [PATCH 4/7] chore(0.16.0-support): fix typing issues Signed-off-by: Travis Johnson --- vllm_spyre/multimodal/mm_mappings/llava_next.py | 4 +--- vllm_spyre/multimodal/mm_mappings/mistral3.py | 4 +--- vllm_spyre/platform.py | 16 +++++++++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/vllm_spyre/multimodal/mm_mappings/llava_next.py b/vllm_spyre/multimodal/mm_mappings/llava_next.py index 6e989d42e..efdfce2d7 100644 --- a/vllm_spyre/multimodal/mm_mappings/llava_next.py +++ b/vllm_spyre/multimodal/mm_mappings/llava_next.py @@ -156,9 +156,7 @@ def _build_multimodal_spec(proc_res): } mm_fields = MultiModalKwargsItem( { - mm_key: MultiModalFieldElem( - modality="image", key=mm_key, data=mm_data, field=MultiModalBatchedField() - ) + mm_key: MultiModalFieldElem(data=mm_data, field=MultiModalBatchedField()) for mm_key, mm_data in mm_data.items() } ) diff --git a/vllm_spyre/multimodal/mm_mappings/mistral3.py b/vllm_spyre/multimodal/mm_mappings/mistral3.py index dd11541ba..b73b4a488 100644 --- a/vllm_spyre/multimodal/mm_mappings/mistral3.py +++ b/vllm_spyre/multimodal/mm_mappings/mistral3.py @@ -138,9 +138,7 @@ def get_warmup_inputs(self, req_count: int) -> MMWarmupInputs: } mm_fields = MultiModalKwargsItem( { - mm_key: MultiModalFieldElem( - modality="image", key=mm_key, data=mm_data, field=MultiModalBatchedField() - ) + mm_key: MultiModalFieldElem(data=mm_data, field=MultiModalBatchedField()) for mm_key, mm_data in mm_data.items() } ) diff --git a/vllm_spyre/platform.py b/vllm_spyre/platform.py index b40af9335..1d0caa853 100644 --- a/vllm_spyre/platform.py +++ b/vllm_spyre/platform.py @@ -13,10 +13,9 @@ import math import operator import os -from typing import TYPE_CHECKING, Union, cast +from typing import TYPE_CHECKING, cast import torch -from vllm.inputs import ProcessorInputs, PromptType, TokenInputs from vllm.logger import init_logger from vllm.utils.argparse_utils import FlexibleArgumentParser @@ -26,11 +25,18 @@ from vllm.config import ModelConfig, VllmConfig from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams + from vllm.renderers.inputs import DictPrompt, TokPrompt + from vllm.inputs import ProcessorInputs, PromptType, TokenInputs else: ModelConfig = None VllmConfig = None SamplingParams = None PoolingParams = None + DictPrompt = None + TokPrompt = None + ProcessorInputs = None + PromptType = None + TokenInputs = None from vllm.platforms import Platform, PlatformEnum import vllm_spyre.envs as envs_spyre @@ -337,9 +343,9 @@ def supports_v1(cls, model_config: ModelConfig) -> bool: @classmethod def validate_request( cls, - prompt: PromptType, - params: Union[SamplingParams, PoolingParams], - processed_inputs: ProcessorInputs | None = None, + prompt: "PromptType | DictPrompt | TokPrompt", + params: "SamplingParams | PoolingParams", + processed_inputs: "ProcessorInputs", ) -> None: """Raises if this request is unsupported on this platform""" From 6de2410f42426ec6cc9a12c95a1a014429c762c7 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Fri, 27 Feb 2026 15:34:26 -0700 Subject: [PATCH 5/7] chore(0.16.0-support): Bob's fix for profiler changes (untested) Signed-off-by: Travis Johnson --- vllm_spyre/v1/worker/spyre_worker.py | 48 ++++++++++++---------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/vllm_spyre/v1/worker/spyre_worker.py b/vllm_spyre/v1/worker/spyre_worker.py index 7effb0550..b8e716b87 100644 --- a/vllm_spyre/v1/worker/spyre_worker.py +++ b/vllm_spyre/v1/worker/spyre_worker.py @@ -17,6 +17,7 @@ import vllm.envs as envs from huggingface_hub import hf_hub_download from vllm.config import VllmConfig +from vllm.profiler.wrapper import TorchProfilerWrapper from vllm.distributed import ensure_model_parallel_initialized, init_distributed_environment from vllm.logger import init_logger from vllm.pooling_params import PoolingParams @@ -261,12 +262,11 @@ def __init__( ) self._env_initialized = False - # Torch profiler. Enabled and configured through env vars: - # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace - if envs.VLLM_TORCH_PROFILER_DIR: - torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR - logger.info("Profiling enabled. Traces will be saved to: %s", torch_profiler_trace_dir) - + # Torch profiler. Enabled and configured through ProfilerConfig. Set via: + # --profiler-config.profiler=torch + # --profiler-config.torch_profiler_dir=/path/to/save/trace) + profiler_config = vllm_config.profiler_config + if profiler_config.profiler == "torch": if envs_spyre.VLLM_SPYRE_DYNAMO_BACKEND == "sendnn": logger.info( "Traces will contain AIU events if PyTorch with" @@ -285,25 +285,12 @@ def __init__( "execution in the trace." ) - logger.debug( - "Profiler config: record_shapes=%s,profile_memory=%s,with_stack=%s,with_flops=%s", - envs.VLLM_TORCH_PROFILER_RECORD_SHAPES, - envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY, - envs.VLLM_TORCH_PROFILER_WITH_STACK, - envs.VLLM_TORCH_PROFILER_WITH_FLOPS, - ) - - # TODO: These flags should be set as bools, but are passed through as strings. - # This is probably a bug. - self.profiler = torch.profiler.profile( - activities=[torch.profiler.ProfilerActivity.CPU], - record_shapes=envs.VLLM_TORCH_PROFILER_RECORD_SHAPES, # ty: ignore - profile_memory=envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY, # ty: ignore - with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK, # ty: ignore - with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS, # ty: ignore - on_trace_ready=torch.profiler.tensorboard_trace_handler( - torch_profiler_trace_dir, use_gzip=True - ), + worker_name = f"{vllm_config.instance_id}-rank-{self.rank}" + self.profiler: TorchProfilerWrapper | None = TorchProfilerWrapper( + profiler_config, + worker_name=worker_name, + local_rank=self.local_rank, + activities=["CPU"], ) else: self.profiler = None @@ -723,9 +710,14 @@ def _warmup_model_forward_pass( } self.execute_model(scheduler_output) # Prefill - def profile(self, is_start=True): + def profile(self, is_start: bool = True): if self.profiler is None: - raise RuntimeError("Profiler is not enabled.") + raise RuntimeError( + "Profiling is not enabled. Please set --profiler-config to enable " + "profiling. Example: " + "'--profiler-config.profiler=torch --profiler-config.torch_profiler_dir" + "=YOUR_DIR_PATH_TO_DUMP_TRACE'" + ) if is_start: self.profiler.start() else: @@ -752,6 +744,8 @@ def execute_model( self, scheduler_output: "SchedulerOutput", ) -> ModelRunnerOutput | None: + if self.profiler is not None: + self.profiler.step() output = self.model_runner.execute_model(scheduler_output) return output if self.is_driver_worker else None From 69221339067320ac340519d31c9b508692ed5069 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Fri, 27 Feb 2026 16:34:52 -0800 Subject: [PATCH 6/7] test(0.16.0-support): update validate_request calls with processed_inputs=None Since we had to remove the | None from the function definition to adhere to upstream definition Signed-off-by: Prashant Gupta --- tests/utils/test_platform_validation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/utils/test_platform_validation.py b/tests/utils/test_platform_validation.py index 36cb078c7..108d08a78 100644 --- a/tests/utils/test_platform_validation.py +++ b/tests/utils/test_platform_validation.py @@ -24,7 +24,7 @@ def test_strips_structured_outputs(self): assert params.structured_outputs is not None - SpyrePlatform.validate_request("Test prompt", params) + SpyrePlatform.validate_request("Test prompt", params, processed_inputs=None) assert params.structured_outputs is None @@ -34,7 +34,7 @@ def test_logs_warning_when_stripping(self, caplog_vllm_spyre): max_tokens=20, structured_outputs=StructuredOutputsParams(json_object=True) ) - SpyrePlatform.validate_request("Test prompt", params) + SpyrePlatform.validate_request("Test prompt", params, processed_inputs=None) assert len(caplog_vllm_spyre.records) > 0 warning_record = caplog_vllm_spyre.records[0] @@ -55,7 +55,7 @@ def test_strips_different_structured_output_types(self, structured_output): assert params.structured_outputs is not None - SpyrePlatform.validate_request("Test prompt", params) + SpyrePlatform.validate_request("Test prompt", params, processed_inputs=None) assert params.structured_outputs is None @@ -77,7 +77,7 @@ def test_preserves_other_sampling_params(self): "top_k": params.top_k, } - SpyrePlatform.validate_request("Test prompt", params) + SpyrePlatform.validate_request("Test prompt", params, processed_inputs=None) # Verify other params are unchanged assert params.max_tokens == original_values["max_tokens"] @@ -92,7 +92,7 @@ def test_does_not_affect_pooling_params(self): pooling_params = PoolingParams() # Should not raise any errors and should return early - SpyrePlatform.validate_request("Test prompt", pooling_params) + SpyrePlatform.validate_request("Test prompt", pooling_params, processed_inputs=None) # PoolingParams don't have structured_outputs, so just verify no exception assert True # If we got here, the early return worked From 80911cadf6dcf781f89733af527d4fe5b2663df1 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Mon, 2 Mar 2026 14:15:07 -0700 Subject: [PATCH 7/7] improve some docs / logs around the profiling changes Signed-off-by: Travis Johnson --- vllm_spyre/v1/worker/spyre_worker.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/vllm_spyre/v1/worker/spyre_worker.py b/vllm_spyre/v1/worker/spyre_worker.py index b8e716b87..432a3af5b 100644 --- a/vllm_spyre/v1/worker/spyre_worker.py +++ b/vllm_spyre/v1/worker/spyre_worker.py @@ -265,10 +265,20 @@ def __init__( # Torch profiler. Enabled and configured through ProfilerConfig. Set via: # --profiler-config.profiler=torch # --profiler-config.torch_profiler_dir=/path/to/save/trace) + # OR + # --profiler-config '{"profiler": "torch", "torch_profiler_dir": "/path/to/save/trace"}' profiler_config = vllm_config.profiler_config if profiler_config.profiler == "torch": + worker_name = f"{vllm_config.instance_id}-rank-{self.rank}" + self.profiler: TorchProfilerWrapper | None = TorchProfilerWrapper( + profiler_config, + worker_name=worker_name, + local_rank=self.local_rank, + activities=["CPU"], + ) + if envs_spyre.VLLM_SPYRE_DYNAMO_BACKEND == "sendnn": - logger.info( + logger.info_once( "Traces will contain AIU events if PyTorch with" " AIU profiling support is installed." ) @@ -279,19 +289,12 @@ def __init__( options = dict(opt.split("=") for opt in dt_opt.split(",") if "=" in opt) autopilot_opt = options.get("autopilot", "1") # autopilot defaults to 1 if not set if autopilot_opt == "1": - logger.warning( + logger.warning_once( "autopilot on detected with profiling enabled. Add " - "autpilot=0 to DT_OPT to see individual AIU-kernel " + "autopilot=0 to DT_OPT to see individual AIU-kernel " "execution in the trace." ) - worker_name = f"{vllm_config.instance_id}-rank-{self.rank}" - self.profiler: TorchProfilerWrapper | None = TorchProfilerWrapper( - profiler_config, - worker_name=worker_name, - local_rank=self.local_rank, - activities=["CPU"], - ) else: self.profiler = None @@ -721,6 +724,9 @@ def profile(self, is_start: bool = True): if is_start: self.profiler.start() else: + if self.profiler is None: + logger.warning("Profiler was not started, nothing to stop.") + return self.profiler.stop() @property