Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
df7ae76
⬆️ make vllm >=0.10.1.1,<=0.10.2
prashantgupta24 Sep 15, 2025
1584ba4
⬆️ lockfile update to vllm >=0.10.1.1,<=0.10.2
prashantgupta24 Sep 15, 2025
d043a22
⬆️ bump aftu to 0.2.3
prashantgupta24 Sep 15, 2025
2665cda
🚧 changes needed for 0.10.2
prashantgupta24 Sep 16, 2025
1772188
🎨 change the error msg
prashantgupta24 Sep 16, 2025
b00b4e5
🚧 test 0.10.2 instead of main
prashantgupta24 Sep 16, 2025
db5aea7
🐛 fix pooler stuff
prashantgupta24 Sep 17, 2025
703d59d
⏪ revert change for main
prashantgupta24 Sep 17, 2025
0de9b1c
⬆️ bump lowest to 0.10.1.1
prashantgupta24 Sep 17, 2025
6c03694
⬆️ bump default to 0.10.2
prashantgupta24 Sep 17, 2025
5b2f32c
♻️ make platform.py check simple
prashantgupta24 Sep 17, 2025
1c22ec7
🐛 set vllm_config for ClassifierPooler
prashantgupta24 Sep 17, 2025
125a9b3
Merge remote-tracking branch 'upstream/main' into upstream-versions
prashantgupta24 Sep 22, 2025
d63fb01
🚧 add baclward compatibility code
prashantgupta24 Sep 22, 2025
d9965a9
✅ add upstream compat tests
prashantgupta24 Sep 22, 2025
e5ab49b
🐛 fix request params
prashantgupta24 Sep 22, 2025
7c2199e
🐛 revert pytest-mock import
prashantgupta24 Sep 22, 2025
480a177
🚧 not needed?
prashantgupta24 Sep 22, 2025
493d727
⏪ yep need those for pooler models
prashantgupta24 Sep 22, 2025
f93eac5
🎨 fix comment
prashantgupta24 Sep 22, 2025
25ba89a
🎨 remove extra assert
prashantgupta24 Sep 22, 2025
4f1a6a2
🎨 typo
prashantgupta24 Sep 23, 2025
1012c08
Merge branch 'main' into upstream-versions
maxdebayser Sep 24, 2025
67cc853
fix pooler adapter
maxdebayser Sep 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ license = {text = "Apache 2"}
dependencies = [
"fms-model-optimizer[fp8]>=0.6.0",
"ibm-fms>=1.2.1",
"vllm==0.10.1.1",
"vllm>=0.10.1.1,<=0.10.2",
"pytest-mock>=3.15.0",
]
requires-python = ">=3.11"
Expand Down Expand Up @@ -164,7 +164,7 @@ dev = [
"pytest-timeout==2.3.1",
"requests==2.32.3",
"sentence-transformers==3.4.1",
"aiu-fms-testing-utils>=0.2.1",
"aiu-fms-testing-utils>=0.2.3",
]
lint = [
"clang-format==18.1.5",
Expand Down
3 changes: 1 addition & 2 deletions tests/e2e/test_spyre_cb.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ def test_api_cb_rejects_oversized_request(
overflow_prompt = " ".join(["hi"] * max_model_len)
max_tokens = 10

with pytest.raises(BadRequestError,
match="This model's maximum context length is"):
with pytest.raises(BadRequestError, match="maximum context length is"):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bug in vllm upstream - opened a PR vllm-project/vllm#24995

client.completions.create(
model=model.name,
prompt=overflow_prompt,
Expand Down
39 changes: 26 additions & 13 deletions tests/spyre_util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
import math
import os
import random
Expand Down Expand Up @@ -353,19 +354,31 @@ def create_random_request(
assert (len(prompt_token_ids) == num_tokens
), f"need {num_tokens} but got {len(prompt_token_ids)}"

return Request(
request_id=str(request_id),
prompt_token_ids=prompt_token_ids,
multi_modal_hashes=None,
multi_modal_placeholders=None,
sampling_params=sampling_params,
eos_token_id=None,
arrival_time=0,
lora_request=None,
pooling_params=None,
cache_salt=None,
multi_modal_kwargs=None,
)
# temporary backward compat code for 0.10.1.1
annotations = inspect.getfullargspec(Request).annotations
extra_args = {} # noqa
if ('multi_modal_hashes' in annotations):
extra_args.update({
'multi_modal_hashes': None,
})
if ('multi_modal_placeholders' in annotations):
extra_args.update({
'multi_modal_placeholders': None,
})
if ('multi_modal_kwargs' in annotations):
extra_args.update({
'multi_modal_kwargs': None,
})

return Request(request_id=str(request_id),
prompt_token_ids=prompt_token_ids,
sampling_params=sampling_params,
eos_token_id=None,
arrival_time=0,
lora_request=None,
pooling_params=None,
cache_salt=None,
**extra_args)


def skip_unsupported_tp_size(size: int, backend: str):
Expand Down
60 changes: 60 additions & 0 deletions tests/utils/test_upstream_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,63 @@ def test_init_distributed_environment():
assert 'timeout' \
not in annotations, ("we should remove compat code which is now"
" part of released vllm version")


def test_request():

from vllm.v1.request import Request

annotations = inspect.getfullargspec(Request).annotations

if VLLM_VERSION == "vLLM:main":
assert 'multi_modal_kwargs' not in annotations
assert 'multi_modal_hashes' not in annotations
assert 'multi_modal_placeholders' not in annotations
elif VLLM_VERSION == "vLLM:lowest":
assert 'multi_modal_hashes' in annotations
assert 'multi_modal_placeholders' in annotations
assert 'multi_modal_placeholders' in annotations
# The compat code introduced in the PR below can now be removed:
# https://github.com/vllm-project/vllm-spyre/pull/463


def test_model_runner_output():

from vllm.v1.outputs import ModelRunnerOutput

annotations = inspect.getfullargspec(ModelRunnerOutput).annotations

if VLLM_VERSION == "vLLM:main":
assert 'spec_token_ids' not in annotations
elif VLLM_VERSION == "vLLM:lowest":
assert 'spec_token_ids' in annotations
# The compat code introduced in the PR below can now be removed:
# https://github.com/vllm-project/vllm-spyre/pull/463


def test_pooling_metadata():

from vllm.v1.pool.metadata import PoolingMetadata

has_build_pooling_cursor = getattr(PoolingMetadata, "build_pooling_cursor",
False)

if VLLM_VERSION == "vLLM:main":
assert has_build_pooling_cursor
elif VLLM_VERSION == "vLLM:lowest":
assert not has_build_pooling_cursor
# The compat code introduced in the PR below can now be removed:
# https://github.com/vllm-project/vllm-spyre/pull/463


def test_scheduler_output():

from vllm.v1.core.sched.output import SchedulerOutput
annotations = inspect.getfullargspec(SchedulerOutput).annotations

if VLLM_VERSION == "vLLM:main":
assert 'free_encoder_mm_hashes' in annotations
elif VLLM_VERSION == "vLLM:lowest":
assert 'free_encoder_mm_hashes' not in annotations
# The compat code introduced in the PR below can now be removed:
# https://github.com/vllm-project/vllm-spyre/pull/463
Loading