Skip to content

Commit a73f405

Browse files
hmellorbringlein
authored andcommitted
Update rope_scaling to rope_parameters in preparation for Transformers v5 (vllm-project#28542)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 2208ac6 commit a73f405

File tree

104 files changed

+544
-912
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+544
-912
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -872,12 +872,12 @@ steps:
872872
optional: true
873873
commands:
874874
- pip install --upgrade git+https://github.com/huggingface/transformers
875-
- pytest -v -s tests/models/test_initialization.py -k 'not (Gemma3 or ModernBert or Qwen2_5_VL or Qwen2_5vl or Qwen2VL or TransformersMultiModalEmbeddingModel or TransformersMultiModalForSequenceClassification or Ultravox or Phi4Multimodal or LlavaNextVideo or MiniCPMO or Lfm2Moe or PaliGemma or RobertaForSequenceClassification or Ovis2_5 or Fuyu or DeepseekOCR or KimiVL)'
875+
- pytest -v -s tests/models/test_initialization.py -k 'not (Ultravox or Phi4Multimodal or MiniCPMO or Lfm2Moe or RobertaForSequenceClassification or Ovis2_5 or DeepseekOCR or KimiVL)'
876876
- pytest -v -s tests/models/test_transformers.py
877877
# - pytest -v -s tests/models/multimodal/processing/
878-
- pytest -v -s tests/models/multimodal/test_mapping.py -k 'not (Gemma3 or Qwen2VL or Qwen2_5_VL)'
878+
- pytest -v -s tests/models/multimodal/test_mapping.py
879879
- python3 examples/offline_inference/basic/chat.py
880-
# - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
880+
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
881881
# Whisper needs spawn method to avoid deadlock
882882
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
883883

benchmarks/kernels/benchmark_mrope.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#
77
# The CSV file (named with current date/time) contains these columns:
88
# model_name, tp_size, num_tokens, num_heads, num_kv_heads, head_dim, max_position,
9-
# rope_theta, is_neox_style, rope_scaling, dtype, torch_mean, torch_median, torch_p99,
9+
# is_neox_style, rope_parameters, dtype, torch_mean, torch_median, torch_p99,
1010
# torch_min, torch_max, triton_mean, triton_median, triton_p99, triton_min, triton_max,
1111
# speedup
1212
#
@@ -86,9 +86,8 @@ def benchmark_mrope(
8686
num_heads: int,
8787
num_kv_heads: int,
8888
max_position: int = 8192,
89-
rope_theta: float = 10000,
9089
is_neox_style: bool = True,
91-
rope_scaling: dict[str, Any] = None,
90+
rope_parameters: dict[str, Any] | None = None,
9291
dtype: torch.dtype = torch.bfloat16,
9392
seed: int = 0,
9493
warmup_iter: int = 10,
@@ -102,9 +101,8 @@ def benchmark_mrope(
102101
head_size=head_dim,
103102
rotary_dim=head_dim,
104103
max_position=max_position,
105-
base=rope_theta,
106104
is_neox_style=is_neox_style,
107-
rope_scaling=rope_scaling,
105+
rope_parameters=rope_parameters,
108106
dtype=dtype,
109107
).to(device=device)
110108

@@ -203,9 +201,8 @@ def benchmark_mrope(
203201
num_kv_heads,
204202
head_dim,
205203
max_position,
206-
rope_theta,
207204
is_neox_style,
208-
str(rope_scaling),
205+
str(rope_parameters),
209206
str(dtype).split(".")[-1],
210207
torch_stats["mean"],
211208
torch_stats["median"],
@@ -255,9 +252,8 @@ def benchmark_mrope(
255252
"num_kv_heads",
256253
"head_dim",
257254
"max_position",
258-
"rope_theta",
259255
"is_neox_style",
260-
"rope_scaling",
256+
"rope_parameters",
261257
"dtype",
262258
"torch_mean",
263259
"torch_median",
@@ -303,7 +299,7 @@ def benchmark_mrope(
303299
q_size = num_heads * head_dim
304300
kv_size = num_kv_heads * head_dim
305301
is_neox_style = True
306-
rope_theta = config.rope_theta
302+
rope_parameters = config.rope_parameters
307303
max_position = config.max_position_embeddings
308304

309305
for num_tokens in num_tokens_list:
@@ -315,9 +311,8 @@ def benchmark_mrope(
315311
num_heads=num_heads,
316312
num_kv_heads=num_kv_heads,
317313
max_position=max_position,
318-
rope_theta=rope_theta,
319314
is_neox_style=is_neox_style,
320-
rope_scaling=config.rope_scaling,
315+
rope_parameters=rope_parameters,
321316
dtype=getattr(torch, args.dtype),
322317
seed=args.seed,
323318
warmup_iter=args.warmup_iter,

examples/offline_inference/context_extension.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
"""
44
This script demonstrates how to extend the context length
5-
of a Qwen model using the YARN method (rope_scaling)
5+
of a Qwen model using the YARN method (rope_parameters)
66
and run a simple chat example.
77
88
Usage:
@@ -19,8 +19,8 @@ def create_llm():
1919

2020
# Use yarn to extend context
2121
hf_overrides = {
22-
"rope_theta": rope_theta,
23-
"rope_scaling": {
22+
"rope_parameters": {
23+
"rope_theta": rope_theta,
2424
"rope_type": "yarn",
2525
"factor": factor,
2626
"original_max_position_embeddings": original_max_position_embeddings,

tests/compile/test_functionalization.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def __init__(self, head_dim=64, rotary_dim=None, max_position=2048, base=10000):
137137
self.head_dim,
138138
rotary_dim=self.rotary_dim,
139139
max_position=max_position,
140-
base=base,
140+
rope_parameters={"rope_type": "default", "rope_theta": base},
141141
)
142142

143143
def forward(self, positions, q, k):
@@ -172,7 +172,7 @@ def __init__(self, head_dim=64, num_heads=4, max_position=2048, base=10000):
172172
self.head_dim,
173173
rotary_dim=self.head_dim,
174174
max_position=max_position,
175-
base=base,
175+
rope_parameters={"rope_type": "default", "rope_theta": base},
176176
)
177177

178178
def forward(self, positions, hidden_states):

tests/kernels/core/test_mrope.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import pytest
66
import torch
77
from packaging.version import Version
8-
from transformers import AutoConfig
98
from transformers import __version__ as TRANSFORMERS_VERSION
109

1110
from vllm.model_executor.layers.rotary_embedding import get_rope
1211
from vllm.platforms import current_platform
12+
from vllm.transformers_utils.config import get_config
1313

1414
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1515

@@ -98,8 +98,7 @@ def test_mrope(
9898
atol = model_info.atol
9999
rtol = model_info.rtol
100100

101-
config = AutoConfig.from_pretrained(model_name)
102-
config = config.get_text_config()
101+
config = get_config(model_name, False).get_text_config()
103102

104103
# get the model config
105104
total_num_kv_heads = config.num_key_value_heads
@@ -113,7 +112,6 @@ def test_mrope(
113112
)
114113
is_neox_style = True
115114

116-
rope_theta = config.rope_theta
117115
max_position = config.max_position_embeddings
118116
partial_rotary_factor = getattr(config, "partial_rotary_factor", 1.0)
119117
rotary_dim = int(head_dim * partial_rotary_factor)
@@ -122,9 +120,8 @@ def test_mrope(
122120
head_size=head_dim,
123121
rotary_dim=rotary_dim,
124122
max_position=max_position,
125-
base=rope_theta,
126123
is_neox_style=is_neox_style,
127-
rope_scaling=config.rope_scaling,
124+
rope_parameters=config.rope_parameters,
128125
dtype=dtype,
129126
).to(device=device)
130127

@@ -173,8 +170,7 @@ def test_mrope_torch_compile_tracing(
173170
atol = model_info.atol
174171
rtol = model_info.rtol
175172

176-
config = AutoConfig.from_pretrained(model_name)
177-
config = config.get_text_config()
173+
config = get_config(model_name, False).get_text_config()
178174

179175
# get the model config
180176
total_num_kv_heads = config.num_key_value_heads
@@ -187,7 +183,6 @@ def test_mrope_torch_compile_tracing(
187183
else config.hidden_size // total_num_heads
188184
)
189185
is_neox_style = True
190-
rope_theta = config.rope_theta
191186
max_position = config.max_position_embeddings
192187
partial_rotary_factor = getattr(config, "partial_rotary_factor", 1.0)
193188
rotary_dim = int(head_dim * partial_rotary_factor)
@@ -196,9 +191,8 @@ def test_mrope_torch_compile_tracing(
196191
head_size=head_dim,
197192
rotary_dim=rotary_dim,
198193
max_position=max_position,
199-
base=rope_theta,
200194
is_neox_style=is_neox_style,
201-
rope_scaling=config.rope_scaling,
195+
rope_parameters=config.rope_parameters,
202196
dtype=dtype,
203197
).to(device=device)
204198

tests/kernels/core/test_pos_encoding.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_rotary_embedding(
7474
device: str,
7575
use_key: bool,
7676
max_position: int = 8192,
77-
base: float = 10000,
77+
rope_theta: float = 10000,
7878
) -> None:
7979
if rotary_dim is None:
8080
rotary_dim = head_size
@@ -83,7 +83,8 @@ def test_rotary_embedding(
8383
torch.set_default_device(device)
8484
if rotary_dim is None:
8585
rotary_dim = head_size
86-
rope = get_rope(head_size, rotary_dim, max_position, base, is_neox_style)
86+
rope_parameters = {"rope_type": "default", "rope_theta": rope_theta}
87+
rope = get_rope(head_size, rotary_dim, max_position, is_neox_style, rope_parameters)
8788
rope = rope.to(dtype=dtype, device=torch.get_default_device())
8889

8990
positions = torch.randint(0, max_position, (batch_size, seq_len))
@@ -120,19 +121,19 @@ def test_rotary_embedding(
120121
@torch.inference_mode()
121122
def test_rope_module_cache():
122123
MAX_POSITIONS = [123, 1234]
123-
BASES = [10000, 1000000]
124-
ROPE_SCALINGS = (
125-
None,
124+
ROPE_THETAS = [10000, 1000000]
125+
ROPE_PARAMETERS = (
126+
{"rope_type": "default"},
126127
{"rope_type": "linear", "factor": (1,)},
127128
{"rope_type": "dynamic", "factor": 1},
128129
)
129130
settings = (
130131
HEAD_SIZES,
131132
ROTARY_DIMS,
132133
MAX_POSITIONS,
133-
BASES,
134+
ROPE_THETAS,
134135
IS_NEOX_STYLE,
135-
ROPE_SCALINGS,
136+
ROPE_PARAMETERS,
136137
DTYPES,
137138
)
138139
rope_setting_id_map: dict[str, int] = {}
@@ -141,20 +142,20 @@ def test_rope_module_cache():
141142
head_size,
142143
rotary_dim,
143144
max_position,
144-
base,
145-
is_neox_stype,
146-
rope_scaling,
145+
rope_theta,
146+
is_neox_style,
147+
rope_parameters,
147148
dtype,
148149
) = setting
149150
if rotary_dim is None:
150151
rotary_dim = head_size
152+
rope_parameters["rope_theta"] = rope_theta
151153
rope = get_rope(
152154
head_size,
153155
rotary_dim,
154156
max_position,
155-
base,
156-
is_neox_stype,
157-
rope_scaling,
157+
is_neox_style,
158+
rope_parameters,
158159
dtype,
159160
)
160161
# different settings cannot share the same rope module
@@ -168,20 +169,20 @@ def test_rope_module_cache():
168169
head_size,
169170
rotary_dim,
170171
max_position,
171-
base,
172-
is_neox_stype,
173-
rope_scaling,
172+
rope_theta,
173+
is_neox_style,
174+
rope_parameters,
174175
dtype,
175176
) = setting
176177
if rotary_dim is None:
177178
rotary_dim = head_size
179+
rope_parameters["rope_theta"] = rope_theta
178180
rope = get_rope(
179181
head_size,
180182
rotary_dim,
181183
max_position,
182-
base,
183-
is_neox_stype,
184-
rope_scaling,
184+
is_neox_style,
185+
rope_parameters,
185186
dtype,
186187
)
187188
# check if cache take effect

tests/kernels/moe/test_gpt_oss_triton_kernels.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ class ModelConfig:
201201
sliding_window: int = 128
202202
initial_context_length: int = 4096
203203
rope_theta: float = 150000.0
204-
rope_scaling_factor: float = 32.0
204+
rope_parameters_factor: float = 32.0
205205
rope_ntk_alpha: float = 1.0
206206
rope_ntk_beta: float = 32.0
207207

tests/models/language/pooling/test_nomic_max_model_len.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
# ruff: noqa: SIM117
4+
from typing import Any
5+
46
import pytest
57

68
from ...utils import EmbedModelInfo
@@ -79,8 +81,8 @@ def test_set_max_model_len_illegal(model_info, vllm_runner):
7981
@pytest.mark.parametrize("model_info", MODELS)
8082
def test_use_rope_scaling_legal(model_info, vllm_runner):
8183
hf_overrides = {
82-
"rope_theta": rope_theta,
83-
"rope_scaling": {
84+
"rope_parameters": {
85+
"rope_theta": rope_theta,
8486
"rope_type": "yarn",
8587
"factor": factor,
8688
"original_max_position_embeddings": original_max_position_embeddings,
@@ -96,9 +98,9 @@ def test_use_rope_scaling_legal(model_info, vllm_runner):
9698

9799
@pytest.mark.parametrize("model_info", MODELS)
98100
def test_use_rope_scaling_illegal(model_info, vllm_runner):
99-
hf_overrides = {
100-
"rope_theta": rope_theta,
101-
"rope_scaling": {
101+
hf_overrides: dict[str, Any] = {
102+
"rope_parameters": {
103+
"rope_theta": rope_theta,
102104
"rope_type": "yarn",
103105
"factor": factor,
104106
"original_max_position_embeddings": original_max_position_embeddings,
@@ -115,8 +117,8 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
115117
pass
116118

117119
hf_overrides = {
118-
"rope_theta": rope_theta,
119-
"rope_scaling": {
120+
"rope_parameters": {
121+
"rope_theta": rope_theta,
120122
"rope_type": "yarn",
121123
"factor": factor,
122124
"original_max_position_embeddings": original_max_position_embeddings,

0 commit comments

Comments
 (0)