Skip to content

Commit c2121f2

Browse files
DarkLight1337FerdinandZhong
authored andcommitted
[Misc] Standardize RoPE handling for Qwen2-VL (vllm-project#9250)
Signed-off-by: qishuai <[email protected]>
1 parent ed667f8 commit c2121f2

File tree

16 files changed

+102
-200
lines changed

16 files changed

+102
-200
lines changed

benchmarks/kernels/benchmark_rope.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def benchmark_rope_kernels_multi_lora(
3131
# batched RoPE can take multiple scaling factors
3232
batched_rope = get_rope(head_size, rotary_dim, max_position, base,
3333
is_neox_style, {
34-
"type": "linear",
34+
"rope_type": "linear",
3535
"factor": tuple(scaling_factors)
3636
})
3737
# non-batched RoPE takes only one scaling factor, we create multiple
@@ -41,7 +41,7 @@ def benchmark_rope_kernels_multi_lora(
4141
non_batched_ropes.append(
4242
get_rope(head_size, rotary_dim, max_position, base, is_neox_style,
4343
{
44-
"type": "linear",
44+
"rope_type": "linear",
4545
"factor": (scaling_factor, )
4646
}))
4747

requirements-common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ numpy < 2.0.0
44
requests >= 2.26.0
55
tqdm
66
py-cpuinfo
7-
transformers >= 4.45.0 # Required for Llama 3.2.
7+
transformers >= 4.45.2 # Required for Llama 3.2 and Qwen2-VL.
88
tokenizers >= 0.19.1 # Required for Llama 3.
99
protobuf # Required by LlamaTokenizer.
1010
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'

tests/kernels/test_pos_encoding.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def test_batched_rotary_embedding(
105105
if rotary_dim is None:
106106
rotary_dim = head_size
107107
rope = get_rope(head_size, rotary_dim, max_position, base, is_neox_style, {
108-
"type": "linear",
108+
"rope_type": "linear",
109109
"factor": (1, )
110110
})
111111
rope = rope.to(dtype=dtype)
@@ -166,7 +166,7 @@ def test_batched_rotary_embedding_multi_lora(
166166
rotary_dim = head_size
167167
scaling_factors: List[int] = [1, 2, 4]
168168
rope = get_rope(head_size, rotary_dim, max_position, base, is_neox_style, {
169-
"type": "linear",
169+
"rope_type": "linear",
170170
"factor": tuple(scaling_factors)
171171
})
172172
rope = rope.to(dtype=dtype)
@@ -211,10 +211,10 @@ def test_rope_module_cache():
211211
MAX_POSITIONS = [123, 1234]
212212
BASES = [10000, 1000000]
213213
ROPE_SCALINGS = (None, {
214-
"type": "linear",
214+
"rope_type": "linear",
215215
"factor": (1, )
216216
}, {
217-
"type": "dynamic",
217+
"rope_type": "dynamic",
218218
"factor": 1
219219
})
220220
settings = (HEAD_SIZES, ROTARY_DIMS, MAX_POSITIONS, BASES, IS_NEOX_STYLE,

tests/lora/test_layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def test_rotary_embedding_long_context(dist_init, num_loras, device,
951951
lora_rope.create_lora_weights(max_loras, lora_config)
952952
linear_rope = get_rope(head_size, rotary_dim, max_position, base,
953953
is_neox_style, {
954-
"type": "linear",
954+
"rope_type": "linear",
955955
"factor": scaling_factors
956956
})
957957
linear_rope = linear_rope.to(dtype=dtype)

tests/test_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ def test_get_sliding_window():
6464

6565

6666
def test_rope_customization():
67-
TEST_ROPE_SCALING = {"type": "dynamic", "factor": 2.0}
67+
TEST_ROPE_SCALING = {"rope_type": "dynamic", "factor": 2.0}
6868
TEST_ROPE_THETA = 16_000_000.0
69-
LONGCHAT_ROPE_SCALING = {"type": "linear", "factor": 8.0}
69+
LONGCHAT_ROPE_SCALING = {"rope_type": "linear", "factor": 8.0}
7070

7171
llama_model_config = ModelConfig(
7272
"meta-llama/Meta-Llama-3-8B-Instruct",

vllm/config.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,16 +1739,10 @@ def _get_and_verify_max_len(
17391739

17401740
rope_scaling = getattr(hf_config, "rope_scaling", None)
17411741
if rope_scaling is not None:
1742-
if "type" in rope_scaling:
1743-
rope_type = rope_scaling["type"]
1744-
elif "rope_type" in rope_scaling:
1745-
rope_type = rope_scaling["rope_type"]
1746-
else:
1747-
raise ValueError(
1748-
"rope_scaling must have a 'type' or 'rope_type' key.")
1742+
# No need to consider "type" key because of patch_rope_scaling when
1743+
# loading HF config
1744+
rope_type = rope_scaling["rope_type"]
17491745

1750-
# The correct one should be "longrope", kept "su" here
1751-
# to be backward compatible
17521746
if rope_type not in ("su", "longrope", "llama3"):
17531747
if disable_sliding_window:
17541748
# TODO(robertgshaw): Find a model that supports rope_scaling
@@ -1758,11 +1752,10 @@ def _get_and_verify_max_len(
17581752
"with rope_scaling. Please raise an issue so we can "
17591753
"investigate.")
17601754

1761-
if rope_type == "mrope":
1762-
scaling_factor = 1
1763-
else:
1764-
assert "factor" in rope_scaling
1765-
scaling_factor = rope_scaling["factor"]
1755+
# NOTE: rope_type == "default" does not define factor
1756+
# https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/modeling_rope_utils.py
1757+
scaling_factor = rope_scaling.get("factor", 1.0)
1758+
17661759
if rope_type == "yarn":
17671760
derived_max_model_len = rope_scaling[
17681761
"original_max_position_embeddings"]

vllm/engine/arg_utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -454,11 +454,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
454454
'None, we assume the model weights are not '
455455
'quantized and use `dtype` to determine the data '
456456
'type of the weights.')
457-
parser.add_argument('--rope-scaling',
458-
default=None,
459-
type=json.loads,
460-
help='RoPE scaling configuration in JSON format. '
461-
'For example, {"type":"dynamic","factor":2.0}')
457+
parser.add_argument(
458+
'--rope-scaling',
459+
default=None,
460+
type=json.loads,
461+
help='RoPE scaling configuration in JSON format. '
462+
'For example, {"rope_type":"dynamic","factor":2.0}')
462463
parser.add_argument('--rope-theta',
463464
default=None,
464465
type=float,

vllm/model_executor/layers/rotary_embedding.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -920,13 +920,10 @@ def get_rope(
920920
rotary_emb = RotaryEmbedding(head_size, rotary_dim, max_position, base,
921921
is_neox_style, dtype)
922922
else:
923-
scaling_type = rope_scaling[
924-
"type"] if "type" in rope_scaling else rope_scaling["rope_type"]
925-
# The correct one should be "longrope" but keep "su" here
926-
# for backward compatible
927-
if scaling_type not in {"su", "longrope"}:
928-
scaling_factor = rope_scaling.get("factor", 1.0)
923+
scaling_type = rope_scaling["rope_type"]
924+
929925
if scaling_type == "llama3":
926+
scaling_factor = rope_scaling["factor"]
930927
low_freq_factor = rope_scaling["low_freq_factor"]
931928
high_freq_factor = rope_scaling["high_freq_factor"]
932929
original_max_position = rope_scaling[
@@ -937,16 +934,39 @@ def get_rope(
937934
scaling_factor, low_freq_factor,
938935
high_freq_factor,
939936
original_max_position)
937+
elif scaling_type == "default":
938+
if "mrope_section" in rope_scaling:
939+
rotary_emb = MRotaryEmbedding(
940+
head_size,
941+
rotary_dim,
942+
max_position,
943+
base,
944+
is_neox_style,
945+
dtype,
946+
mrope_section=rope_scaling["mrope_section"],
947+
)
948+
else:
949+
rotary_emb = RotaryEmbedding(
950+
head_size,
951+
rotary_dim,
952+
max_position,
953+
base,
954+
is_neox_style,
955+
dtype,
956+
)
940957
elif scaling_type == "linear":
958+
scaling_factor = rope_scaling["factor"]
941959
rotary_emb = LinearScalingRotaryEmbedding(head_size, rotary_dim,
942960
max_position, base,
943961
is_neox_style,
944962
scaling_factor, dtype)
945963
elif scaling_type == "dynamic":
964+
scaling_factor = rope_scaling["factor"]
946965
rotary_emb = DynamicNTKScalingRotaryEmbedding(
947966
head_size, rotary_dim, max_position, base, is_neox_style,
948967
scaling_factor, dtype)
949968
elif scaling_type == "yarn":
969+
scaling_factor = rope_scaling["factor"]
950970
original_max_position = rope_scaling[
951971
"original_max_position_embeddings"]
952972
extra_kwargs = {
@@ -961,6 +981,7 @@ def get_rope(
961981
scaling_factor, dtype,
962982
**extra_kwargs)
963983
elif scaling_type == "deepseek_yarn":
984+
scaling_factor = rope_scaling["factor"]
964985
original_max_position = rope_scaling[
965986
"original_max_position_embeddings"]
966987
# assert max_position == original_max_position * scaling_factor
@@ -973,9 +994,7 @@ def get_rope(
973994
rotary_emb = DeepseekScalingRotaryEmbedding(
974995
head_size, rotary_dim, original_max_position, base,
975996
is_neox_style, scaling_factor, dtype, **extra_kwargs)
976-
# The correct one should be "longrope" but keep "su" here
977-
# for backward compatible
978-
elif scaling_type == "su" or scaling_type == "longrope":
997+
elif scaling_type == "longrope":
979998
short_factor = rope_scaling["short_factor"]
980999
long_factor = rope_scaling["long_factor"]
9811000
original_max_position = rope_scaling[
@@ -989,16 +1008,6 @@ def get_rope(
9891008
head_size, rotary_dim, max_position, original_max_position,
9901009
base, is_neox_style, dtype, short_factor, long_factor,
9911010
**extra_kwargs)
992-
elif scaling_type == "mrope":
993-
rotary_emb = MRotaryEmbedding(
994-
head_size,
995-
rotary_dim,
996-
max_position,
997-
base,
998-
is_neox_style,
999-
dtype,
1000-
mrope_section=rope_scaling["mrope_section"],
1001-
)
10021011
else:
10031012
raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
10041013
_ROPE_DICT[key] = rotary_emb

vllm/model_executor/models/deepseek_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def __init__(
242242
bias=False,
243243
quant_config=quant_config,
244244
prefix=f"{prefix}.o_proj")
245-
rope_scaling['type'] = 'deepseek_yarn'
245+
rope_scaling["rope_type"] = 'deepseek_yarn'
246246
self.rotary_emb = get_rope(qk_rope_head_dim,
247247
rotary_dim=qk_rope_head_dim,
248248
max_position=max_position_embeddings,

vllm/model_executor/models/phi3_small.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def __init__(
179179
rope_scaling["factor"] = self.rope_position_scale
180180
else:
181181
rope_scaling = {
182-
"type": "linear",
182+
"rope_type": "linear",
183183
"factor": self.rope_position_scale,
184184
}
185185

0 commit comments

Comments
 (0)