Skip to content

Commit f784296

Browse files
refactor(gguf): remove redundant vocab_size extraction
Remove extract_vocab_size_from_gguf() and vocab_size override logic as transformers now handles this internally via modeling_gguf_pytorch_utils. Also fixed the tests/models/multimodal/generation/test_common.py to use HuggingFace implementation for Gemma3 testing. Signed-off-by: Luciano Martins <[email protected]>
1 parent 9c071b9 commit f784296

File tree

2 files changed

+1
-62
lines changed

2 files changed

+1
-62
lines changed

tests/models/multimodal/generation/test_common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@
221221
vllm_output_post_proc=model_utils.gemma3_vllm_to_hf_output,
222222
image_size_factors=[(0.25, 0.5, 1.0)],
223223
vllm_runner_kwargs={
224-
"model_impl": "auto",
224+
"model_impl": "transformers",
225225
},
226226
marks=[pytest.mark.core_model],
227227
),

vllm/transformers_utils/gguf_utils.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -41,50 +41,6 @@ def detect_gguf_multimodal(model: str) -> Path | None:
4141
return None
4242

4343

44-
def extract_vocab_size_from_gguf(model_path: str) -> int | None:
45-
"""Extract vocabulary size from GGUF file's embedding tensor.
46-
47-
Reads the token_embd.weight tensor shape to determine the actual
48-
vocabulary size in the GGUF file. This is the source of truth for
49-
GGUF models and may differ from the HuggingFace tokenizer config,
50-
particularly for models with extended vocabularies (e.g., Unsloth).
51-
52-
Args:
53-
model_path: Path to GGUF model file
54-
55-
Returns:
56-
Vocabulary size (second dimension of token_embd.weight tensor)
57-
or None if extraction fails
58-
59-
Note:
60-
GGUF embedding tensor format: [hidden_size, vocab_size]
61-
We extract vocab_size from shape[1]
62-
"""
63-
try:
64-
reader = gguf.GGUFReader(model_path)
65-
for tensor in reader.tensors:
66-
if tensor.name == "token_embd.weight":
67-
# Tensor shape: [hidden_size, vocab_size]
68-
vocab_size = int(tensor.shape[1])
69-
logger.info(
70-
"Extracted vocab_size=%d from GGUF embedding tensor",
71-
vocab_size,
72-
)
73-
return vocab_size
74-
75-
logger.warning(
76-
"Could not find token_embd.weight tensor in GGUF file: %s",
77-
model_path,
78-
)
79-
return None
80-
except Exception as e:
81-
logger.warning(
82-
"Failed to extract vocab_size from GGUF file %s: %s",
83-
model_path,
84-
e,
85-
)
86-
return None
87-
8844

8945
def extract_vision_config_from_gguf(mmproj_path: str) -> "SiglipVisionConfig | None":
9046
"""Extract vision config parameters from mmproj.gguf metadata.
@@ -208,21 +164,4 @@ def maybe_patch_hf_config_from_gguf(
208164
)
209165
hf_config = new_hf_config
210166

211-
# Override vocab_size from GGUF embedding tensor for all GGUF models
212-
# This handles models with extended vocabularies (e.g., Unsloth)
213-
if model.endswith(".gguf"):
214-
vocab_size_from_gguf = extract_vocab_size_from_gguf(model)
215-
if vocab_size_from_gguf is not None:
216-
# Get text config (handles both regular and multimodal configs)
217-
text_config = hf_config.get_text_config()
218-
original_vocab_size = text_config.vocab_size
219-
220-
if original_vocab_size != vocab_size_from_gguf:
221-
logger.info(
222-
"Overriding vocab_size: %d (HF config) → %d (GGUF file)",
223-
original_vocab_size,
224-
vocab_size_from_gguf,
225-
)
226-
text_config.vocab_size = vocab_size_from_gguf
227-
228167
return hf_config

0 commit comments

Comments
 (0)