From 9bab5a48048cbb2ede4c4307e0f69004b524ff5e Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 1 Sep 2025 02:00:09 -0400 Subject: [PATCH 01/10] add support for regex Signed-off-by: n1ck-guo --- .../layers/quantization/auto_round.py | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index fb285413ba9e..ca6ee09264a7 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import re from fractions import Fraction from typing import TYPE_CHECKING, Any, Optional, Union @@ -120,11 +121,39 @@ def from_config(cls, config: dict[str, Any]) -> "AutoRoundConfig": def get_layer_config(self, layer, layer_name: str): def get_config(name: str, quantized: bool = True): - cfg = self.extra_config.get(name, {}) if self.extra_config else {} + if not self.extra_config: + return ( + self.weight_bits if quantized else 16, + self.group_size if quantized else -1, + self.sym if quantized else True, + ) + + # exact match first + if name in self.extra_config: + cfg = self.extra_config[name] + return ( + cfg.get("bits", self.weight_bits if quantized else 16), + cfg.get("group_size", self.group_size if quantized else -1), + cfg.get("sym", self.sym if quantized else True), + ) + + # If there is no exact match, try a regular expression match + for pattern, cfg in self.extra_config.items(): + try: + if re.fullmatch(pattern, name): + return ( + cfg.get("bits", self.weight_bits if quantized else 16), + cfg.get("group_size", self.group_size if quantized else -1), + cfg.get("sym", self.sym if quantized else True), + ) + except (re.error, TypeError): + # If the regular expression is invalid or the key is not a string, skip + continue + return ( - cfg.get("bits", self.weight_bits if quantized else 16), - cfg.get("group_size", self.group_size if quantized else -1), - cfg.get("sym", self.sym if quantized else True), + self.weight_bits if quantized else 16, + self.group_size if quantized else -1, + self.sym if quantized else True, ) # 1. Exact match from config @@ -169,7 +198,7 @@ def get_config(name: str, quantized: bool = True): f"Fused module '{layer_name}' requires " f"consistent quant config for {sub_names}") - # 5. Fallback + # 5. Fallback or try a regular expression match return get_config(layer_name, quantized) def check_quantized(self, weight_bits: int) -> bool: From ef62369583c8bd0b2470ac3cc75b6d7e9a027dee Mon Sep 17 00:00:00 2001 From: Heng Guo Date: Mon, 1 Sep 2025 14:25:26 +0800 Subject: [PATCH 02/10] Update vllm/model_executor/layers/quantization/auto_round.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Heng Guo --- .../layers/quantization/auto_round.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index ca6ee09264a7..0a7ae0f7982a 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -137,8 +137,15 @@ def get_config(name: str, quantized: bool = True): cfg.get("sym", self.sym if quantized else True), ) - # If there is no exact match, try a regular expression match + # A heuristic to identify regex patterns and avoid misinterpreting literals. + # We only treat patterns with special regex characters as regexes. + # The '.' character is excluded as it's common in layer names. + REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\") for pattern, cfg in self.extra_config.items(): + if not isinstance(pattern, str) or not any( + c in REGEX_SPECIAL_CHARS for c in pattern): + continue + try: if re.fullmatch(pattern, name): return ( @@ -146,8 +153,8 @@ def get_config(name: str, quantized: bool = True): cfg.get("group_size", self.group_size if quantized else -1), cfg.get("sym", self.sym if quantized else True), ) - except (re.error, TypeError): - # If the regular expression is invalid or the key is not a string, skip + except re.error: + # Invalid regex, ignore. continue return ( From 9924a05f60eda65794983d147f36f6e81511f8b6 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 1 Sep 2025 19:41:52 -0400 Subject: [PATCH 03/10] pre-commit Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index ca6ee09264a7..1aa0bf7d9578 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -133,7 +133,8 @@ def get_config(name: str, quantized: bool = True): cfg = self.extra_config[name] return ( cfg.get("bits", self.weight_bits if quantized else 16), - cfg.get("group_size", self.group_size if quantized else -1), + cfg.get("group_size", + self.group_size if quantized else -1), cfg.get("sym", self.sym if quantized else True), ) @@ -142,8 +143,10 @@ def get_config(name: str, quantized: bool = True): try: if re.fullmatch(pattern, name): return ( - cfg.get("bits", self.weight_bits if quantized else 16), - cfg.get("group_size", self.group_size if quantized else -1), + cfg.get("bits", + self.weight_bits if quantized else 16), + cfg.get("group_size", + self.group_size if quantized else -1), cfg.get("sym", self.sym if quantized else True), ) except (re.error, TypeError): From 67e33cdee5d25312cd9ce3ad588c20ddcb2e6cd1 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 1 Sep 2025 21:26:40 -0400 Subject: [PATCH 04/10] clean Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 7eac3a36fa05..3595dd6db713 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -138,9 +138,6 @@ def get_config(name: str, quantized: bool = True): cfg.get("sym", self.sym if quantized else True), ) - # A heuristic to identify regex patterns and avoid misinterpreting literals. - # We only treat patterns with special regex characters as regexes. - # The '.' character is excluded as it's common in layer names. REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\") for pattern, cfg in self.extra_config.items(): if not isinstance(pattern, str) or not any( From 4b63c8eba445bcece83390acc68f03140c9f1a25 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 1 Sep 2025 22:01:16 -0400 Subject: [PATCH 05/10] format Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 3595dd6db713..2a3b39353c57 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -127,7 +127,7 @@ def get_config(name: str, quantized: bool = True): self.group_size if quantized else -1, self.sym if quantized else True, ) - + # exact match first if name in self.extra_config: cfg = self.extra_config[name] @@ -137,7 +137,7 @@ def get_config(name: str, quantized: bool = True): self.group_size if quantized else -1), cfg.get("sym", self.sym if quantized else True), ) - + REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\") for pattern, cfg in self.extra_config.items(): if not isinstance(pattern, str) or not any( @@ -156,7 +156,7 @@ def get_config(name: str, quantized: bool = True): except re.error: # Invalid regex, ignore. continue - + return ( self.weight_bits if quantized else 16, self.group_size if quantized else -1, From 3065ae5354d6521936de72edd18e247eace836e4 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 13 Oct 2025 04:32:48 -0400 Subject: [PATCH 06/10] update Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 2a3b39353c57..c9e13fda09da 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -145,7 +145,7 @@ def get_config(name: str, quantized: bool = True): continue try: - if re.fullmatch(pattern, name): + if re.search(pattern, name): return ( cfg.get("bits", self.weight_bits if quantized else 16), From 3efb8e6e60b4469357b2997bc48f81a063c0f9d8 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 13 Oct 2025 04:39:28 -0400 Subject: [PATCH 07/10] preci Signed-off-by: n1ck-guo --- .../model_executor/layers/quantization/auto_round.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index c9e13fda09da..2dcb88a192f4 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -133,24 +133,22 @@ def get_config(name: str, quantized: bool = True): cfg = self.extra_config[name] return ( cfg.get("bits", self.weight_bits if quantized else 16), - cfg.get("group_size", - self.group_size if quantized else -1), + cfg.get("group_size", self.group_size if quantized else -1), cfg.get("sym", self.sym if quantized else True), ) REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\") for pattern, cfg in self.extra_config.items(): if not isinstance(pattern, str) or not any( - c in REGEX_SPECIAL_CHARS for c in pattern): + c in REGEX_SPECIAL_CHARS for c in pattern + ): continue try: if re.search(pattern, name): return ( - cfg.get("bits", - self.weight_bits if quantized else 16), - cfg.get("group_size", - self.group_size if quantized else -1), + cfg.get("bits", self.weight_bits if quantized else 16), + cfg.get("group_size", self.group_size if quantized else -1), cfg.get("sym", self.sym if quantized else True), ) except re.error: From 93782ce71bd334a9039f1d795c240121d31d4fad Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 13 Oct 2025 20:19:12 -0400 Subject: [PATCH 08/10] update Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index ec9b6437bfed..48586c2065c1 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -153,7 +153,7 @@ def get_config(name: str, quantized: bool = True): continue try: - if re.search(pattern, name): + if re.search(re.compile(pattern), name) is not None: return ( cfg.get("bits", self.weight_bits if quantized else 16), cfg.get("group_size", self.group_size if quantized else -1), From 75b7c988811306f6913c1e509a9c6c73744cbbcd Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 13 Oct 2025 20:46:05 -0400 Subject: [PATCH 09/10] change import Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 48586c2065c1..00b39d9e317d 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import re +import regex as re from fractions import Fraction from typing import TYPE_CHECKING, Any From 044ca3cf1b58d34a8065fbcbf719c488b975b043 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 13 Oct 2025 20:50:53 -0400 Subject: [PATCH 10/10] preci Signed-off-by: n1ck-guo --- vllm/model_executor/layers/quantization/auto_round.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 00b39d9e317d..0e4815be603e 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -1,10 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import regex as re from fractions import Fraction from typing import TYPE_CHECKING, Any +import regex as re import torch from vllm.logger import init_logger