From 9bab5a48048cbb2ede4c4307e0f69004b524ff5e Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 1 Sep 2025 02:00:09 -0400
Subject: [PATCH 01/10] add support for regex

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 .../layers/quantization/auto_round.py         | 39 ++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index fb285413ba9e..ca6ee09264a7 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import re
 from fractions import Fraction
 from typing import TYPE_CHECKING, Any, Optional, Union
 
@@ -120,11 +121,39 @@ def from_config(cls, config: dict[str, Any]) -> "AutoRoundConfig":
     def get_layer_config(self, layer, layer_name: str):
 
         def get_config(name: str, quantized: bool = True):
-            cfg = self.extra_config.get(name, {}) if self.extra_config else {}
+            if not self.extra_config:
+                return (
+                    self.weight_bits if quantized else 16,
+                    self.group_size if quantized else -1,
+                    self.sym if quantized else True,
+                )
+            
+            # exact match first
+            if name in self.extra_config:
+                cfg = self.extra_config[name]
+                return (
+                    cfg.get("bits", self.weight_bits if quantized else 16),
+                    cfg.get("group_size", self.group_size if quantized else -1),
+                    cfg.get("sym", self.sym if quantized else True),
+                )
+            
+            # If there is no exact match, try a regular expression match
+            for pattern, cfg in self.extra_config.items():
+                try:
+                    if re.fullmatch(pattern, name):
+                        return (
+                            cfg.get("bits", self.weight_bits if quantized else 16),
+                            cfg.get("group_size", self.group_size if quantized else -1),
+                            cfg.get("sym", self.sym if quantized else True),
+                        )
+                except (re.error, TypeError):
+                    # If the regular expression is invalid or the key is not a string, skip
+                    continue
+            
             return (
-                cfg.get("bits", self.weight_bits if quantized else 16),
-                cfg.get("group_size", self.group_size if quantized else -1),
-                cfg.get("sym", self.sym if quantized else True),
+                self.weight_bits if quantized else 16,
+                self.group_size if quantized else -1,
+                self.sym if quantized else True,
             )
 
         # 1. Exact match from config
@@ -169,7 +198,7 @@ def get_config(name: str, quantized: bool = True):
                         f"Fused module '{layer_name}' requires "
                         f"consistent quant config for {sub_names}")
 
-        # 5. Fallback
+        # 5. Fallback or try a regular expression match
         return get_config(layer_name, quantized)
 
     def check_quantized(self, weight_bits: int) -> bool:

From ef62369583c8bd0b2470ac3cc75b6d7e9a027dee Mon Sep 17 00:00:00 2001
From: Heng Guo <heng.guo@intel.com>
Date: Mon, 1 Sep 2025 14:25:26 +0800
Subject: [PATCH 02/10] Update
 vllm/model_executor/layers/quantization/auto_round.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Signed-off-by: Heng Guo <heng.guo@intel.com>
---
 .../layers/quantization/auto_round.py               | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index ca6ee09264a7..0a7ae0f7982a 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -137,8 +137,15 @@ def get_config(name: str, quantized: bool = True):
                     cfg.get("sym", self.sym if quantized else True),
                 )
             
-            # If there is no exact match, try a regular expression match
+            # A heuristic to identify regex patterns and avoid misinterpreting literals.
+            # We only treat patterns with special regex characters as regexes.
+            # The '.' character is excluded as it's common in layer names.
+            REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\")
             for pattern, cfg in self.extra_config.items():
+                if not isinstance(pattern, str) or not any(
+                        c in REGEX_SPECIAL_CHARS for c in pattern):
+                    continue
+
                 try:
                     if re.fullmatch(pattern, name):
                         return (
@@ -146,8 +153,8 @@ def get_config(name: str, quantized: bool = True):
                             cfg.get("group_size", self.group_size if quantized else -1),
                             cfg.get("sym", self.sym if quantized else True),
                         )
-                except (re.error, TypeError):
-                    # If the regular expression is invalid or the key is not a string, skip
+                except re.error:
+                    # Invalid regex, ignore.
                     continue
             
             return (

From 9924a05f60eda65794983d147f36f6e81511f8b6 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 1 Sep 2025 19:41:52 -0400
Subject: [PATCH 03/10] pre-commit

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index ca6ee09264a7..1aa0bf7d9578 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -133,7 +133,8 @@ def get_config(name: str, quantized: bool = True):
                 cfg = self.extra_config[name]
                 return (
                     cfg.get("bits", self.weight_bits if quantized else 16),
-                    cfg.get("group_size", self.group_size if quantized else -1),
+                    cfg.get("group_size",
+                            self.group_size if quantized else -1),
                     cfg.get("sym", self.sym if quantized else True),
                 )
             
@@ -142,8 +143,10 @@ def get_config(name: str, quantized: bool = True):
                 try:
                     if re.fullmatch(pattern, name):
                         return (
-                            cfg.get("bits", self.weight_bits if quantized else 16),
-                            cfg.get("group_size", self.group_size if quantized else -1),
+                            cfg.get("bits",
+                                    self.weight_bits if quantized else 16),
+                            cfg.get("group_size",
+                                    self.group_size if quantized else -1),
                             cfg.get("sym", self.sym if quantized else True),
                         )
                 except (re.error, TypeError):

From 67e33cdee5d25312cd9ce3ad588c20ddcb2e6cd1 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 1 Sep 2025 21:26:40 -0400
Subject: [PATCH 04/10] clean

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index 7eac3a36fa05..3595dd6db713 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -138,9 +138,6 @@ def get_config(name: str, quantized: bool = True):
                     cfg.get("sym", self.sym if quantized else True),
                 )
             
-            # A heuristic to identify regex patterns and avoid misinterpreting literals.
-            # We only treat patterns with special regex characters as regexes.
-            # The '.' character is excluded as it's common in layer names.
             REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\")
             for pattern, cfg in self.extra_config.items():
                 if not isinstance(pattern, str) or not any(

From 4b63c8eba445bcece83390acc68f03140c9f1a25 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 1 Sep 2025 22:01:16 -0400
Subject: [PATCH 05/10] format

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index 3595dd6db713..2a3b39353c57 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -127,7 +127,7 @@ def get_config(name: str, quantized: bool = True):
                     self.group_size if quantized else -1,
                     self.sym if quantized else True,
                 )
-            
+
             # exact match first
             if name in self.extra_config:
                 cfg = self.extra_config[name]
@@ -137,7 +137,7 @@ def get_config(name: str, quantized: bool = True):
                             self.group_size if quantized else -1),
                     cfg.get("sym", self.sym if quantized else True),
                 )
-            
+
             REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\")
             for pattern, cfg in self.extra_config.items():
                 if not isinstance(pattern, str) or not any(
@@ -156,7 +156,7 @@ def get_config(name: str, quantized: bool = True):
                 except re.error:
                     # Invalid regex, ignore.
                     continue
-            
+
             return (
                 self.weight_bits if quantized else 16,
                 self.group_size if quantized else -1,

From 3065ae5354d6521936de72edd18e247eace836e4 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 13 Oct 2025 04:32:48 -0400
Subject: [PATCH 06/10] update

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index 2a3b39353c57..c9e13fda09da 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -145,7 +145,7 @@ def get_config(name: str, quantized: bool = True):
                     continue
 
                 try:
-                    if re.fullmatch(pattern, name):
+                    if re.search(pattern, name):
                         return (
                             cfg.get("bits",
                                     self.weight_bits if quantized else 16),

From 3efb8e6e60b4469357b2997bc48f81a063c0f9d8 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 13 Oct 2025 04:39:28 -0400
Subject: [PATCH 07/10] preci

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 .../model_executor/layers/quantization/auto_round.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index c9e13fda09da..2dcb88a192f4 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -133,24 +133,22 @@ def get_config(name: str, quantized: bool = True):
                 cfg = self.extra_config[name]
                 return (
                     cfg.get("bits", self.weight_bits if quantized else 16),
-                    cfg.get("group_size",
-                            self.group_size if quantized else -1),
+                    cfg.get("group_size", self.group_size if quantized else -1),
                     cfg.get("sym", self.sym if quantized else True),
                 )
 
             REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\")
             for pattern, cfg in self.extra_config.items():
                 if not isinstance(pattern, str) or not any(
-                        c in REGEX_SPECIAL_CHARS for c in pattern):
+                    c in REGEX_SPECIAL_CHARS for c in pattern
+                ):
                     continue
 
                 try:
                     if re.search(pattern, name):
                         return (
-                            cfg.get("bits",
-                                    self.weight_bits if quantized else 16),
-                            cfg.get("group_size",
-                                    self.group_size if quantized else -1),
+                            cfg.get("bits", self.weight_bits if quantized else 16),
+                            cfg.get("group_size", self.group_size if quantized else -1),
                             cfg.get("sym", self.sym if quantized else True),
                         )
                 except re.error:

From 93782ce71bd334a9039f1d795c240121d31d4fad Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 13 Oct 2025 20:19:12 -0400
Subject: [PATCH 08/10] update

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index ec9b6437bfed..48586c2065c1 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -153,7 +153,7 @@ def get_config(name: str, quantized: bool = True):
                     continue
 
                 try:
-                    if re.search(pattern, name):
+                    if re.search(re.compile(pattern), name) is not None:
                         return (
                             cfg.get("bits", self.weight_bits if quantized else 16),
                             cfg.get("group_size", self.group_size if quantized else -1),

From 75b7c988811306f6913c1e509a9c6c73744cbbcd Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 13 Oct 2025 20:46:05 -0400
Subject: [PATCH 09/10] change import

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index 48586c2065c1..00b39d9e317d 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import re
+import regex as re
 from fractions import Fraction
 from typing import TYPE_CHECKING, Any
 

From 044ca3cf1b58d34a8065fbcbf719c488b975b043 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Mon, 13 Oct 2025 20:50:53 -0400
Subject: [PATCH 10/10] preci

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 vllm/model_executor/layers/quantization/auto_round.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index 00b39d9e317d..0e4815be603e 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import regex as re
 from fractions import Fraction
 from typing import TYPE_CHECKING, Any
 
+import regex as re
 import torch
 
 from vllm.logger import init_logger