BUG FIX: HQQ quantization would error out if torch.dtype (dataType) was set to auto, it now force-sets to torch.bfloat16

abgulati · abgulati · commit 8aa7889ca950 · 2024-09-06T16:05:29.000-07:00
diff --git a/web_app/hf_waitress.py b/web_app/hf_waitress.py
@@ -568,7 +568,8 @@ def initialize_model():
                 quantization_config  = QuantoConfig(weights="int4")
                 model_params["quantization_config"] = quantization_config
         elif quantize == "hqq":
-            print("HQQ-Quantizing")
+            print("HQQ-Quantizing - Force-setting torch_dtype to torch.bfloat16")
+            model_params["torch_dtype"] = torch.bfloat16
             quant_level = quant_level.lower().strip()
 
             if quant_level == "int8":