We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 086b9d0 commit 8aa7889Copy full SHA for 8aa7889
web_app/hf_waitress.py
@@ -568,7 +568,8 @@ def initialize_model():
568
quantization_config = QuantoConfig(weights="int4")
569
model_params["quantization_config"] = quantization_config
570
elif quantize == "hqq":
571
- print("HQQ-Quantizing")
+ print("HQQ-Quantizing - Force-setting torch_dtype to torch.bfloat16")
572
+ model_params["torch_dtype"] = torch.bfloat16
573
quant_level = quant_level.lower().strip()
574
575
if quant_level == "int8":
0 commit comments