Skip to content

Commit 0535613

Browse files
author
George
authored
[Test Fix] Quant model reload (#974)
~~Contingent on merge of huggingface/transformers#34719 ~~^ has been merged not yet released~~ ^ has been released SUMMARY: Update test to use AutoModelForCausalLM decompressor instead of manually instantiating the compressor and decompressing. AutoModelForCausalLM will run code that if quantization_config is recognized, it will run the same decompression TEST PLAN: Ran the test using transformers main Must pass: tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py
1 parent 4d06685 commit 0535613

File tree

1 file changed

+11
-19
lines changed

1 file changed

+11
-19
lines changed

tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from compressed_tensors.quantization import QuantizationStatus
1313
from compressed_tensors.utils import get_offloaded_device, update_prefix_dict
1414
from transformers import AutoConfig, AutoModelForCausalLM
15+
from transformers.utils.quantization_config import CompressedTensorsConfig
1516

1617
from llmcompressor.core import reset_session
1718
from llmcompressor.pytorch.utils.helpers import tensor_sparsity
@@ -171,9 +172,8 @@ def test_quant_model_reload(format, dtype, tmp_path):
171172
device = "cpu"
172173
dataset = "open_platypus"
173174
concatenate_data = False
174-
num_calibration_samples = 64
175+
num_calibration_samples = 16
175176
splits = {"calibration": "train[:10%]"}
176-
empty_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype)
177177

178178
# create a quantized model
179179
oneshot(
@@ -191,7 +191,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
191191
# Fetch the oneshot model
192192
model = get_session_model()
193193
og_state_dict = model.state_dict()
194-
path = tmp_path / "compressed"
194+
save_path_compressed = tmp_path / "compressed"
195195

196196
for _, module in model.named_modules():
197197
if hasattr(module, "quantization_scheme"):
@@ -200,32 +200,24 @@ def test_quant_model_reload(format, dtype, tmp_path):
200200

201201
# Save to disk
202202
model.save_pretrained(
203-
path,
203+
save_path_compressed,
204204
quantization_format=format,
205205
save_compressed=True,
206206
)
207207

208208
# Verify config on disk
209-
config = AutoConfig.from_pretrained(path)
209+
config = AutoConfig.from_pretrained(save_path_compressed)
210210
compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
211211
quant_config = ModelCompressor.parse_quantization_config(compression_config)
212212
assert quant_config["format"] == format
213213

214-
# As HFQuantizer doesn't decompress the model, use the compressor to decompress
215-
# the model instead
216-
compressor = ModelCompressor.from_compression_config(compression_config)
217-
compressor.quantization_config.quantization_status = QuantizationStatus.FROZEN
218-
compressor.decompress(model_path=path, model=empty_model)
219-
220-
# eventually use this pathway once HFQuant Decompression works
221-
"""
222-
dense_model = SparseAutoModelForCausalLM.from_pretrained(
223-
"compress_out", torch_dtype="auto", device_map=device
214+
decompressed_model = AutoModelForCausalLM.from_pretrained(
215+
save_path_compressed,
216+
torch_dtype=dtype,
217+
quantization_config=CompressedTensorsConfig(run_compressed=False),
224218
)
225-
"""
226-
# Verify the abs difference between the decompressed model
227-
# and the original model
228-
reconstructed_state_dict = empty_model.state_dict()
219+
220+
reconstructed_state_dict = decompressed_model.state_dict()
229221
assert len(og_state_dict) == len(reconstructed_state_dict)
230222
for key in og_state_dict.keys():
231223
dense_tensor = og_state_dict[key].to(device)

0 commit comments

Comments
 (0)