Skip to content

Commit 50e881f

Browse files
dsikkakylesayrs
andcommitted
actually make the test useful (#920)
Co-authored-by: Kyle Sayers <[email protected]> Signed-off-by: Kyle Sayers <[email protected]>
1 parent cdb6231 commit 50e881f

File tree

5 files changed

+29
-12
lines changed

5 files changed

+29
-12
lines changed
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
cadence: "commit"
22
test_type: "regression"
3-
model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
3+
model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
4+
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
cadence: "commit"
22
test_type: "regression"
3-
model_stub: "nm-testing/tinyllama-w4a16-compressed"
3+
model_stub: "nm-testing/tinyllama-w4a16-compressed"
4+
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
cadence: "commit"
22
test_type: "regression"
3-
model_stub: "nm-testing/tinyllama-w8a16-dense"
3+
model_stub: "nm-testing/tinyllama-w8a16-dense"
4+
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
cadence: "commit"
22
test_type: "regression"
3-
model_stub: "nm-testing/tinyllama-w8a8-compressed"
3+
model_stub: "nm-testing/tinyllama-w8a8-compressed"
4+
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

tests/llmcompressor/transformers/compression/test_run_compressed.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
import unittest
44

55
import torch
6+
from compressed_tensors import QUANTIZATION_CONFIG_NAME
7+
from compressed_tensors.compressors import ModelCompressor
8+
from compressed_tensors.quantization import QuantizationStatus
69
from parameterized import parameterized_class
7-
from transformers import AutoModelForCausalLM, AutoTokenizer
10+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
811

912
from tests.testing_utils import parse_params, requires_gpu, requires_torch
1013

@@ -16,6 +19,7 @@
1619
@parameterized_class(parse_params(CONFIG_DIR))
1720
class TestQuantizationMatches(unittest.TestCase):
1821
model_stub = None
22+
empty_model = None
1923

2024
@classmethod
2125
def setUpClass(cls):
@@ -29,25 +33,34 @@ def setUpClass(cls):
2933
device_map="auto",
3034
# run_compressed=True, # TODO: Give option on HFQuantizer
3135
)
36+
# TODO: Use ModelCompressor until decompression is supported through
37+
# HFQuant/run_compressed can be turned off.
3238
cls.uncompressed_model = AutoModelForCausalLM.from_pretrained(
33-
cls.model_stub,
34-
torch_dtype="auto",
35-
device_map="auto",
36-
# run_compressed=False, # TODO: Give option on HFQuantizer
39+
cls.empty_model,
40+
torch_dtype=cls.compressed_model.dtype,
41+
device_map=cls.compressed_model.device,
42+
)
43+
config = AutoConfig.from_pretrained(cls.model_stub)
44+
compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
45+
cls.compressor = ModelCompressor.from_compression_config(compression_config)
46+
cls.compressor.quantization_config.quantization_status = (
47+
QuantizationStatus.FROZEN
48+
)
49+
cls.compressor.decompress(
50+
model_path=cls.model_stub, model=cls.uncompressed_model
3751
)
3852

3953
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_stub)
40-
cls.device = cls.compressed_model.device
4154

4255
def test_compressed_matches_uncompressed(self):
4356
SAMPLE_INPUT = [
4457
"I love 4-bit quantization because",
45-
"What is the capital of Paris?",
58+
"What is the capital of France?",
4659
"def fibonacci(n):",
4760
]
4861

4962
inputs = self.tokenizer(SAMPLE_INPUT, return_tensors="pt", padding=True).to(
50-
self.device
63+
self.compressed_model.device
5164
)
5265
compressed_output = self.tokenizer.batch_decode(
5366
self.compressed_model.generate(**inputs, max_length=50)

0 commit comments

Comments
 (0)