From ddd6c7040d22449d91a9fba4fbcd0d8abcee7909 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Fri, 14 Feb 2025 15:35:10 +0000 Subject: [PATCH 1/2] Update: Sparse2of4 example Signed-off-by: Rahul Tuli --- examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py b/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py index 21cec66c8b..85cb388679 100644 --- a/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py +++ b/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py @@ -116,7 +116,5 @@ def get_recipe(fp8_enabled): print("==========================================\n") # Save compressed model and tokenizer -model.save_pretrained( - save_dir, save_compressed=args.fp8, disable_sparse_compression=True -) +model.save_pretrained(save_dir, save_compressed=args.fp8) tokenizer.save_pretrained(save_dir) From 8862d8bc71ec7a270faea5b6a58f11d4490cf4c0 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Fri, 14 Feb 2025 21:10:25 +0000 Subject: [PATCH 2/2] Update: test! Signed-off-by: Rahul Tuli --- tests/e2e/vLLM/configs/sparse_24.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/configs/sparse_24.yaml b/tests/e2e/vLLM/configs/sparse_24.yaml index 653168b977..d0805a1b0d 100644 --- a/tests/e2e/vLLM/configs/sparse_24.yaml +++ b/tests/e2e/vLLM/configs/sparse_24.yaml @@ -5,4 +5,4 @@ recipe: tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4.yaml scheme: sparse2of4_only dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft -save_compressed: False \ No newline at end of file +save_compressed: True \ No newline at end of file