pytorch
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/quantization/test_mixed_precision.py‎ ‎test/prototype/test_mixed_precision.py‎test/quantization/test_mixed_precision.py renamed to test/prototype/test_mixed_precision.py
Lines changed: 1 addition & 1 deletion b/‎test/quantization/test_mixed_precision.py‎ ‎test/prototype/test_mixed_precision.py‎test/quantization/test_mixed_precision.py renamed to test/prototype/test_mixed_precision.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/quantization/test_qat.py‎ ‎test/prototype/test_qat.py‎test/quantization/test_qat.py renamed to test/prototype/test_qat.py
Lines changed: 15 additions & 15 deletions b/‎test/quantization/test_qat.py‎ ‎test/prototype/test_qat.py‎test/quantization/test_qat.py renamed to test/prototype/test_qat.py
Lines changed: 15 additions & 15 deletions
diff --git a/‎…rchao/quantization/prototype/__init__.py‎ ‎…rchao/prototype/quantization/__init__.py‎torchao/quantization/prototype/__init__.py renamed to torchao/prototype/quantization/__init__.py b/‎…rchao/quantization/prototype/__init__.py‎ ‎…rchao/prototype/quantization/__init__.py‎torchao/quantization/prototype/__init__.py renamed to torchao/prototype/quantization/__init__.py
diff --git a/‎…tion/prototype/mixed_precision/README.md‎ ‎…e/quantization/mixed_precision/README.md‎torchao/quantization/prototype/mixed_precision/README.md renamed to torchao/prototype/quantization/mixed_precision/README.md b/‎…tion/prototype/mixed_precision/README.md‎ ‎…e/quantization/mixed_precision/README.md‎torchao/quantization/prototype/mixed_precision/README.md renamed to torchao/prototype/quantization/mixed_precision/README.md
diff --git a/‎…on/prototype/mixed_precision/__init__.py‎ ‎…quantization/mixed_precision/__init__.py‎torchao/quantization/prototype/mixed_precision/__init__.py renamed to torchao/prototype/quantization/mixed_precision/__init__.py b/‎…on/prototype/mixed_precision/__init__.py‎ ‎…quantization/mixed_precision/__init__.py‎torchao/quantization/prototype/mixed_precision/__init__.py renamed to torchao/prototype/quantization/mixed_precision/__init__.py
diff --git a/‎…ed_precision/scripts/BO_acc_modelsize.py‎ ‎…ed_precision/scripts/BO_acc_modelsize.py‎torchao/quantization/prototype/mixed_precision/scripts/BO_acc_modelsize.py renamed to torchao/prototype/quantization/mixed_precision/scripts/BO_acc_modelsize.py b/‎…ed_precision/scripts/BO_acc_modelsize.py‎ ‎…ed_precision/scripts/BO_acc_modelsize.py‎torchao/quantization/prototype/mixed_precision/scripts/BO_acc_modelsize.py renamed to torchao/prototype/quantization/mixed_precision/scripts/BO_acc_modelsize.py
diff --git a/‎…d_precision/scripts/BO_acc_throughput.py‎ ‎…d_precision/scripts/BO_acc_throughput.py‎torchao/quantization/prototype/mixed_precision/scripts/BO_acc_throughput.py renamed to torchao/prototype/quantization/mixed_precision/scripts/BO_acc_throughput.py b/‎…d_precision/scripts/BO_acc_throughput.py‎ ‎…d_precision/scripts/BO_acc_throughput.py‎torchao/quantization/prototype/mixed_precision/scripts/BO_acc_throughput.py renamed to torchao/prototype/quantization/mixed_precision/scripts/BO_acc_throughput.py
diff --git a/‎…n/scripts/Llama3-8B_initial_samples.json‎ ‎…n/scripts/Llama3-8B_initial_samples.json‎torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_initial_samples.json renamed to torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_initial_samples.json b/‎…n/scripts/Llama3-8B_initial_samples.json‎ ‎…n/scripts/Llama3-8B_initial_samples.json‎torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_initial_samples.json renamed to torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_initial_samples.json
diff --git a/‎…cision/scripts/Llama3-8B_parameters.json‎ ‎…cision/scripts/Llama3-8B_parameters.json‎torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_parameters.json renamed to torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_parameters.json b/‎…cision/scripts/Llama3-8B_parameters.json‎ ‎…cision/scripts/Llama3-8B_parameters.json‎torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_parameters.json renamed to torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_parameters.json
@@ -59,7 +59,7 @@ In practice these features alongside int4 weight only quantization allow us to *
 Post-training quantization can result in a fast and compact model, but may also lead to accuracy degradation. We recommend exploring Quantization Aware Training (QAT) to overcome this limitation. In collaboration with Torchtune, we've developed a QAT recipe that demonstrates significant accuracy improvements over traditional PTQ, recovering **96% of the accuracy degradation on hellaswag and 68% of the perplexity degradation on wikitext** for Llama3 compared to post-training quantization (PTQ). And we've provided a full recipe [here](https://pytorch.org/blog/quantization-aware-training/)
 
 ```python
-from torchao.quantization.prototype.qat import Int8DynActInt4WeightQATQuantizer
+from torchao.prototype.quantization.qat import Int8DynActInt4WeightQATQuantizer
 
 qat_quantizer = Int8DynActInt4WeightQATQuantizer()
 
 
@@ -4,7 +4,7 @@
 import torch.nn as nn
 from torchao.quantization import quantize_, int8_weight_only, int4_weight_only
 from torchao.quantization.utils import compute_error
-from torchao.quantization.prototype.mixed_precision.scripts.naive_intNwo import intN_weight_only
+from torchao.prototype.quantization.mixed_precision.scripts.naive_intNwo import intN_weight_only
 
 _CUDA_IS_AVAILABLE = torch.cuda.is_available()
 
 
@@ -22,17 +22,17 @@
     PerRow,
     PerToken,
 )
-from torchao.quantization.prototype.qat.api import (
+from torchao.prototype.quantization.qat.api import (
     ComposableQATQuantizer,
     FakeQuantizeConfig,
 )
-from torchao.quantization.prototype.qat.fake_quantizer import (
+from torchao.prototype.quantization.qat.fake_quantizer import (
     FakeQuantizer,
 )
-from torchao.quantization.prototype.qat.linear import (
+from torchao.prototype.quantization.qat.linear import (
     FakeQuantizedLinear,
 )
-from torchao.quantization.prototype.qat.utils import (
+from torchao.prototype.quantization.qat.utils import (
     _choose_qparams_per_token_asymmetric,
     _fake_quantize_per_channel_group,
     _fake_quantize_per_token,
@@ -172,7 +172,7 @@ def _set_ptq_weight(
             Int8DynActInt4WeightLinear,
             WeightOnlyInt4Linear,
         )
-        from torchao.quantization.prototype.qat.linear import (
+        from torchao.prototype.quantization.qat.linear import (
             Int8DynActInt4WeightQATLinear,
             Int4WeightOnlyQATLinear,
         )
@@ -204,7 +204,7 @@ def _set_ptq_weight(
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     def test_qat_8da4w_linear(self):
-        from torchao.quantization.prototype.qat.linear import Int8DynActInt4WeightQATLinear
+        from torchao.prototype.quantization.qat.linear import Int8DynActInt4WeightQATLinear
         from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear
 
         group_size = 128
@@ -229,7 +229,7 @@ def test_qat_8da4w_linear(self):
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     def test_qat_8da4w_quantizer(self):
-        from torchao.quantization.prototype.qat import Int8DynActInt4WeightQATQuantizer
+        from torchao.prototype.quantization.qat import Int8DynActInt4WeightQATQuantizer
         from torchao.quantization.GPTQ import Int8DynActInt4WeightQuantizer
 
         group_size = 16
@@ -263,7 +263,7 @@ def test_qat_8da4w_quantizer(self):
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     def test_qat_8da4w_quantizer_meta_weights(self):
-        from torchao.quantization.prototype.qat import Int8DynActInt4WeightQATQuantizer
+        from torchao.prototype.quantization.qat import Int8DynActInt4WeightQATQuantizer
 
         with torch.device("meta"):
             m = M()
@@ -278,7 +278,7 @@ def test_qat_8da4w_quantizer_disable_fake_quant(self):
         """
         Test that 8da4w QAT with disabled fake quant matches nn.Linear in forward.
         """
-        from torchao.quantization.prototype.qat import (
+        from torchao.prototype.quantization.qat import (
             Int8DynActInt4WeightQATQuantizer,
             disable_8da4w_fake_quant,
             enable_8da4w_fake_quant,
@@ -337,7 +337,7 @@ def test_qat_8da4w_quantizer_disable_fake_quant_backward(self):
         """
         Test that 8da4w QAT with disabled fake quant matches nn.Linear in backward.
         """
-        from torchao.quantization.prototype.qat import (
+        from torchao.prototype.quantization.qat import (
             Int8DynActInt4WeightQATQuantizer,
             disable_8da4w_fake_quant,
         )
@@ -419,7 +419,7 @@ def _test_qat_quantized_gradients(self, quantizer):
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     def test_qat_8da4w_quantizer_gradients(self):
-        from torchao.quantization.prototype.qat import Int8DynActInt4WeightQATQuantizer
+        from torchao.prototype.quantization.qat import Int8DynActInt4WeightQATQuantizer
         quantizer = Int8DynActInt4WeightQATQuantizer(groupsize=16)
         self._test_qat_quantized_gradients(quantizer)
 
@@ -509,7 +509,7 @@ def test_qat_4w_primitives(self):
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     @unittest.skipIf(not _CUDA_IS_AVAILABLE, "skipping when cuda is not available")
     def test_qat_4w_linear(self):
-        from torchao.quantization.prototype.qat.linear import Int4WeightOnlyQATLinear
+        from torchao.prototype.quantization.qat.linear import Int4WeightOnlyQATLinear
         from torchao.quantization.GPTQ import WeightOnlyInt4Linear
 
         group_size = 128
@@ -536,14 +536,14 @@ def test_qat_4w_linear(self):
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     def test_qat_4w_quantizer_gradients(self):
-        from torchao.quantization.prototype.qat import Int4WeightOnlyQATQuantizer
+        from torchao.prototype.quantization.qat import Int4WeightOnlyQATQuantizer
         quantizer = Int4WeightOnlyQATQuantizer(groupsize=32, inner_k_tiles=8)
         self._test_qat_quantized_gradients(quantizer)
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     @unittest.skipIf(not _CUDA_IS_AVAILABLE, "skipping when cuda is not available")
     def test_qat_4w_quantizer(self):
-        from torchao.quantization.prototype.qat import Int4WeightOnlyQATQuantizer
+        from torchao.prototype.quantization.qat import Int4WeightOnlyQATQuantizer
         from torchao.quantization.GPTQ import Int4WeightOnlyQuantizer
 
         group_size = 32
@@ -621,7 +621,7 @@ def test_composable_qat_quantizer(self):
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower")
     def test_qat_4w_embedding(self):
-        from torchao.quantization.prototype.qat import Int4WeightOnlyEmbeddingQATQuantizer
+        from torchao.prototype.quantization.qat import Int4WeightOnlyEmbeddingQATQuantizer
         model = M2()
         x = model.example_inputs()
         out = model(*x)