From ac0944d6b5bcee5d8ee9e19bea8aff432f34c7b9 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 27 Aug 2025 19:49:43 +0000 Subject: [PATCH 1/4] add format --- src/compressed_tensors/quantization/quant_scheme.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index 79db8d28..b6e8d9fb 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -163,6 +163,16 @@ def is_preset_scheme(name: str) -> bool: ) ) +MXFP4 = dict( + weights=QuantizationArgs( + num_bits=4, + type=QuantizationType.FLOAT, + strategy=QuantizationStrategy.GROUP, + symmetric=True, + dynamic=False, + group_size=32 + ) +) NVFP4 = dict( weights=QuantizationArgs( From a35b5f9e21c033e3eb4a3531c18b2f45f3ce9e34 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 28 Aug 2025 17:20:11 +0000 Subject: [PATCH 2/4] update --- .../quantization/quant_scheme.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index b6e8d9fb..f810dae2 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -169,9 +169,17 @@ def is_preset_scheme(name: str) -> bool: type=QuantizationType.FLOAT, strategy=QuantizationStrategy.GROUP, symmetric=True, - dynamic=False, - group_size=32 - ) + dynamic=False, + group_size=32, + ), + input_activations=QuantizationArgs( + num_bits=4, + type=QuantizationType.FLOAT, + strategy=QuantizationStrategy.GROUP, + dynamic=True, + symmetric=True, + group_size=32, + ), ) NVFP4 = dict( From 4602764fdec8e61b620aaf02e6543349c48fc7e6 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 28 Aug 2025 17:36:55 +0000 Subject: [PATCH 3/4] update --- .../quantization/quant_scheme.py | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index f810dae2..60d6e524 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -163,46 +163,59 @@ def is_preset_scheme(name: str) -> bool: ) ) -MXFP4 = dict( + +NVFP4 = dict( weights=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, - strategy=QuantizationStrategy.GROUP, + strategy=QuantizationStrategy.TENSOR_GROUP, symmetric=True, dynamic=False, - group_size=32, + group_size=16, + observer="static_minmax", ), input_activations=QuantizationArgs( + num_bits=4, + type=QuantizationType.FLOAT, + strategy=QuantizationStrategy.TENSOR_GROUP, + symmetric=True, + dynamic=DynamicType.LOCAL, + group_size=16, + observer="static_minmax", + ), +) + +MXFP4A16 = dict( + weights=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, strategy=QuantizationStrategy.GROUP, - dynamic=True, symmetric=True, + dynamic=False, group_size=32, - ), + ) ) -NVFP4 = dict( +MXFP4 = dict( weights=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, - strategy=QuantizationStrategy.TENSOR_GROUP, + strategy=QuantizationStrategy.GROUP, symmetric=True, dynamic=False, - group_size=16, - observer="static_minmax", + group_size=32, ), input_activations=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, - strategy=QuantizationStrategy.TENSOR_GROUP, + strategy=QuantizationStrategy.GROUP, + dynamic=True, symmetric=True, - dynamic=DynamicType.LOCAL, - group_size=16, - observer="static_minmax", + group_size=32, ), ) + # 8 bit integer weights and 8 bit activations quantization INT8_W8A8 = dict( weights=QuantizationArgs( From e22862ea82090189b1813db04b7efa76a087eaf7 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 28 Aug 2025 19:56:45 +0000 Subject: [PATCH 4/4] update --- src/compressed_tensors/quantization/quant_scheme.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index 60d6e524..e6cb7929 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -367,4 +367,6 @@ def is_preset_scheme(name: str) -> bool: "FP8_BLOCK": FP8_BLOCK, "NVFP4A16": NVFP4A16, "NVFP4": NVFP4, + "MXFP4": MXFP4, + "MXFP4A16": MXFP4A16, }