NVIDIA · sugunav14 · Nov 11, 2025 · Nov 13, 2025 · Nov 13, 2025 · Nov 17, 2025
@@ -1111,6 +1111,39 @@ class SVDQuantConfig(QuantizeAlgorithmConfig):
     )
 
 
+class GPTQLiteConfig(QuantizeAlgorithmConfig):
+    """The config for GPTQ lite.
+
+    GPTQ lite is a variant of GPTQ that does not exactly follow the official GPTQ implementation.
+
+    GPTQ lite does not perform sequential quantization of layers. This means that the updated
+    activations are not used to process the next layer.
+
+    GPTQ lite also uses dynamic scales computed during the weight update phase. The original GPTQ
+    implementation uses static scales computed on the weights before beginning blockwise update.
+
+    """
+
+    method: Literal["gptq_lite"] = ModeloptField("gptq_lite")
+    percdamp: float | None = ModeloptField(
+        default=0.01,
+        gt=0.0,
+        le=1.0,
+        title="Percentage damping factor.",
+        description="The percentage of average Hessian diagonal used for damping.",
+    )
+    block_size: int | None = ModeloptField(
+        default=128,
+        title="Block size for GPTQ weight update.",
+        description="The block size for GPTQ weight update.",
+    )
+    hessian_state_path: str | None = ModeloptField(
+        default=None,
+        title="Path to the Hessian state file.",
+        description="The path to the Hessian state file.",
+    )
+
+
 QuantizeQuantCfgType = dict[
     str | Callable,
     QuantizerAttributeConfig

@@ -37,6 +37,7 @@
     AWQFullCalibConfig,
     AWQLiteCalibConfig,
     CompressConfig,
+    GPTQLiteConfig,
     MaxCalibConfig,
     QuantizeAlgoCfgType,
     QuantizeAlgorithmConfig,
@@ -54,7 +55,7 @@
     restore_svdquant_model,
     update_quantize_metadata,
 )
-from .model_calib import awq, max_calibrate, smoothquant, svdquant
+from .model_calib import awq, gptq_lite, max_calibrate, smoothquant, svdquant
 
 __all__ = ["BaseCalibrateModeDescriptor"]
 
@@ -426,3 +427,15 @@ def config_class(self) -> type[QuantizeAlgorithmConfig]:
     def restore(self) -> RestoreEntrypoint:
         """The mode's entrypoint for restoring a model."""
         return restore_svdquant_model
+
+
+@CalibrateModeRegistry.register_mode
+class GPTQLiteModeDescriptor(BaseCalibrateModeDescriptor):
+    """Mode for GPTQ calibration algorithm."""
+
+    @property
+    def config_class(self) -> type[QuantizeAlgorithmConfig]:
+        """Specifies the config class for the mode."""
+        return GPTQLiteConfig
+
+    _calib_func = gptq_lite