Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions modelopt/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,39 @@ class SVDQuantConfig(QuantizeAlgorithmConfig):
)


class GPTQLiteConfig(QuantizeAlgorithmConfig):
"""The config for GPTQ lite.

GPTQ lite is a variant of GPTQ that does not exactly follow the official GPTQ implementation.

GPTQ lite does not perform sequential quantization of layers. This means that the updated
activations are not used to process the next layer.
Comment on lines +1119 to +1120
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you estimate how much effort is needed if we need to add this constraint? I am thinking if we can have a quick test to see what's the accuracy impact.


GPTQ lite also uses dynamic scales computed during the weight update phase. The original GPTQ
implementation uses static scales computed on the weights before beginning blockwise update.

"""

method: Literal["gptq_lite"] = ModeloptField("gptq_lite")
percdamp: float | None = ModeloptField(
default=0.01,
gt=0.0,
le=1.0,
title="Percentage damping factor.",
description="The percentage of average Hessian diagonal used for damping.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you have a reference from the original paper about what these are, could you also share the link too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add some instructions here, so users can know what's the impact of increasing/decreasing this parameter?

)
block_size: int | None = ModeloptField(
default=128,
title="Block size for GPTQ weight update.",
description="The block size for GPTQ weight update.",
)
Comment on lines +1135 to +1139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be the multiple of block_size used in quantization. We should explain it in the description as well.

hessian_state_path: str | None = ModeloptField(
default=None,
title="Path to the Hessian state file.",
description="The path to the Hessian state file.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe state: if the path exists, we load the hessian from the path instead of re-computing them.

)


QuantizeQuantCfgType = dict[
str | Callable,
QuantizerAttributeConfig
Expand Down
15 changes: 14 additions & 1 deletion modelopt/torch/quantization/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
AWQFullCalibConfig,
AWQLiteCalibConfig,
CompressConfig,
GPTQLiteConfig,
MaxCalibConfig,
QuantizeAlgoCfgType,
QuantizeAlgorithmConfig,
Expand All @@ -54,7 +55,7 @@
restore_svdquant_model,
update_quantize_metadata,
)
from .model_calib import awq, max_calibrate, smoothquant, svdquant
from .model_calib import awq, gptq_lite, max_calibrate, smoothquant, svdquant

__all__ = ["BaseCalibrateModeDescriptor"]

Expand Down Expand Up @@ -426,3 +427,15 @@ def config_class(self) -> type[QuantizeAlgorithmConfig]:
def restore(self) -> RestoreEntrypoint:
"""The mode's entrypoint for restoring a model."""
return restore_svdquant_model


@CalibrateModeRegistry.register_mode
class GPTQLiteModeDescriptor(BaseCalibrateModeDescriptor):
"""Mode for GPTQ calibration algorithm."""

@property
def config_class(self) -> type[QuantizeAlgorithmConfig]:
"""Specifies the config class for the mode."""
return GPTQLiteConfig

_calib_func = gptq_lite
Loading
Loading