From 6f66bce4e9c4dd6e2a349698938d75f48f01919a Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 31 Jul 2024 17:24:27 +0200 Subject: [PATCH 01/10] Initial Commit --- activation_sparisty.py | 21 +++ .../sparsify_activations/openvino_backend.py | 153 ++++++++++++++++++ .../sparsify_activations_impl.py | 17 +- 3 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 activation_sparisty.py create mode 100644 nncf/experimental/torch/sparsify_activations/openvino_backend.py diff --git a/activation_sparisty.py b/activation_sparisty.py new file mode 100644 index 00000000000..a0e3b148851 --- /dev/null +++ b/activation_sparisty.py @@ -0,0 +1,21 @@ +import numpy as np +import openvino as ov +import nncf + +from nncf.experimental.torch.sparsify_activations import sparsify_activations, TargetScope + +model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/dummy_llama.xml") + +dataset = nncf.Dataset([np.random.random((2, 8)) for _ in range(3)]) +sparse_model = sparsify_activations( + model, + # dataset=dataset, + # target_sparsity_by_scope={TargetScope(patterns=["aten::linear"]): 0.3} + dataset=nncf.Dataset(np.random.randint(0, 30, (3, 2, 8))), + target_sparsity_by_scope={ + TargetScope(patterns=[".*gate_proj.*"]): 0.2, + TargetScope(patterns=[".*up_proj.*"]): 0.3, + TargetScope(patterns=[".*down_proj.*"]): 0.4, + } +) +ov.save_model(sparse_model, "sparse_model.xml", compress_to_fp16=False) diff --git a/nncf/experimental/torch/sparsify_activations/openvino_backend.py b/nncf/experimental/torch/sparsify_activations/openvino_backend.py new file mode 100644 index 00000000000..06bb389f989 --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/openvino_backend.py @@ -0,0 +1,153 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Type, TypeVar, Optional + +import numpy as np +import torch +import torch.nn as nn +from openvino.runtime import opset13 as opset +import openvino.runtime + +import nncf +import nncf.tensor.functions as fns +from nncf.common.factory import StatisticsAggregatorFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.operator_metatypes import CONST_NOOP_METATYPES +from nncf.common.graph.operator_metatypes import OperatorMetatype +from nncf.common.graph.transformations.commands import TargetType +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer, StatisticPoint +from nncf.data import Dataset +from nncf.experimental.common.tensor_statistics.collectors import OnlineAggregatorBase, AggregationAxes, TensorCollector +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend +from nncf.openvino.graph.model_transformer import OVModelTransformer +from nncf.openvino.graph.transformations.commands import OVTargetPoint +from nncf.openvino.statistics.aggregator import OVStatisticsAggregator +from nncf.openvino.statistics.collectors import OVAbsQuantileReducer +from nncf.tensor import Tensor +from nncf.tensor import TensorDataType +from nncf.tensor.functions.torch_numeric import quantile +from nncf.openvino.graph.metatypes import openvino_metatypes as om +from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand +from nncf.torch.graph.transformations.commands import PTTargetPoint +from nncf.torch.graph.transformations.layout import PTTransformationLayout +from nncf.torch.model_transformer import PTModelTransformer +from nncf.torch.nncf_network import NNCFNetwork +from nncf.torch.utils import training_mode_switcher + +ACTIVATIONS_SPARSIFIER_PREFIX = "activations_sparsifier" +STATISTIC_BRANCH_KEY = "abs_quantile" +ALGORITHM_KEY = "AS" +TModel = TypeVar("TModel") + + +class EMAAggregator(OnlineAggregatorBase): + def __init__( + self, + alpha: float, + num_samples: Optional[int] = None, + window_size: Optional[int] = None, + ): + self._alpha = alpha + super().__init__(aggregation_axes=(0,), num_samples=num_samples, window_size=window_size) + + def _aggregation_fn(self, stacked_value: Tensor, axis: AggregationAxes, keepdims: bool) -> Tensor: + if self._collected_samples == 0: + return stacked_value + else: + beta = 1.0 - self._alpha + new_value = fns.expand_dims(stacked_value[0], 0) + old_value = fns.expand_dims(stacked_value[1], 0) + return (new_value * self._alpha + old_value * beta * (1 - beta ** self._collected_samples) / + (1 - beta ** (self._collected_samples + 1))) + + +class OVSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): + """ + OpenVINO backend for the activation sparsification algorithm. + """ + + SUPPORTED_METATYPES = [om.OVMatMulMetatype] + + @property + def supported_metatypes(self) -> List[Type[OperatorMetatype]]: + return OVSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES + + def insert_sparsifiers( + self, + model: NNCFNetwork, + graph: NNCFGraph, + target_sparsity_by_node: Dict[NNCFNode, float], + ) -> NNCFNetwork: + return model + + def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> TModel: + return None + + def do_sparsification(self, model, graph, target_sparsity_by_node, dataset: Dataset): + statistic_points_container = StatisticPointsContainer() + for node, sparsity in target_sparsity_by_node.items(): + stat_collector = TensorCollector() + stat_collector.register_statistic_branch( + container_key=STATISTIC_BRANCH_KEY, + reducer=OVAbsQuantileReducer(quantile=[sparsity,]), + aggregator=EMAAggregator(alpha=0.2) + ) + statistic_point = StatisticPoint( + target_point=OVTargetPoint(TargetType.PRE_LAYER_OPERATION, node.node_name, port_id=0), + tensor_collector=stat_collector, + algorithm=ALGORITHM_KEY, + ) + statistic_points_container.add_statistic_point(statistic_point) + + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(statistic_points_container) + statistics_aggregator.collect_statistics(model, graph) + + name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) + for nncf_node in target_sparsity_by_node.keys(): + for tensor_collector in statistic_points_container.get_algo_statistics_for_node( + nncf_node.node_name, lambda args: True, ALGORITHM_KEY + ): + threshold = tensor_collector.get_statistics()[STATISTIC_BRANCH_KEY].data + matmul_node = name_to_node_mapping[nncf_node.node_name] + dense_activation = matmul_node.input(0).get_source_output().get_node() + + dtype = dense_activation.get_element_type() + threshold_const = opset.constant(threshold, dtype=dtype, name=f"{matmul_node.name}/sparsity_threshold") + zero_const = opset.constant(0.0, dtype=dtype) + + less_mask = opset.less_equal(opset.abs(dense_activation), threshold_const) + sparse_activation = opset.select(less_mask, zero_const, dense_activation, name=f"{matmul_node.name}/sparse_input") + matmul_node.input(0).replace_source_output(sparse_activation.output(0)) + + return model + + + @staticmethod + def _get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: + """ + Finds the input activation port id for the node. + + :param node: The node to find its activation port id. + :param graph: The NNCF graph containing the node. + :return: The activation port id. + """ + activation_ports = [] + for prev_node in graph.get_previous_nodes(node): + edge = graph.get_edge(prev_node, node) + if prev_node.metatype in CONST_NOOP_METATYPES or edge.input_port_id in node.metatype.weight_port_ids: + continue + activation_ports.append(edge.input_port_id) + if len(activation_ports) != 1: + raise nncf.InternalError(f'Cannot find activation port for node "{node}".') + return activation_ports[0] diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 83a7a418911..8bf3ab999a2 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -91,6 +91,12 @@ def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Datase :return: The model with calibrated activation sparsifiers. """ + @abstractmethod + def do_sparsification(self, model, graph, target_sparsity_by_node, dataset): + """ + + """ + class SparsifyActivationsAlgorithm: """ @@ -154,8 +160,11 @@ def do_sparsification( :param dataset: The dataset to calibrate the activation sparsifiers. :return: The sparsified model. """ - model = self._backend_entity.insert_sparsifiers(model, graph, target_sparsity_by_node) - model = self._backend_entity.calibrate_sparsifiers(model, graph, dataset) + if get_backend(model) == BackendType.TORCH: + model = self._backend_entity.insert_sparsifiers(model, graph, target_sparsity_by_node) + model = self._backend_entity.calibrate_sparsifiers(model, graph, dataset) + else: + model = self._backend_entity.do_sparsification(model, graph, target_sparsity_by_node, dataset) return model def _set_backend_entity(self, model: TModel) -> None: @@ -169,6 +178,10 @@ def _set_backend_entity(self, model: TModel) -> None: from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend self._backend_entity = PTSparsifyActivationsAlgoBackend() + elif model_backend == BackendType.OPENVINO: + from nncf.experimental.torch.sparsify_activations.openvino_backend import OVSparsifyActivationsAlgoBackend + + self._backend_entity = OVSparsifyActivationsAlgoBackend() else: raise nncf.UnsupportedBackendError( f"{model_backend.value} backend is not supported for `sparsify_activations`." From a0d43f5354d4375e3e9d8ca9afa3fe33f3035def Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 5 Aug 2024 16:15:23 +0200 Subject: [PATCH 02/10] Added logic to get act. port id --- activation_sparisty.py | 2 +- .../sparsify_activations/openvino_backend.py | 30 +++++++------------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/activation_sparisty.py b/activation_sparisty.py index a0e3b148851..93f1e20fc24 100644 --- a/activation_sparisty.py +++ b/activation_sparisty.py @@ -10,7 +10,7 @@ sparse_model = sparsify_activations( model, # dataset=dataset, - # target_sparsity_by_scope={TargetScope(patterns=["aten::linear"]): 0.3} + # target_sparsity_by_scope={TargetScope(patterns=[".*linear.*"]): 0.3} dataset=nncf.Dataset(np.random.randint(0, 30, (3, 2, 8))), target_sparsity_by_scope={ TargetScope(patterns=[".*gate_proj.*"]): 0.2, diff --git a/nncf/experimental/torch/sparsify_activations/openvino_backend.py b/nncf/experimental/torch/sparsify_activations/openvino_backend.py index 06bb389f989..f6f2ed21a86 100644 --- a/nncf/experimental/torch/sparsify_activations/openvino_backend.py +++ b/nncf/experimental/torch/sparsify_activations/openvino_backend.py @@ -102,8 +102,9 @@ def do_sparsification(self, model, graph, target_sparsity_by_node, dataset: Data reducer=OVAbsQuantileReducer(quantile=[sparsity,]), aggregator=EMAAggregator(alpha=0.2) ) + activation_port_id = self._get_activation_port_id(node, graph) statistic_point = StatisticPoint( - target_point=OVTargetPoint(TargetType.PRE_LAYER_OPERATION, node.node_name, port_id=0), + target_point=OVTargetPoint(TargetType.PRE_LAYER_OPERATION, node.node_name, port_id=activation_port_id), tensor_collector=stat_collector, algorithm=ALGORITHM_KEY, ) @@ -118,9 +119,10 @@ def do_sparsification(self, model, graph, target_sparsity_by_node, dataset: Data for tensor_collector in statistic_points_container.get_algo_statistics_for_node( nncf_node.node_name, lambda args: True, ALGORITHM_KEY ): + activation_port_id = self._get_activation_port_id(nncf_node, graph) threshold = tensor_collector.get_statistics()[STATISTIC_BRANCH_KEY].data matmul_node = name_to_node_mapping[nncf_node.node_name] - dense_activation = matmul_node.input(0).get_source_output().get_node() + dense_activation = matmul_node.input(activation_port_id).get_source_output().get_node() dtype = dense_activation.get_element_type() threshold_const = opset.constant(threshold, dtype=dtype, name=f"{matmul_node.name}/sparsity_threshold") @@ -128,26 +130,16 @@ def do_sparsification(self, model, graph, target_sparsity_by_node, dataset: Data less_mask = opset.less_equal(opset.abs(dense_activation), threshold_const) sparse_activation = opset.select(less_mask, zero_const, dense_activation, name=f"{matmul_node.name}/sparse_input") - matmul_node.input(0).replace_source_output(sparse_activation.output(0)) + matmul_node.input(activation_port_id).replace_source_output(sparse_activation.output(0)) return model @staticmethod - def _get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: - """ - Finds the input activation port id for the node. - - :param node: The node to find its activation port id. - :param graph: The NNCF graph containing the node. - :return: The activation port id. - """ - activation_ports = [] - for prev_node in graph.get_previous_nodes(node): - edge = graph.get_edge(prev_node, node) - if prev_node.metatype in CONST_NOOP_METATYPES or edge.input_port_id in node.metatype.weight_port_ids: - continue - activation_ports.append(edge.input_port_id) - if len(activation_ports) != 1: - raise nncf.InternalError(f'Cannot find activation port for node "{node}".') + def _get_activation_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: + constant_ports = node.layer_attributes.get_const_port_ids() + activation_ports = [ + e.input_port_id for e in nncf_graph.get_input_edges(node) if e.input_port_id not in constant_ports + ] + assert len(activation_ports) == 1 return activation_ports[0] From 945e7b3aa1190cabc846344b93496a61de46fb12 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 7 Aug 2024 10:32:36 +0200 Subject: [PATCH 03/10] Hardcode activation port id as a workaround --- .../torch/sparsify_activations/openvino_backend.py | 1 + nncf/experimental/torch/sparsify_activations/torch_backend.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/nncf/experimental/torch/sparsify_activations/openvino_backend.py b/nncf/experimental/torch/sparsify_activations/openvino_backend.py index f6f2ed21a86..a33a786c9d1 100644 --- a/nncf/experimental/torch/sparsify_activations/openvino_backend.py +++ b/nncf/experimental/torch/sparsify_activations/openvino_backend.py @@ -137,6 +137,7 @@ def do_sparsification(self, model, graph, target_sparsity_by_node, dataset: Data @staticmethod def _get_activation_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: + return 0 constant_ports = node.layer_attributes.get_const_port_ids() activation_ports = [ e.input_port_id for e in nncf_graph.get_input_edges(node) if e.input_port_id not in constant_ports diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index a10f12c6518..d65e89c647f 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -197,3 +197,6 @@ def _get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: if len(activation_ports) != 1: raise nncf.InternalError(f'Cannot find activation port for node "{node}".') return activation_ports[0] + + def do_sparsification(self, model, graph, target_sparsity_by_node, dataset): + raise NotImplementedError From 965b5b22c62bf878fb1ccef767d118c22418df0c Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 21 Aug 2024 18:04:14 +0200 Subject: [PATCH 04/10] Refactor OV and PT backends --- activation_sparisty.py | 2 +- .../sparsify_activations/openvino_backend.py | 113 +++---------- .../sparsify_activations_impl.py | 154 +++++++++++------- .../sparsify_activations/torch_backend.py | 117 +++---------- 4 files changed, 138 insertions(+), 248 deletions(-) diff --git a/activation_sparisty.py b/activation_sparisty.py index 93f1e20fc24..eaa3ac73f07 100644 --- a/activation_sparisty.py +++ b/activation_sparisty.py @@ -18,4 +18,4 @@ TargetScope(patterns=[".*down_proj.*"]): 0.4, } ) -ov.save_model(sparse_model, "sparse_model.xml", compress_to_fp16=False) +# ov.save_model(sparse_model, "sparse_model.xml", compress_to_fp16=False) diff --git a/nncf/experimental/torch/sparsify_activations/openvino_backend.py b/nncf/experimental/torch/sparsify_activations/openvino_backend.py index a33a786c9d1..13bbb167ba5 100644 --- a/nncf/experimental/torch/sparsify_activations/openvino_backend.py +++ b/nncf/experimental/torch/sparsify_activations/openvino_backend.py @@ -9,66 +9,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Type, TypeVar, Optional +from typing import Dict, List, Type, Optional, Union -import numpy as np -import torch -import torch.nn as nn from openvino.runtime import opset13 as opset import openvino.runtime -import nncf -import nncf.tensor.functions as fns -from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode -from nncf.common.graph.operator_metatypes import CONST_NOOP_METATYPES from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer, StatisticPoint -from nncf.data import Dataset -from nncf.experimental.common.tensor_statistics.collectors import OnlineAggregatorBase, AggregationAxes, TensorCollector from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.openvino.graph.model_transformer import OVModelTransformer from nncf.openvino.graph.transformations.commands import OVTargetPoint -from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.openvino.statistics.collectors import OVAbsQuantileReducer -from nncf.tensor import Tensor -from nncf.tensor import TensorDataType from nncf.tensor.functions.torch_numeric import quantile from nncf.openvino.graph.metatypes import openvino_metatypes as om -from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand -from nncf.torch.graph.transformations.commands import PTTargetPoint -from nncf.torch.graph.transformations.layout import PTTransformationLayout -from nncf.torch.model_transformer import PTModelTransformer from nncf.torch.nncf_network import NNCFNetwork -from nncf.torch.utils import training_mode_switcher ACTIVATIONS_SPARSIFIER_PREFIX = "activations_sparsifier" -STATISTIC_BRANCH_KEY = "abs_quantile" -ALGORITHM_KEY = "AS" -TModel = TypeVar("TModel") - - -class EMAAggregator(OnlineAggregatorBase): - def __init__( - self, - alpha: float, - num_samples: Optional[int] = None, - window_size: Optional[int] = None, - ): - self._alpha = alpha - super().__init__(aggregation_axes=(0,), num_samples=num_samples, window_size=window_size) - - def _aggregation_fn(self, stacked_value: Tensor, axis: AggregationAxes, keepdims: bool) -> Tensor: - if self._collected_samples == 0: - return stacked_value - else: - beta = 1.0 - self._alpha - new_value = fns.expand_dims(stacked_value[0], 0) - old_value = fns.expand_dims(stacked_value[1], 0) - return (new_value * self._alpha + old_value * beta * (1 - beta ** self._collected_samples) / - (1 - beta ** (self._collected_samples + 1))) class OVSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): @@ -76,68 +34,43 @@ class OVSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): OpenVINO backend for the activation sparsification algorithm. """ - SUPPORTED_METATYPES = [om.OVMatMulMetatype] - @property def supported_metatypes(self) -> List[Type[OperatorMetatype]]: - return OVSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES + return [om.OVMatMulMetatype] + + def abs_quantile_reducer(self, quantile: Optional[Union[float, List[float]]] = None) -> OVAbsQuantileReducer: + return OVAbsQuantileReducer(quantile=quantile) + + def target_point(self, target_type: TargetType, target_node_name: str, port_id: int) -> OVTargetPoint: + return OVTargetPoint(TargetType.PRE_LAYER_OPERATION, target_node_name, port_id=port_id) def insert_sparsifiers( self, - model: NNCFNetwork, + model: openvino.Model, graph: NNCFGraph, - target_sparsity_by_node: Dict[NNCFNode, float], + threshold_by_node: Dict[NNCFNode, float], ) -> NNCFNetwork: - return model - - def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> TModel: - return None - - def do_sparsification(self, model, graph, target_sparsity_by_node, dataset: Dataset): - statistic_points_container = StatisticPointsContainer() - for node, sparsity in target_sparsity_by_node.items(): - stat_collector = TensorCollector() - stat_collector.register_statistic_branch( - container_key=STATISTIC_BRANCH_KEY, - reducer=OVAbsQuantileReducer(quantile=[sparsity,]), - aggregator=EMAAggregator(alpha=0.2) - ) - activation_port_id = self._get_activation_port_id(node, graph) - statistic_point = StatisticPoint( - target_point=OVTargetPoint(TargetType.PRE_LAYER_OPERATION, node.node_name, port_id=activation_port_id), - tensor_collector=stat_collector, - algorithm=ALGORITHM_KEY, - ) - statistic_points_container.add_statistic_point(statistic_point) - - statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) - statistics_aggregator.register_statistic_points(statistic_points_container) - statistics_aggregator.collect_statistics(model, graph) - name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) - for nncf_node in target_sparsity_by_node.keys(): - for tensor_collector in statistic_points_container.get_algo_statistics_for_node( - nncf_node.node_name, lambda args: True, ALGORITHM_KEY - ): - activation_port_id = self._get_activation_port_id(nncf_node, graph) - threshold = tensor_collector.get_statistics()[STATISTIC_BRANCH_KEY].data - matmul_node = name_to_node_mapping[nncf_node.node_name] - dense_activation = matmul_node.input(activation_port_id).get_source_output().get_node() + for nncf_node, threshold in threshold_by_node.items(): + activation_port_id = self.get_activation_port_id(nncf_node, graph) + matmul_node = name_to_node_mapping[nncf_node.node_name] + dense_activation = matmul_node.input(activation_port_id).get_source_output().get_node() - dtype = dense_activation.get_element_type() - threshold_const = opset.constant(threshold, dtype=dtype, name=f"{matmul_node.name}/sparsity_threshold") - zero_const = opset.constant(0.0, dtype=dtype) + dtype = dense_activation.get_element_type() + threshold_const = opset.constant(threshold, dtype=dtype, name=f"{matmul_node.name}/sparsity_threshold") + zero_const = opset.constant(0.0, dtype=dtype) - less_mask = opset.less_equal(opset.abs(dense_activation), threshold_const) - sparse_activation = opset.select(less_mask, zero_const, dense_activation, name=f"{matmul_node.name}/sparse_input") - matmul_node.input(activation_port_id).replace_source_output(sparse_activation.output(0)) + less_mask = opset.less_equal(opset.abs(dense_activation), threshold_const) + sparse_activation = opset.select(less_mask, zero_const, dense_activation, name=f"{matmul_node.name}/sparse_input") + matmul_node.input(activation_port_id).replace_source_output(sparse_activation.output(0)) return model - @staticmethod - def _get_activation_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: + def get_activation_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: return 0 + + # Code below won't work for the case of compressed weight constant constant_ports = node.layer_attributes.get_const_port_ids() activation_ports = [ e.input_port_id for e in nncf_graph.get_input_edges(node) if e.input_port_id not in constant_ports diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 8bf3ab999a2..1ba29b11073 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -11,27 +11,38 @@ from abc import ABC from abc import abstractmethod -from typing import Dict, List, Optional, Type, TypeVar +from typing import Dict, List, Optional, Type, TypeVar, Union import nncf -from nncf.common import factory +import nncf.tensor.functions as fns from nncf.common.factory import NNCFGraphFactory +from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype -from nncf.common.logging.track_progress import track +from nncf.common.graph.transformations.commands import TargetPoint +from nncf.common.graph.transformations.commands import TargetType from nncf.common.scopes import should_consider_scope +from nncf.common.tensor_statistics.statistic_point import StatisticPoint +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend from nncf.data import Dataset +from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer +from nncf.experimental.common.tensor_statistics.collectors import AggregationAxes +from nncf.experimental.common.tensor_statistics.collectors import OnlineAggregatorBase +from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.experimental.torch.sparsify_activations.target_scope import TargetScope from nncf.experimental.torch.sparsify_activations.target_scope import get_target_node_names_from_target_scope from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope +from nncf.tensor import Tensor from nncf.torch.model_creation import is_wrapped_model from nncf.torch.model_creation import wrap_model TModel = TypeVar("TModel") +STATISTIC_BRANCH_KEY = "abs_quantile" +ALGORITHM_KEY = "AS" class SparsifyActivationsAlgoBackend(ABC): @@ -39,24 +50,6 @@ class SparsifyActivationsAlgoBackend(ABC): Abstract class for activation sparsification algorithm backend. """ - CALIBRATION_TRACKING_DESC = "Conducting Activations Sparsifier Calibration" - - @staticmethod - def do_inference(model: TModel, dataset: Dataset): - """ - Conducts model inference on given dataset to calibrate the activation sparsifiers. - - :param model: The model with activation sparsifiers. - :param dataset: The calibration dataset to update the sparsifiers. - """ - engine = factory.EngineFactory.create(model) - for input_data in track( - dataset.get_inference_data(), - total=dataset.get_length(), - description=SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC, - ): - engine.infer(input_data) - @property @abstractmethod def supported_metatypes(self) -> List[Type[OperatorMetatype]]: @@ -64,12 +57,20 @@ def supported_metatypes(self) -> List[Type[OperatorMetatype]]: Property for the backend-specific metatypes for supported layers. """ + @abstractmethod + def abs_quantile_reducer(self, quantile: Optional[Union[float, List[float]]] = None) -> AbsQuantileReducer: + """ """ + + @abstractmethod + def target_point(self, target_type: TargetType, target_node_name: str, port_id: int) -> TargetPoint: + """ """ + @abstractmethod def insert_sparsifiers( self, model: TModel, graph: NNCFGraph, - target_sparsity_by_node: Dict[NNCFNode, float], + threshold_by_node: Dict[NNCFNode, float], ) -> TModel: """ Inserts the activation sparsifiers to the model. @@ -80,21 +81,15 @@ def insert_sparsifiers( :return: The model with inserted activation sparsifiers. """ + @staticmethod @abstractmethod - def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> TModel: - """ - Calibrates the thresholds in the activation sparsifiers. - - :param model: The model with inserted activation sparsifiers. - :param graph: The model's NNCF graph. - :param dataset: The calibration dataset to update the thresholds in the sparsifiers. - :return: The model with calibrated activation sparsifiers. - """ - - @abstractmethod - def do_sparsification(self, model, graph, target_sparsity_by_node, dataset): + def get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: """ + Finds the input activation port id for the node. + :param node: The node to find its activation port id. + :param graph: The NNCF graph containing the node. + :return: The activation port id. """ @@ -122,7 +117,7 @@ def available_backends(self) -> List[BackendType]: """ Supported backends for this algorithm. """ - return [BackendType.TORCH] + return [BackendType.TORCH, BackendType.OPENVINO] def apply( self, @@ -140,33 +135,10 @@ def apply( """ self._set_backend_entity(model) target_sparsity_by_node = self._get_target_sparsity_by_node(graph) - sparse_model = self.do_sparsification(model, graph, target_sparsity_by_node, dataset) + threshold_by_node = self._get_threshold_by_node(model, graph, target_sparsity_by_node, dataset) + sparse_model = self._backend_entity.insert_sparsifiers(model, graph, threshold_by_node) return sparse_model - def do_sparsification( - self, - model: TModel, - graph: NNCFGraph, - target_sparsity_by_node: Dict[NNCFNode, float], - dataset: Dataset, - ): - """ - Transforms the model into a sparsified one with node-specific target activation sparsity levels. - - :param model: The model to be sparsified. - :param graph: The model's NNCF graph. - :param target_sparsity_by_node: A dictionary that defines the target sparsity level - for specified node layers. - :param dataset: The dataset to calibrate the activation sparsifiers. - :return: The sparsified model. - """ - if get_backend(model) == BackendType.TORCH: - model = self._backend_entity.insert_sparsifiers(model, graph, target_sparsity_by_node) - model = self._backend_entity.calibrate_sparsifiers(model, graph, dataset) - else: - model = self._backend_entity.do_sparsification(model, graph, target_sparsity_by_node, dataset) - return model - def _set_backend_entity(self, model: TModel) -> None: """ Creates a helper class with a backend-specific logic of the algorithm. @@ -216,6 +188,46 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float raise nncf.ValidationError("No layers to conduct activation sparsification.") return target_sparsity_by_node + def _get_threshold_by_node(self, model, graph, target_sparsity_by_node, dataset: Dataset) -> Dict[NNCFNode, float]: + statistic_points_container = StatisticPointsContainer() + for node, sparsity in target_sparsity_by_node.items(): + stat_collector = TensorCollector() + stat_collector.register_statistic_branch( + container_key=STATISTIC_BRANCH_KEY, + reducer=self._backend_entity.abs_quantile_reducer( + quantile=[ + sparsity, + ] + ), + aggregator=EMAAggregator(alpha=0.2), + ) + activation_port_id = self._backend_entity.get_activation_port_id(node, graph) + statistic_point = StatisticPoint( + target_point=self._backend_entity.target_point( + TargetType.PRE_LAYER_OPERATION, node.node_name, port_id=activation_port_id + ), + tensor_collector=stat_collector, + algorithm=ALGORITHM_KEY, + ) + statistic_points_container.add_statistic_point(statistic_point) + + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(statistic_points_container) + statistics_aggregator.collect_statistics(model, graph) + + threshold_by_node = {} + for nncf_node in target_sparsity_by_node: + tensor_collector = next( + iter( + statistic_points_container.get_algo_statistics_for_node( + nncf_node.node_name, lambda args: True, ALGORITHM_KEY + ) + ) + ) + threshold_by_node[nncf_node] = tensor_collector.get_statistics()[STATISTIC_BRANCH_KEY].data + + return threshold_by_node + def sparsify_activations( model: TModel, @@ -275,3 +287,25 @@ def sparsify_activations( graph = NNCFGraphFactory.create(model) sparse_model = algorithm.apply(model, graph, dataset) return sparse_model + + +class EMAAggregator(OnlineAggregatorBase): + def __init__( + self, + alpha: float, + num_samples: Optional[int] = None, + window_size: Optional[int] = None, + ): + self._alpha = alpha + super().__init__(aggregation_axes=(0,), num_samples=num_samples, window_size=window_size) + + def _aggregation_fn(self, stacked_value: Tensor, axis: AggregationAxes, keepdims: bool) -> Tensor: + if self._collected_samples == 0: + return stacked_value + else: + beta = 1.0 - self._alpha + new_value = fns.expand_dims(stacked_value[0], 0) + old_value = fns.expand_dims(stacked_value[1], 0) + return new_value * self._alpha + old_value * beta * (1 - beta**self._collected_samples) / ( + 1 - beta ** (self._collected_samples + 1) + ) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index d65e89c647f..ad67521efbe 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Type, TypeVar +from typing import Dict, List, Optional, Type, Union import torch import torch.nn as nn @@ -20,19 +20,16 @@ from nncf.common.graph.operator_metatypes import CONST_NOOP_METATYPES from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.data import Dataset from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend -from nncf.tensor.functions.torch_numeric import quantile from nncf.torch.graph import operator_metatypes as om from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand from nncf.torch.graph.transformations.commands import PTTargetPoint from nncf.torch.graph.transformations.layout import PTTransformationLayout from nncf.torch.model_transformer import PTModelTransformer from nncf.torch.nncf_network import NNCFNetwork -from nncf.torch.utils import training_mode_switcher +from nncf.torch.tensor_statistics.collectors import PTAbsQuantileReducer ACTIVATIONS_SPARSIFIER_PREFIX = "activations_sparsifier" -TModel = TypeVar("TModel") class ActivationsSparsifier(nn.Module): @@ -40,82 +37,19 @@ class ActivationsSparsifier(nn.Module): Sparsifies input activations by masking out values around zero. """ - def __init__(self, target_sparsity: float, alpha: float = 0.2): - """ - :param target_sparsity: The target activation sparsity level. - :param alpha: The exponential moving average decay factor in range (0, 1) for calibrating - the threshold. A larger alpha will give more weight to the most recent batches. - """ + def __init__(self, threshold: float): + """ """ super().__init__() - self.target_sparsity = target_sparsity - if alpha <= 0.0 or alpha >= 1.0: - raise ValueError("The decay factor `alpha` should be in range (0, 1).") - self.alpha = alpha - self.register_buffer("running_threshold", torch.tensor(float("-inf"))) - self.register_buffer("num_batches_tracked", torch.tensor(0)) - self.running_threshold: torch.Tensor - self.num_batches_tracked: torch.Tensor - self._freeze = True - - @staticmethod - def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor: - """ - Calculates the threshold to sparsify the input tensor with target sparsity if locations of - `x.abs() <= threshold` are zeroed out. - - :param x: The input tensor. - :param target_sparsity: The target sparsity level on the input tensor. - :return: The threshold value. - """ - return quantile(x.detach().abs().view(-1), q=target_sparsity, axis=0) - - @property - def freeze(self): - return self._freeze - - @freeze.setter - def freeze(self, value: bool): - self._freeze = value + self.register_buffer("threshold", torch.tensor(threshold, dtype=torch.float32)) + self.threshold: torch.Tensor def forward(self, x: torch.Tensor) -> torch.Tensor: - if not self.freeze: - threshold = self.calculate_threshold(x, self.target_sparsity) - self._update(threshold, dtype=x.dtype) - mask = torch.le(x.abs(), self.running_threshold) + mask = torch.le(x.abs(), self.threshold) x = torch.masked_fill(x, mask, 0.0) return x - def reset_running_stats(self): - """ - Resets the running threshold and the number of tracked batches to the initial stage. - """ - self.running_threshold.fill_(float("-inf")) - self.num_batches_tracked.zero_() - def extra_repr(self) -> str: - return f"target_sparsity={self.target_sparsity}" - - def _update(self, threshold: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: - """ - Updates the running threshold by exponential moving average with decaying adjustment. - The updating logic is similar to `pandas.DataFrame.ewm(adjust=True)`. - - :param threshold: The threshold value derived from this batch to update the running threshold. - :param dtype: Data type of the updated running threshold. - :return: The updated running threshold. - """ - if self.num_batches_tracked == 0: - running_threshold = threshold - else: - beta = 1.0 - self.alpha - old_running_threshold = self.running_threshold.to(device=threshold.device, dtype=torch.float64) - running_threshold = ( - threshold.to(torch.float64) * self.alpha - + old_running_threshold * beta * (1 - beta**self.num_batches_tracked) - ) / (1 - beta ** (self.num_batches_tracked + 1)) - self.running_threshold = running_threshold.type(dtype) - self.num_batches_tracked += 1 - return self.running_threshold + return f"target_sparsity={self.threshold}" class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): @@ -123,8 +57,6 @@ class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): Torch backend for the activation sparsification algorithm. """ - SUPPORTED_METATYPES = [om.PTLinearMetatype] - @staticmethod def get_sparsifiers(model: NNCFNetwork) -> List[ActivationsSparsifier]: """ @@ -137,18 +69,24 @@ def get_sparsifiers(model: NNCFNetwork) -> List[ActivationsSparsifier]: @property def supported_metatypes(self) -> List[Type[OperatorMetatype]]: - return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES + return [om.PTLinearMetatype] + + def abs_quantile_reducer(self, quantile: Optional[Union[float, List[float]]] = None) -> PTAbsQuantileReducer: + return PTAbsQuantileReducer(quantile=quantile) + + def target_point(self, target_type: TargetType, target_node_name: str, port_id: int) -> PTTargetPoint: + return PTTargetPoint(TargetType.PRE_LAYER_OPERATION, target_node_name, input_port_id=port_id) def insert_sparsifiers( self, model: NNCFNetwork, graph: NNCFGraph, - target_sparsity_by_node: Dict[NNCFNode, float], + threshold_by_node: Dict[NNCFNode, float], ) -> NNCFNetwork: transformation_layout = PTTransformationLayout() - for node, target_sparsity in target_sparsity_by_node.items(): - activation_port_id = self._get_activation_port_id(node, graph) - sparsifier = ActivationsSparsifier(target_sparsity=target_sparsity) + for node, threshold in threshold_by_node.items(): + activation_port_id = self.get_activation_port_id(node, graph) + sparsifier = ActivationsSparsifier(threshold=threshold) sparsifier_name = f"{ACTIVATIONS_SPARSIFIER_PREFIX}_{node.node_name.replace('.', '_')}" transformation_layout.register( PTSharedFnInsertionCommand( @@ -167,20 +105,8 @@ def insert_sparsifiers( transformed_model = PTModelTransformer(model).transform(transformation_layout) return transformed_model - def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: Dataset) -> NNCFNetwork: - sparsifiers = self.get_sparsifiers(model) - for sparsifier in sparsifiers: - sparsifier.reset_running_stats() - sparsifier.freeze = False - with training_mode_switcher(model, is_training=False): - with torch.no_grad(): - self.do_inference(model, dataset) - for sparsifier in sparsifiers: - sparsifier.freeze = True - return model - @staticmethod - def _get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: + def get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: """ Finds the input activation port id for the node. @@ -197,6 +123,3 @@ def _get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: if len(activation_ports) != 1: raise nncf.InternalError(f'Cannot find activation port for node "{node}".') return activation_ports[0] - - def do_sparsification(self, model, graph, target_sparsity_by_node, dataset): - raise NotImplementedError From 812c033209274b5ac260f817a48c8b1fcc9a18e5 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 26 Aug 2024 20:45:35 +0200 Subject: [PATCH 05/10] Match compressed constant subgraph --- activation_sparisty.py | 26 +++++----- .../sparsify_activations/openvino_backend.py | 48 ++++++++++++++----- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/activation_sparisty.py b/activation_sparisty.py index eaa3ac73f07..c9c528a325a 100644 --- a/activation_sparisty.py +++ b/activation_sparisty.py @@ -1,21 +1,25 @@ import numpy as np import openvino as ov -import nncf -from nncf.experimental.torch.sparsify_activations import sparsify_activations, TargetScope +import nncf +from nncf.experimental.torch.sparsify_activations import TargetScope +from nncf.experimental.torch.sparsify_activations import sparsify_activations -model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/dummy_llama.xml") +# model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/dummy_llama.xml") +# model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/ov/linear.xml") +# model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/ov/linear_int8.xml") +model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/ov/linear_int4.xml") dataset = nncf.Dataset([np.random.random((2, 8)) for _ in range(3)]) sparse_model = sparsify_activations( model, - # dataset=dataset, - # target_sparsity_by_scope={TargetScope(patterns=[".*linear.*"]): 0.3} - dataset=nncf.Dataset(np.random.randint(0, 30, (3, 2, 8))), - target_sparsity_by_scope={ - TargetScope(patterns=[".*gate_proj.*"]): 0.2, - TargetScope(patterns=[".*up_proj.*"]): 0.3, - TargetScope(patterns=[".*down_proj.*"]): 0.4, - } + dataset=dataset, + target_sparsity_by_scope={TargetScope(patterns=[".*linear.*"]): 0.3}, + # dataset=nncf.Dataset(np.random.randint(0, 30, (3, 2, 8))), + # target_sparsity_by_scope={ + # TargetScope(patterns=[".*gate_proj.*"]): 0.2, + # TargetScope(patterns=[".*up_proj.*"]): 0.3, + # TargetScope(patterns=[".*down_proj.*"]): 0.4, + # } ) # ov.save_model(sparse_model, "sparse_model.xml", compress_to_fp16=False) diff --git a/nncf/experimental/torch/sparsify_activations/openvino_backend.py b/nncf/experimental/torch/sparsify_activations/openvino_backend.py index 13bbb167ba5..60e561ccf29 100644 --- a/nncf/experimental/torch/sparsify_activations/openvino_backend.py +++ b/nncf/experimental/torch/sparsify_activations/openvino_backend.py @@ -9,21 +9,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Type, Optional, Union +from typing import Dict, List, Optional, Type, Union -from openvino.runtime import opset13 as opset import openvino.runtime +from openvino.runtime import opset13 as opset from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend +from nncf.openvino.graph.metatypes import openvino_metatypes as om from nncf.openvino.graph.model_transformer import OVModelTransformer from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.statistics.collectors import OVAbsQuantileReducer -from nncf.tensor.functions.torch_numeric import quantile -from nncf.openvino.graph.metatypes import openvino_metatypes as om from nncf.torch.nncf_network import NNCFNetwork ACTIVATIONS_SPARSIFIER_PREFIX = "activations_sparsifier" @@ -61,19 +60,42 @@ def insert_sparsifiers( zero_const = opset.constant(0.0, dtype=dtype) less_mask = opset.less_equal(opset.abs(dense_activation), threshold_const) - sparse_activation = opset.select(less_mask, zero_const, dense_activation, name=f"{matmul_node.name}/sparse_input") + sparse_activation = opset.select( + less_mask, zero_const, dense_activation, name=f"{matmul_node.name}/sparse_input" + ) matmul_node.input(activation_port_id).replace_source_output(sparse_activation.output(0)) return model @staticmethod - def get_activation_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int: - return 0 + def get_activation_port_id(matmul_node: NNCFNode, nncf_graph: NNCFGraph) -> int: + # return 0 + n_inputs = len(nncf_graph.get_input_edges(matmul_node)) + if n_inputs != 2: + raise RuntimeError(f"Expected node to have two inputs, but found {n_inputs} for node {matmul_node}.") - # Code below won't work for the case of compressed weight constant - constant_ports = node.layer_attributes.get_const_port_ids() - activation_ports = [ - e.input_port_id for e in nncf_graph.get_input_edges(node) if e.input_port_id not in constant_ports + is_const_node_on_port = [ + nncf_graph.get_input_edges(matmul_node)[i].from_node.node_type == "Constant" for i in range(2) ] - assert len(activation_ports) == 1 - return activation_ports[0] + if is_const_node_on_port[0] != is_const_node_on_port[1]: + return 1 if is_const_node_on_port[0] else 0 + + # Try to match compressed constant subgraph + for i in range(2): + node = nncf_graph.get_input_edges(matmul_node)[i].from_node + if node.node_type == "Convert": + node = nncf_graph.get_input_edges(node)[0].from_node + if node.node_type == "Multiply": + node = nncf_graph.get_input_edges(node)[0].from_node + else: + continue + if node.node_type == "Subtract": + node = nncf_graph.get_input_edges(node)[0].from_node + else: + continue + if node.node_type == "Convert": + node = nncf_graph.get_input_edges(node)[0].from_node + if node.node_type == "Constant": + return int(i == 0) + + raise RuntimeError(f"Could not find activation port id for node {matmul_node}.") From 618f050fdf499d6cbf693580e44e4ff454c6e4a8 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 28 Aug 2024 11:43:49 +0200 Subject: [PATCH 06/10] Tests WIP --- .../sparsify_activations/ema_aggregator.py | 41 +++++++++ .../sparsify_activations_impl.py | 27 +----- .../sparsify_activations/torch_backend.py | 10 --- .../sparsify_activations/test_algo.py | 4 +- .../sparsify_activations/test_components.py | 84 +++---------------- 5 files changed, 55 insertions(+), 111 deletions(-) create mode 100644 nncf/experimental/torch/sparsify_activations/ema_aggregator.py diff --git a/nncf/experimental/torch/sparsify_activations/ema_aggregator.py b/nncf/experimental/torch/sparsify_activations/ema_aggregator.py new file mode 100644 index 00000000000..c3089bdaa80 --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/ema_aggregator.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import nncf.tensor.functions as fns + +from nncf.experimental.common.tensor_statistics.collectors import AggregationAxes +from nncf.experimental.common.tensor_statistics.collectors import OnlineAggregatorBase + +from nncf.tensor import Tensor + + +# TODO: add tests +class EMAAggregator(OnlineAggregatorBase): + def __init__( + self, + alpha: float, + num_samples: Optional[int] = None, + window_size: Optional[int] = None, + ): + self._alpha = alpha + super().__init__(aggregation_axes=(0,), num_samples=num_samples, window_size=window_size) + + def _aggregation_fn(self, stacked_value: Tensor, axis: AggregationAxes, keepdims: bool) -> Tensor: + if self._collected_samples == 0: + return stacked_value + else: + beta = 1.0 - self._alpha + new_value = fns.expand_dims(stacked_value[0], 0) + old_value = fns.expand_dims(stacked_value[1], 0) + return new_value * self._alpha + old_value * beta * (1 - beta**self._collected_samples) / ( + 1 - beta ** (self._collected_samples + 1) + ) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 1ba29b11073..e89bf0255ea 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -14,7 +14,6 @@ from typing import Dict, List, Optional, Type, TypeVar, Union import nncf -import nncf.tensor.functions as fns from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFGraph @@ -29,14 +28,12 @@ from nncf.common.utils.backend import get_backend from nncf.data import Dataset from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer -from nncf.experimental.common.tensor_statistics.collectors import AggregationAxes -from nncf.experimental.common.tensor_statistics.collectors import OnlineAggregatorBase from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.experimental.torch.sparsify_activations.target_scope import TargetScope from nncf.experimental.torch.sparsify_activations.target_scope import get_target_node_names_from_target_scope +from nncf.experimental.torch.sparsify_activations.ema_aggregator import EMAAggregator from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope -from nncf.tensor import Tensor from nncf.torch.model_creation import is_wrapped_model from nncf.torch.model_creation import wrap_model @@ -287,25 +284,3 @@ def sparsify_activations( graph = NNCFGraphFactory.create(model) sparse_model = algorithm.apply(model, graph, dataset) return sparse_model - - -class EMAAggregator(OnlineAggregatorBase): - def __init__( - self, - alpha: float, - num_samples: Optional[int] = None, - window_size: Optional[int] = None, - ): - self._alpha = alpha - super().__init__(aggregation_axes=(0,), num_samples=num_samples, window_size=window_size) - - def _aggregation_fn(self, stacked_value: Tensor, axis: AggregationAxes, keepdims: bool) -> Tensor: - if self._collected_samples == 0: - return stacked_value - else: - beta = 1.0 - self._alpha - new_value = fns.expand_dims(stacked_value[0], 0) - old_value = fns.expand_dims(stacked_value[1], 0) - return new_value * self._alpha + old_value * beta * (1 - beta**self._collected_samples) / ( - 1 - beta ** (self._collected_samples + 1) - ) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index ad67521efbe..388bc52ecae 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -57,16 +57,6 @@ class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): Torch backend for the activation sparsification algorithm. """ - @staticmethod - def get_sparsifiers(model: NNCFNetwork) -> List[ActivationsSparsifier]: - """ - Finds all the activation sparsifiers in the model. - - :param model: The model with activation sparsifiers. - :return: List of activation sparsifiers. - """ - return [m for m in model.nncf.modules() if isinstance(m, ActivationsSparsifier)] - @property def supported_metatypes(self) -> List[Type[OperatorMetatype]]: return [om.PTLinearMetatype] diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index b7214aaa5fa..1a2b2904f4f 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -164,8 +164,6 @@ def test_inserted_sparsifier(self): num_sparsifiers = 0 for name, op in model.nncf.external_op.items(): if isinstance(op, ActivationsSparsifier): - assert op.target_sparsity == desc.ref_sparsifier_target_sparsity[name] - assert op.num_batches_tracked == desc.ref_num_batches_tracked num_sparsifiers += 1 assert num_sparsifiers == len(desc.ref_sparsifier_target_sparsity) @@ -196,7 +194,7 @@ def test_export_openvino(self): ov_outputs = compiled_model(example_input.cpu()).to_tuple() assert len(torch_outputs) == len(ov_outputs) for torch_output, ov_output in zip(torch_outputs, ov_outputs): - torch.testing.assert_close(torch_output.cpu(), torch.from_numpy(ov_output), rtol=1e-3, atol=1e-3) + torch.testing.assert_close(torch_output.cpu(), torch.from_numpy(ov_output), rtol=1e-2, atol=1e-2) @dataclass diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 9c5fde1c9e5..1419ad70612 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -108,72 +108,32 @@ def setup(self, use_cuda: bool): self.device = torch.device("cuda" if use_cuda else "cpu") @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) - def test_forward_before_calibration(self, use_cuda: bool, dtype: torch.dtype): + def test_sparsifier_forward(self, use_cuda: bool, dtype: torch.dtype): device = self.device - input_tensor = torch.rand([3, 3], device=device, dtype=dtype) - sparsifier = ActivationsSparsifier(target_sparsity=0.9).to(device) - assert sparsifier.freeze is True - assert not sparsifier.num_batches_tracked.is_nonzero() - assert sparsifier.running_threshold.isneginf() - output_tensor = sparsifier(input_tensor) - # The output tensor is a new tensor - assert not output_tensor.is_set_to(input_tensor) - # Before calibration, the sparsifier does not change the input - torch.testing.assert_close(output_tensor, input_tensor, rtol=1e-4, atol=1e-4) - - @pytest.mark.parametrize( - "desc", - sparsifier_forward_during_calibration_test_descs.values(), - ids=sparsifier_forward_during_calibration_test_descs.keys(), - ) - def test_forward_during_calibration(self, use_cuda: bool, desc: SparsifierForwardTestDesc): - device = self.device - sparsifier = ActivationsSparsifier(desc.target_sparsity, desc.alpha).to(device) - sparsifier.freeze = False - running_thresholds = [] - outputs = [] - with torch.no_grad(): - for batch in desc.input_batches: - output = sparsifier(batch.to(device)) - running_thresholds.append(sparsifier.running_threshold) - outputs.append(output) - assert sparsifier.num_batches_tracked == len(desc.input_batches) - assert len(running_thresholds) == len(desc.ref_running_thresholds) - for threshold, ref_threshold in zip(running_thresholds, desc.ref_running_thresholds): - assert threshold.device.type == device.type - torch.testing.assert_close(threshold, ref_threshold, rtol=1e-4, atol=1e-4, check_device=False) - assert len(outputs) == len(desc.ref_outputs) - for output, ref_output in zip(outputs, desc.ref_outputs): - assert output.device.type == device.type - torch.testing.assert_close(output, ref_output, rtol=1e-4, atol=1e-4, check_device=False) - - @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) - def test_forward_after_calibration(self, use_cuda: bool, dtype: torch.dtype): - device = self.device - sparsifier = ActivationsSparsifier(target_sparsity=0.9).to(device) - sparsifier.running_threshold.fill_(0.1) - sparsifier.num_batches_tracked.fill_(100) + sparsifier = ActivationsSparsifier(threshold=0.1).to(device) for _ in range(2): # The sparsifier does not change in the following forwards input_tensor = torch.rand([2, 10], device=device, dtype=dtype) ref_output = torch.where(input_tensor.abs() <= 0.1, 0.0, input_tensor) output_tensor = sparsifier(ref_output) - assert sparsifier.num_batches_tracked == 100 torch.testing.assert_close( - sparsifier.running_threshold, torch.tensor(0.1, device=device), rtol=1e-4, atol=1e-4 + sparsifier.threshold, torch.tensor(0.1, device=device), rtol=1e-4, atol=1e-4 ) torch.testing.assert_close(output_tensor, ref_output, rtol=1e-4, atol=1e-4) class TestPTSparsifyActivationsAlgoBackend: + @staticmethod + def get_sparsifiers(model: NNCFNetwork) -> List[ActivationsSparsifier]: + return [m for m in model.nncf.modules() if isinstance(m, ActivationsSparsifier)] + def test_get_sparsifiers(self): model, dataset = self.create_model_and_dataset() sparse_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( model, dataset, target_sparsity_by_scope={TargetScope(patterns=[".*"]): 0.5} ) - backend = PTSparsifyActivationsAlgoBackend() - sparsifiers = backend.get_sparsifiers(sparse_model) + sparsifiers = self.get_sparsifiers(sparse_model) assert len(sparsifiers) == 3 @pytest.mark.parametrize("compress_weights", [False, True]) @@ -183,35 +143,15 @@ def test_insert_sparsifiers(self, compress_weights: bool): ref_output = model(example_input) graph = model.nncf.get_graph() - nodes = graph.get_nodes_by_metatypes(PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES) backend = PTSparsifyActivationsAlgoBackend() - model_with_sparsifiers = backend.insert_sparsifiers(model, graph, {node: 0.9 for node in nodes}) - assert len(backend.get_sparsifiers(model_with_sparsifiers)) == len(nodes) + nodes = graph.get_nodes_by_metatypes(backend.supported_metatypes) + model_with_sparsifiers = backend.insert_sparsifiers(model, graph, {node: 0.0 for node in nodes}) + assert len(self.get_sparsifiers(model_with_sparsifiers)) == len(nodes) output = model_with_sparsifiers(example_input) torch.testing.assert_close( output, ref_output, rtol=1e-4, atol=1e-4 - ) # At this time the sparsifers do not change the output - - def test_calibrate_sparsifiers(self, mocker): - model, dataset = self.create_model_and_dataset() - graph = model.nncf.get_graph() - backend = PTSparsifyActivationsAlgoBackend() - mock_sparsifier = ActivationsSparsifier(0.5, 0.1) - mock_sparsifier.freeze = True - num_model_forward_calls = 0 - - def model_forward_pre_hook(model: NNCFNetwork, args): - nonlocal num_model_forward_calls - num_model_forward_calls += 1 - assert model.training is False - - model.register_forward_pre_hook(model_forward_pre_hook) - - with mocker.patch.object(backend, "get_sparsifiers", return_value=[mock_sparsifier]): - backend.calibrate_sparsifiers(model, graph, dataset) - assert mock_sparsifier.freeze is True - assert num_model_forward_calls == dataset.get_length() + ) # Since threshold is 0.0 sparsifiers do not change the output def create_model_and_dataset(self, compress_weights: bool = False): model = ThreeLinearModel() From 25dbb7283fde603374ac7eb411906f154ea18d91 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 28 Aug 2024 15:02:30 +0200 Subject: [PATCH 07/10] Remove helper script --- activation_sparisty.py | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 activation_sparisty.py diff --git a/activation_sparisty.py b/activation_sparisty.py deleted file mode 100644 index c9c528a325a..00000000000 --- a/activation_sparisty.py +++ /dev/null @@ -1,25 +0,0 @@ -import numpy as np -import openvino as ov - -import nncf -from nncf.experimental.torch.sparsify_activations import TargetScope -from nncf.experimental.torch.sparsify_activations import sparsify_activations - -# model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/dummy_llama.xml") -# model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/ov/linear.xml") -# model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/ov/linear_int8.xml") -model = ov.Core().read_model("/home/nsavel/workspace/nncf_local/dummy_models/ov/linear_int4.xml") - -dataset = nncf.Dataset([np.random.random((2, 8)) for _ in range(3)]) -sparse_model = sparsify_activations( - model, - dataset=dataset, - target_sparsity_by_scope={TargetScope(patterns=[".*linear.*"]): 0.3}, - # dataset=nncf.Dataset(np.random.randint(0, 30, (3, 2, 8))), - # target_sparsity_by_scope={ - # TargetScope(patterns=[".*gate_proj.*"]): 0.2, - # TargetScope(patterns=[".*up_proj.*"]): 0.3, - # TargetScope(patterns=[".*down_proj.*"]): 0.4, - # } -) -# ov.save_model(sparse_model, "sparse_model.xml", compress_to_fp16=False) From 75ca314c6a448deb7a5c8a0cbc4ea8b9f6c5d523 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 28 Aug 2024 15:03:08 +0200 Subject: [PATCH 08/10] Style --- .../experimental/torch/sparsify_activations/ema_aggregator.py | 2 -- .../torch/sparsify_activations/sparsify_activations_impl.py | 2 +- .../experimental/sparsify_activations/test_components.py | 4 +--- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ema_aggregator.py b/nncf/experimental/torch/sparsify_activations/ema_aggregator.py index c3089bdaa80..cf90a47ab83 100644 --- a/nncf/experimental/torch/sparsify_activations/ema_aggregator.py +++ b/nncf/experimental/torch/sparsify_activations/ema_aggregator.py @@ -11,10 +11,8 @@ from typing import Optional import nncf.tensor.functions as fns - from nncf.experimental.common.tensor_statistics.collectors import AggregationAxes from nncf.experimental.common.tensor_statistics.collectors import OnlineAggregatorBase - from nncf.tensor import Tensor diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index e89bf0255ea..750b8715176 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -29,9 +29,9 @@ from nncf.data import Dataset from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.experimental.torch.sparsify_activations.ema_aggregator import EMAAggregator from nncf.experimental.torch.sparsify_activations.target_scope import TargetScope from nncf.experimental.torch.sparsify_activations.target_scope import get_target_node_names_from_target_scope -from nncf.experimental.torch.sparsify_activations.ema_aggregator import EMAAggregator from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope from nncf.torch.model_creation import is_wrapped_model diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 1419ad70612..d6dc399279b 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -117,9 +117,7 @@ def test_sparsifier_forward(self, use_cuda: bool, dtype: torch.dtype): input_tensor = torch.rand([2, 10], device=device, dtype=dtype) ref_output = torch.where(input_tensor.abs() <= 0.1, 0.0, input_tensor) output_tensor = sparsifier(ref_output) - torch.testing.assert_close( - sparsifier.threshold, torch.tensor(0.1, device=device), rtol=1e-4, atol=1e-4 - ) + torch.testing.assert_close(sparsifier.threshold, torch.tensor(0.1, device=device), rtol=1e-4, atol=1e-4) torch.testing.assert_close(output_tensor, ref_output, rtol=1e-4, atol=1e-4) From cf228d92f7bb51db83eb924069117d9a0227eb33 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 16 Dec 2024 19:10:03 +0100 Subject: [PATCH 09/10] Fix import --- .../torch/sparsify_activations/torch_backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 388bc52ecae..87478db0ede 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -20,6 +20,7 @@ from nncf.common.graph.operator_metatypes import CONST_NOOP_METATYPES from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.torch.graph import operator_metatypes as om from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand @@ -27,7 +28,6 @@ from nncf.torch.graph.transformations.layout import PTTransformationLayout from nncf.torch.model_transformer import PTModelTransformer from nncf.torch.nncf_network import NNCFNetwork -from nncf.torch.tensor_statistics.collectors import PTAbsQuantileReducer ACTIVATIONS_SPARSIFIER_PREFIX = "activations_sparsifier" @@ -61,8 +61,8 @@ class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): def supported_metatypes(self) -> List[Type[OperatorMetatype]]: return [om.PTLinearMetatype] - def abs_quantile_reducer(self, quantile: Optional[Union[float, List[float]]] = None) -> PTAbsQuantileReducer: - return PTAbsQuantileReducer(quantile=quantile) + def abs_quantile_reducer(self, quantile: Optional[Union[float, List[float]]] = None) -> AbsQuantileReducer: + return AbsQuantileReducer(quantile=quantile) def target_point(self, target_type: TargetType, target_node_name: str, port_id: int) -> PTTargetPoint: return PTTargetPoint(TargetType.PRE_LAYER_OPERATION, target_node_name, input_port_id=port_id) From 3b255c3b686e669ca0c3d4ef7526f0bcba134a81 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Tue, 17 Dec 2024 15:25:40 +0100 Subject: [PATCH 10/10] Problems with compression subgraph matching --- .../sparsify_activations/openvino_backend.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/openvino_backend.py b/nncf/experimental/torch/sparsify_activations/openvino_backend.py index 60e561ccf29..f6a1c3dcb99 100644 --- a/nncf/experimental/torch/sparsify_activations/openvino_backend.py +++ b/nncf/experimental/torch/sparsify_activations/openvino_backend.py @@ -69,7 +69,7 @@ def insert_sparsifiers( @staticmethod def get_activation_port_id(matmul_node: NNCFNode, nncf_graph: NNCFGraph) -> int: - # return 0 + return 0 n_inputs = len(nncf_graph.get_input_edges(matmul_node)) if n_inputs != 2: raise RuntimeError(f"Expected node to have two inputs, but found {n_inputs} for node {matmul_node}.") @@ -78,6 +78,7 @@ def get_activation_port_id(matmul_node: NNCFNode, nncf_graph: NNCFGraph) -> int: nncf_graph.get_input_edges(matmul_node)[i].from_node.node_type == "Constant" for i in range(2) ] if is_const_node_on_port[0] != is_const_node_on_port[1]: + assert not is_const_node_on_port[0], matmul_node.node_name return 1 if is_const_node_on_port[0] else 0 # Try to match compressed constant subgraph @@ -85,17 +86,18 @@ def get_activation_port_id(matmul_node: NNCFNode, nncf_graph: NNCFGraph) -> int: node = nncf_graph.get_input_edges(matmul_node)[i].from_node if node.node_type == "Convert": node = nncf_graph.get_input_edges(node)[0].from_node - if node.node_type == "Multiply": - node = nncf_graph.get_input_edges(node)[0].from_node - else: - continue - if node.node_type == "Subtract": + if node.node_type == "Reshape": node = nncf_graph.get_input_edges(node)[0].from_node - else: - continue - if node.node_type == "Convert": + if node.node_type == "Multiply": node = nncf_graph.get_input_edges(node)[0].from_node + if node.node_type == "Subtract": + node = nncf_graph.get_input_edges(node)[0].from_node + if node.node_type == "Convert": + node = nncf_graph.get_input_edges(node)[0].from_node + else: + continue if node.node_type == "Constant": + assert i == 1, matmul_node.node_name return int(i == 0) raise RuntimeError(f"Could not find activation port id for node {matmul_node}.")