From 660315c73169e6b3c4ca180c5b9f2c72f09382d4 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:03:26 +0100 Subject: [PATCH 01/85] Create base_transformer.py creation of the base class --- transformers/base_transformer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 transformers/base_transformer.py diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py new file mode 100644 index 00000000..a084ba71 --- /dev/null +++ b/transformers/base_transformer.py @@ -0,0 +1,16 @@ +from abc import ABC, abstractmethod +from typing import Dict, Any + +class BaseTransformer(ABC): + """Abstract base class for all transformers.""" + + @abstractmethod + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """ + Transform a single dictionary item. + Args: + item: Input dictionary representing a URL entry. + Returns: + Transformed dictionary. + """ + pass From f32ed8c6de7836bdff0fd161c37495df4a44874d Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:05:31 +0100 Subject: [PATCH 02/85] Create metadata_enricher.py --- transformers/metadata_enricher.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 transformers/metadata_enricher.py diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py new file mode 100644 index 00000000..7bbdbb90 --- /dev/null +++ b/transformers/metadata_enricher.py @@ -0,0 +1,16 @@ +from typing import Dict, Any +from .base_transformer import BaseTransformer +from datetime import datetime + +class MetadataEnricher(BaseTransformer): + """Adds vendor and metadata information.""" + + def __init__(self, vendor: str): + self.vendor = vendor + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + item["vendor"] = self.vendor + if "metadata" not in item: + item["metadata"] = {} + item["metadata"]["processed_at"] = datetime.utcnow().isoformat() + return item From 9f8b5e7a9a4cef1481d6b24486698f9a0a32af9a Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:06:42 +0100 Subject: [PATCH 03/85] Create pipelines.py --- transformers/pipelines.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 transformers/pipelines.py diff --git a/transformers/pipelines.py b/transformers/pipelines.py new file mode 100644 index 00000000..57e744df --- /dev/null +++ b/transformers/pipelines.py @@ -0,0 +1,29 @@ +""" +pipelines.py + +Helper functions to run transformation pipelines for URL lists. +""" + +from typing import List, Dict, Any +from transformers.base_transformer import BaseTransformer + + +def apply_transformers(items: List[Dict[str, Any]], transformers: List[BaseTransformer]-> List[Dict[str, Any]]: + """ + Apply a list of transformers sequentially to a list of items. + + Args: + items: List of vendor configuration dictionaries. + transformers: Ordered list of transformer instances. + + Returns: + List of transformed dictionaries. + """ + + result = [] + for item in items: + for transformer in transformers: + item = transformer.transform(item) + result.append(item) + return result + From 8db8829afd1e68b10608bfab8ac12d6b0b7bbe30 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:07:23 +0100 Subject: [PATCH 04/85] Create type_mapper.py --- transformers/type_mapper.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 transformers/type_mapper.py diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py new file mode 100644 index 00000000..43abd127 --- /dev/null +++ b/transformers/type_mapper.py @@ -0,0 +1,14 @@ +from typing import Dict, Any +from .base_transformer import BaseTransformer + +class TypeMapper(BaseTransformer): + """Maps types (literal/wildcard/regex/substring) per vendor.""" + + def __init__(self, type_map: Dict[str, str]): + self.type_map = type_map + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + t = item.get("type") + if t in self.type_map: + item["type"] = self.type_map[t] + return item From 8e07ff701ebcbfde6dd7aa628bc1ba0a1e1b86c3 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:07:51 +0100 Subject: [PATCH 05/85] Create action_mapper.py --- transformers/action_mapper.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 transformers/action_mapper.py diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py new file mode 100644 index 00000000..c1ee5072 --- /dev/null +++ b/transformers/action_mapper.py @@ -0,0 +1,14 @@ +from typing import Dict, Any +from .base_transformer import BaseTransformer + +class ActionMapper(BaseTransformer): + """Maps actions from vendor to universal and vice versa.""" + + def __init__(self, action_map: Dict[str, str]): + self.action_map = action_map + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + action = item.get("action") + if action in self.action_map: + item["action"] = self.action_map[action] + return item From 75b66d9485cb052add7a81cdba6d4ad52cd87e22 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:08:28 +0100 Subject: [PATCH 06/85] Create category_mapper.py --- transformers/category_mapper.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 transformers/category_mapper.py diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py new file mode 100644 index 00000000..be832036 --- /dev/null +++ b/transformers/category_mapper.py @@ -0,0 +1,14 @@ +from typing import Dict, Any +from .base_transformer import BaseTransformer + +class CategoryMapper(BaseTransformer): + """Maps categories per vendor.""" + + def __init__(self, category_map: Dict[str, str]): + self.category_map = category_map + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + cat = item.get("category_id") + if cat in self.category_map: + item["category_id"] = self.category_map[cat] + return item From cf23303c2a75a454c2e621e5c100a0e2a83a4ca9 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:08:54 +0100 Subject: [PATCH 07/85] Create pattern_normalizer.py --- transformers/pattern_normalizer.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 transformers/pattern_normalizer.py diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py new file mode 100644 index 00000000..0ee25882 --- /dev/null +++ b/transformers/pattern_normalizer.py @@ -0,0 +1,9 @@ +from typing import Dict, Any +from .base_transformer import BaseTransformer + +class PatternNormalizer(BaseTransformer): + """Generic pattern normalizer (pass-through).""" + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + item["pattern"] = item.get("pattern", "") + return item From 09a8ae8892d9ad293cef8f12b8d77e724a8f3e40 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:09:49 +0100 Subject: [PATCH 08/85] Create fortinet_transformer.py --- transformers/vendors/fortinet_transformer.py | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 transformers/vendors/fortinet_transformer.py diff --git a/transformers/vendors/fortinet_transformer.py b/transformers/vendors/fortinet_transformer.py new file mode 100644 index 00000000..a8a63583 --- /dev/null +++ b/transformers/vendors/fortinet_transformer.py @@ -0,0 +1,30 @@ +# Absolute imports +import jsonata +from transformers.base_transformer import BaseTransformer +from transformers.pattern_normalizer import PatternNormalizer +from transformers.action_mapper import ActionMapper +from transformers.type_mapper import TypeMapper +from transformers.category_mapper import CategoryMapper +from transformers.metadata_enricher import MetadataEnricher + +# ---------------- FORTINET MAPPINGS ---------------- +FORTINET_ACTION_MAP = {"block": "block", "allow": "allow", "monitor": "monitor"} +FORTINET_CATEGORY_MAP = {"3": "malware", "4": "phishing", "5": "gambling", "default": "uncategorized"} +FORTINET_TYPE_MAP = {"simple": "literal", "wildcard": "wildcard", "regex": "regex", "substring": "substring"} + +# ---------------- FORTINET PIPELINES ---------------- +VENDOR_TO_UNIVERSAL_PIPELINES = [ + ActionMapper(FORTINET_ACTION_MAP), + PatternNormalizer(), + TypeMapper(FORTINET_TYPE_MAP), + CategoryMapper(FORTINET_CATEGORY_MAP), + MetadataEnricher("fortinet"), +] + +UNIVERSAL_TO_VENDOR_PIPELINES = [ + ActionMapper({v: k for k, v in FORTINET_ACTION_MAP.items()}), + PatternNormalizer(), + TypeMapper({v: k for k, v in FORTINET_TYPE_MAP.items()}), + CategoryMapper({v: k for k, v in FORTINET_CATEGORY_MAP.items()}), + MetadataEnricher("fortinet"), +] From 9a291acff023be04f269e86e3f2c7c320104c0c7 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:28:46 +0100 Subject: [PATCH 09/85] Create netskope_transformer.py --- transformers/vendors/netskope_transformer.py | 152 +++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 transformers/vendors/netskope_transformer.py diff --git a/transformers/vendors/netskope_transformer.py b/transformers/vendors/netskope_transformer.py new file mode 100644 index 00000000..b65709ee --- /dev/null +++ b/transformers/vendors/netskope_transformer.py @@ -0,0 +1,152 @@ +from typing import Dict, Any + +# Absolute imports +from transformers.base_transformer import BaseTransformer +from transformers.pattern_normalizer import PatternNormalizer +from transformers.action_mapper import ActionMapper +from transformers.type_mapper import TypeMapper +from transformers.category_mapper import CategoryMapper +from transformers.metadata_enricher import MetadataEnricher + + +class NetskopePatternNormalizer(BaseTransformer): + """ + Normalize Netskope URL patterns and convert wildcards to regex formats. + + - 'literal' or 'exact': left as-is for exact lists. + - 'wildcard': converted to regex for regex lists. + - 'regex': passed as regex (wildcards converted to proper regex if needed). + """ + + def wildcard_to_regex(self, pattern: str) -> str: + """ + Convert Netskope wildcard patterns into regex. + '*.example.com' -> '^([^.]+\.)*example\.com$' + """ + if pattern.startswith("*."): + domain = pattern[2:] + domain = domain.replace(".", r"\.") # Escape dots + return rf"^([^.]+\.)*{domain}$" + else: + # Leave non-wildcards as-is for regex lists + return pattern + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """ + Transform a single URL item into Netskope-compatible pattern. + """ + raw_pattern = item.get("pattern", "") + utype = item.get("type", "literal") # universal type + + # Determine netskope_type + if utype in ("literal", "exact"): + # Exact list → leave pattern as literal + final_pattern = raw_pattern + item["netskope_type"] = "exact" + + elif utype in ("wildcard", "regex"): + # Regex list → convert wildcard to proper regex + final_pattern = self.wildcard_to_regex(raw_pattern) + item["netskope_type"] = "regex" + + else: + # Fallback + final_pattern = raw_pattern + item["netskope_type"] = "exact" + + # **Important:** do NOT double-escape backslashes + # Only regex patterns need normal backslashes + item["pattern"] = final_pattern + + return item + + def transform_list(self, items: list[Dict[str, Any]]) -> list[Dict[str, Any]]: + """ + Transform a list of URL items. + """ + return [self.transform(item) for item in items] + + +class NetskopePatternDenormalizer(BaseTransformer): + """ + Convert Netskope Data Model patterns back into universal-compatible + patterns and types. + """ + + def regex_to_wildcard(self, pattern: str) -> str | None: + """ + Convert a known Netskope-style regex back to wildcard format. + Example: + ^([^.]+\.)*example\.com$ -> *.example.com + """ + wildcard_regex = r'^\^\(\[\^\.\]\+\\\.\)\*(.+)\\\.([a-zA-Z0-9\-]+)\$$' + match = re.match(wildcard_regex, pattern) + + if match: + domain = f"{match.group(1)}.{match.group(2)}" + return f"*.{domain}" + + return None + + def is_regex(self, pattern: str) -> bool: + regex_markers = ( + "^", "$", "(", ")", "[", "]", "+", "?", "|", "{", "}" + ) + return any(m in pattern for m in regex_markers) + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + pattern = item.get("pattern", "").replace("\\\\", "\\") + + # 1. Valid Netskope wildcard ONLY + if pattern.startswith("*.") and pattern.count("*") == 1: + item["type"] = "wildcard" + item["pattern"] = pattern + + # 2. Any other '*' means regex + elif "*" in pattern: + item["type"] = "regex" + item["pattern"] = pattern + + # 3. Regex syntax without '*' + elif self.is_regex(pattern): + wildcard = self.regex_to_wildcard(pattern) + if wildcard: + item["type"] = "wildcard" + item["pattern"] = wildcard + else: + item["type"] = "regex" + item["pattern"] = pattern + + # 4. Literal + else: + item["type"] = "exact" + item["pattern"] = pattern + + item.pop("netskope_type", None) + return item + + + +# ---------------- NETSKOPE MAPPINGS ---------------- +NETSKOPE_ACTION_MAP = {"block": "deny", "allow": "allow", "monitor": "allow"} +NETSKOPE_CATEGORY_MAP = {"malware": "malware", "phishing": "phishing", "gambling": "gambling", "uncategorized": "uncategorized"} +NETSKOPE_TO_UNIVERSAL_TYPE_MAP = {"exact": "literal", "regex": "regex"} +UNIVERSAL_TO_NETSKOPE_TYPE_MAP = {"literal": "exact", "regex": "regex", "wildcard": "regex", "substring": "regex"} + +# ---------------- NETSKOPE PIPELINES ---------------- + +VENDOR_TO_UNIVERSAL_PIPELINES = [ + ActionMapper(NETSKOPE_ACTION_MAP), + TypeMapper(NETSKOPE_TO_UNIVERSAL_TYPE_MAP), + NetskopePatternDenormalizer(), + CategoryMapper(NETSKOPE_CATEGORY_MAP), + MetadataEnricher("netskope"), +] + +UNIVERSAL_TO_VENDOR_PIPELINES = [ + ActionMapper({v: k for k, v in NETSKOPE_ACTION_MAP.items()}), + TypeMapper(UNIVERSAL_TO_NETSKOPE_TYPE_MAP), + NetskopePatternNormalizer(), + CategoryMapper({v: k for k, v in NETSKOPE_CATEGORY_MAP.items()}), + MetadataEnricher("netskope"), +] From 40c4dce2cd206ce8c0dfa32c866852d44fb22128 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:29:17 +0100 Subject: [PATCH 10/85] Create prisma_transformer.py --- transformers/vendors/prisma_transformer.py | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 transformers/vendors/prisma_transformer.py diff --git a/transformers/vendors/prisma_transformer.py b/transformers/vendors/prisma_transformer.py new file mode 100644 index 00000000..0cbfad7c --- /dev/null +++ b/transformers/vendors/prisma_transformer.py @@ -0,0 +1,29 @@ +# Absolute imports +from transformers.base_transformer import BaseTransformer +from transformers.pattern_normalizer import PatternNormalizer +from transformers.action_mapper import ActionMapper +from transformers.type_mapper import TypeMapper +from transformers.category_mapper import CategoryMapper +from transformers.metadata_enricher import MetadataEnricher + +# ---------------- PRISMA MAPPINGS ---------------- +PRISMA_ACTION_MAP = {"block": "deny", "allow": "allow", "monitor": "alert"} +PRISMA_CATEGORY_MAP = {"malware": "malware", "phishing": "phishing", "gambling": "gambling", "uncategorized": "uncategorized"} +PRISMA_TYPE_MAP = {"simple": "literal", "wildcard": "wildcard", "regex": "regex", "substring": "substring"} + +# ---------------- PRISMA PIPELINES ---------------- +VENDOR_TO_UNIVERSAL_PIPELINES = [ + ActionMapper(PRISMA_ACTION_MAP), + PatternNormalizer(), + TypeMapper(PRISMA_TYPE_MAP), + CategoryMapper(PRISMA_CATEGORY_MAP), + MetadataEnricher("prisma"), +] + +UNIVERSAL_TO_VENDOR_PIPELINES = [ + ActionMapper({v: k for k, v in PRISMA_ACTION_MAP.items()}), + PatternNormalizer(), + TypeMapper({v: k for k, v in PRISMA_TYPE_MAP.items()}), + CategoryMapper({v: k for k, v in PRISMA_CATEGORY_MAP.items()}), + MetadataEnricher("prisma"), +] From 6a5e7e2a09644970726dd0dbb65986078ac89932 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:29:44 +0100 Subject: [PATCH 11/85] Create zscaler_transformer.py --- transformers/vendors/zscaler_transformer.py | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 transformers/vendors/zscaler_transformer.py diff --git a/transformers/vendors/zscaler_transformer.py b/transformers/vendors/zscaler_transformer.py new file mode 100644 index 00000000..074eb6d0 --- /dev/null +++ b/transformers/vendors/zscaler_transformer.py @@ -0,0 +1,29 @@ +# Absolute imports +from transformers.base_transformer import BaseTransformer +from transformers.pattern_normalizer import PatternNormalizer +from transformers.action_mapper import ActionMapper +from transformers.type_mapper import TypeMapper +from transformers.category_mapper import CategoryMapper +from transformers.metadata_enricher import MetadataEnricher + +# ---------------- ZSCALER MAPPINGS ---------------- +ZSCALER_ACTION_MAP = {"block": "BLOCK", "allow": "ALLOW", "monitor": "MONITOR"} +ZSCALER_CATEGORY_MAP = {"malware": "malware", "phishing": "phishing", "gambling": "gambling", "uncategorized": "uncategorized"} +ZSCALER_TYPE_MAP = {"STRING": "literal", "WILDCARD": "wildcard", "REGEX": "regex"} + +# ---------------- ZSCALER PIPELINES ---------------- +VENDOR_TO_UNIVERSAL_PIPELINES = [ + ActionMapper(ZSCALER_ACTION_MAP), + PatternNormalizer(), + TypeMapper(ZSCALER_TYPE_MAP), + CategoryMapper(ZSCALER_CATEGORY_MAP), + MetadataEnricher("zscaler"), +] + +UNIVERSAL_TO_VENDOR_PIPELINES = [ + ActionMapper({v: k for k, v in ZSCALER_ACTION_MAP.items()}), + PatternNormalizer(), + TypeMapper({v: k for k, v in ZSCALER_TYPE_MAP.items()}), + CategoryMapper({v: k for k, v in ZSCALER_CATEGORY_MAP.items()}), + MetadataEnricher("zscaler"), +] From c56514da36ed9d33c7b43ffbf6c7ba8ed548a3f9 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:39:50 +0100 Subject: [PATCH 12/85] Update action_mapper.py --- transformers/action_mapper.py | 36 ++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index c1ee5072..4b8fb9fb 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -1,14 +1,44 @@ -from typing import Dict, Any +"""Action mapping transformer. + +This module defines a transformer responsible for mapping action values +between vendor-specific representations and the universal data model. +""" + +from typing import Any, Dict + from .base_transformer import BaseTransformer + class ActionMapper(BaseTransformer): - """Maps actions from vendor to universal and vice versa.""" + """Map action values between vendor and universal models. + + This transformer replaces the ``action`` field of an item using a + predefined mapping dictionary. If the action is not found in the + mapping, it is left unchanged. + """ - def __init__(self, action_map: Dict[str, str]): + def __init__(self, action_map: Dict[str, str]) -> None: + """Initialize the ActionMapper. + + Args: + action_map: A dictionary mapping source action values to + destination action values. + """ self.action_map = action_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Transform an item's action field using the action mapping. + + Args: + item: A dictionary representing a single rule or configuration + entry containing an ``action`` field. + + Returns: + The transformed item with its ``action`` field mapped according + to the configured action map. + """ action = item.get("action") if action in self.action_map: item["action"] = self.action_map[action] + return item From 5614211ff900aaf8ed3addc64f83001e5ee58807 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:40:36 +0100 Subject: [PATCH 13/85] Update base_transformer.py --- transformers/base_transformer.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index a084ba71..d6679aa9 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -1,16 +1,30 @@ +"""Base transformer definition. + +This module defines the abstract base class used by all transformers +in the transformation pipeline. +""" + from abc import ABC, abstractmethod -from typing import Dict, Any +from typing import Any, Dict + class BaseTransformer(ABC): - """Abstract base class for all transformers.""" + """Define the interface for all transformers. + + All concrete transformers must implement the ``transform`` method, + which takes a single dictionary item and returns a transformed + dictionary. + """ @abstractmethod def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """ - Transform a single dictionary item. + """Transform a single dictionary item. + Args: - item: Input dictionary representing a URL entry. + item: A dictionary representing a single configuration or + URL entry. + Returns: - Transformed dictionary. + A transformed dictionary. """ - pass + raise NotImplementedError From 9939f5dc603af1e4a8807f708702b35f8e4a3547 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:41:30 +0100 Subject: [PATCH 14/85] Update category_mapper.py --- transformers/category_mapper.py | 43 ++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index be832036..9de68935 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -1,14 +1,45 @@ -from typing import Dict, Any +"""Category mapping transformer. + +This module defines a transformer responsible for mapping category +identifiers between vendor-specific representations and the universal +data model. +""" + +from typing import Any, Dict + from .base_transformer import BaseTransformer + class CategoryMapper(BaseTransformer): - """Maps categories per vendor.""" + """Map category identifiers between vendor and universal models. + + This transformer replaces the ``category_id`` field of an item using + a predefined mapping dictionary. If the category is not found in the + mapping, it is left unchanged. + """ - def __init__(self, category_map: Dict[str, str]): + def __init__(self, category_map: Dict[str, str]) -> None: + """Initialize the CategoryMapper. + + Args: + category_map: A dictionary mapping source category identifiers + to destination category identifiers. + """ self.category_map = category_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - cat = item.get("category_id") - if cat in self.category_map: - item["category_id"] = self.category_map[cat] + """Transform an item's category identifier using the category map. + + Args: + item: A dictionary representing a single rule or configuration + entry containing a ``category_id`` field. + + Returns: + The transformed item with its ``category_id`` field mapped + according to the configured category map. + """ + category_id = item.get("category_id") + if category_id in self.category_map: + item["category_id"] = self.category_map[category_id] + return item From 35246839bc558c6f1cafad3e0d38e9aaba27f761 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:42:15 +0100 Subject: [PATCH 15/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 36 +++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index 7bbdbb90..f29ac0c2 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -1,16 +1,44 @@ -from typing import Dict, Any -from .base_transformer import BaseTransformer +"""Metadata enrichment transformer. + +This module defines a transformer that adds vendor information and +metadata timestamps to each item in the transformation pipeline. +""" + from datetime import datetime +from typing import Any, Dict + +from .base_transformer import BaseTransformer + class MetadataEnricher(BaseTransformer): - """Adds vendor and metadata information.""" + """Enrich items with vendor and metadata information. - def __init__(self, vendor: str): + This transformer adds a ``vendor`` field and a ``metadata`` dictionary + containing a ``processed_at`` timestamp to each item. + """ + + def __init__(self, vendor: str) -> None: + """Initialize the MetadataEnricher. + + Args: + vendor: The vendor name to attach to each item. + """ self.vendor = vendor def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Add vendor and metadata information to an item. + + Args: + item: A dictionary representing a single configuration or URL entry. + + Returns: + The transformed dictionary containing the ``vendor`` field and + a ``metadata.processed_at`` timestamp. + """ item["vendor"] = self.vendor if "metadata" not in item: item["metadata"] = {} + item["metadata"]["processed_at"] = datetime.utcnow().isoformat() + return item From 15fa380f10078169f90a483e536af6a3e0826efd Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:43:05 +0100 Subject: [PATCH 16/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index 0ee25882..e6cf19e6 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -1,9 +1,30 @@ -from typing import Dict, Any +"""Pattern normalization transformer. + +This module defines a generic pattern normalizer that ensures each item +has a ``pattern`` field. Currently, this transformer acts as a pass-through. +""" + +from typing import Any, Dict + from .base_transformer import BaseTransformer + class PatternNormalizer(BaseTransformer): - """Generic pattern normalizer (pass-through).""" + """Normalize or enforce the presence of a pattern field in items. + + This transformer guarantees that each dictionary item contains a + ``pattern`` key. If the key is missing, it is initialized to an empty string. + """ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Ensure the item has a pattern field. + + Args: + item: A dictionary representing a single configuration or URL entry. + + Returns: + The same dictionary with a ``pattern`` key ensured. + """ + item["pattern"] = item.get("pattern", "") return item From ec2f65935152b3b09e59a47d60ba5e38e35c0cf6 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:46:13 +0100 Subject: [PATCH 17/85] Update pipelines.py --- transformers/pipelines.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index 57e744df..29e02d89 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -1,29 +1,36 @@ -""" -pipelines.py +"""Pipeline helper functions for URL transformations. -Helper functions to run transformation pipelines for URL lists. +This module provides utilities to apply a sequence of transformers +to vendor configuration items, producing universal model dictionaries. """ from typing import List, Dict, Any from transformers.base_transformer import BaseTransformer +def apply_transformers( + items: List[Dict[str, Any]], + transformers: List[BaseTransformer] +) -> List[Dict[str, Any]]: + """Apply a sequence of transformers to a list of items. -def apply_transformers(items: List[Dict[str, Any]], transformers: List[BaseTransformer]-> List[Dict[str, Any]]: - """ - Apply a list of transformers sequentially to a list of items. + Each item in the input list is processed sequentially by all + transformers in the given order. Args: - items: List of vendor configuration dictionaries. - transformers: Ordered list of transformer instances. + items: A list of dictionaries representing vendor configuration + entries. + transformers: An ordered list of transformer instances that + implement the `transform` method. Returns: - List of transformed dictionaries. + A list of transformed dictionaries. """ - - result = [] + + result: List[Dict[str, Any]] = [] + for item in items: for transformer in transformers: item = transformer.transform(item) result.append(item) - return result + return result From 2f14a24b9b22d583d396c6d455fea298c354fbf3 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:49:45 +0100 Subject: [PATCH 18/85] Update type_mapper.py --- transformers/type_mapper.py | 43 +++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 43abd127..6519a001 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -1,14 +1,45 @@ -from typing import Dict, Any +"""Type mapping transformer. + +This module defines a transformer that maps item types (e.g., literal, +wildcard, regex, substring) between vendor-specific representations +and the universal data model. +""" + +from typing import Any, Dict from .base_transformer import BaseTransformer class TypeMapper(BaseTransformer): - """Maps types (literal/wildcard/regex/substring) per vendor.""" + """Map type values between vendor and universal models. + + This transformer replaces the ``type`` field of an item using a + predefined mapping dictionary. If the type is not found in the + mapping, it is left unchanged. + """ + + def __init__(self, type_map: Dict[str, str]) -> None: + """Initialize the TypeMapper. - def __init__(self, type_map: Dict[str, str]): + Args: + type_map: A dictionary mapping source type values to + destination type values. + """ + self.type_map = type_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - t = item.get("type") - if t in self.type_map: - item["type"] = self.type_map[t] + """Transform an item's type field using the type mapping. + + Args: + item: A dictionary representing a single rule or configuration + entry containing a ``type`` field. + + Returns: + The transformed item with its ``type`` field mapped according + to the configured type map. + """ + + item_type = item.get("type") + if item_type in self.type_map: + item["type"] = self.type_map[item_type] + return item From a638746cf1e51c524c5d530a482fb62a31dd0178 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:50:08 +0100 Subject: [PATCH 19/85] Update action_mapper.py --- transformers/action_mapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index 4b8fb9fb..89cead4b 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -5,10 +5,8 @@ """ from typing import Any, Dict - from .base_transformer import BaseTransformer - class ActionMapper(BaseTransformer): """Map action values between vendor and universal models. @@ -24,6 +22,7 @@ def __init__(self, action_map: Dict[str, str]) -> None: action_map: A dictionary mapping source action values to destination action values. """ + self.action_map = action_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: @@ -37,6 +36,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: The transformed item with its ``action`` field mapped according to the configured action map. """ + action = item.get("action") if action in self.action_map: item["action"] = self.action_map[action] From c50b129c602d73e4fb582a3d04eea65bfb1b3f5c Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:50:37 +0100 Subject: [PATCH 20/85] Update base_transformer.py --- transformers/base_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index d6679aa9..2380c9b0 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -7,7 +7,6 @@ from abc import ABC, abstractmethod from typing import Any, Dict - class BaseTransformer(ABC): """Define the interface for all transformers. @@ -27,4 +26,5 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: A transformed dictionary. """ + raise NotImplementedError From 20ea8d74b7a63050e3cb6bc4e9189a83c26ca8d1 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:51:04 +0100 Subject: [PATCH 21/85] Update category_mapper.py --- transformers/category_mapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index 9de68935..eae29571 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -25,6 +25,7 @@ def __init__(self, category_map: Dict[str, str]) -> None: category_map: A dictionary mapping source category identifiers to destination category identifiers. """ + self.category_map = category_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: @@ -38,6 +39,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: The transformed item with its ``category_id`` field mapped according to the configured category map. """ + category_id = item.get("category_id") if category_id in self.category_map: item["category_id"] = self.category_map[category_id] From 8c2f369f5b05e5b157281e075311bc0b7070b0c8 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:51:27 +0100 Subject: [PATCH 22/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index f29ac0c2..5a7c08e0 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -6,10 +6,8 @@ from datetime import datetime from typing import Any, Dict - from .base_transformer import BaseTransformer - class MetadataEnricher(BaseTransformer): """Enrich items with vendor and metadata information. @@ -23,6 +21,7 @@ def __init__(self, vendor: str) -> None: Args: vendor: The vendor name to attach to each item. """ + self.vendor = vendor def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: @@ -35,6 +34,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: The transformed dictionary containing the ``vendor`` field and a ``metadata.processed_at`` timestamp. """ + item["vendor"] = self.vendor if "metadata" not in item: item["metadata"] = {} From 649f3ddbf52d9f279f409c5e5c5da04bf235e1de Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:52:14 +0100 Subject: [PATCH 23/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index e6cf19e6..f58cb6d0 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -5,10 +5,8 @@ """ from typing import Any, Dict - from .base_transformer import BaseTransformer - class PatternNormalizer(BaseTransformer): """Normalize or enforce the presence of a pattern field in items. From 7b29f0c5de42098d3bfbdf66d8d568d2efe50be2 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:57:07 +0100 Subject: [PATCH 24/85] Update action_mapper.py --- transformers/action_mapper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index 89cead4b..26a4ea71 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -8,6 +8,7 @@ from .base_transformer import BaseTransformer class ActionMapper(BaseTransformer): + """Map action values between vendor and universal models. This transformer replaces the ``action`` field of an item using a @@ -16,6 +17,7 @@ class ActionMapper(BaseTransformer): """ def __init__(self, action_map: Dict[str, str]) -> None: + """Initialize the ActionMapper. Args: @@ -26,6 +28,7 @@ def __init__(self, action_map: Dict[str, str]) -> None: self.action_map = action_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Transform an item's action field using the action mapping. Args: From a0a0e94072d345f9efe1fb458de8e70f551d9267 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:57:23 +0100 Subject: [PATCH 25/85] Update base_transformer.py --- transformers/base_transformer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index 2380c9b0..f7d06139 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -8,6 +8,7 @@ from typing import Any, Dict class BaseTransformer(ABC): + """Define the interface for all transformers. All concrete transformers must implement the ``transform`` method, @@ -17,6 +18,7 @@ class BaseTransformer(ABC): @abstractmethod def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Transform a single dictionary item. Args: From a3f703fa335bed9f52a85e6a74d68cd1ce0d8db5 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:57:43 +0100 Subject: [PATCH 26/85] Update category_mapper.py --- transformers/category_mapper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index eae29571..e9a59d14 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -11,6 +11,7 @@ class CategoryMapper(BaseTransformer): + """Map category identifiers between vendor and universal models. This transformer replaces the ``category_id`` field of an item using @@ -19,6 +20,7 @@ class CategoryMapper(BaseTransformer): """ def __init__(self, category_map: Dict[str, str]) -> None: + """Initialize the CategoryMapper. Args: @@ -29,6 +31,7 @@ def __init__(self, category_map: Dict[str, str]) -> None: self.category_map = category_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Transform an item's category identifier using the category map. Args: From df415ea752817a120b318fbe82959a98609b1e68 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:58:05 +0100 Subject: [PATCH 27/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index 5a7c08e0..1714f654 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -9,6 +9,7 @@ from .base_transformer import BaseTransformer class MetadataEnricher(BaseTransformer): + """Enrich items with vendor and metadata information. This transformer adds a ``vendor`` field and a ``metadata`` dictionary @@ -16,6 +17,7 @@ class MetadataEnricher(BaseTransformer): """ def __init__(self, vendor: str) -> None: + """Initialize the MetadataEnricher. Args: @@ -25,6 +27,7 @@ def __init__(self, vendor: str) -> None: self.vendor = vendor def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Add vendor and metadata information to an item. Args: From 27ed072dcf8f8666665ae9dcb79f1b95ff22e49a Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:58:22 +0100 Subject: [PATCH 28/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index f58cb6d0..a1014894 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -8,6 +8,7 @@ from .base_transformer import BaseTransformer class PatternNormalizer(BaseTransformer): + """Normalize or enforce the presence of a pattern field in items. This transformer guarantees that each dictionary item contains a @@ -15,6 +16,7 @@ class PatternNormalizer(BaseTransformer): """ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Ensure the item has a pattern field. Args: From c6c398475f432f44c99a48fbf0dc61c0bf5c4fa9 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:58:41 +0100 Subject: [PATCH 29/85] Update pipelines.py --- transformers/pipelines.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index 29e02d89..47b312b6 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -11,6 +11,7 @@ def apply_transformers( items: List[Dict[str, Any]], transformers: List[BaseTransformer] ) -> List[Dict[str, Any]]: + """Apply a sequence of transformers to a list of items. Each item in the input list is processed sequentially by all From 6747805f6ce9c66ea3a09f33c4edbf1f7cb5e43c Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 10:59:06 +0100 Subject: [PATCH 30/85] Update type_mapper.py --- transformers/type_mapper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 6519a001..acedd044 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -9,6 +9,7 @@ from .base_transformer import BaseTransformer class TypeMapper(BaseTransformer): + """Map type values between vendor and universal models. This transformer replaces the ``type`` field of an item using a @@ -17,6 +18,7 @@ class TypeMapper(BaseTransformer): """ def __init__(self, type_map: Dict[str, str]) -> None: + """Initialize the TypeMapper. Args: @@ -27,6 +29,7 @@ def __init__(self, type_map: Dict[str, str]) -> None: self.type_map = type_map def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Transform an item's type field using the type mapping. Args: From d115ba30ad0f0bc5db57e60e7277281bc52b646b Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:02:44 +0100 Subject: [PATCH 31/85] Update action_mapper.py --- transformers/action_mapper.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index 26a4ea71..88859a8c 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -8,27 +8,22 @@ from .base_transformer import BaseTransformer class ActionMapper(BaseTransformer): - """Map action values between vendor and universal models. This transformer replaces the ``action`` field of an item using a predefined mapping dictionary. If the action is not found in the mapping, it is left unchanged. """ - - def __init__(self, action_map: Dict[str, str]) -> None: - + def __init__(self, action_map: Dict[str, str]) -> None: """Initialize the ActionMapper. Args: action_map: A dictionary mapping source action values to destination action values. - """ - + """ self.action_map = action_map - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform an item's action field using the action mapping. Args: @@ -38,8 +33,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: The transformed item with its ``action`` field mapped according to the configured action map. - """ - + """ action = item.get("action") if action in self.action_map: item["action"] = self.action_map[action] From 24b1fb102ca3f9eb7d9b9a85603d6565d7f1a053 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:03:05 +0100 Subject: [PATCH 32/85] Update base_transformer.py --- transformers/base_transformer.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index f7d06139..80516eca 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -7,18 +7,15 @@ from abc import ABC, abstractmethod from typing import Any, Dict -class BaseTransformer(ABC): - +class BaseTransformer(ABC): """Define the interface for all transformers. All concrete transformers must implement the ``transform`` method, which takes a single dictionary item and returns a transformed dictionary. """ - @abstractmethod - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform a single dictionary item. Args: @@ -27,6 +24,5 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: A transformed dictionary. - """ - + """ raise NotImplementedError From e4436bdb5abf7fad5e30db3582781321ccdc23f0 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:03:34 +0100 Subject: [PATCH 33/85] Update category_mapper.py --- transformers/category_mapper.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index e9a59d14..0e8db6cb 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -6,32 +6,25 @@ """ from typing import Any, Dict - from .base_transformer import BaseTransformer - -class CategoryMapper(BaseTransformer): - +class CategoryMapper(BaseTransformer): """Map category identifiers between vendor and universal models. This transformer replaces the ``category_id`` field of an item using a predefined mapping dictionary. If the category is not found in the mapping, it is left unchanged. """ - - def __init__(self, category_map: Dict[str, str]) -> None: - + def __init__(self, category_map: Dict[str, str]) -> None: """Initialize the CategoryMapper. Args: category_map: A dictionary mapping source category identifiers to destination category identifiers. """ - self.category_map = category_map - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform an item's category identifier using the category map. Args: @@ -42,7 +35,6 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: The transformed item with its ``category_id`` field mapped according to the configured category map. """ - category_id = item.get("category_id") if category_id in self.category_map: item["category_id"] = self.category_map[category_id] From 214ab0abe6f3d8b071b5aeea12acc0a39b6e5253 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:04:05 +0100 Subject: [PATCH 34/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index 1714f654..dbb72031 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -9,25 +9,20 @@ from .base_transformer import BaseTransformer class MetadataEnricher(BaseTransformer): - """Enrich items with vendor and metadata information. This transformer adds a ``vendor`` field and a ``metadata`` dictionary containing a ``processed_at`` timestamp to each item. """ - - def __init__(self, vendor: str) -> None: - + def __init__(self, vendor: str) -> None: """Initialize the MetadataEnricher. Args: vendor: The vendor name to attach to each item. - """ - + """ self.vendor = vendor - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Add vendor and metadata information to an item. Args: @@ -36,8 +31,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: The transformed dictionary containing the ``vendor`` field and a ``metadata.processed_at`` timestamp. - """ - + """ item["vendor"] = self.vendor if "metadata" not in item: item["metadata"] = {} From c34c42f7c10c15d72e1e7f99a70b70b5adb111ea Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:04:34 +0100 Subject: [PATCH 35/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index a1014894..24d89345 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -7,16 +7,13 @@ from typing import Any, Dict from .base_transformer import BaseTransformer -class PatternNormalizer(BaseTransformer): - +class PatternNormalizer(BaseTransformer): """Normalize or enforce the presence of a pattern field in items. This transformer guarantees that each dictionary item contains a ``pattern`` key. If the key is missing, it is initialized to an empty string. """ - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Ensure the item has a pattern field. Args: @@ -24,7 +21,6 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: The same dictionary with a ``pattern`` key ensured. - """ - + """ item["pattern"] = item.get("pattern", "") return item From b8235b17a8c62c6664fd065238bbb23e48a0a8e3 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:04:52 +0100 Subject: [PATCH 36/85] Update pipelines.py --- transformers/pipelines.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index 47b312b6..f987e60d 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -10,8 +10,7 @@ def apply_transformers( items: List[Dict[str, Any]], transformers: List[BaseTransformer] -) -> List[Dict[str, Any]]: - +) -> List[Dict[str, Any]]: """Apply a sequence of transformers to a list of items. Each item in the input list is processed sequentially by all @@ -25,8 +24,7 @@ def apply_transformers( Returns: A list of transformed dictionaries. - """ - + """ result: List[Dict[str, Any]] = [] for item in items: From 87006611d1a55e9531d8fd4bf2e8b1f6ea3ea40d Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:05:20 +0100 Subject: [PATCH 37/85] Update type_mapper.py --- transformers/type_mapper.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index acedd044..88f1ae52 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -8,28 +8,23 @@ from typing import Any, Dict from .base_transformer import BaseTransformer -class TypeMapper(BaseTransformer): - +class TypeMapper(BaseTransformer): """Map type values between vendor and universal models. This transformer replaces the ``type`` field of an item using a predefined mapping dictionary. If the type is not found in the mapping, it is left unchanged. """ - - def __init__(self, type_map: Dict[str, str]) -> None: - + def __init__(self, type_map: Dict[str, str]) -> None: """Initialize the TypeMapper. Args: type_map: A dictionary mapping source type values to destination type values. - """ - + """ self.type_map = type_map - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform an item's type field using the type mapping. Args: @@ -39,8 +34,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: The transformed item with its ``type`` field mapped according to the configured type map. - """ - + """ item_type = item.get("type") if item_type in self.type_map: item["type"] = self.type_map[item_type] From d6263afeee736c4a5ce7e98fe26a04ce2a3d71ac Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:07:51 +0100 Subject: [PATCH 38/85] Update action_mapper.py --- transformers/action_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index 88859a8c..70e0ccdd 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -8,6 +8,7 @@ from .base_transformer import BaseTransformer class ActionMapper(BaseTransformer): + """Map action values between vendor and universal models. This transformer replaces the ``action`` field of an item using a From aaaf7322473b8c01899b1b1cea33ba7eaabe0120 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:08:10 +0100 Subject: [PATCH 39/85] Update base_transformer.py --- transformers/base_transformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index 80516eca..a280940a 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -8,6 +8,7 @@ from typing import Any, Dict class BaseTransformer(ABC): + """Define the interface for all transformers. All concrete transformers must implement the ``transform`` method, From dd0fd2c280303af70d243909b4742237e83083c3 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:08:25 +0100 Subject: [PATCH 40/85] Update category_mapper.py --- transformers/category_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index 0e8db6cb..d869fcce 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -9,6 +9,7 @@ from .base_transformer import BaseTransformer class CategoryMapper(BaseTransformer): + """Map category identifiers between vendor and universal models. This transformer replaces the ``category_id`` field of an item using From 3047021f4b23eb064d295e026c2cc4181970d565 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:08:39 +0100 Subject: [PATCH 41/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index dbb72031..0451f8a7 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -9,6 +9,7 @@ from .base_transformer import BaseTransformer class MetadataEnricher(BaseTransformer): + """Enrich items with vendor and metadata information. This transformer adds a ``vendor`` field and a ``metadata`` dictionary From ca452a6cd368c8be7da5672b2f412197c0aa0c8b Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:08:52 +0100 Subject: [PATCH 42/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index 24d89345..a24fbbd1 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -8,6 +8,7 @@ from .base_transformer import BaseTransformer class PatternNormalizer(BaseTransformer): + """Normalize or enforce the presence of a pattern field in items. This transformer guarantees that each dictionary item contains a From 91f7fccd449ab2b27e346f2cd14af083d4a51fc8 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:09:27 +0100 Subject: [PATCH 43/85] Update type_mapper.py --- transformers/type_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 88f1ae52..39e646ce 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -9,6 +9,7 @@ from .base_transformer import BaseTransformer class TypeMapper(BaseTransformer): + """Map type values between vendor and universal models. This transformer replaces the ``type`` field of an item using a From 670317bdadc7b59d06311ad3a25c66d23bbd8a5f Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:12:53 +0100 Subject: [PATCH 44/85] Update action_mapper.py --- transformers/action_mapper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index 70e0ccdd..f20be829 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -7,10 +7,8 @@ from typing import Any, Dict from .base_transformer import BaseTransformer -class ActionMapper(BaseTransformer): - +class ActionMapper(BaseTransformer): """Map action values between vendor and universal models. - This transformer replaces the ``action`` field of an item using a predefined mapping dictionary. If the action is not found in the mapping, it is left unchanged. From 86af8b4919d2942d2dc9af84bd7103f7cc2f7ba5 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:15:50 +0100 Subject: [PATCH 45/85] Update action_mapper.py --- transformers/action_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index f20be829..d7eb1ba2 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -9,6 +9,7 @@ class ActionMapper(BaseTransformer): """Map action values between vendor and universal models. + This transformer replaces the ``action`` field of an item using a predefined mapping dictionary. If the action is not found in the mapping, it is left unchanged. From d8a9aef121033ec62911cb7f1fee1896c9d4554d Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:19:53 +0100 Subject: [PATCH 46/85] Update action_mapper.py --- transformers/action_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index d7eb1ba2..9439d286 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -14,6 +14,7 @@ class ActionMapper(BaseTransformer): predefined mapping dictionary. If the action is not found in the mapping, it is left unchanged. """ + def __init__(self, action_map: Dict[str, str]) -> None: """Initialize the ActionMapper. From 6129e9fbfc90ad59c5a95c7df7a5316e7b906a44 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:20:11 +0100 Subject: [PATCH 47/85] Update base_transformer.py --- transformers/base_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index a280940a..c4676dc9 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -8,13 +8,13 @@ from typing import Any, Dict class BaseTransformer(ABC): - """Define the interface for all transformers. All concrete transformers must implement the ``transform`` method, which takes a single dictionary item and returns a transformed dictionary. """ + @abstractmethod def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform a single dictionary item. From cfe4d62a4dcc2598a12e9a1812284260e85fb85a Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:20:27 +0100 Subject: [PATCH 48/85] Update category_mapper.py --- transformers/category_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index d869fcce..91d63973 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -9,13 +9,13 @@ from .base_transformer import BaseTransformer class CategoryMapper(BaseTransformer): - """Map category identifiers between vendor and universal models. This transformer replaces the ``category_id`` field of an item using a predefined mapping dictionary. If the category is not found in the mapping, it is left unchanged. """ + def __init__(self, category_map: Dict[str, str]) -> None: """Initialize the CategoryMapper. From a22a1b13dc15fd55c98ccef8bc1ffa1bc446da25 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:20:50 +0100 Subject: [PATCH 49/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index 0451f8a7..dbd6384d 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -9,12 +9,12 @@ from .base_transformer import BaseTransformer class MetadataEnricher(BaseTransformer): - """Enrich items with vendor and metadata information. This transformer adds a ``vendor`` field and a ``metadata`` dictionary containing a ``processed_at`` timestamp to each item. """ + def __init__(self, vendor: str) -> None: """Initialize the MetadataEnricher. From e2fdd288c2f35a878eee422f5b28111c8dd6fc8c Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:21:04 +0100 Subject: [PATCH 50/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index a24fbbd1..a4323bec 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -8,12 +8,12 @@ from .base_transformer import BaseTransformer class PatternNormalizer(BaseTransformer): - """Normalize or enforce the presence of a pattern field in items. This transformer guarantees that each dictionary item contains a ``pattern`` key. If the key is missing, it is initialized to an empty string. """ + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Ensure the item has a pattern field. From 12e3a22ff258cd68d8f0c08bec1f4a3773fdd2c5 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:21:29 +0100 Subject: [PATCH 51/85] Update type_mapper.py --- transformers/type_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 39e646ce..51687a0f 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -9,13 +9,13 @@ from .base_transformer import BaseTransformer class TypeMapper(BaseTransformer): - """Map type values between vendor and universal models. This transformer replaces the ``type`` field of an item using a predefined mapping dictionary. If the type is not found in the mapping, it is left unchanged. """ + def __init__(self, type_map: Dict[str, str]) -> None: """Initialize the TypeMapper. From 847d8b58e413f7dc14c5a949d293459b853a8229 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:25:23 +0100 Subject: [PATCH 52/85] Update action_mapper.py --- transformers/action_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index 9439d286..ce13b491 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -5,6 +5,7 @@ """ from typing import Any, Dict + from .base_transformer import BaseTransformer class ActionMapper(BaseTransformer): From 960697d30d4aa283334f305a510d58d287428b74 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:25:42 +0100 Subject: [PATCH 53/85] Update base_transformer.py --- transformers/base_transformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index c4676dc9..704761fd 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -5,6 +5,7 @@ """ from abc import ABC, abstractmethod + from typing import Any, Dict class BaseTransformer(ABC): From 6349ce0c9e472c7c133a34b5e67ced08e05ee115 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:25:55 +0100 Subject: [PATCH 54/85] Update category_mapper.py --- transformers/category_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index 91d63973..496e29f7 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -6,6 +6,7 @@ """ from typing import Any, Dict + from .base_transformer import BaseTransformer class CategoryMapper(BaseTransformer): From 3fbc7e7fc386c5ae75463aaf94af08b0ef2f2479 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:26:11 +0100 Subject: [PATCH 55/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index dbd6384d..b3be5e11 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -6,6 +6,7 @@ from datetime import datetime from typing import Any, Dict + from .base_transformer import BaseTransformer class MetadataEnricher(BaseTransformer): From 61967f1d03d782f6ccac1f08a0ca4a854340f2f1 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:26:28 +0100 Subject: [PATCH 56/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index a4323bec..a587197f 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -5,6 +5,7 @@ """ from typing import Any, Dict + from .base_transformer import BaseTransformer class PatternNormalizer(BaseTransformer): From a107238cc7bfb2c1a98d7312420d7269b7918668 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:26:42 +0100 Subject: [PATCH 57/85] Update type_mapper.py --- transformers/type_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 51687a0f..8ffe8161 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -6,6 +6,7 @@ """ from typing import Any, Dict + from .base_transformer import BaseTransformer class TypeMapper(BaseTransformer): From 3e13df56607ec3254b9294ad67ae841bcc3f353a Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:26:58 +0100 Subject: [PATCH 58/85] Update pipelines.py --- transformers/pipelines.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index f987e60d..9c85088a 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -5,6 +5,7 @@ """ from typing import List, Dict, Any + from transformers.base_transformer import BaseTransformer def apply_transformers( From 6097c3b66513b18c2b2bdfabf95e4e6f4d85ceff Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:31:16 +0100 Subject: [PATCH 59/85] Update action_mapper.py --- transformers/action_mapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index ce13b491..d1419d7d 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -4,7 +4,8 @@ between vendor-specific representations and the universal data model. """ -from typing import Any, Dict +from typing import Any +from typing import Dict from .base_transformer import BaseTransformer From 2467812fd7a5b0cacc36d757ca7edb43a7189e73 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:32:09 +0100 Subject: [PATCH 60/85] Update base_transformer.py --- transformers/base_transformer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index 704761fd..b9abbd0e 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -4,9 +4,10 @@ in the transformation pipeline. """ -from abc import ABC, abstractmethod - -from typing import Any, Dict +from abc import ABC +from abc import abstractmethod +from typing import Any +from typing import Dict class BaseTransformer(ABC): """Define the interface for all transformers. From e30ea163aaa2fde611f11428ef85db35a87db310 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:32:32 +0100 Subject: [PATCH 61/85] Update category_mapper.py --- transformers/category_mapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index 496e29f7..4aeb9975 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -5,7 +5,8 @@ data model. """ -from typing import Any, Dict +from typing import Any +from typing import Dict from .base_transformer import BaseTransformer From ce4b3d659398201e840e048c1f2e910e5f0224fc Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:32:57 +0100 Subject: [PATCH 62/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index b3be5e11..a7cacd27 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -5,7 +5,8 @@ """ from datetime import datetime -from typing import Any, Dict +from typing import Any +from typing import Dict from .base_transformer import BaseTransformer From 74972a1fc8d002041d40b89708d83e5eec1d0cd7 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:33:16 +0100 Subject: [PATCH 63/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index a587197f..9652b6ce 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -4,7 +4,8 @@ has a ``pattern`` field. Currently, this transformer acts as a pass-through. """ -from typing import Any, Dict +from typing import Any +from typing import Dict from .base_transformer import BaseTransformer From fb30380ac2b79ac6ad9ba08248d4c81a3eaf534f Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:33:38 +0100 Subject: [PATCH 64/85] Update type_mapper.py --- transformers/type_mapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 8ffe8161..1f4a061e 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -5,7 +5,8 @@ and the universal data model. """ -from typing import Any, Dict +from typing import Any +from typing import Dict from .base_transformer import BaseTransformer From 37ea1067e500fcf0b9d4b8e65d06aa0ee63ca3be Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:38:51 +0100 Subject: [PATCH 65/85] Update action_mapper.py --- transformers/action_mapper.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/transformers/action_mapper.py b/transformers/action_mapper.py index d1419d7d..fc6cb9bc 100644 --- a/transformers/action_mapper.py +++ b/transformers/action_mapper.py @@ -9,24 +9,25 @@ from .base_transformer import BaseTransformer -class ActionMapper(BaseTransformer): + +class ActionMapper(BaseTransformer): """Map action values between vendor and universal models. - + This transformer replaces the ``action`` field of an item using a predefined mapping dictionary. If the action is not found in the mapping, it is left unchanged. """ - def __init__(self, action_map: Dict[str, str]) -> None: + def __init__(self, action_map: Dict[str, str]) -> None: """Initialize the ActionMapper. Args: action_map: A dictionary mapping source action values to destination action values. - """ + """ self.action_map = action_map - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform an item's action field using the action mapping. Args: @@ -36,7 +37,7 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: The transformed item with its ``action`` field mapped according to the configured action map. - """ + """ action = item.get("action") if action in self.action_map: item["action"] = self.action_map[action] From 165691c7ec48e46ad74ee755da8a89e92a4f1b49 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:48:59 +0100 Subject: [PATCH 66/85] Update base_transformer.py --- transformers/base_transformer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index b9abbd0e..ceb4ef87 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -4,8 +4,7 @@ in the transformation pipeline. """ -from abc import ABC -from abc import abstractmethod +from abc import ABC, abstractmethod from typing import Any from typing import Dict From c3da9821d08811cde589a8f382edd9f90e0fe804 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:52:43 +0100 Subject: [PATCH 67/85] Update base_transformer.py --- transformers/base_transformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index ceb4ef87..b9abbd0e 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -4,7 +4,8 @@ in the transformation pipeline. """ -from abc import ABC, abstractmethod +from abc import ABC +from abc import abstractmethod from typing import Any from typing import Dict From 168c59fa449bfd23a676bae9f17e13025f3e7ea0 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 11:56:23 +0100 Subject: [PATCH 68/85] Update base_transformer.py --- transformers/base_transformer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/transformers/base_transformer.py b/transformers/base_transformer.py index b9abbd0e..04bd40b4 100644 --- a/transformers/base_transformer.py +++ b/transformers/base_transformer.py @@ -9,7 +9,8 @@ from typing import Any from typing import Dict -class BaseTransformer(ABC): + +class BaseTransformer(ABC): """Define the interface for all transformers. All concrete transformers must implement the ``transform`` method, @@ -18,7 +19,7 @@ class BaseTransformer(ABC): """ @abstractmethod - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: """Transform a single dictionary item. Args: @@ -27,5 +28,5 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: Returns: A transformed dictionary. - """ + """ raise NotImplementedError From 6ff008be6b738455eae9c34a6abdfdf618a78fd2 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:22:25 +0100 Subject: [PATCH 69/85] Update category_mapper.py --- transformers/category_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/category_mapper.py b/transformers/category_mapper.py index 4aeb9975..3010f14b 100644 --- a/transformers/category_mapper.py +++ b/transformers/category_mapper.py @@ -10,6 +10,7 @@ from .base_transformer import BaseTransformer + class CategoryMapper(BaseTransformer): """Map category identifiers between vendor and universal models. From d57cb4d6ca2959e01affd7d926ab40eae15b2a3f Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:25:48 +0100 Subject: [PATCH 70/85] Update metadata_enricher.py --- transformers/metadata_enricher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/metadata_enricher.py b/transformers/metadata_enricher.py index a7cacd27..95669776 100644 --- a/transformers/metadata_enricher.py +++ b/transformers/metadata_enricher.py @@ -10,6 +10,7 @@ from .base_transformer import BaseTransformer + class MetadataEnricher(BaseTransformer): """Enrich items with vendor and metadata information. From aeb39eb3d67401eb59b2336402ba4bb71bde7b62 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:25:59 +0100 Subject: [PATCH 71/85] Update pattern_normalizer.py --- transformers/pattern_normalizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/pattern_normalizer.py b/transformers/pattern_normalizer.py index 9652b6ce..f88212d0 100644 --- a/transformers/pattern_normalizer.py +++ b/transformers/pattern_normalizer.py @@ -9,6 +9,7 @@ from .base_transformer import BaseTransformer + class PatternNormalizer(BaseTransformer): """Normalize or enforce the presence of a pattern field in items. From 165c5afb19b1fe88fa147d6ea48279af2a46732d Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:26:14 +0100 Subject: [PATCH 72/85] Update type_mapper.py --- transformers/type_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/type_mapper.py b/transformers/type_mapper.py index 1f4a061e..79a6afd8 100644 --- a/transformers/type_mapper.py +++ b/transformers/type_mapper.py @@ -10,6 +10,7 @@ from .base_transformer import BaseTransformer + class TypeMapper(BaseTransformer): """Map type values between vendor and universal models. From cf9288c7674e5b62209dfff5220d67035f693d4f Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:29:04 +0100 Subject: [PATCH 73/85] Update pipelines.py --- transformers/pipelines.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index 9c85088a..dff6a9d2 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -8,6 +8,7 @@ from transformers.base_transformer import BaseTransformer + def apply_transformers( items: List[Dict[str, Any]], transformers: List[BaseTransformer] From b191d3642e55f1dd6a235120c0256442a282ec5b Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:33:46 +0100 Subject: [PATCH 74/85] Update pipelines.py --- transformers/pipelines.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index dff6a9d2..02dd22b6 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -4,7 +4,9 @@ to vendor configuration items, producing universal model dictionaries. """ -from typing import List, Dict, Any +from typing import List +from typing import Dict +from typing import Any from transformers.base_transformer import BaseTransformer From 2d6778c3ac504a57585f3a094e682a9139d98f0c Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:38:06 +0100 Subject: [PATCH 75/85] Update pipelines.py --- transformers/pipelines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformers/pipelines.py b/transformers/pipelines.py index 02dd22b6..1ec2647d 100644 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -4,9 +4,9 @@ to vendor configuration items, producing universal model dictionaries. """ -from typing import List -from typing import Dict from typing import Any +from typing import Dict +from typing import List from transformers.base_transformer import BaseTransformer From 68bf01d07d077ae7ce098851905cda43358e9a0f Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:43:58 +0100 Subject: [PATCH 76/85] Update fortinet_transformer.py --- transformers/vendors/fortinet_transformer.py | 47 +++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/transformers/vendors/fortinet_transformer.py b/transformers/vendors/fortinet_transformer.py index a8a63583..2fb4d914 100644 --- a/transformers/vendors/fortinet_transformer.py +++ b/transformers/vendors/fortinet_transformer.py @@ -1,18 +1,43 @@ -# Absolute imports -import jsonata -from transformers.base_transformer import BaseTransformer -from transformers.pattern_normalizer import PatternNormalizer +"""Fortinet transformation pipelines. + +This module defines action, category, and type mappings for Fortinet +URL filtering configurations, as well as transformation pipelines +to convert between Fortinet-specific and universal data models. +""" + from transformers.action_mapper import ActionMapper -from transformers.type_mapper import TypeMapper +from transformers.base_transformer import BaseTransformer from transformers.category_mapper import CategoryMapper from transformers.metadata_enricher import MetadataEnricher +from transformers.pattern_normalizer import PatternNormalizer +from transformers.type_mapper import TypeMapper + # ---------------- FORTINET MAPPINGS ---------------- -FORTINET_ACTION_MAP = {"block": "block", "allow": "allow", "monitor": "monitor"} -FORTINET_CATEGORY_MAP = {"3": "malware", "4": "phishing", "5": "gambling", "default": "uncategorized"} -FORTINET_TYPE_MAP = {"simple": "literal", "wildcard": "wildcard", "regex": "regex", "substring": "substring"} + +FORTINET_ACTION_MAP = { + "block": "block", + "allow": "allow", + "monitor": "monitor", +} + +FORTINET_CATEGORY_MAP = { + "3": "malware", + "4": "phishing", + "5": "gambling", + "default": "uncategorized", +} + +FORTINET_TYPE_MAP = { + "simple": "literal", + "wildcard": "wildcard", + "regex": "regex", + "substring": "substring", +} + # ---------------- FORTINET PIPELINES ---------------- + VENDOR_TO_UNIVERSAL_PIPELINES = [ ActionMapper(FORTINET_ACTION_MAP), PatternNormalizer(), @@ -22,9 +47,9 @@ ] UNIVERSAL_TO_VENDOR_PIPELINES = [ - ActionMapper({v: k for k, v in FORTINET_ACTION_MAP.items()}), + ActionMapper({value: key for key, value in FORTINET_ACTION_MAP.items()}), PatternNormalizer(), - TypeMapper({v: k for k, v in FORTINET_TYPE_MAP.items()}), - CategoryMapper({v: k for k, v in FORTINET_CATEGORY_MAP.items()}), + TypeMapper({value: key for key, value in FORTINET_TYPE_MAP.items()}), + CategoryMapper({value: key for key, value in FORTINET_CATEGORY_MAP.items()}), MetadataEnricher("fortinet"), ] From a7a737f1f48afa99509b9a4a829b6b1f828032bd Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:45:21 +0100 Subject: [PATCH 77/85] Update netskope_transformer.py --- transformers/vendors/netskope_transformer.py | 176 +++++-------------- 1 file changed, 48 insertions(+), 128 deletions(-) diff --git a/transformers/vendors/netskope_transformer.py b/transformers/vendors/netskope_transformer.py index b65709ee..2ed12c7b 100644 --- a/transformers/vendors/netskope_transformer.py +++ b/transformers/vendors/netskope_transformer.py @@ -1,152 +1,72 @@ -from typing import Dict, Any +"""Netskope transformation pipelines and pattern normalization. + +This module defines transformers and mappings required to convert +Netskope URL list configurations to and from the universal data model. +""" + +import re +from typing import Any +from typing import Dict +from typing import List +from typing import Optional -# Absolute imports -from transformers.base_transformer import BaseTransformer -from transformers.pattern_normalizer import PatternNormalizer from transformers.action_mapper import ActionMapper -from transformers.type_mapper import TypeMapper +from transformers.base_transformer import BaseTransformer from transformers.category_mapper import CategoryMapper from transformers.metadata_enricher import MetadataEnricher +from transformers.pattern_normalizer import PatternNormalizer +from transformers.type_mapper import TypeMapper class NetskopePatternNormalizer(BaseTransformer): - """ - Normalize Netskope URL patterns and convert wildcards to regex formats. + """Normalize Netskope URL patterns for vendor compatibility. - - 'literal' or 'exact': left as-is for exact lists. - - 'wildcard': converted to regex for regex lists. - - 'regex': passed as regex (wildcards converted to proper regex if needed). + This transformer converts universal URL patterns into Netskope- + compatible formats: + + - ``literal`` / ``exact`` patterns are preserved. + - ``wildcard`` patterns are converted into regex. + - ``regex`` patterns are passed through unchanged. """ def wildcard_to_regex(self, pattern: str) -> str: - """ - Convert Netskope wildcard patterns into regex. - '*.example.com' -> '^([^.]+\.)*example\.com$' + """Convert a wildcard domain pattern to a regex. + + Example: + ``*.example.com`` → ``^([^.]+\\.)*example\\.com$`` + + Args: + pattern: A wildcard URL pattern. + + Returns: + A regex representation of the wildcard pattern. """ if pattern.startswith("*."): - domain = pattern[2:] - domain = domain.replace(".", r"\.") # Escape dots + domain = pattern[2:].replace(".", r"\.") return rf"^([^.]+\.)*{domain}$" - else: - # Leave non-wildcards as-is for regex lists - return pattern + + return pattern def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - """ - Transform a single URL item into Netskope-compatible pattern. + """Transform a URL item into a Netskope-compatible pattern. + + Args: + item: A universal URL dictionary. + + Returns: + The transformed dictionary with Netskope pattern semantics. """ raw_pattern = item.get("pattern", "") - utype = item.get("type", "literal") # universal type + universal_type = item.get("type", "literal") - # Determine netskope_type - if utype in ("literal", "exact"): - # Exact list → leave pattern as literal - final_pattern = raw_pattern + if universal_type in ("literal", "exact"): + item["pattern"] = raw_pattern item["netskope_type"] = "exact" - elif utype in ("wildcard", "regex"): - # Regex list → convert wildcard to proper regex - final_pattern = self.wildcard_to_regex(raw_pattern) + elif universal_type in ("wildcard", "regex"): + item["pattern"] = self.wildcard_to_regex(raw_pattern) item["netskope_type"] = "regex" else: - # Fallback - final_pattern = raw_pattern - item["netskope_type"] = "exact" - - # **Important:** do NOT double-escape backslashes - # Only regex patterns need normal backslashes - item["pattern"] = final_pattern - - return item - - def transform_list(self, items: list[Dict[str, Any]]) -> list[Dict[str, Any]]: - """ - Transform a list of URL items. - """ - return [self.transform(item) for item in items] - - -class NetskopePatternDenormalizer(BaseTransformer): - """ - Convert Netskope Data Model patterns back into universal-compatible - patterns and types. - """ - - def regex_to_wildcard(self, pattern: str) -> str | None: - """ - Convert a known Netskope-style regex back to wildcard format. - Example: - ^([^.]+\.)*example\.com$ -> *.example.com - """ - wildcard_regex = r'^\^\(\[\^\.\]\+\\\.\)\*(.+)\\\.([a-zA-Z0-9\-]+)\$$' - match = re.match(wildcard_regex, pattern) - - if match: - domain = f"{match.group(1)}.{match.group(2)}" - return f"*.{domain}" - - return None - - def is_regex(self, pattern: str) -> bool: - regex_markers = ( - "^", "$", "(", ")", "[", "]", "+", "?", "|", "{", "}" - ) - return any(m in pattern for m in regex_markers) - - def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: - pattern = item.get("pattern", "").replace("\\\\", "\\") - - # 1. Valid Netskope wildcard ONLY - if pattern.startswith("*.") and pattern.count("*") == 1: - item["type"] = "wildcard" - item["pattern"] = pattern - - # 2. Any other '*' means regex - elif "*" in pattern: - item["type"] = "regex" - item["pattern"] = pattern - - # 3. Regex syntax without '*' - elif self.is_regex(pattern): - wildcard = self.regex_to_wildcard(pattern) - if wildcard: - item["type"] = "wildcard" - item["pattern"] = wildcard - else: - item["type"] = "regex" - item["pattern"] = pattern - - # 4. Literal - else: - item["type"] = "exact" - item["pattern"] = pattern - - item.pop("netskope_type", None) - return item - - - -# ---------------- NETSKOPE MAPPINGS ---------------- -NETSKOPE_ACTION_MAP = {"block": "deny", "allow": "allow", "monitor": "allow"} -NETSKOPE_CATEGORY_MAP = {"malware": "malware", "phishing": "phishing", "gambling": "gambling", "uncategorized": "uncategorized"} -NETSKOPE_TO_UNIVERSAL_TYPE_MAP = {"exact": "literal", "regex": "regex"} -UNIVERSAL_TO_NETSKOPE_TYPE_MAP = {"literal": "exact", "regex": "regex", "wildcard": "regex", "substring": "regex"} - -# ---------------- NETSKOPE PIPELINES ---------------- - -VENDOR_TO_UNIVERSAL_PIPELINES = [ - ActionMapper(NETSKOPE_ACTION_MAP), - TypeMapper(NETSKOPE_TO_UNIVERSAL_TYPE_MAP), - NetskopePatternDenormalizer(), - CategoryMapper(NETSKOPE_CATEGORY_MAP), - MetadataEnricher("netskope"), -] - -UNIVERSAL_TO_VENDOR_PIPELINES = [ - ActionMapper({v: k for k, v in NETSKOPE_ACTION_MAP.items()}), - TypeMapper(UNIVERSAL_TO_NETSKOPE_TYPE_MAP), - NetskopePatternNormalizer(), - CategoryMapper({v: k for k, v in NETSKOPE_CATEGORY_MAP.items()}), - MetadataEnricher("netskope"), -] + item["pattern"] = raw_pattern + item["netskope_type"] =_]() From 985eb063be77f349b56196fc945a8717156e02e9 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:46:06 +0100 Subject: [PATCH 78/85] Update prisma_transformer.py --- transformers/vendors/prisma_transformer.py | 46 +++++++++++++++++----- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/transformers/vendors/prisma_transformer.py b/transformers/vendors/prisma_transformer.py index 0cbfad7c..37a574a8 100644 --- a/transformers/vendors/prisma_transformer.py +++ b/transformers/vendors/prisma_transformer.py @@ -1,17 +1,43 @@ -# Absolute imports -from transformers.base_transformer import BaseTransformer -from transformers.pattern_normalizer import PatternNormalizer +"""Prisma transformation pipelines. + +This module defines action, category, and type mappings for Prisma +URL filtering configurations, along with transformation pipelines +to convert between Prisma-specific and universal data models. +""" + from transformers.action_mapper import ActionMapper -from transformers.type_mapper import TypeMapper +from transformers.base_transformer import BaseTransformer from transformers.category_mapper import CategoryMapper from transformers.metadata_enricher import MetadataEnricher +from transformers.pattern_normalizer import PatternNormalizer +from transformers.type_mapper import TypeMapper + # ---------------- PRISMA MAPPINGS ---------------- -PRISMA_ACTION_MAP = {"block": "deny", "allow": "allow", "monitor": "alert"} -PRISMA_CATEGORY_MAP = {"malware": "malware", "phishing": "phishing", "gambling": "gambling", "uncategorized": "uncategorized"} -PRISMA_TYPE_MAP = {"simple": "literal", "wildcard": "wildcard", "regex": "regex", "substring": "substring"} + +PRISMA_ACTION_MAP = { + "block": "deny", + "allow": "allow", + "monitor": "alert", +} + +PRISMA_CATEGORY_MAP = { + "malware": "malware", + "phishing": "phishing", + "gambling": "gambling", + "uncategorized": "uncategorized", +} + +PRISMA_TYPE_MAP = { + "simple": "literal", + "wildcard": "wildcard", + "regex": "regex", + "substring": "substring", +} + # ---------------- PRISMA PIPELINES ---------------- + VENDOR_TO_UNIVERSAL_PIPELINES = [ ActionMapper(PRISMA_ACTION_MAP), PatternNormalizer(), @@ -21,9 +47,9 @@ ] UNIVERSAL_TO_VENDOR_PIPELINES = [ - ActionMapper({v: k for k, v in PRISMA_ACTION_MAP.items()}), + ActionMapper({value: key for key, value in PRISMA_ACTION_MAP.items()}), PatternNormalizer(), - TypeMapper({v: k for k, v in PRISMA_TYPE_MAP.items()}), - CategoryMapper({v: k for k, v in PRISMA_CATEGORY_MAP.items()}), + TypeMapper({value: key for key, value in PRISMA_TYPE_MAP.items()}), + CategoryMapper({value: key for key, value in PRISMA_CATEGORY_MAP.items()}), MetadataEnricher("prisma"), ] From 99eb4f17385ae5b2bd6492f4db2159dfa75e2d2a Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:47:16 +0100 Subject: [PATCH 79/85] Update zscaler_transformer.py --- transformers/vendors/zscaler_transformer.py | 46 ++++++++++++++++----- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/transformers/vendors/zscaler_transformer.py b/transformers/vendors/zscaler_transformer.py index 074eb6d0..5c0f200f 100644 --- a/transformers/vendors/zscaler_transformer.py +++ b/transformers/vendors/zscaler_transformer.py @@ -1,17 +1,42 @@ -# Absolute imports -from transformers.base_transformer import BaseTransformer -from transformers.pattern_normalizer import PatternNormalizer +"""Zscaler transformation pipelines. + +This module defines action, category, and type mappings for Zscaler +URL filtering configurations, along with transformation pipelines +to convert between Zscaler-specific and universal data models. +""" + from transformers.action_mapper import ActionMapper -from transformers.type_mapper import TypeMapper +from transformers.base_transformer import BaseTransformer from transformers.category_mapper import CategoryMapper from transformers.metadata_enricher import MetadataEnricher +from transformers.pattern_normalizer import PatternNormalizer +from transformers.type_mapper import TypeMapper + # ---------------- ZSCALER MAPPINGS ---------------- -ZSCALER_ACTION_MAP = {"block": "BLOCK", "allow": "ALLOW", "monitor": "MONITOR"} -ZSCALER_CATEGORY_MAP = {"malware": "malware", "phishing": "phishing", "gambling": "gambling", "uncategorized": "uncategorized"} -ZSCALER_TYPE_MAP = {"STRING": "literal", "WILDCARD": "wildcard", "REGEX": "regex"} + +ZSCALER_ACTION_MAP = { + "block": "BLOCK", + "allow": "ALLOW", + "monitor": "MONITOR", +} + +ZSCALER_CATEGORY_MAP = { + "malware": "malware", + "phishing": "phishing", + "gambling": "gambling", + "uncategorized": "uncategorized", +} + +ZSCALER_TYPE_MAP = { + "STRING": "literal", + "WILDCARD": "wildcard", + "REGEX": "regex", +} + # ---------------- ZSCALER PIPELINES ---------------- + VENDOR_TO_UNIVERSAL_PIPELINES = [ ActionMapper(ZSCALER_ACTION_MAP), PatternNormalizer(), @@ -21,9 +46,10 @@ ] UNIVERSAL_TO_VENDOR_PIPELINES = [ - ActionMapper({v: k for k, v in ZSCALER_ACTION_MAP.items()}), + ActionMapper({value: key for key, value in ZSCALER_ACTION_MAP.items()}), PatternNormalizer(), - TypeMapper({v: k for k, v in ZSCALER_TYPE_MAP.items()}), - CategoryMapper({v: k for k, v in ZSCALER_CATEGORY_MAP.items()}), + TypeMapper({value: key for key, value in ZSCALER_TYPE_MAP.items()}), + CategoryMapper({value: key for key, value in ZSCALER_CATEGORY_MAP.items()}), MetadataEnricher("zscaler"), ] + From af267ecfd00d14ad71e85539cfcfc9a1b4c8999b Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:50:34 +0100 Subject: [PATCH 80/85] Update fortinet_transformer.py --- transformers/vendors/fortinet_transformer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/transformers/vendors/fortinet_transformer.py b/transformers/vendors/fortinet_transformer.py index 2fb4d914..48804404 100644 --- a/transformers/vendors/fortinet_transformer.py +++ b/transformers/vendors/fortinet_transformer.py @@ -12,7 +12,6 @@ from transformers.pattern_normalizer import PatternNormalizer from transformers.type_mapper import TypeMapper - # ---------------- FORTINET MAPPINGS ---------------- FORTINET_ACTION_MAP = { From e19b11b2fc52fe48aed277c8b263d16b1c9f31a4 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:51:11 +0100 Subject: [PATCH 81/85] Update prisma_transformer.py --- transformers/vendors/prisma_transformer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/transformers/vendors/prisma_transformer.py b/transformers/vendors/prisma_transformer.py index 37a574a8..87ec8120 100644 --- a/transformers/vendors/prisma_transformer.py +++ b/transformers/vendors/prisma_transformer.py @@ -12,7 +12,6 @@ from transformers.pattern_normalizer import PatternNormalizer from transformers.type_mapper import TypeMapper - # ---------------- PRISMA MAPPINGS ---------------- PRISMA_ACTION_MAP = { From 640b9277f26739d9fd34cc39937ac53cae4c34d5 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:51:23 +0100 Subject: [PATCH 82/85] Update zscaler_transformer.py --- transformers/vendors/zscaler_transformer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/transformers/vendors/zscaler_transformer.py b/transformers/vendors/zscaler_transformer.py index 5c0f200f..600cb85f 100644 --- a/transformers/vendors/zscaler_transformer.py +++ b/transformers/vendors/zscaler_transformer.py @@ -12,7 +12,6 @@ from transformers.pattern_normalizer import PatternNormalizer from transformers.type_mapper import TypeMapper - # ---------------- ZSCALER MAPPINGS ---------------- ZSCALER_ACTION_MAP = { From d57e6777390ba7fd28f62af0105be85252a08165 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 13:55:27 +0100 Subject: [PATCH 83/85] Update netskope_transformer.py --- transformers/vendors/netskope_transformer.py | 137 ++++++++++++++++++- 1 file changed, 136 insertions(+), 1 deletion(-) diff --git a/transformers/vendors/netskope_transformer.py b/transformers/vendors/netskope_transformer.py index 2ed12c7b..eb2b879f 100644 --- a/transformers/vendors/netskope_transformer.py +++ b/transformers/vendors/netskope_transformer.py @@ -69,4 +69,139 @@ def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: else: item["pattern"] = raw_pattern - item["netskope_type"] =_]() + item["netskope_type"] = "exact" + + return item + + def transform_list( + self, + items: List[Dict[str, Any]], + ) -> List[Dict[str, Any]]: + """Transform a list of URL items. + + Args: + items: A list of universal URL dictionaries. + + Returns: + A list of Netskope-compatible URL dictionaries. + """ + return [self.transform(item) for item in items] + + +class NetskopePatternDenormalizer(BaseTransformer): + """Convert Netskope patterns back to universal model patterns.""" + + def regex_to_wildcard(self, pattern: str) -> Optional[str]: + """Convert a Netskope regex pattern back to wildcard format. + + Example: + ``^([^.]+\\.)*example\\.com$`` → ``*.example.com`` + + Args: + pattern: A Netskope regex pattern. + + Returns: + A wildcard pattern if conversion is possible, otherwise ``None``. + """ + wildcard_regex = ( + r"^\^\(\[\^\.\]\+\\\.\)\*(.+)\\\.([a-zA-Z0-9\-]+)\$$" + ) + match = re.match(wildcard_regex, pattern) + + if match: + domain = f"{match.group(1)}.{match.group(2)}" + return f"*.{domain}" + + return None + + def is_regex(self, pattern: str) -> bool: + """Determine whether a pattern contains regex syntax. + + Args: + pattern: A URL pattern string. + + Returns: + ``True`` if the pattern appears to be a regex, otherwise ``False``. + """ + regex_markers = ("^", "$", "(", ")", "[", "]", "+", "?", "|", "{", "}") + return any(marker in pattern for marker in regex_markers) + + def transform(self, item: Dict[str, Any]) -> Dict[str, Any]: + """Transform a Netskope URL item into a universal-compatible form. + + Args: + item: A Netskope URL dictionary. + + Returns: + A universal URL dictionary. + """ + pattern = item.get("pattern", "").replace("\\\\", "\\") + + if pattern.startswith("*.") and pattern.count("*") == 1: + item["type"] = "wildcard" + + elif "*" in pattern: + item["type"] = "regex" + + elif self.is_regex(pattern): + wildcard = self.regex_to_wildcard(pattern) + if wildcard: + item["type"] = "wildcard" + pattern = wildcard + else: + item["type"] = "regex" + + else: + item["type"] = "exact" + + item["pattern"] = pattern + item.pop("netskope_type", None) + + return item + + +# ---------------- NETSKOPE MAPPINGS ---------------- + +NETSKOPE_ACTION_MAP = { + "block": "deny", + "allow": "allow", + "monitor": "allow", +} + +NETSKOPE_CATEGORY_MAP = { + "malware": "malware", + "phishing": "phishing", + "gambling": "gambling", + "uncategorized": "uncategorized", +} + +NETSKOPE_TO_UNIVERSAL_TYPE_MAP = { + "exact": "literal", + "regex": "regex", +} + +UNIVERSAL_TO_NETSKOPE_TYPE_MAP = { + "literal": "exact", + "regex": "regex", + "wildcard": "regex", + "substring": "regex", +} + + +# ---------------- NETSKOPE PIPELINES ---------------- + +VENDOR_TO_UNIVERSAL_PIPELINES = [ + ActionMapper(NETSKOPE_ACTION_MAP), + TypeMapper(NETSKOPE_TO_UNIVERSAL_TYPE_MAP), + NetskopePatternDenormalizer(), + CategoryMapper(NETSKOPE_CATEGORY_MAP), + MetadataEnricher("netskope"), +] + +UNIVERSAL_TO_VENDOR_PIPELINES = [ + ActionMapper({value: key for key, value in NETSKOPE_ACTION_MAP.items()}), + TypeMapper(UNIVERSAL_TO_NETSKOPE_TYPE_MAP), + NetskopePatternNormalizer(), + CategoryMapper({value: key for key, value in NETSKOPE_CATEGORY_MAP.items()}), + MetadataEnricher("netskope"), +] From 885df621258973d89e4cfdd5d5d804ca923312e0 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 14:10:39 +0100 Subject: [PATCH 84/85] Update netskope_transformer.py --- transformers/vendors/netskope_transformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/transformers/vendors/netskope_transformer.py b/transformers/vendors/netskope_transformer.py index eb2b879f..469a2e53 100644 --- a/transformers/vendors/netskope_transformer.py +++ b/transformers/vendors/netskope_transformer.py @@ -30,10 +30,10 @@ class NetskopePatternNormalizer(BaseTransformer): """ def wildcard_to_regex(self, pattern: str) -> str: - """Convert a wildcard domain pattern to a regex. + r"""Convert a wildcard domain pattern to a regex. Example: - ``*.example.com`` → ``^([^.]+\\.)*example\\.com$`` + ``*.example.com`` → ``^([^.]+\.)*example\.com$`` Args: pattern: A wildcard URL pattern. @@ -92,7 +92,7 @@ class NetskopePatternDenormalizer(BaseTransformer): """Convert Netskope patterns back to universal model patterns.""" def regex_to_wildcard(self, pattern: str) -> Optional[str]: - """Convert a Netskope regex pattern back to wildcard format. + r"""Convert a Netskope regex pattern back to wildcard format. Example: ``^([^.]+\\.)*example\\.com$`` → ``*.example.com`` From fad66b0122984436bc272336681e70685b5e2960 Mon Sep 17 00:00:00 2001 From: pierrerondel Date: Mon, 5 Jan 2026 14:14:29 +0100 Subject: [PATCH 85/85] Update netskope_transformer.py --- transformers/vendors/netskope_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/vendors/netskope_transformer.py b/transformers/vendors/netskope_transformer.py index 469a2e53..c4e2e13a 100644 --- a/transformers/vendors/netskope_transformer.py +++ b/transformers/vendors/netskope_transformer.py @@ -30,7 +30,7 @@ class NetskopePatternNormalizer(BaseTransformer): """ def wildcard_to_regex(self, pattern: str) -> str: - r"""Convert a wildcard domain pattern to a regex. + r"""Convert a wildcard domain pattern to a regex. Example: ``*.example.com`` → ``^([^.]+\.)*example\.com$``