From 1523cc62801074e0d14e8c52104d5d80dfb73aba Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Thu, 29 Jan 2026 17:07:47 +0100
Subject: [PATCH 1/7] style: ruff

---
 .github/workflows/ci-cd.yml    | 7 +++++--
 src/ms_blocking/ms_blocking.py | 4 ++--
 src/ms_blocking/utils.py       | 7 ++++---
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
index d674ed1..cb460dc 100644
--- a/.github/workflows/ci-cd.yml
+++ b/.github/workflows/ci-cd.yml
@@ -23,11 +23,14 @@ jobs:
     - name: Install package
       run: poetry install
 
+    #- name: Install ruff
+    #  run: poetry install ruff
+
     - name: Format with ruff
-      run: ruff format
+      run: poetry run ruff format
 
     - name: Test with pytest
-      run: poetry run pytest tests/ --cov=pycounts --cov-report=xml
+      run: poetry run pytest tests/ --cov=pycounts --cov-report=html
 
     - name: Build documentation
       run: poetry run make html --directory docs/
diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index d6ad2fb..0fdfae9 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -242,7 +242,7 @@ def __repr__(self):
         return f"AndNode{{{self.left}, {self.right}}}"
 
     def __eq__(self, other):
-        return self.left==other.left and self.right==other.right
+        return self.left == other.left and self.right == other.right
 
     def block(self, df, motives=False):
         # In order not to perform redundant computations, we first filter out the rows that were not considered by the first blocker before running the second blocker
@@ -275,7 +275,7 @@ def __repr__(self):
         return f"OrNode{{{self.left}, {self.right}}}"
 
     def __eq__(self, other):
-        return self.left==other.left and self.right==other.right
+        return self.left == other.left and self.right == other.right
 
     def block(self, df, motives=False):
         coords_left = self.left.block(df, motives=motives)
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index da01fa6..c32ebc1 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -546,7 +546,8 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     else:
         return [s for s in cleaned_items if len(s) > 0]
 
-def scoring(data: pd.DataFrame, motives_column: str="motive") -> pd.Series:
+
+def scoring(data: pd.DataFrame, motives_column: str = "motive") -> pd.Series:
     """Add a score to a blocked DataFrame based on the number of motives
 
     Parameters
@@ -565,11 +566,11 @@ def scoring(data: pd.DataFrame, motives_column: str="motive") -> pd.Series:
 
     # Check that we do have motives
     if motives_column not in data.columns:
-        raise ValueError(f"Specified motives column \"{motives_column}\" does not exist")
+        raise ValueError(f'Specified motives column "{motives_column}" does not exist')
 
     if "score" in data.columns:
         print("Renaming 'score' column to 'score_old'")
         data = data.rename(columns={"score": "score_old"})
 
     scores = data[motives_column].apply(len)
-    return scores
\ No newline at end of file
+    return scores

From 4e14e6dfada8c41dc762c3401417eb23691d63cb Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Thu, 29 Jan 2026 17:58:33 +0100
Subject: [PATCH 2/7] refactor: rename Node to BlockerNode; fix: motives
 handling in AndNode

---
 .github/workflows/ci-cd.yml    |  3 ---
 src/ms_blocking/ms_blocking.py | 29 +++++++++++++++++++----------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
index cb460dc..3b6e956 100644
--- a/.github/workflows/ci-cd.yml
+++ b/.github/workflows/ci-cd.yml
@@ -23,9 +23,6 @@ jobs:
     - name: Install package
       run: poetry install
 
-    #- name: Install ruff
-    #  run: poetry install ruff
-
     - name: Format with ruff
       run: poetry run ruff format
 
diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index 0fdfae9..d9234b7 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -211,28 +211,36 @@ def add_motives_to_coords(coords, explanations):
     return {pair: explanations for pair in coords}
 
 
-class Node:
+class BlockerNode:
     """Abstract class from which derive all classes in the module"""
 
     def __init__(self, left=None, right=None):
         self.left = left
         self.right = right
+        self.equivalence_columns = None
+        self.overlap_columns = None
         self.overlap = None
         self.normalize = None
         self.must_not_be_different = None
         self.word_level = None
 
     def __and__(self, other):
-        return merge_blockers(self, other)
+        if self == other:
+            return self
+        else:
+            return merge_blockers(self, other)
 
     def __or__(self, other):
-        return OrNode(self, other)
+        if self == other:
+            return self
+        else:
+            return OrNode(self, other)
 
     def __repr__(self):
         return f"Node{{{self.left}, {self.right}}}"
 
 
-class AndNode(Node):
+class AndNode(BlockerNode):
     """Used to compute the intersection of the outputs of two Blockers."""
 
     def __init__(self, left, right):
@@ -258,14 +266,14 @@ def block(self, df, motives=False):
             if id_lists
             else pd.DataFrame(columns=df.columns)
         )
-
-        coords_right = self.right.block(df_shortened, motives=motives)
+        # Rows that are in no pairs following the first blocking step cannot be in any pair of the interection
+        coords_right = self.right.block(df_shortened, motives=self.right.motives)
 
         result = merge_blocks_and(coords_left, coords_right)
         return result
 
 
-class OrNode(Node):
+class OrNode(BlockerNode):
     """Used to compute the union of the outputs of two Blockers."""
 
     def __init__(self, left, right):
@@ -278,6 +286,7 @@ def __eq__(self, other):
         return self.left == other.left and self.right == other.right
 
     def block(self, df, motives=False):
+        # Note: for performance, it would be wise to remove rows that are already paired with all other rows, though this case should be pretty rare in real situations
         coords_left = self.left.block(df, motives=motives)
 
         coords_right = self.right.block(df, motives=motives)
@@ -286,7 +295,7 @@ def block(self, df, motives=False):
         return result
 
 
-class AttributeEquivalenceBlocker(Node):  # Leaf
+class AttributeEquivalenceBlocker(BlockerNode):  # Leaf
     """To regroup rows based on equality across columns."""
 
     def __init__(
@@ -391,7 +400,7 @@ def block(self, data, motives=False):
             return set(coords)  # set is unnnecessary
 
 
-class OverlapBlocker(Node):  # Leaf
+class OverlapBlocker(BlockerNode):  # Leaf
     """To regroup rows based on overlap of one or more columns."""
 
     def __init__(
@@ -482,7 +491,7 @@ def block(self, data, motives=False):
             return set(coords)
 
 
-class MixedBlocker(Node):  # Leaf; For ANDs and RAM
+class MixedBlocker(BlockerNode):  # Leaf; For ANDs and RAM
     """Represent the intersection of an AttributeEquivalenceBlocker and an OverlapBlocker.
     Designed for performance and RAM efficiency.
     """

From 187716836a6f48c6815025fef891edfc4e2117ab Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Fri, 30 Jan 2026 14:41:41 +0100
Subject: [PATCH 3/7] refactor: move helpers to utils; docs: add docstrings and
 typehints to utils

---
 src/ms_blocking/ms_blocking.py | 372 +++++++++++++++------------------
 src/ms_blocking/utils.py       | 148 ++++++++++++-
 2 files changed, 304 insertions(+), 216 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index d9234b7..a7903c0 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -1,214 +1,7 @@
-import random
-from itertools import combinations
-from collections import Counter
-
 from ms_blocking.utils import *  # noqa: F403
 
 
-def merge_blockers(left, right):
-    """
-    Convert two blockers into a single one for performance purposes
-    """
-
-    if (
-        type(left) is AttributeEquivalenceBlocker
-        and type(right) is AttributeEquivalenceBlocker
-        and left.normalize == right.normalize
-        and left.must_not_be_different == right.must_not_be_different
-    ):
-        return AttributeEquivalenceBlocker(
-            blocking_columns=left.blocking_columns + right.blocking_columns,
-            normalize_strings=left.normalize,
-            must_not_be_different=left.must_not_be_different,
-        )
-
-    elif (
-        type(left) is OverlapBlocker
-        and type(right) is OverlapBlocker
-        and left.normalize == right.normalize
-        and left.overlap == right.overlap
-        and left.word_level == right.word_level
-    ):
-        return OverlapBlocker(
-            blocking_columns=left.blocking_columns + right.blocking_columns,
-            normalize_strings=left.normalize,
-            overlap=left.overlap,
-            word_level=left.word_level,
-        )
-
-    elif (
-        type(left) is AttributeEquivalenceBlocker
-        and type(right) is OverlapBlocker
-        and left.normalize == right.normalize
-    ):
-        return MixedBlocker(
-            equivalence_columns=left.blocking_columns,
-            overlap_columns=right.blocking_columns,
-            normalize_strings=left.normalize,
-            overlap=right.overlap,
-            word_level=right.word_level,
-        )
-
-    elif (
-        type(left) is OverlapBlocker
-        and type(right) is AttributeEquivalenceBlocker
-        and left.normalize == right.normalize
-    ):
-        return MixedBlocker(
-            equivalence_columns=right.blocking_columns,
-            overlap_columns=left.blocking_columns,
-            normalize_strings=left.normalize,
-            overlap=left.overlap,
-            word_level=left.word_level,
-        )
-
-    elif (
-        type(left) is MixedBlocker
-        and type(right) is MixedBlocker
-        and left.normalize == right.normalize
-        and left.overlap == right.overlap
-        and left.word_level == right.word_level
-    ):
-        return MixedBlocker(
-            equivalence_columns=left.equivalence_columns + right.equivalence_columns,
-            overlap_columns=left.overlap_columns + right.overlap_columns,
-            must_not_be_different=list(
-                set(left.must_not_be_different + right.must_not_be_different)
-            ),
-            normalize_strings=left.normalize,
-            overlap=left.overlap,
-            word_level=left.word_level,
-        )
-
-    elif (
-        type(left) is MixedBlocker
-        and type(right) is AttributeEquivalenceBlocker
-        and left.normalize == right.normalize
-    ):
-        return MixedBlocker(
-            equivalence_columns=left.equivalence_columns + right.blocking_columns,
-            overlap_columns=left.overlap_columns,
-            must_not_be_different=list(
-                set(left.must_not_be_different + right.must_not_be_different)
-            ),
-            normalize_strings=left.normalize,
-            overlap=left.overlap,
-            word_level=left.word_level,
-        )
-
-    elif (
-        type(left) is AttributeEquivalenceBlocker
-        and type(right) is MixedBlocker
-        and left.normalize == right.normalize
-    ):
-        return MixedBlocker(
-            equivalence_columns=left.blocking_columns + right.equivalence_columns,
-            overlap_columns=right.overlap_columns,
-            must_not_be_different=list(
-                set(left.must_not_be_different + right.must_not_be_different)
-            ),
-            normalize_strings=left.normalize,
-            overlap=right.overlap,
-            word_level=right.word_level,
-        )
-
-    elif (
-        type(left) is MixedBlocker
-        and type(right) is OverlapBlocker
-        and left.normalize == right.normalize
-        and left.overlap == right.overlap
-        and left.word_level == right.word_level
-    ):
-        return MixedBlocker(
-            equivalence_columns=left.equivalence_columns,
-            overlap_columns=left.overlap_columns + right.blocking_columns,
-            must_not_be_different=left.must_not_be_different,
-            normalize_strings=left.normalize,
-            overlap=left.overlap,
-            word_level=left.word_level,
-        )
-
-    elif (
-        type(left) is OverlapBlocker
-        and type(right) is MixedBlocker
-        and left.normalize == right.normalize
-        and left.overlap == right.overlap
-        and left.word_level == right.word_level
-    ):
-        return MixedBlocker(
-            equivalence_columns=right.equivalence_columns,
-            overlap_columns=left.blocking_columns + right.overlap_columns,
-            must_not_be_different=right.must_not_be_different,
-            normalize_strings=left.normalize,
-            overlap=left.overlap,
-            word_level=left.word_level,
-        )
-    else:
-        return AndNode(left, right)
-
-
-def must_not_be_different_apply(
-    temp_data, blocking_columns, must_not_be_different_columns
-):
-    """Re-block DataFrame on a second column, where we require non-difference rather than equality"""
-
-    temp_data["block_id"] = temp_data.groupby(blocking_columns).ngroup()
-    temp_data = temp_data[temp_data["block_id"].duplicated(keep=False)]
-
-    reconstructed_data = pd.DataFrame(columns=temp_data.columns)
-    for block in temp_data["block_id"].unique():
-        # noinspection PyArgumentList
-        current_block = (
-            temp_data[temp_data["block_id"] == block]
-            .sort_values(must_not_be_different_columns)
-            .copy()
-        )
-        if (
-            len(current_block[current_block[must_not_be_different_columns].notnull()])
-            == 0
-        ):  # All nulls
-            random_string = "".join(
-                random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", k=10)
-            )  # As long as the string is not already in the column...
-            # There must be a better way to do it...
-            current_block[must_not_be_different_columns] = (
-                current_block[must_not_be_different_columns]
-                .astype(str)
-                .fillna(random_string)
-            )
-        else:
-            current_block[must_not_be_different_columns] = (
-                current_block[must_not_be_different_columns].astype(str).ffill()
-            )
-        if len(reconstructed_data) == 0:
-            reconstructed_data = current_block
-        else:
-            reconstructed_data = pd.concat([reconstructed_data, current_block])
-    return reconstructed_data
-
-
-def block_overlap(groups, overlap):
-    coords = {
-        frozenset(pair) for group_list in groups for pair in combinations(group_list, 2)
-    }
-
-    if overlap > 1:
-        coords = [  # In this specific case, we want to keep duplicates to track the number of occurences of a pair
-            frozenset(pair)
-            for group_list in groups
-            for pair in combinations(group_list, 2)
-        ]
-        # Filter pairs that fulfill the minimum overlap condition
-        occurences_dict = Counter(coords)
-        coords = {
-            p for p in occurences_dict if occurences_dict[p] >= overlap
-        }  # The collection of pairs that fulfill the overlap condition
-
-    return coords
-
-
-def add_motives_to_coords(coords, explanations):
-    return {pair: explanations for pair in coords}
+# TODO: "block_id"
 
 
 class BlockerNode:
@@ -217,6 +10,7 @@ class BlockerNode:
     def __init__(self, left=None, right=None):
         self.left = left
         self.right = right
+        self.blocking_columns = None
         self.equivalence_columns = None
         self.overlap_columns = None
         self.overlap = None
@@ -267,7 +61,7 @@ def block(self, df, motives=False):
             else pd.DataFrame(columns=df.columns)
         )
         # Rows that are in no pairs following the first blocking step cannot be in any pair of the interection
-        coords_right = self.right.block(df_shortened, motives=self.right.motives)
+        coords_right = self.right.block(df_shortened, motives=motives)
 
         result = merge_blocks_and(coords_left, coords_right)
         return result
@@ -382,6 +176,7 @@ def block(self, data, motives=False):
                     return set()
 
         # Use the DataFrame index for grouping and forming pairs
+        # Using frozenset since they are ahshable and thus can be used as dictionary keys
         groups = temp_data.groupby(
             self.blocking_columns + self.must_not_be_different
         ).apply(lambda x: frozenset(x.index), include_groups=False)
@@ -475,6 +270,7 @@ def block(self, data, motives=False):
                 return set()
 
         # Use the DataFrame index for grouping and forming pairs
+        # Using frozenset since they are ahshable and thus can be used as dictionary keys
         groups = temp_data.groupby(self.blocking_columns).apply(
             lambda x: frozenset(x.index), include_groups=False
         )
@@ -613,6 +409,7 @@ def block(self, data, motives=False):
                 must_not_be_different_columns=self.must_not_be_different,
             )
 
+        # Using frozenset since they are ahshable and thus can be used as dictionary keys
         groups_equivalence = temp_data.groupby(self.equivalence_columns).apply(
             lambda x: frozenset(x.index), include_groups=False
         )
@@ -642,4 +439,161 @@ def block(self, data, motives=False):
             return set(coords)
 
 
+def merge_blockers(
+    left: BlockerNode, right: BlockerNode
+) -> AttributeEquivalenceBlocker | OverlapBlocker | MixedBlocker | AndNode:
+    """Convert two blockers into a single one for performance purposes
+
+    This function outputs a new blocker that combines the functionalities of the two input blockers, to prevent redundant operations.
+
+    Parameters
+    ----------
+    left : BlockerNode
+      Blocker that represents the first condition
+
+    right : BlockerNode
+      Blocker that represents the second condition
+
+    Returns
+    -------
+    AttributeEquivalenceBlocker|OverlapBlocker|MixedBlocker|AndNode
+      Blocker that represents both conditions
+    """
+    if (
+        type(left) is AttributeEquivalenceBlocker
+        and type(right) is AttributeEquivalenceBlocker
+        and left.normalize == right.normalize
+        and left.must_not_be_different == right.must_not_be_different
+    ):
+        return AttributeEquivalenceBlocker(
+            blocking_columns=left.blocking_columns + right.blocking_columns,
+            normalize_strings=left.normalize,
+            must_not_be_different=left.must_not_be_different,
+        )
+
+    elif (
+        type(left) is OverlapBlocker
+        and type(right) is OverlapBlocker
+        and left.normalize == right.normalize
+        and left.overlap == right.overlap
+        and left.word_level == right.word_level
+    ):
+        return OverlapBlocker(
+            blocking_columns=left.blocking_columns + right.blocking_columns,
+            normalize_strings=left.normalize,
+            overlap=left.overlap,
+            word_level=left.word_level,
+        )
+
+    elif (
+        type(left) is AttributeEquivalenceBlocker
+        and type(right) is OverlapBlocker
+        and left.normalize == right.normalize
+    ):
+        return MixedBlocker(
+            equivalence_columns=left.blocking_columns,
+            overlap_columns=right.blocking_columns,
+            normalize_strings=left.normalize,
+            overlap=right.overlap,
+            word_level=right.word_level,
+        )
+
+    elif (
+        type(left) is OverlapBlocker
+        and type(right) is AttributeEquivalenceBlocker
+        and left.normalize == right.normalize
+    ):
+        return MixedBlocker(
+            equivalence_columns=right.blocking_columns,
+            overlap_columns=left.blocking_columns,
+            normalize_strings=left.normalize,
+            overlap=left.overlap,
+            word_level=left.word_level,
+        )
+
+    elif (
+        type(left) is MixedBlocker
+        and type(right) is MixedBlocker
+        and left.normalize == right.normalize
+        and left.overlap == right.overlap
+        and left.word_level == right.word_level
+    ):
+        return MixedBlocker(
+            equivalence_columns=left.equivalence_columns + right.equivalence_columns,
+            overlap_columns=left.overlap_columns + right.overlap_columns,
+            must_not_be_different=list(
+                set(left.must_not_be_different + right.must_not_be_different)
+            ),
+            normalize_strings=left.normalize,
+            overlap=left.overlap,
+            word_level=left.word_level,
+        )
+
+    elif (
+        type(left) is MixedBlocker
+        and type(right) is AttributeEquivalenceBlocker
+        and left.normalize == right.normalize
+    ):
+        return MixedBlocker(
+            equivalence_columns=left.equivalence_columns + right.blocking_columns,
+            overlap_columns=left.overlap_columns,
+            must_not_be_different=list(
+                set(left.must_not_be_different + right.must_not_be_different)
+            ),
+            normalize_strings=left.normalize,
+            overlap=left.overlap,
+            word_level=left.word_level,
+        )
+
+    elif (
+        type(left) is AttributeEquivalenceBlocker
+        and type(right) is MixedBlocker
+        and left.normalize == right.normalize
+    ):
+        return MixedBlocker(
+            equivalence_columns=left.blocking_columns + right.equivalence_columns,
+            overlap_columns=right.overlap_columns,
+            must_not_be_different=list(
+                set(left.must_not_be_different + right.must_not_be_different)
+            ),
+            normalize_strings=left.normalize,
+            overlap=right.overlap,
+            word_level=right.word_level,
+        )
+
+    elif (
+        type(left) is MixedBlocker
+        and type(right) is OverlapBlocker
+        and left.normalize == right.normalize
+        and left.overlap == right.overlap
+        and left.word_level == right.word_level
+    ):
+        return MixedBlocker(
+            equivalence_columns=left.equivalence_columns,
+            overlap_columns=left.overlap_columns + right.blocking_columns,
+            must_not_be_different=left.must_not_be_different,
+            normalize_strings=left.normalize,
+            overlap=left.overlap,
+            word_level=left.word_level,
+        )
+
+    elif (
+        type(left) is OverlapBlocker
+        and type(right) is MixedBlocker
+        and left.normalize == right.normalize
+        and left.overlap == right.overlap
+        and left.word_level == right.word_level
+    ):
+        return MixedBlocker(
+            equivalence_columns=right.equivalence_columns,
+            overlap_columns=left.blocking_columns + right.overlap_columns,
+            must_not_be_different=right.must_not_be_different,
+            normalize_strings=left.normalize,
+            overlap=left.overlap,
+            word_level=left.word_level,
+        )
+    else:
+        return AndNode(left, right)
+
+
 # /!\ TODO: make class for motives (+ pair, motive dict)?
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index c32ebc1..596cee1 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -5,7 +5,10 @@
 from scipy.sparse.csgraph import connected_components
 import pandas as pd
 import networkx as nx
+import random
+from collections import Counter
 
+from itertools import combinations
 from typing import List, Set, Iterable, Dict, Collection, Any
 
 Columns = List[str]
@@ -21,14 +24,14 @@
 def remove_rows_if_value_appears_only_once(
     data: pd.DataFrame, cols: Columns
 ) -> pd.DataFrame:
-    """Drops rows of a Pandas DataFrame where a certain column's values appears only once.
+    """Drop rows of a Pandas DataFrame where a certain column's values appears only once.
 
     Ensures all elements of provided columns appear at least twice in their column
 
     Parameters
     ----------
     data : DataFrame
-      The DataFrame to preprocess
+      DataFrame to preprocess
 
     cols : List[str]
       List of columns where rows that contain non-duplicated elements shall be discarded
@@ -131,7 +134,7 @@ def normalize_function(string: Any) -> Any:
     Parameters
     ----------
     string : Any
-      The text to preprocess
+        Text to preprocess
 
     Returns
     -------
@@ -160,7 +163,7 @@ def normalize(text: Any) -> Any:
     Parameters
     ----------
     text : Any
-      The text(s) to preprocess
+      Text(s) to preprocess
 
     Returns
     -------
@@ -191,7 +194,7 @@ def flatten(list_of_iterables_: Collection[Iterable]) -> List[Any] | None:
     Parameters
     ----------
     list_of_iterables_ : Collection[Iterable]
-      The list to flatten
+      List to flatten
 
     Returns
     -------
@@ -502,7 +505,7 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     Parameters
     ----------
     s : str
-      The stringified representation of a list e.g. "['string 1', 'string 2', ...]"
+      Stringified representation of a list e.g. "['string 1', 'string 2', ...]"
 
     word_level : bool
       Whether to return a list of all words within s instead of a list of each comma-separated element
@@ -556,7 +559,7 @@ def scoring(data: pd.DataFrame, motives_column: str = "motive") -> pd.Series:
       A DataFrame with motives
 
     motives_column : str
-      The name of the column containing the motives
+      Name of the column containing the motives
 
     Returns
     -------
@@ -574,3 +577,134 @@ def scoring(data: pd.DataFrame, motives_column: str = "motive") -> pd.Series:
 
     scores = data[motives_column].apply(len)
     return scores
+
+
+def must_not_be_different_apply(  # WIP
+    temp_data: pd.DataFrame,
+    blocking_columns: List[str],
+    must_not_be_different_columns: List[str],
+):
+    """Re-block DataFrame on a second column, where we require non-difference rather than equality
+
+    Parameters
+    ----------
+    temp_data : DataFrame
+      Partially blocked DataFrame
+
+    blocking_columns : List[str]
+      Columns where we check for equality
+
+    must_not_be_different_columns : List[str]
+        Columns where we only check for non-difference
+
+    Returns
+    -------
+    pd.DataFrame
+      A column of scores
+    """
+    temp_data["block_id"] = temp_data.groupby(blocking_columns).ngroup()
+    temp_data = temp_data[temp_data["block_id"].duplicated(keep=False)]
+
+    reconstructed_data = pd.DataFrame(columns=temp_data.columns)
+    for block in temp_data["block_id"].unique():
+        # noinspection PyArgumentList
+        current_block = (
+            temp_data[temp_data["block_id"] == block]
+            .sort_values(must_not_be_different_columns)
+            .copy()
+        )
+        if (
+            len(current_block[current_block[must_not_be_different_columns].notnull()])
+            == 0
+        ):  # All nulls
+            random_string = "".join(
+                random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", k=10)
+            )  # As long as the string is not already in the column...
+            # There must be a better way to do it...
+            current_block[must_not_be_different_columns] = (
+                current_block[must_not_be_different_columns]
+                .astype(str)
+                .fillna(random_string)
+            )
+        else:
+            current_block[must_not_be_different_columns] = (
+                current_block[must_not_be_different_columns].astype(str).ffill()
+            )
+        if len(reconstructed_data) == 0:
+            reconstructed_data = current_block
+        else:
+            reconstructed_data = pd.concat([reconstructed_data, current_block])
+    return reconstructed_data
+
+
+def block_overlap(groups: Iterable, overlap: int = 1) -> Coords:
+    """Block a DataFrame based on overlap accross columns
+
+    Parameters
+    ----------
+    groups : Iterable
+      Output of a groupby
+
+    overlap : int
+      Minimum passing overlap
+
+    Returns
+    -------
+    Coords
+      Pairs obtained by blocking
+    """
+    coords = {
+        frozenset(pair) for group_list in groups for pair in combinations(group_list, 2)
+    }
+
+    if overlap > 1:
+        coords = [  # In this specific case, we want to keep duplicates to track the number of occurences of each pair
+            frozenset(pair)
+            for group_list in groups
+            for pair in combinations(group_list, 2)
+        ]
+        # Filter pairs that fulfill the minimum overlap condition
+        occurences_dict = Counter(coords)
+        coords = {
+            p for p in occurences_dict if occurences_dict[p] >= overlap
+        }  # The collection of pairs that fulfill the overlap condition
+
+    return coords
+
+
+def add_motives_to_coords(coords: Coords, explanations: Set[str]) -> CoordsMotives:
+    """Block a DataFrame based on overlap accross columns
+
+    Parameters
+    ----------
+    coords : Coords
+      Coords obtained by blocking
+
+    explanations : Set[str]
+      Set of explanations
+
+    Returns
+    -------
+    CoordsMotives
+      Pairs obtained by blocking
+
+    Examples
+    --------
+    >>> add_motives_to_coords({
+        frozenset({1, 4}),
+        frozenset({8, 11}),
+        frozenset({2, 5}),
+        frozenset({10, 13}),
+        frozenset({3, 8}),
+        frozenset({3, 11}),
+    }, {"Same 'City'"}')
+    {
+        frozenset({1, 4}): {"Same 'City'"},
+        frozenset({8, 11}): {"Same 'City'"},
+        frozenset({2, 5}): {"Same 'City'"},
+        frozenset({10, 13}): {"Same 'City'"},
+        frozenset({3, 8}): {"Same 'City'"},
+        frozenset({3, 11}): {"Same 'City'"},
+    }
+    """
+    return {pair: explanations for pair in coords}

From 043ac2deb0841844d7ef2966727d00e0d44b3d85 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Fri, 30 Jan 2026 15:30:54 +0100
Subject: [PATCH 4/7] refactor: add underscore in front of new column names;
 perf: do not add temp column to df in must_not_be_different_apply

---
 src/ms_blocking/utils.py | 80 ++++++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 596cee1..837645f 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -341,17 +341,6 @@ def add_blocks_to_dataset(
         id_l rank_l  id_r rank_r  block
        0     0  first     2  first      0
     """
-    if output_columns is None:
-        output_columns = data.columns
-    data = data[output_columns].copy()
-
-    if "motive" in data.columns:
-        print("Renaming 'motive' column to 'motive_old'")
-        data = data.rename(columns={"motive": "motive_old"})
-
-    if "block" in data.columns:
-        print("Renaming 'block' column to 'block_old'")
-        data = data.rename(columns={"block": "block_old"})
 
     if show_as_pairs and keep_ungrouped_rows:
         raise ValueError("Cannot both return pairs and keep ungrouped rows")
@@ -364,6 +353,19 @@ def add_blocks_to_dataset(
     if not data.index.is_unique:
         raise ValueError("DataFrame index must be unique to be used as an identifier.")
 
+    if "_motive" in data.columns:
+        if motives:
+            raise ValueError(
+                "Please rename existing '_motive' column OR do not pass 'motives=True'"
+            )
+
+    if "_block" in data.columns:
+        raise ValueError("Please rename existing '_block' column")
+
+    if output_columns is None:
+        output_columns = data.columns
+    data = data[output_columns].copy()
+
     if len(coords) == 0 and not keep_ungrouped_rows:  # Empty graph
         if show_as_pairs:
             columns = [col + "_l" for col in data.columns] + [
@@ -414,16 +416,16 @@ def add_blocks_to_dataset(
                 output_data = pd.concat([output_data, current_row])
 
         # Assign blocks to rows based on their original index
-        output_data["block"] = output_data.index.map(matcher)
+        output_data["_block"] = output_data.index.map(matcher)
         if not merge_blocks:
-            output_data = output_data.explode("block")
+            output_data = output_data.explode("_block")
 
         if keep_ungrouped_rows:
-            output_data["block"] = output_data["block"].fillna(-1)
+            output_data["_block"] = output_data["_block"].fillna(-1)
             matcher_ungrouped_rows = {}
             block_temp = []
             i = 0  # Track # of blocks processed
-            for b in output_data["block"]:
+            for b in output_data["_block"]:
                 if b == -1:
                     block_temp.append(i)
                     i += 1
@@ -433,19 +435,19 @@ def add_blocks_to_dataset(
                     i += 1
                 else:
                     block_temp.append(matcher_ungrouped_rows[b])
-            output_data["block"] = block_temp
+            output_data["_block"] = block_temp
         else:
             if not show_as_pairs:
                 output_data = output_data[
-                    output_data["block"].duplicated(keep=False)
-                    & output_data["block"].notna()
+                    output_data["_block"].duplicated(keep=False)
+                    & output_data["_block"].notna()
                 ]
 
-        output_data.loc[:, ["block"]] = start_from_zero(output_data["block"])
+        output_data.loc[:, ["_block"]] = start_from_zero(output_data["_block"])
 
         if sort:
             # Sort by block, then by original index
-            sort_cols = ["block"]
+            sort_cols = ["_block"]
             if output_data.index.name:
                 output_data = output_data.sort_values(
                     sort_cols + [output_data.index.name]
@@ -459,7 +461,7 @@ def add_blocks_to_dataset(
                 output_data = output_data.set_index(output_data.columns[0])
 
     if motives:
-        output_data["motive"] = ""
+        output_data["_motive"] = ""
         id_list = flatten(coords.keys())
         motive_matcher = {
             row_id: frozenset(
@@ -470,13 +472,14 @@ def add_blocks_to_dataset(
             )
             for row_id in id_list
         }
-        output_data["motive"] = output_data.index.map(motive_matcher)
+        output_data["_motive"] = output_data.index.map(motive_matcher)
 
-    if "block" not in output_data.columns:  # Empty coords
-        output_data["block"] = -1
+    if "_block" not in output_data.columns:  # Empty coords
+        output_data["_block"] = -1
 
     output_data = output_data.reset_index(drop=True)
-    output_data["block"] = output_data["block"].astype(int)
+    output_data["_block"] = output_data["_block"].astype(int)
+
     return output_data
 
 
@@ -513,7 +516,7 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     Returns
     -------
     List[str]
-      A python list based on s
+      s turned into a List
 
     Examples
     --------
@@ -550,13 +553,13 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
         return [s for s in cleaned_items if len(s) > 0]
 
 
-def scoring(data: pd.DataFrame, motives_column: str = "motive") -> pd.Series:
+def scoring(data: pd.DataFrame, motives_column: str = "_motive") -> pd.Series:
     """Add a score to a blocked DataFrame based on the number of motives
 
     Parameters
     ----------
     data : DataFrame
-      A DataFrame with motives
+      DataFrame with motives
 
     motives_column : str
       Name of the column containing the motives
@@ -569,7 +572,12 @@ def scoring(data: pd.DataFrame, motives_column: str = "motive") -> pd.Series:
 
     # Check that we do have motives
     if motives_column not in data.columns:
-        raise ValueError(f'Specified motives column "{motives_column}" does not exist')
+        if motives_column == "_motive":
+            raise ValueError("No motives in DataFrame")
+        else:
+            raise ValueError(
+                f'Specified motives column "{motives_column}" does not exist'
+            )
 
     if "score" in data.columns:
         print("Renaming 'score' column to 'score_old'")
@@ -599,17 +607,18 @@ def must_not_be_different_apply(  # WIP
 
     Returns
     -------
-    pd.DataFrame
-      A column of scores
+    DataFrame
+      Column of scores
     """
-    temp_data["block_id"] = temp_data.groupby(blocking_columns).ngroup()
-    temp_data = temp_data[temp_data["block_id"].duplicated(keep=False)]
+
+    series_block_id = temp_data.groupby(blocking_columns).ngroup()
+    temp_data = temp_data[series_block_id.duplicated(keep=False)]
 
     reconstructed_data = pd.DataFrame(columns=temp_data.columns)
-    for block in temp_data["block_id"].unique():
+    for block in series_block_id.unique():
         # noinspection PyArgumentList
         current_block = (
-            temp_data[temp_data["block_id"] == block]
+            temp_data[series_block_id == block]
             .sort_values(must_not_be_different_columns)
             .copy()
         )
@@ -634,6 +643,7 @@ def must_not_be_different_apply(  # WIP
             reconstructed_data = current_block
         else:
             reconstructed_data = pd.concat([reconstructed_data, current_block])
+
     return reconstructed_data
 
 

From c6f6fd6dff653dda93607ba59b4941b40e770905 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Fri, 30 Jan 2026 15:31:23 +0100
Subject: [PATCH 5/7] refactor: rename block and motive to new names

---
 tests/test_ms_blocking.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_ms_blocking.py b/tests/test_ms_blocking.py
index cbf8284..d3f9ab2 100644
--- a/tests/test_ms_blocking.py
+++ b/tests/test_ms_blocking.py
@@ -105,7 +105,7 @@ def attribute_city_show_as_pairs_true_id():
 
 @pytest.fixture
 def attribute_city_show_as_pairs_true_columns():
-    return ["id_l", "Name_l", "id_r", "Name_r", "block"]
+    return ["id_l", "Name_l", "id_r", "Name_r", "_block"]
 
 
 @pytest.fixture
@@ -183,7 +183,7 @@ def test_merge_blocks(overlap_websites_merge_blocks):
     websites_blocker = msb.OverlapBlocker(["websites"])
     links = websites_blocker.block(get_users())
     actual = msb.add_blocks_to_dataset(get_users(), links, merge_blocks=False)[
-        "block"
+        "_block"
     ].to_list()
     assert actual == expected, (
         "Blocking on websites should return [0, 0, 0, 1, 1, 2, 2, 2]"
@@ -240,7 +240,7 @@ def test_sort_false(attribute_city_sort_false_blocks):
     city_blocker = msb.AttributeEquivalenceBlocker(["City"])
     links = city_blocker.block(get_users())
     actual = msb.add_blocks_to_dataset(get_users(), links, sort=False)[
-        "block"
+        "_block"
     ].to_list()
     assert actual == expected, (
         "Blocking on websites and adding blocks with sort=False should return [0, 1, 2, 0, 1, 2, 3, 2, 3]"
@@ -253,7 +253,7 @@ def test_keep_ungrouped_rows_false(attribute_city_keep_ungrouped_rows_false):
     city_blocker = msb.AttributeEquivalenceBlocker(["City"])
     links = city_blocker.block(get_users())
     actual = msb.add_blocks_to_dataset(get_users(), links, keep_ungrouped_rows=True)[
-        "block"
+        "_block"
     ].to_list()
     assert actual == expected, (
         "Blocking on Name with normalize_strings=False should return [0, 1, 1, 2, 2, 3, 3, 3, 4, 5, 6, 7, 7, 8]"
@@ -274,7 +274,7 @@ def test_motives_when_adding_to_dataframe(attribute_city_motives_true_add):
     city_blocker = msb.AttributeEquivalenceBlocker(["City"])
     links = city_blocker.block(get_users(), motives=True)
     actual = msb.add_blocks_to_dataset(get_users(), links, motives=True)[
-        "motive"
+        "_motive"
     ].to_list()
     assert actual == expected
 
@@ -337,7 +337,7 @@ def test_pipelining_motives(city_age_websites_pipelining_motives):
     links = final_blocker.block(get_users(), motives=True)
     actual = msb.add_blocks_to_dataset(
         get_users(), links, show_as_pairs=True, motives=True, merge_blocks=False
-    )["motive"].to_list()
+    )["_motive"].to_list()
     assert actual == expected
 
 
@@ -508,7 +508,7 @@ def test_no_links_m():
 
 def test_no_links_add_blocks_to_dataframe():
     """Test that add_blocks_to_dataframe gracefully outputs an empty DataFrame when no pairs were found"""
-    expected = pd.DataFrame(columns=["id", "Name", "City", "Age", "websites", "block"])
+    expected = pd.DataFrame(columns=["id", "Name", "City", "Age", "websites", "_block"])
     expected_show_as_pairs = pd.DataFrame(
         columns=[
             "id_l",
@@ -521,11 +521,11 @@ def test_no_links_add_blocks_to_dataframe():
             "City_r",
             "Age_r",
             "websites_r",
-            "block",
+            "_block",
         ]
     )
     expected_motives = pd.DataFrame(
-        columns=["id", "Name", "City", "Age", "websites", "motive", "block"]
+        columns=["id", "Name", "City", "Age", "websites", "_motive", "_block"]
     )
     id_blocker = msb.AttributeEquivalenceBlocker(["id"])
     links = id_blocker.block(get_users())

From 3793a13af7d0ddb606d2c34e00ee288f31fad234 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Fri, 30 Jan 2026 15:39:57 +0100
Subject: [PATCH 6/7] docs: run notebook

---
 docs/example.ipynb | 656 ++++++++++++++++++++++-----------------------
 1 file changed, 328 insertions(+), 328 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 32bc8c4..6b82165 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -32,15 +32,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:26.824780400Z",
-     "start_time": "2026-01-29T15:07:26.781971700Z"
+     "end_time": "2026-01-30T14:21:14.010997600Z",
+     "start_time": "2026-01-30T14:21:13.420790Z"
     }
    },
    "source": [
     "import ms_blocking.ms_blocking as msb"
    ],
    "outputs": [],
-   "execution_count": 137
+   "execution_count": 1
   },
   {
    "cell_type": "markdown",
@@ -60,8 +60,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:27.000725500Z",
-     "start_time": "2026-01-29T15:07:26.849860500Z"
+     "end_time": "2026-01-30T14:21:14.049404600Z",
+     "start_time": "2026-01-30T14:21:14.010997600Z"
     }
    },
    "source": [
@@ -250,12 +250,12 @@
        "</div>"
       ]
      },
-     "execution_count": 138,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 138
+   "execution_count": 2
   },
   {
    "cell_type": "markdown",
@@ -282,15 +282,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:27.363604100Z",
-     "start_time": "2026-01-29T15:07:27.107402500Z"
+     "end_time": "2026-01-30T14:21:14.190107400Z",
+     "start_time": "2026-01-30T14:21:14.089762400Z"
     }
    },
    "source": [
     "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])"
    ],
    "outputs": [],
-   "execution_count": 139
+   "execution_count": 3
   },
   {
    "cell_type": "markdown",
@@ -310,8 +310,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:27.745120Z",
-     "start_time": "2026-01-29T15:07:27.573071300Z"
+     "end_time": "2026-01-30T14:21:14.309413300Z",
+     "start_time": "2026-01-30T14:21:14.278545600Z"
     }
    },
    "source": [
@@ -326,7 +326,7 @@
      ]
     }
    ],
-   "execution_count": 140
+   "execution_count": 4
   },
   {
    "cell_type": "markdown",
@@ -339,8 +339,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:27.933575500Z",
-     "start_time": "2026-01-29T15:07:27.819208200Z"
+     "end_time": "2026-01-30T14:21:14.378808Z",
+     "start_time": "2026-01-30T14:21:14.349508200Z"
     }
    },
    "source": [
@@ -358,19 +358,19 @@
        " frozenset({10, 13})}"
       ]
      },
-     "execution_count": 141,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 141
+   "execution_count": 5
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:28.123040400Z",
-     "start_time": "2026-01-29T15:07:27.999988Z"
+     "end_time": "2026-01-30T14:21:14.558644200Z",
+     "start_time": "2026-01-30T14:21:14.459573100Z"
     }
    },
    "source": [
@@ -396,7 +396,7 @@
      }
     }
    ],
-   "execution_count": 142
+   "execution_count": 6
   },
   {
    "cell_type": "markdown",
@@ -409,8 +409,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:28.188617600Z",
-     "start_time": "2026-01-29T15:07:28.160831900Z"
+     "end_time": "2026-01-30T14:21:14.635514Z",
+     "start_time": "2026-01-30T14:21:14.598913Z"
     }
    },
    "source": [
@@ -431,16 +431,16 @@
        "7  10    Caroline Dufour               Lens   45   \n",
        "8  13      Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2                    ['somewebsite.com/users/rpz59']      1  \n",
-       "3                                                 []      1  \n",
-       "4                                 ['roubaixlove.fr']      2  \n",
-       "5                                                 []      2  \n",
-       "6                                                 []      2  \n",
-       "7             ['pythonensamusant.fr', 'lensfans.fr']      3  \n",
-       "8                                    ['lensfans.fr']      3  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  \n",
+       "3                                                 []       1  \n",
+       "4                                 ['roubaixlove.fr']       2  \n",
+       "5                                                 []       2  \n",
+       "6                                                 []       2  \n",
+       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  \n",
+       "8                                    ['lensfans.fr']       3  "
       ],
       "text/html": [
        "<div>\n",
@@ -466,7 +466,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -556,12 +556,12 @@
        "</div>"
       ]
      },
-     "execution_count": 143,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 143
+   "execution_count": 7
   },
   {
    "cell_type": "markdown",
@@ -574,8 +574,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:28.391859600Z",
-     "start_time": "2026-01-29T15:07:28.233676700Z"
+     "end_time": "2026-01-30T14:21:14.829719100Z",
+     "start_time": "2026-01-30T14:21:14.676157200Z"
     }
    },
    "source": [
@@ -590,12 +590,12 @@
        "array([-1,  0,  1,  2,  0,  1, -1, -1,  2, -1,  3,  2, -1,  3])"
       ]
      },
-     "execution_count": 144,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 144
+   "execution_count": 8
   },
   {
    "cell_type": "markdown",
@@ -622,8 +622,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:28.636881700Z",
-     "start_time": "2026-01-29T15:07:28.555420800Z"
+     "end_time": "2026-01-30T14:21:15.027923700Z",
+     "start_time": "2026-01-30T14:21:14.926401Z"
     }
    },
    "source": [
@@ -649,12 +649,12 @@
        "3  10     Caroline Dufour               Lens   45   \n",
        "4  13       Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2             ['lensfans.fr', 'pythonensamusant.fr']      0  \n",
-       "3             ['pythonensamusant.fr', 'lensfans.fr']      0  \n",
-       "4                                    ['lensfans.fr']      0  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2             ['lensfans.fr', 'pythonensamusant.fr']       0  \n",
+       "3             ['pythonensamusant.fr', 'lensfans.fr']       0  \n",
+       "4                                    ['lensfans.fr']       0  "
       ],
       "text/html": [
        "<div>\n",
@@ -680,7 +680,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -734,12 +734,12 @@
        "</div>"
       ]
      },
-     "execution_count": 145,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 145
+   "execution_count": 9
   },
   {
    "cell_type": "markdown",
@@ -759,8 +759,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:28.897958600Z",
-     "start_time": "2026-01-29T15:07:28.814714900Z"
+     "end_time": "2026-01-30T14:21:15.403596500Z",
+     "start_time": "2026-01-30T14:21:15.279120300Z"
     }
    },
    "source": [
@@ -783,7 +783,7 @@
      }
     }
    ],
-   "execution_count": 146
+   "execution_count": 10
   },
   {
    "cell_type": "markdown",
@@ -796,8 +796,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:29.062518Z",
-     "start_time": "2026-01-29T15:07:29.011197700Z"
+     "end_time": "2026-01-30T14:21:15.686136800Z",
+     "start_time": "2026-01-30T14:21:15.608444400Z"
     }
    },
    "source": [
@@ -817,15 +817,15 @@
        "6  10     Caroline Dufour               Lens   45   \n",
        "7  13       Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2  ['somewebsite.com/users/jacquesdupond', 'jacqu...      1  \n",
-       "3             ['lensfans.fr', 'pythonensamusant.fr']      1  \n",
-       "4             ['pythonensamusant.fr', 'lensfans.fr']      1  \n",
-       "5             ['lensfans.fr', 'pythonensamusant.fr']      2  \n",
-       "6             ['pythonensamusant.fr', 'lensfans.fr']      2  \n",
-       "7                                    ['lensfans.fr']      2  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2  ['somewebsite.com/users/jacquesdupond', 'jacqu...       1  \n",
+       "3             ['lensfans.fr', 'pythonensamusant.fr']       1  \n",
+       "4             ['pythonensamusant.fr', 'lensfans.fr']       1  \n",
+       "5             ['lensfans.fr', 'pythonensamusant.fr']       2  \n",
+       "6             ['pythonensamusant.fr', 'lensfans.fr']       2  \n",
+       "7                                    ['lensfans.fr']       2  "
       ],
       "text/html": [
        "<div>\n",
@@ -851,7 +851,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -932,12 +932,12 @@
        "</div>"
       ]
      },
-     "execution_count": 147,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 147
+   "execution_count": 11
   },
   {
    "cell_type": "markdown",
@@ -971,8 +971,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:29.409293600Z",
-     "start_time": "2026-01-29T15:07:29.374108Z"
+     "end_time": "2026-01-30T14:21:15.998425200Z",
+     "start_time": "2026-01-30T14:21:15.931370100Z"
     }
    },
    "source": [
@@ -995,9 +995,9 @@
        "0   6  Jean-Michel Python  Douai   49  ['lensfans.fr', 'pythonensamusant.fr']   \n",
        "1  10     Caroline Dufour   Lens   45  ['pythonensamusant.fr', 'lensfans.fr']   \n",
        "\n",
-       "   block  \n",
-       "0      0  \n",
-       "1      0  "
+       "   _block  \n",
+       "0       0  \n",
+       "1       0  "
       ],
       "text/html": [
        "<div>\n",
@@ -1023,7 +1023,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1050,12 +1050,12 @@
        "</div>"
       ]
      },
-     "execution_count": 148,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 148
+   "execution_count": 12
   },
   {
    "cell_type": "markdown",
@@ -1075,8 +1075,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:29.806693100Z",
-     "start_time": "2026-01-29T15:07:29.577252700Z"
+     "end_time": "2026-01-30T14:21:16.305679100Z",
+     "start_time": "2026-01-30T14:21:16.212470400Z"
     }
    },
    "source": [
@@ -1089,7 +1089,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n"
+      "Processing AttributeEquivalenceBlocker(['Age', 'City'], [])\n"
      ]
     },
     {
@@ -1103,13 +1103,13 @@
        "4   8     Sophie Delarue            Roubaix   33   \n",
        "5  11     sophie_delarue            Roubaix   33   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2                    ['somewebsite.com/users/rpz59']      1  \n",
-       "3                                                 []      1  \n",
-       "4                                                 []      2  \n",
-       "5                                                 []      2  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  \n",
+       "3                                                 []       1  \n",
+       "4                                                 []       2  \n",
+       "5                                                 []       2  "
       ],
       "text/html": [
        "<div>\n",
@@ -1135,7 +1135,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1198,12 +1198,12 @@
        "</div>"
       ]
      },
-     "execution_count": 149,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 149
+   "execution_count": 13
   },
   {
    "cell_type": "markdown",
@@ -1223,8 +1223,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:30.174760Z",
-     "start_time": "2026-01-29T15:07:30.089742500Z"
+     "end_time": "2026-01-30T14:21:16.678653800Z",
+     "start_time": "2026-01-30T14:21:16.558976200Z"
     }
    },
    "source": [
@@ -1249,11 +1249,11 @@
        "2   8     Sophie Delarue    Roubaix   33                               []   \n",
        "3  11     sophie_delarue    Roubaix   33                               []   \n",
        "\n",
-       "   block  \n",
-       "0      0  \n",
-       "1      0  \n",
-       "2      1  \n",
-       "3      1  "
+       "   _block  \n",
+       "0       0  \n",
+       "1       0  \n",
+       "2       1  \n",
+       "3       1  "
       ],
       "text/html": [
        "<div>\n",
@@ -1279,7 +1279,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1324,12 +1324,12 @@
        "</div>"
       ]
      },
-     "execution_count": 150,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 150
+   "execution_count": 14
   },
   {
    "cell_type": "markdown",
@@ -1342,8 +1342,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:30.712714100Z",
-     "start_time": "2026-01-29T15:07:30.650914300Z"
+     "end_time": "2026-01-30T14:21:17.354294400Z",
+     "start_time": "2026-01-30T14:21:17.316050200Z"
     }
    },
    "source": [
@@ -1365,7 +1365,7 @@
      "data": {
       "text/plain": [
        "Empty DataFrame\n",
-       "Columns: [id, Name, City, Age, websites, block]\n",
+       "Columns: [id, Name, City, Age, websites, _block]\n",
        "Index: []"
       ],
       "text/html": [
@@ -1392,7 +1392,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1401,12 +1401,12 @@
        "</div>"
       ]
      },
-     "execution_count": 151,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 151
+   "execution_count": 15
   },
   {
    "cell_type": "markdown",
@@ -1440,8 +1440,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:31.007955500Z",
-     "start_time": "2026-01-29T15:07:30.942038900Z"
+     "end_time": "2026-01-30T14:21:17.537043700Z",
+     "start_time": "2026-01-30T14:21:17.392490700Z"
     }
    },
    "source": [
@@ -1458,14 +1458,14 @@
      ]
     }
    ],
-   "execution_count": 152
+   "execution_count": 16
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:31.269933200Z",
-     "start_time": "2026-01-29T15:07:31.230646800Z"
+     "end_time": "2026-01-30T14:21:17.655177300Z",
+     "start_time": "2026-01-30T14:21:17.573776300Z"
     }
    },
    "source": [
@@ -1489,11 +1489,11 @@
        "2  10  Caroline Dufour               Lens   45   \n",
        "3  13    Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2             ['pythonensamusant.fr', 'lensfans.fr']      1  \n",
-       "3                                    ['lensfans.fr']      1  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2             ['pythonensamusant.fr', 'lensfans.fr']       1  \n",
+       "3                                    ['lensfans.fr']       1  "
       ],
       "text/html": [
        "<div>\n",
@@ -1519,7 +1519,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1564,12 +1564,12 @@
        "</div>"
       ]
      },
-     "execution_count": 153,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 153
+   "execution_count": 17
   },
   {
    "cell_type": "markdown",
@@ -1589,8 +1589,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:31.613284200Z",
-     "start_time": "2026-01-29T15:07:31.446107500Z"
+     "end_time": "2026-01-30T14:21:17.910335600Z",
+     "start_time": "2026-01-30T14:21:17.821453400Z"
     }
    },
    "source": [
@@ -1621,17 +1621,17 @@
        "8   8      Sophie Delarue            Roubaix   33   \n",
        "9  11      sophie_delarue            Roubaix   33   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2             ['lensfans.fr', 'pythonensamusant.fr']      0  \n",
-       "3             ['pythonensamusant.fr', 'lensfans.fr']      0  \n",
-       "4                                    ['lensfans.fr']      0  \n",
-       "5                    ['somewebsite.com/users/rpz59']      1  \n",
-       "6                                                 []      1  \n",
-       "7                                 ['roubaixlove.fr']      2  \n",
-       "8                                                 []      2  \n",
-       "9                                                 []      2  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2             ['lensfans.fr', 'pythonensamusant.fr']       0  \n",
+       "3             ['pythonensamusant.fr', 'lensfans.fr']       0  \n",
+       "4                                    ['lensfans.fr']       0  \n",
+       "5                    ['somewebsite.com/users/rpz59']       1  \n",
+       "6                                                 []       1  \n",
+       "7                                 ['roubaixlove.fr']       2  \n",
+       "8                                                 []       2  \n",
+       "9                                                 []       2  "
       ],
       "text/html": [
        "<div>\n",
@@ -1657,7 +1657,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1756,12 +1756,12 @@
        "</div>"
       ]
      },
-     "execution_count": 154,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 154
+   "execution_count": 18
   },
   {
    "cell_type": "markdown",
@@ -1804,8 +1804,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:32.031630100Z",
-     "start_time": "2026-01-29T15:07:32.013496100Z"
+     "end_time": "2026-01-30T14:21:18.279899900Z",
+     "start_time": "2026-01-30T14:21:18.250988900Z"
     }
    },
    "source": [
@@ -1815,7 +1815,7 @@
     "websites_blocker = msb.OverlapBlocker([\"websites\"])"
    ],
    "outputs": [],
-   "execution_count": 155
+   "execution_count": 19
   },
   {
    "cell_type": "markdown",
@@ -1828,15 +1828,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:32.189589400Z",
-     "start_time": "2026-01-29T15:07:32.172200Z"
+     "end_time": "2026-01-30T14:21:18.481263300Z",
+     "start_time": "2026-01-30T14:21:18.466284300Z"
     }
    },
    "source": [
     "final_blocker = (city_blocker & age_blocker) | (name_blocker & websites_blocker)"
    ],
    "outputs": [],
-   "execution_count": 156
+   "execution_count": 20
   },
   {
    "cell_type": "markdown",
@@ -1849,8 +1849,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:32.274232600Z",
-     "start_time": "2026-01-29T15:07:32.231839Z"
+     "end_time": "2026-01-30T14:21:18.562779600Z",
+     "start_time": "2026-01-30T14:21:18.520368200Z"
     }
    },
    "source": [
@@ -1862,7 +1862,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n",
+      "Processing AttributeEquivalenceBlocker(['Age', 'City'], [])\n",
       "Processing MixedBlocker(['Name'], ['websites'], 1)\n"
      ]
     },
@@ -1877,13 +1877,13 @@
        "4   8     Sophie Delarue            Roubaix   33   \n",
        "5  11     sophie_delarue            Roubaix   33   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2                    ['somewebsite.com/users/rpz59']      1  \n",
-       "3                                                 []      1  \n",
-       "4                                                 []      2  \n",
-       "5                                                 []      2  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  \n",
+       "3                                                 []       1  \n",
+       "4                                                 []       2  \n",
+       "5                                                 []       2  "
       ],
       "text/html": [
        "<div>\n",
@@ -1909,7 +1909,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1972,12 +1972,12 @@
        "</div>"
       ]
      },
-     "execution_count": 157,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 157
+   "execution_count": 21
   },
   {
    "cell_type": "markdown",
@@ -1990,8 +1990,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:32.514343300Z",
-     "start_time": "2026-01-29T15:07:32.366139Z"
+     "end_time": "2026-01-30T14:21:18.843568700Z",
+     "start_time": "2026-01-30T14:21:18.686911500Z"
     }
    },
    "source": [
@@ -2007,7 +2007,7 @@
      ]
     }
    ],
-   "execution_count": 158
+   "execution_count": 22
   },
   {
    "cell_type": "markdown",
@@ -2034,8 +2034,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:32.653038500Z",
-     "start_time": "2026-01-29T15:07:32.610025600Z"
+     "end_time": "2026-01-30T14:21:18.967168700Z",
+     "start_time": "2026-01-30T14:21:18.928864500Z"
     }
    },
    "source": [
@@ -2056,16 +2056,16 @@
        "7  11     sophie_delarue            Roubaix   33   \n",
        "8  13      Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                    ['somewebsite.com/users/rpz59']      1  \n",
-       "2                                 ['roubaixlove.fr']      2  \n",
-       "3                               ['jacquesdupond.fr']      0  \n",
-       "4                                                 []      1  \n",
-       "5                                                 []      2  \n",
-       "6             ['pythonensamusant.fr', 'lensfans.fr']      3  \n",
-       "7                                                 []      2  \n",
-       "8                                    ['lensfans.fr']      3  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                    ['somewebsite.com/users/rpz59']       1  \n",
+       "2                                 ['roubaixlove.fr']       2  \n",
+       "3                               ['jacquesdupond.fr']       0  \n",
+       "4                                                 []       1  \n",
+       "5                                                 []       2  \n",
+       "6             ['pythonensamusant.fr', 'lensfans.fr']       3  \n",
+       "7                                                 []       2  \n",
+       "8                                    ['lensfans.fr']       3  "
       ],
       "text/html": [
        "<div>\n",
@@ -2091,7 +2091,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2181,12 +2181,12 @@
        "</div>"
       ]
      },
-     "execution_count": 159,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 159
+   "execution_count": 23
   },
   {
    "cell_type": "markdown",
@@ -2213,8 +2213,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:33.051456600Z",
-     "start_time": "2026-01-29T15:07:33.007723500Z"
+     "end_time": "2026-01-30T14:21:19.276047300Z",
+     "start_time": "2026-01-30T14:21:19.146886900Z"
     }
    },
    "source": [
@@ -2240,21 +2240,21 @@
        "12  13        Benoît Benoît               Lens   15   \n",
        "13  12  Marcel Vandermersch           Fourmies   48   \n",
        "\n",
-       "                                             websites  block  \n",
-       "0                     ['jeandaux.fr', 'lillefans.fr']      0  \n",
-       "1   ['somewebsite.com/users/jacquesdupond', 'jacqu...      1  \n",
-       "2                                ['jacquesdupond.fr']      1  \n",
-       "3                     ['somewebsite.com/users/rpz59']      2  \n",
-       "4                                                  []      2  \n",
-       "5                                  ['roubaixlove.fr']      3  \n",
-       "6                                                  []      3  \n",
-       "7                                                  []      3  \n",
-       "8              ['lensfans.fr', 'pythonensamusant.fr']      4  \n",
-       "9                                        ['lorem.fr']      5  \n",
-       "10                ['somewebsite.com/users/jajanne59']      6  \n",
-       "11             ['pythonensamusant.fr', 'lensfans.fr']      7  \n",
-       "12                                    ['lensfans.fr']      7  \n",
-       "13                         ['lesrecettesdemarcel.fr']      8  "
+       "                                             websites  _block  \n",
+       "0                     ['jeandaux.fr', 'lillefans.fr']       0  \n",
+       "1   ['somewebsite.com/users/jacquesdupond', 'jacqu...       1  \n",
+       "2                                ['jacquesdupond.fr']       1  \n",
+       "3                     ['somewebsite.com/users/rpz59']       2  \n",
+       "4                                                  []       2  \n",
+       "5                                  ['roubaixlove.fr']       3  \n",
+       "6                                                  []       3  \n",
+       "7                                                  []       3  \n",
+       "8              ['lensfans.fr', 'pythonensamusant.fr']       4  \n",
+       "9                                        ['lorem.fr']       5  \n",
+       "10                ['somewebsite.com/users/jajanne59']       6  \n",
+       "11             ['pythonensamusant.fr', 'lensfans.fr']       7  \n",
+       "12                                    ['lensfans.fr']       7  \n",
+       "13                         ['lesrecettesdemarcel.fr']       8  "
       ],
       "text/html": [
        "<div>\n",
@@ -2280,7 +2280,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2415,12 +2415,12 @@
        "</div>"
       ]
      },
-     "execution_count": 160,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 160
+   "execution_count": 24
   },
   {
    "cell_type": "markdown",
@@ -2443,8 +2443,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:33.425225900Z",
-     "start_time": "2026-01-29T15:07:33.285367100Z"
+     "end_time": "2026-01-30T14:21:19.820247800Z",
+     "start_time": "2026-01-30T14:21:19.653280100Z"
     }
    },
    "source": [
@@ -2473,13 +2473,13 @@
        "4   8     Sophie Delarue            Roubaix   33   \n",
        "5  11     sophie_delarue            Roubaix   33   \n",
        "\n",
-       "                                            websites  block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  \n",
-       "1                               ['jacquesdupond.fr']      0  \n",
-       "2                    ['somewebsite.com/users/rpz59']      1  \n",
-       "3                                                 []      1  \n",
-       "4                                                 []      2  \n",
-       "5                                                 []      2  "
+       "                                            websites  _block  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
+       "1                               ['jacquesdupond.fr']       0  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  \n",
+       "3                                                 []       1  \n",
+       "4                                                 []       2  \n",
+       "5                                                 []       2  "
       ],
       "text/html": [
        "<div>\n",
@@ -2505,7 +2505,7 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2568,12 +2568,12 @@
        "</div>"
       ]
      },
-     "execution_count": 161,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 161
+   "execution_count": 25
   },
   {
    "cell_type": "markdown",
@@ -2593,8 +2593,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:33.836934900Z",
-     "start_time": "2026-01-29T15:07:33.664956Z"
+     "end_time": "2026-01-30T14:21:20.335572Z",
+     "start_time": "2026-01-30T14:21:20.302358700Z"
     }
    },
    "source": [
@@ -2621,12 +2621,12 @@
        " frozenset({3, 11}): {\"Same 'City'\"}}"
       ]
      },
-     "execution_count": 162,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 162
+   "execution_count": 26
   },
   {
    "cell_type": "markdown",
@@ -2646,8 +2646,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:34.005705200Z",
-     "start_time": "2026-01-29T15:07:33.958769500Z"
+     "end_time": "2026-01-30T14:21:20.409405100Z",
+     "start_time": "2026-01-30T14:21:20.374573700Z"
     }
    },
    "source": [
@@ -2668,16 +2668,16 @@
        "7  10    Caroline Dufour               Lens   45   \n",
        "8  13      Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  block         motive  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...      0  (Same 'City')  \n",
-       "1                               ['jacquesdupond.fr']      0  (Same 'City')  \n",
-       "2                    ['somewebsite.com/users/rpz59']      1  (Same 'City')  \n",
-       "3                                                 []      1  (Same 'City')  \n",
-       "4                                 ['roubaixlove.fr']      2  (Same 'City')  \n",
-       "5                                                 []      2  (Same 'City')  \n",
-       "6                                                 []      2  (Same 'City')  \n",
-       "7             ['pythonensamusant.fr', 'lensfans.fr']      3  (Same 'City')  \n",
-       "8                                    ['lensfans.fr']      3  (Same 'City')  "
+       "                                            websites  _block        _motive  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  (Same 'City')  \n",
+       "1                               ['jacquesdupond.fr']       0  (Same 'City')  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  (Same 'City')  \n",
+       "3                                                 []       1  (Same 'City')  \n",
+       "4                                 ['roubaixlove.fr']       2  (Same 'City')  \n",
+       "5                                                 []       2  (Same 'City')  \n",
+       "6                                                 []       2  (Same 'City')  \n",
+       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  (Same 'City')  \n",
+       "8                                    ['lensfans.fr']       3  (Same 'City')  "
       ],
       "text/html": [
        "<div>\n",
@@ -2703,8 +2703,8 @@
        "      <th>City</th>\n",
        "      <th>Age</th>\n",
        "      <th>websites</th>\n",
-       "      <th>block</th>\n",
-       "      <th>motive</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_motive</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2803,12 +2803,12 @@
        "</div>"
       ]
      },
-     "execution_count": 163,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 163
+   "execution_count": 27
   },
   {
    "cell_type": "markdown",
@@ -2828,8 +2828,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:34.269628100Z",
-     "start_time": "2026-01-29T15:07:34.186432Z"
+     "end_time": "2026-01-30T14:21:20.612990700Z",
+     "start_time": "2026-01-30T14:21:20.483928200Z"
     }
    },
    "source": [
@@ -2855,13 +2855,13 @@
        "4                                                 []     3       Paul Delarue   \n",
        "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
        "\n",
-       "              City_r  Age_r            websites_r  block         motive  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']      0  (Same 'City')  \n",
-       "1          Phalempin     24                    []      1  (Same 'City')  \n",
-       "2            Roubaix     33                    []      2  (Same 'City')  \n",
-       "3            Roubaix     33                    []      2  (Same 'City')  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']      2  (Same 'City')  \n",
-       "5               Lens     15       ['lensfans.fr']      3  (Same 'City')  "
+       "              City_r  Age_r            websites_r  _block        _motive  \n",
+       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']       0  (Same 'City')  \n",
+       "1          Phalempin     24                    []       1  (Same 'City')  \n",
+       "2            Roubaix     33                    []       2  (Same 'City')  \n",
+       "3            Roubaix     33                    []       2  (Same 'City')  \n",
+       "4            Roubaix     32    ['roubaixlove.fr']       2  (Same 'City')  \n",
+       "5               Lens     15       ['lensfans.fr']       3  (Same 'City')  "
       ],
       "text/html": [
        "<div>\n",
@@ -2892,8 +2892,8 @@
        "      <th>City_r</th>\n",
        "      <th>Age_r</th>\n",
        "      <th>websites_r</th>\n",
-       "      <th>block</th>\n",
-       "      <th>motive</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_motive</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2992,12 +2992,12 @@
        "</div>"
       ]
      },
-     "execution_count": 164,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 164
+   "execution_count": 28
   },
   {
    "cell_type": "markdown",
@@ -3010,8 +3010,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:34.787375Z",
-     "start_time": "2026-01-29T15:07:34.745314800Z"
+     "end_time": "2026-01-30T14:21:20.944670700Z",
+     "start_time": "2026-01-30T14:21:20.834495500Z"
     }
    },
    "source": [
@@ -3023,13 +3023,13 @@
     {
      "data": {
       "text/plain": [
-       "   id_l             Name_l  id_r             Name_r  block         motive\n",
-       "0     1     Jacques Dupond     4     Jacques Dupont      0  (Same 'City')\n",
-       "1     2  Pierre Dusquesnes     5  pierre dusquesnes      1  (Same 'City')\n",
-       "2     3       Paul Delarue    11     sophie_delarue      2  (Same 'City')\n",
-       "3     8     Sophie Delarue    11     sophie_delarue      2  (Same 'City')\n",
-       "4     8     Sophie Delarue     3       Paul Delarue      2  (Same 'City')\n",
-       "5    10    Caroline Dufour    13      Benoît Benoît      3  (Same 'City')"
+       "   id_l             Name_l  id_r             Name_r  _block        _motive\n",
+       "0     1     Jacques Dupond     4     Jacques Dupont       0  (Same 'City')\n",
+       "1     2  Pierre Dusquesnes     5  pierre dusquesnes       1  (Same 'City')\n",
+       "2     3       Paul Delarue    11     sophie_delarue       2  (Same 'City')\n",
+       "3     8     Sophie Delarue    11     sophie_delarue       2  (Same 'City')\n",
+       "4     8     Sophie Delarue     3       Paul Delarue       2  (Same 'City')\n",
+       "5    10    Caroline Dufour    13      Benoît Benoît       3  (Same 'City')"
       ],
       "text/html": [
        "<div>\n",
@@ -3054,8 +3054,8 @@
        "      <th>Name_l</th>\n",
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
-       "      <th>block</th>\n",
-       "      <th>motive</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_motive</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3118,12 +3118,12 @@
        "</div>"
       ]
      },
-     "execution_count": 165,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 165
+   "execution_count": 29
   },
   {
    "cell_type": "markdown",
@@ -3136,8 +3136,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:35.067637700Z",
-     "start_time": "2026-01-29T15:07:34.976540100Z"
+     "end_time": "2026-01-30T14:21:21.591044600Z",
+     "start_time": "2026-01-30T14:21:21.517777200Z"
     }
    },
    "source": [
@@ -3163,13 +3163,13 @@
        "4                                                 []     3       Paul Delarue   \n",
        "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
        "\n",
-       "              City_r  Age_r            websites_r  block         motive  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']      0  (Same 'City')  \n",
-       "1          Phalempin     24                    []      1  (Same 'City')  \n",
-       "2            Roubaix     33                    []      2  (Same 'City')  \n",
-       "3            Roubaix     33                    []      2  (Same 'City')  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']      2  (Same 'City')  \n",
-       "5               Lens     15       ['lensfans.fr']      3  (Same 'City')  "
+       "              City_r  Age_r            websites_r  _block        _motive  \n",
+       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']       0  (Same 'City')  \n",
+       "1          Phalempin     24                    []       1  (Same 'City')  \n",
+       "2            Roubaix     33                    []       2  (Same 'City')  \n",
+       "3            Roubaix     33                    []       2  (Same 'City')  \n",
+       "4            Roubaix     32    ['roubaixlove.fr']       2  (Same 'City')  \n",
+       "5               Lens     15       ['lensfans.fr']       3  (Same 'City')  "
       ],
       "text/html": [
        "<div>\n",
@@ -3200,8 +3200,8 @@
        "      <th>City_r</th>\n",
        "      <th>Age_r</th>\n",
        "      <th>websites_r</th>\n",
-       "      <th>block</th>\n",
-       "      <th>motive</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_motive</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3300,20 +3300,20 @@
        "</div>"
       ]
      },
-     "execution_count": 166,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 166
+   "execution_count": 30
   },
   {
    "cell_type": "code",
    "metadata": {
     "scrolled": true,
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:35.517819300Z",
-     "start_time": "2026-01-29T15:07:35.345233200Z"
+     "end_time": "2026-01-30T14:21:21.867809800Z",
+     "start_time": "2026-01-30T14:21:21.674986800Z"
     }
    },
    "source": [
@@ -3337,35 +3337,35 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n",
+      "Processing AttributeEquivalenceBlocker(['Age', 'City'], [])\n",
       "Processing OverlapBlocker(['websites'], 1)\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "    id_l             Name_l  id_r              Name_r  block  \\\n",
-       "0      1     Jacques Dupond     4      Jacques Dupont      0   \n",
-       "1      1     Jacques Dupond     6  Jean-Michel Python      0   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour      0   \n",
-       "3      1     Jacques Dupond     4      Jacques Dupont      1   \n",
-       "4      1     Jacques Dupond     6  Jean-Michel Python      1   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour      1   \n",
-       "6     10    Caroline Dufour     6  Jean-Michel Python      1   \n",
-       "7     10    Caroline Dufour    13       Benoît Benoît      1   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes      2   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue      3   \n",
-       "10    10    Caroline Dufour     6  Jean-Michel Python      4   \n",
-       "11    10    Caroline Dufour    13       Benoît Benoît      4   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python      4   \n",
+       "    id_l             Name_l  id_r              Name_r  _block  \\\n",
+       "0      1     Jacques Dupond     4      Jacques Dupont       0   \n",
+       "1      1     Jacques Dupond     6  Jean-Michel Python       0   \n",
+       "2      1     Jacques Dupond    10     Caroline Dufour       0   \n",
+       "3      1     Jacques Dupond     4      Jacques Dupont       1   \n",
+       "4      1     Jacques Dupond     6  Jean-Michel Python       1   \n",
+       "5      1     Jacques Dupond    10     Caroline Dufour       1   \n",
+       "6     10    Caroline Dufour     6  Jean-Michel Python       1   \n",
+       "7     10    Caroline Dufour    13       Benoît Benoît       1   \n",
+       "8      2  Pierre Dusquesnes     5   pierre dusquesnes       2   \n",
+       "9      8     Sophie Delarue    11      sophie_delarue       3   \n",
+       "10    10    Caroline Dufour     6  Jean-Michel Python       4   \n",
+       "11    10    Caroline Dufour    13       Benoît Benoît       4   \n",
+       "12    13      Benoît Benoît     6  Jean-Michel Python       4   \n",
        "\n",
-       "                                               motive  \n",
-       "0   (Same 'City', Same 'Age', >=1 overlap in 'webs...  \n",
-       "1   (Same 'City', Same 'Age', >=1 overlap in 'webs...  \n",
-       "2   (Same 'City', Same 'Age', >=1 overlap in 'webs...  \n",
-       "3   (Same 'City', Same 'Age', >=1 overlap in 'webs...  \n",
-       "4   (Same 'City', Same 'Age', >=1 overlap in 'webs...  \n",
-       "5   (Same 'City', Same 'Age', >=1 overlap in 'webs...  \n",
+       "                                              _motive  \n",
+       "0   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
+       "1   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
+       "2   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
+       "3   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
+       "4   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
+       "5   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
        "6                         (>=1 overlap in 'websites')  \n",
        "7                         (>=1 overlap in 'websites')  \n",
        "8                           (Same 'City', Same 'Age')  \n",
@@ -3397,8 +3397,8 @@
        "      <th>Name_l</th>\n",
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
-       "      <th>block</th>\n",
-       "      <th>motive</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_motive</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3409,7 +3409,7 @@
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -3418,7 +3418,7 @@
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -3427,7 +3427,7 @@
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -3436,7 +3436,7 @@
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -3445,7 +3445,7 @@
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -3454,7 +3454,7 @@
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -3524,12 +3524,12 @@
        "</div>"
       ]
      },
-     "execution_count": 167,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 167
+   "execution_count": 31
   },
   {
    "cell_type": "markdown",
@@ -3545,8 +3545,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-29T15:07:35.899178900Z",
-     "start_time": "2026-01-29T15:07:35.837149Z"
+     "end_time": "2026-01-30T14:21:22.186415700Z",
+     "start_time": "2026-01-30T14:21:22.127304600Z"
     }
    },
    "source": [
@@ -3557,28 +3557,28 @@
     {
      "data": {
       "text/plain": [
-       "    id_l             Name_l  id_r              Name_r  block  \\\n",
-       "0      1     Jacques Dupond     4      Jacques Dupont      0   \n",
-       "1      1     Jacques Dupond     6  Jean-Michel Python      0   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour      0   \n",
-       "3      1     Jacques Dupond     4      Jacques Dupont      1   \n",
-       "4      1     Jacques Dupond     6  Jean-Michel Python      1   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour      1   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes      2   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue      3   \n",
-       "6     10    Caroline Dufour     6  Jean-Michel Python      1   \n",
-       "7     10    Caroline Dufour    13       Benoît Benoît      1   \n",
-       "10    10    Caroline Dufour     6  Jean-Michel Python      4   \n",
-       "11    10    Caroline Dufour    13       Benoît Benoît      4   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python      4   \n",
+       "    id_l             Name_l  id_r              Name_r  _block  \\\n",
+       "0      1     Jacques Dupond     4      Jacques Dupont       0   \n",
+       "1      1     Jacques Dupond     6  Jean-Michel Python       0   \n",
+       "2      1     Jacques Dupond    10     Caroline Dufour       0   \n",
+       "3      1     Jacques Dupond     4      Jacques Dupont       1   \n",
+       "4      1     Jacques Dupond     6  Jean-Michel Python       1   \n",
+       "5      1     Jacques Dupond    10     Caroline Dufour       1   \n",
+       "8      2  Pierre Dusquesnes     5   pierre dusquesnes       2   \n",
+       "9      8     Sophie Delarue    11      sophie_delarue       3   \n",
+       "6     10    Caroline Dufour     6  Jean-Michel Python       1   \n",
+       "7     10    Caroline Dufour    13       Benoît Benoît       1   \n",
+       "10    10    Caroline Dufour     6  Jean-Michel Python       4   \n",
+       "11    10    Caroline Dufour    13       Benoît Benoît       4   \n",
+       "12    13      Benoît Benoît     6  Jean-Michel Python       4   \n",
        "\n",
-       "                                               motive  score  \n",
-       "0   (Same 'City', Same 'Age', >=1 overlap in 'webs...      3  \n",
-       "1   (Same 'City', Same 'Age', >=1 overlap in 'webs...      3  \n",
-       "2   (Same 'City', Same 'Age', >=1 overlap in 'webs...      3  \n",
-       "3   (Same 'City', Same 'Age', >=1 overlap in 'webs...      3  \n",
-       "4   (Same 'City', Same 'Age', >=1 overlap in 'webs...      3  \n",
-       "5   (Same 'City', Same 'Age', >=1 overlap in 'webs...      3  \n",
+       "                                              _motive  score  \n",
+       "0   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
+       "1   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
+       "2   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
+       "3   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
+       "4   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
+       "5   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
        "8                           (Same 'City', Same 'Age')      2  \n",
        "9                           (Same 'City', Same 'Age')      2  \n",
        "6                         (>=1 overlap in 'websites')      1  \n",
@@ -3610,8 +3610,8 @@
        "      <th>Name_l</th>\n",
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
-       "      <th>block</th>\n",
-       "      <th>motive</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_motive</th>\n",
        "      <th>score</th>\n",
        "    </tr>\n",
        "  </thead>\n",
@@ -3623,7 +3623,7 @@
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3633,7 +3633,7 @@
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3643,7 +3643,7 @@
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3653,7 +3653,7 @@
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3663,7 +3663,7 @@
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3673,7 +3673,7 @@
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3751,12 +3751,12 @@
        "</div>"
       ]
      },
-     "execution_count": 168,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 168
+   "execution_count": 32
   }
  ],
  "metadata": {

From 1ec4d4d5c3b5fe7f11497f02a362fdd3e0437dfd Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Fri, 30 Jan 2026 15:55:49 +0100
Subject: [PATCH 7/7] style: formatting

---
 src/ms_blocking/ms_blocking.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index a7903c0..57ccd4c 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -1,9 +1,6 @@
 from ms_blocking.utils import *  # noqa: F403
 
 
-# TODO: "block_id"
-
-
 class BlockerNode:
     """Abstract class from which derive all classes in the module"""