From 1abd602e5167f098e9dee6cfd4cd916d68d2d42d Mon Sep 17 00:00:00 2001 From: Talia-noemalab Date: Fri, 17 Oct 2025 16:23:32 +0800 Subject: [PATCH 1/4] Enhance 'pyjanitor' description in index.mdImprove clarity of project introduction in index.md Updated description of 'pyjanitor' to emphasize user-friendly API. --- mkdocs/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs/index.md b/mkdocs/index.md index 1bf5b26eb..a68fcb16c 100644 --- a/mkdocs/index.md +++ b/mkdocs/index.md @@ -3,8 +3,8 @@ [![](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ericmjl/pyjanitor/dev) -`pyjanitor` is a Python implementation of the R package [`janitor`][janitor], and -provides a clean API for cleaning data. +`pyjanitor` is a Python implementation of the R package [`janitor`][janitor]. +It provides a clean user-friendly API for extending pandas with powerful and readable data-cleaning functions . [janitor]: https://github.com/sfirke/janitor From 53c9b4316a33ca325381c0908131408420642d9d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Oct 2025 08:30:19 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mkdocs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/index.md b/mkdocs/index.md index a68fcb16c..e82e477c8 100644 --- a/mkdocs/index.md +++ b/mkdocs/index.md @@ -3,7 +3,7 @@ [![](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ericmjl/pyjanitor/dev) -`pyjanitor` is a Python implementation of the R package [`janitor`][janitor]. +`pyjanitor` is a Python implementation of the R package [`janitor`][janitor]. It provides a clean user-friendly API for extending pandas with powerful and readable data-cleaning functions . [janitor]: https://github.com/sfirke/janitor From 0a76f85fc07fe7c65f49f76c8fe3de36d986aa9d Mon Sep 17 00:00:00 2001 From: Talia-noemalab Date: Sat, 18 Oct 2025 20:42:24 +0800 Subject: [PATCH 3/4] feat: add filter_column_isin with multi-column and dict support Adds filter_column_isin back to filter.py with the following improvements: 1. Supports single-column, multi-column, and dict-based filtering 2. Keeps backward compatibility for single-column usage 3. Adds complement flag for inverse filtering Addresses issue #1476 --- janitor/functions/filter.py | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/janitor/functions/filter.py b/janitor/functions/filter.py index e3c007216..330a056c5 100644 --- a/janitor/functions/filter.py +++ b/janitor/functions/filter.py @@ -357,3 +357,51 @@ def filter_column_isin( if complement: return df[~criteria] return df[criteria] + +from __future__ import annotations +from collections.abc import Iterable +from typing import Dict, List, Sequence, Tuple, Union + +import pandas as pd +from pandas_flavor import register_dataframe_method + +ColumnsArg = Union[str, List[str], Tuple[str, ...], Dict[str, Iterable]] + +@register_dataframe_method +def filter_column_isin( + df: pd.DataFrame, + columns: ColumnsArg, + values: Iterable | None = None, + *, + complement: bool = False, +) -> pd.DataFrame: + """ + Supports: + 1) Single column (str) + 2) Multiple columns (list/tuple) + 3) Dictionary mapping (dict) + """ + if isinstance(columns, dict): + if values is not None: + raise ValueError("When `columns` is a dict, do not pass `values`.") + if not columns: + return df + mask = pd.Series(True, index=df.index) + for col, vals in columns.items(): + mask &= df[col].isin(vals) + + elif isinstance(columns, (list, tuple)): + if values is None: + raise ValueError("`values` must be provided when `columns` is a list/tuple.") + cols_seq: Sequence[str] = list(columns) + combos = [tuple(v) for v in values] + mask = df.set_index(cols_seq).index.isin(combos) + + else: + if values is None: + raise ValueError("`values` must be provided when `columns` is a string.") + mask = df[columns].isin(values) + + if complement: + mask = ~mask + return df.loc[mask] From 85d1f5e0a538810355e232e5eddfdb82947c004a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 18 Oct 2025 13:05:37 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- janitor/functions/filter.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/janitor/functions/filter.py b/janitor/functions/filter.py index 330a056c5..eee007dea 100644 --- a/janitor/functions/filter.py +++ b/janitor/functions/filter.py @@ -357,8 +357,10 @@ def filter_column_isin( if complement: return df[~criteria] return df[criteria] - + + from __future__ import annotations + from collections.abc import Iterable from typing import Dict, List, Sequence, Tuple, Union @@ -367,6 +369,7 @@ def filter_column_isin( ColumnsArg = Union[str, List[str], Tuple[str, ...], Dict[str, Iterable]] + @register_dataframe_method def filter_column_isin( df: pd.DataFrame, @@ -392,14 +395,18 @@ def filter_column_isin( elif isinstance(columns, (list, tuple)): if values is None: - raise ValueError("`values` must be provided when `columns` is a list/tuple.") + raise ValueError( + "`values` must be provided when `columns` is a list/tuple." + ) cols_seq: Sequence[str] = list(columns) combos = [tuple(v) for v in values] mask = df.set_index(cols_seq).index.isin(combos) else: if values is None: - raise ValueError("`values` must be provided when `columns` is a string.") + raise ValueError( + "`values` must be provided when `columns` is a string." + ) mask = df[columns].isin(values) if complement: