From e33de24240852d3355a8b4b9ab178fcef78720e5 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Thu, 20 Nov 2025 10:07:22 +0100 Subject: [PATCH 1/3] pandas-dev/pandas#63155 stable sort value_counts --- pandas/core/algorithms.py | 2 +- pandas/core/base.py | 7 ++++++- pandas/core/frame.py | 9 +++++++-- pandas/core/groupby/generic.py | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3569a578943d4..4e0cc93e64458 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -948,7 +948,7 @@ def value_counts_internal( result = Series(counts, index=idx, name=name, copy=False) if sort: - result = result.sort_values(ascending=ascending) + result = result.sort_values(ascending=ascending, kind="stable") if normalize: result = result / counts.sum() diff --git a/pandas/core/base.py b/pandas/core/base.py index b417cf1487417..200b16b4b6b1a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -993,7 +993,12 @@ def value_counts( If True then the object returned will contain the relative frequencies of the unique values. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. bins : int, optional diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8c246434f6d8..4d6af17ba9060 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7718,11 +7718,16 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. .. versionchanged:: 3.0.0 Prior to 3.0.0, ``sort=False`` would sort by the columns values. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. dropna : bool, default True @@ -7832,7 +7837,7 @@ def value_counts( counts.name = name if sort: - counts = counts.sort_values(ascending=ascending) + counts = counts.sort_values(ascending=ascending, kind="stable") if normalize: counts /= counts.sum() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 93e04fe61555e..dfa875249afff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2776,8 +2776,8 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. When False, non-grouping columns will appear - in the order they occur in within groups. + Stable sort by frequencies when True. When False, non-grouping + columns will appear in the order they occur in within groups. .. versionchanged:: 3.0.0 From f7a524095192da609605516b750d6e8e8ecc26ad Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 23 Nov 2025 10:55:01 +0100 Subject: [PATCH 2/3] add test --- pandas/tests/test_algos.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index c876aae6dea5e..4a84ac4501258 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1446,6 +1446,12 @@ def test_value_counts_series(self): ) tm.assert_series_equal(result, expected) + def test_value_counts_stability(self): + # GH 63155 + arr = np.random.default_rng(2).integers(0, 128, 8192) + result = algos.value_counts_internal(arr, sort=True) + expected = Series(arr).value_counts(sort=False).sort_values(ascending=False, kind="stable") + tm.assert_series_equal(result, expected) class TestDuplicated: def test_duplicated_with_nas(self): From bf361c75b27e3b12e8b6b4eb0b9cda53a2718048 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 23 Nov 2025 11:03:14 +0100 Subject: [PATCH 3/3] pre-commit --- pandas/tests/test_algos.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 4a84ac4501258..03c1bef848acb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1450,9 +1450,14 @@ def test_value_counts_stability(self): # GH 63155 arr = np.random.default_rng(2).integers(0, 128, 8192) result = algos.value_counts_internal(arr, sort=True) - expected = Series(arr).value_counts(sort=False).sort_values(ascending=False, kind="stable") + expected = ( + Series(arr) + .value_counts(sort=False) + .sort_values(ascending=False, kind="stable") + ) tm.assert_series_equal(result, expected) + class TestDuplicated: def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)