diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3569a578943d4..4e0cc93e64458 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -948,7 +948,7 @@ def value_counts_internal( result = Series(counts, index=idx, name=name, copy=False) if sort: - result = result.sort_values(ascending=ascending) + result = result.sort_values(ascending=ascending, kind="stable") if normalize: result = result / counts.sum() diff --git a/pandas/core/base.py b/pandas/core/base.py index b417cf1487417..200b16b4b6b1a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -993,7 +993,12 @@ def value_counts( If True then the object returned will contain the relative frequencies of the unique values. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. bins : int, optional diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8c246434f6d8..4d6af17ba9060 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7718,11 +7718,16 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. .. versionchanged:: 3.0.0 Prior to 3.0.0, ``sort=False`` would sort by the columns values. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. dropna : bool, default True @@ -7832,7 +7837,7 @@ def value_counts( counts.name = name if sort: - counts = counts.sort_values(ascending=ascending) + counts = counts.sort_values(ascending=ascending, kind="stable") if normalize: counts /= counts.sum() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 93e04fe61555e..dfa875249afff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2776,8 +2776,8 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. When False, non-grouping columns will appear - in the order they occur in within groups. + Stable sort by frequencies when True. When False, non-grouping + columns will appear in the order they occur in within groups. .. versionchanged:: 3.0.0 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index c876aae6dea5e..03c1bef848acb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1446,6 +1446,17 @@ def test_value_counts_series(self): ) tm.assert_series_equal(result, expected) + def test_value_counts_stability(self): + # GH 63155 + arr = np.random.default_rng(2).integers(0, 128, 8192) + result = algos.value_counts_internal(arr, sort=True) + expected = ( + Series(arr) + .value_counts(sort=False) + .sort_values(ascending=False, kind="stable") + ) + tm.assert_series_equal(result, expected) + class TestDuplicated: def test_duplicated_with_nas(self):