From a5f0b1dd803e7dd380cc8778b035e2efa4a493ac Mon Sep 17 00:00:00 2001
From: Georgios <giorgosgnkl@gmail.com>
Date: Wed, 29 Oct 2025 22:45:38 +0000
Subject: [PATCH 1/5] fix sort_index using level name on MultiIndex

---
 pandas/core/sorting.py                        | 28 ++++++-
 pandas/tests/frame/methods/test_sort_index.py | 83 +++++++++++++++++++
 2 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 930704e6f62f4..c1e65d2d4a5b1 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -57,7 +57,7 @@
 
 def get_indexer_indexer(
     target: Index,
-    level: Level | list[Level] | None,
+    level: Level | list[Level] | None,  # can level actually be a list here?
     ascending: list[bool] | bool,
     kind: SortKind,
     na_position: NaPosition,
@@ -87,7 +87,19 @@ def get_indexer_indexer(
     # error: Incompatible types in assignment (expression has type
     # "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has
     # type "Index")
+
+    # before:
+    # MultiIndex([('a', 'top10'),
+    #             ('a',  'top2')],
+    #            names=['A', 'B'])
     target = ensure_key_mapped(target, key, levels=level)  # type: ignore[assignment]
+    # # after
+    # MultiIndex([('a', 1),
+    #             ('a', 0)],
+    #            names=['A', None])
+    # the big problem is that the name is lost as well,
+    # but with the new change I preserve it
+
     target = target._sort_levels_monotonic()
 
     if level is not None:
@@ -531,11 +543,15 @@ def _ensure_key_mapped_multiindex(
             level_iter = [level]
         else:
             level_iter = level
-
         sort_levels: range | set = {index._get_level_number(lev) for lev in level_iter}
     else:
         sort_levels = range(index.nlevels)
 
+    # breakpoint() # the loops through the levels
+    # for the levels to be sorted, it applies the key function
+    # (uses the number, not the name)
+    # it returns the indexeer: ensure_key_mapped(
+    #   index._get_level_values(1), key) = Index([1, 0], dtype='int64')
     mapped = [
         (
             ensure_key_mapped(index._get_level_values(level), key)
@@ -569,19 +585,23 @@ def ensure_key_mapped(
         return values
 
     if isinstance(values, ABCMultiIndex):
+        # redirects to special MultiIndex handler
         return _ensure_key_mapped_multiindex(values, key, level=levels)
 
     result = key(values.copy())
     if len(result) != len(values):
         raise ValueError(
-            "User-provided `key` function must not change the shape of the array."
+            "User-provided `key` bfunction must not change the shape of the array."
         )
 
     try:
         if isinstance(
             values, Index
         ):  # convert to a new Index subclass, not necessarily the same
-            result = Index(result, tupleize_cols=False)
+            # preserve the original name when creating the new Index
+            result = Index(
+                result, tupleize_cols=False, name=getattr(values, "name", None)
+            )
         else:
             # try to revert to original type otherwise
             type_of_values = type(values)
diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 1a631e760208a..8b089ffe93833 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -1,3 +1,4 @@
+from natsort import index_natsorted
 import numpy as np
 import pytest
 
@@ -943,6 +944,88 @@ def test_sort_index_multiindex_sort_remaining(self, ascending):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_sort_multi_index_sort_by_level_name(self):
+        # GH#62361
+
+        df = DataFrame(
+            [[1, 2], [3, 4]],
+            columns=MultiIndex.from_product(
+                [["a"], ["top10", "top2"]], names=("A", "B")
+            ),
+        )
+
+        expected = DataFrame(
+            [[2, 1], [4, 3]],
+            columns=MultiIndex.from_product(
+                [["a"], ["top2", "top10"]], names=("A", "B")
+            ),
+        )
+
+        sorted_df = df.sort_index(
+            axis=1, level="B", key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+    def test_sort_multi_index_sort_by_level_name_2(self):
+        # GH#62361
+
+        df = DataFrame(
+            [[1, 2], [3, 4]],
+            columns=MultiIndex.from_tuples(
+                [("alpha10", "top10"), ("alpha3", "top2")], names=("A", "B")
+            ),
+        )
+
+        expected = DataFrame(
+            [[2, 1], [4, 3]],
+            columns=MultiIndex.from_tuples(
+                [("alpha3", "top2"), ("alpha10", "top10")], names=("A", "B")
+            ),
+        )
+
+        sorted_df = df.sort_index(
+            axis=1, level=0, key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        sorted_df = df.sort_index(
+            axis=1, level="A", key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        sorted_df = df.sort_index(
+            axis=1, level=1, key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        sorted_df = df.sort_index(
+            axis=1, level="B", key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        sorted_df = df.sort_index(
+            axis=1, level=[0, 1], key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        sorted_df = df.sort_index(
+            axis=1, level=[1, 0], key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        sorted_df = df.sort_index(
+            axis=1, level=[1, "A"], key=lambda x: np.argsort(index_natsorted(x))
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
+        # repetition does not matter
+        sorted_df = df.sort_index(
+            axis=1,
+            level=["A", "B", 0, 1, "B"],
+            key=lambda x: np.argsort(index_natsorted(x)),
+        )
+        tm.assert_frame_equal(sorted_df, expected)
+
 
 def test_sort_index_with_sliced_multiindex():
     # GH 55379

From eb000884d0d04d3c4ce865a3842e547f89962ec3 Mon Sep 17 00:00:00 2001
From: Georgios <giorgosgnkl@gmail.com>
Date: Wed, 29 Oct 2025 23:39:50 +0000
Subject: [PATCH 2/5] update tests, remove comments

---
 pandas/core/sorting.py                        | 18 +----
 pandas/tests/frame/methods/test_sort_index.py | 65 ++++++++++---------
 2 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index c1e65d2d4a5b1..c13fab4adb9cf 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -88,17 +88,7 @@ def get_indexer_indexer(
     # "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has
     # type "Index")
 
-    # before:
-    # MultiIndex([('a', 'top10'),
-    #             ('a',  'top2')],
-    #            names=['A', 'B'])
     target = ensure_key_mapped(target, key, levels=level)  # type: ignore[assignment]
-    # # after
-    # MultiIndex([('a', 1),
-    #             ('a', 0)],
-    #            names=['A', None])
-    # the big problem is that the name is lost as well,
-    # but with the new change I preserve it
 
     target = target._sort_levels_monotonic()
 
@@ -547,11 +537,6 @@ def _ensure_key_mapped_multiindex(
     else:
         sort_levels = range(index.nlevels)
 
-    # breakpoint() # the loops through the levels
-    # for the levels to be sorted, it applies the key function
-    # (uses the number, not the name)
-    # it returns the indexeer: ensure_key_mapped(
-    #   index._get_level_values(1), key) = Index([1, 0], dtype='int64')
     mapped = [
         (
             ensure_key_mapped(index._get_level_values(level), key)
@@ -585,13 +570,12 @@ def ensure_key_mapped(
         return values
 
     if isinstance(values, ABCMultiIndex):
-        # redirects to special MultiIndex handler
         return _ensure_key_mapped_multiindex(values, key, level=levels)
 
     result = key(values.copy())
     if len(result) != len(values):
         raise ValueError(
-            "User-provided `key` bfunction must not change the shape of the array."
+            "User-provided `key` function must not change the shape of the array."
         )
 
     try:
diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 8b089ffe93833..6cc6c5bb7c6dc 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -944,7 +944,7 @@ def test_sort_index_multiindex_sort_remaining(self, ascending):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_sort_multi_index_sort_by_level_name(self):
+    def test_sort_index_multiindex_by_level_name(self):
         # GH#62361
 
         df = DataFrame(
@@ -970,61 +970,68 @@ def test_sort_multi_index_sort_by_level_name_2(self):
         # GH#62361
 
         df = DataFrame(
-            [[1, 2], [3, 4]],
+            [[1, 2, 3], [4, 5, 6]],
             columns=MultiIndex.from_tuples(
-                [("alpha10", "top10"), ("alpha3", "top2")], names=("A", "B")
+                [("a10", "b12"), ("a2", "b17"), ("a2", "b4")], names=("A", "B")
             ),
         )
 
-        expected = DataFrame(
-            [[2, 1], [4, 3]],
+        expected_A = DataFrame(
+            [[2, 3, 1], [5, 6, 4]],
+            columns=MultiIndex.from_tuples(
+                [("a2", "b17"), ("a2", "b4"), ("a10", "b12")], names=("A", "B")
+            ),
+        )
+        expected_B = DataFrame(
+            [[3, 1, 2], [6, 4, 5]],
             columns=MultiIndex.from_tuples(
-                [("alpha3", "top2"), ("alpha10", "top10")], names=("A", "B")
+                [("a2", "b4"), ("a10", "b12"), ("a2", "b17")], names=("A", "B")
             ),
         )
 
         sorted_df = df.sort_index(
             axis=1, level=0, key=lambda x: np.argsort(index_natsorted(x))
         )
-        tm.assert_frame_equal(sorted_df, expected)
+        tm.assert_frame_equal(sorted_df, expected_A)
 
         sorted_df = df.sort_index(
             axis=1, level="A", key=lambda x: np.argsort(index_natsorted(x))
         )
-        tm.assert_frame_equal(sorted_df, expected)
+        tm.assert_frame_equal(sorted_df, expected_A)
 
         sorted_df = df.sort_index(
             axis=1, level=1, key=lambda x: np.argsort(index_natsorted(x))
         )
-        tm.assert_frame_equal(sorted_df, expected)
+        tm.assert_frame_equal(sorted_df, expected_B)
 
         sorted_df = df.sort_index(
             axis=1, level="B", key=lambda x: np.argsort(index_natsorted(x))
         )
-        tm.assert_frame_equal(sorted_df, expected)
+        tm.assert_frame_equal(sorted_df, expected_B)
 
+        # actually, only 1 element of list matters for sorting (2nd is ignored)
         sorted_df = df.sort_index(
             axis=1, level=[0, 1], key=lambda x: np.argsort(index_natsorted(x))
         )
-        tm.assert_frame_equal(sorted_df, expected)
-
-        sorted_df = df.sort_index(
-            axis=1, level=[1, 0], key=lambda x: np.argsort(index_natsorted(x))
-        )
-        tm.assert_frame_equal(sorted_df, expected)
-
-        sorted_df = df.sort_index(
-            axis=1, level=[1, "A"], key=lambda x: np.argsort(index_natsorted(x))
-        )
-        tm.assert_frame_equal(sorted_df, expected)
-
-        # repetition does not matter
-        sorted_df = df.sort_index(
-            axis=1,
-            level=["A", "B", 0, 1, "B"],
-            key=lambda x: np.argsort(index_natsorted(x)),
-        )
-        tm.assert_frame_equal(sorted_df, expected)
+        tm.assert_frame_equal(sorted_df, expected_A)
+
+        # sorted_df = df.sort_index(
+        #     axis=1, level=[1, 0], key=lambda x: np.argsort(index_natsorted(x))
+        # )
+        # tm.assert_frame_equal(sorted_df, expected_B)
+
+        # sorted_df = df.sort_index(
+        #     axis=1, level=[1, "A"], key=lambda x: np.argsort(index_natsorted(x))
+        # )
+        # tm.assert_frame_equal(sorted_df, expected_B)
+
+        # # repetition does not matter
+        # sorted_df = df.sort_index(
+        #     axis=1,
+        #     level=["A", "B", 0, 1, "B"],
+        #     key=lambda x: np.argsort(index_natsorted(x)),
+        # )
+        # tm.assert_frame_equal(sorted_df, expected_A)
 
 
 def test_sort_index_with_sliced_multiindex():

From 8a237edbd1e35a73539c53c3b1c3e9ab70acb5af Mon Sep 17 00:00:00 2001
From: Georgios <giorgosgnkl@gmail.com>
Date: Wed, 29 Oct 2025 23:41:48 +0000
Subject: [PATCH 3/5] remove empty lines

---
 pandas/core/sorting.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index c13fab4adb9cf..3349a13d7f0bc 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -87,9 +87,7 @@ def get_indexer_indexer(
     # error: Incompatible types in assignment (expression has type
     # "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has
     # type "Index")
-
     target = ensure_key_mapped(target, key, levels=level)  # type: ignore[assignment]
-
     target = target._sort_levels_monotonic()
 
     if level is not None:

From ae88c18ba4d5382b1732c9f77835836640612446 Mon Sep 17 00:00:00 2001
From: Georgios <giorgosgnkl@gmail.com>
Date: Wed, 29 Oct 2025 23:46:54 +0000
Subject: [PATCH 4/5] rename tes

---
 pandas/tests/frame/methods/test_sort_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 6cc6c5bb7c6dc..056c10f7a1dd0 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -966,7 +966,7 @@ def test_sort_index_multiindex_by_level_name(self):
         )
         tm.assert_frame_equal(sorted_df, expected)
 
-    def test_sort_multi_index_sort_by_level_name_2(self):
+    def test_sort_index_multiindex_by_level_name_2(self):
         # GH#62361
 
         df = DataFrame(

From d08a1cefcfd5cf31c424dddd59a5cbf4808a5e8c Mon Sep 17 00:00:00 2001
From: Georgios <giorgosgnkl@gmail.com>
Date: Wed, 29 Oct 2025 23:50:04 +0000
Subject: [PATCH 5/5] note in comment

---
 pandas/tests/frame/methods/test_sort_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 056c10f7a1dd0..1ee7d3a91f41b 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -1,4 +1,4 @@
-from natsort import index_natsorted
+from natsort import index_natsorted  # should we import this? or change test?
 import numpy as np
 import pytest