From 52149dbc38315a5b36edc2841edcda7765accaf0 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 1 Nov 2025 15:01:41 +0000
Subject: [PATCH 1/4] Enable roundtripping nested dtypes through parquet and
 arrow

---
 doc/source/whatsnew/v3.0.0.rst       |  1 +
 pandas/core/dtypes/dtypes.py         | 66 +++++++++++++++++++++++-
 pandas/tests/extension/test_arrow.py | 77 ++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 1 deletion(-)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 12f522301e121..e0120a445c36a 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1221,6 +1221,7 @@ ExtensionArray
 - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
 - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`)
 - Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`)
+- Bug in dtype inference when roundtripping nested arrow dtypes like ``list``, ``struct``, ``map`` through pyarrow tables or parquet (:issue:`61529`)
 - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`)
 - Fixed flex arithmetic with :class:`ExtensionArray` operands raising when ``fill_value`` was passed. (:issue:`62467`)
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 2e3d73edcdf4f..e5f93293a34fd 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -2387,6 +2387,10 @@ def construct_from_string(cls, string: str) -> ArrowDtype:
                 except (NotImplementedError, ValueError):
                     # Fall through to raise with nice exception message below
                     pass
+                binary_pattern = re.compile(r"^fixed_size_binary\[(?P<width>\d+)\]$")
+                if match := binary_pattern.match(base_type):
+                    byte_width = match.group("width")
+                    return cls(pa.binary(int(byte_width)))
 
                 raise NotImplementedError(
                     "Passing pyarrow type specific parameters "
@@ -2394,9 +2398,69 @@ def construct_from_string(cls, string: str) -> ArrowDtype:
                     "Please construct an ArrowDtype object with a pyarrow_dtype "
                     "instance with specific parameters."
                 ) from err
-            raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err
+            # match maps
+            map_pattern = re.compile(r"^map<(?P<key>[^,<>]+),\s(?P<value>[^,<>]+)>$")
+            # match lists
+            list_inner_pattern = r"<item:\s(?P<item_type>.+)>$"
+            list_pattern = re.compile(rf"^list{list_inner_pattern}")
+            large_list_pattern = re.compile(rf"^large_list{list_inner_pattern}")
+            # match structs
+            struct_pattern = re.compile(r"^struct<(?P<fields>.+)>$")
+            if match := map_pattern.match(base_type):
+                pa_dtype = pa.map_(
+                    pa.type_for_alias(match.group("key")),
+                    pa.type_for_alias(match.group("value")),
+                )
+            elif match := list_pattern.match(base_type):
+                pa_dtype = pa.list_(
+                    cls._resolve_inner_types(match.group("item_type") + "[pyarrow]")
+                )
+            elif match := large_list_pattern.match(base_type):
+                pa_dtype = pa.large_list(
+                    cls._resolve_inner_types(match.group("item_type") + "[pyarrow]")
+                )
+            elif match := struct_pattern.match(base_type):
+                fields = []
+                for name, t in cls._split_struct(match.group("fields")):
+                    field_dtype = cls._resolve_inner_types(t + "[pyarrow]")
+                    fields.append((name, field_dtype))
+                pa_dtype = pa.struct(fields)
+            else:
+                raise TypeError(
+                    f"'{base_type}' is not a valid pyarrow data type."
+                ) from err
         return cls(pa_dtype)
 
+    @classmethod
+    def _resolve_inner_types(cls, string: str) -> pa.DataType:
+        if string == "string[pyarrow]":
+            return pa.string()
+        else:
+            return cls.construct_from_string(string).pyarrow_dtype
+
+    @staticmethod
+    def _split_struct(fields: str):
+        field_pattern = re.compile(r"^\s*(?P<name>[^:]+):\s*(?P<type>.+)\s*$")
+
+        parts, start, depth = [], 0, 0
+        for i, char in enumerate(fields):
+            if char in "<":
+                depth += 1
+            elif char in ">":
+                depth -= 1
+            elif char == "," and depth == 0:
+                parts.append(fields[start:i].strip())
+                start = i + 1
+
+        if start < len(fields):
+            parts.append(fields[start:].strip())
+
+        for field in parts:
+            if match := field_pattern.match(field):
+                yield match.group("name"), match.group("type")
+            else:
+                raise TypeError(f"Could not parse struct field definition: '{field}'")
+
     # TODO(arrow#33642): This can be removed once supported by pyarrow
     @classmethod
     def _parse_temporal_dtype_string(cls, string: str) -> ArrowDtype:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index c1e01bbbe57a0..c4ea4f28d7a34 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3780,6 +3780,83 @@ def test_arrow_dtype_itemsize_fixed_width(type_name, expected_size):
     )
 
 
+def test_roundtrip_of_nested_types():
+    df = pd.DataFrame(
+        {
+            "list_int": pd.Series(
+                [[1, 2, 3], [4, 5]], dtype=ArrowDtype(pa.list_(pa.int64()))
+            ),
+            "list_string": pd.Series(
+                [["a", "b"], ["c"]], dtype=ArrowDtype(pa.list_(pa.string()))
+            ),
+            "large_list_int": pd.Series(
+                [[1, 2], [3, 4, 5]], dtype=ArrowDtype(pa.large_list(pa.int64()))
+            ),
+            "large_list_string": pd.Series(
+                [["x", "y"], ["z"]], dtype=ArrowDtype(pa.large_list(pa.string()))
+            ),
+            "list_map": pd.Series(
+                [[{"a": 1.0, "b": 2.0}], [{"c": 3.0}]],
+                dtype=ArrowDtype(pa.list_(pa.map_(pa.string(), pa.float64()))),
+            ),
+            "large_list_map": pd.Series(
+                [[{"x": 1.5}], [{"y": 2.5, "z": 3.5}]],
+                dtype=ArrowDtype(pa.large_list(pa.map_(pa.string(), pa.float64()))),
+            ),
+            "map_int_float": pd.Series(
+                [{1: 1.1, 2: 2.2}, {3: 3.3}],
+                dtype=ArrowDtype(pa.map_(pa.int64(), pa.float64())),
+            ),
+            "struct_simple": pd.Series(
+                [{"f1": 1, "f2": 1.5}, {"f1": 2, "f2": 2.5}],
+                dtype=ArrowDtype(pa.struct([("f1", pa.int64()), ("f2", pa.float64())])),
+            ),
+            "struct_nested": pd.Series(
+                [
+                    {
+                        "outer_int": 10,
+                        "inner": {"int_list": [1, 2, 3], "text": "hello"},
+                    },
+                    {"outer_int": 20, "inner": {"int_list": [4, 5], "text": "world"}},
+                ],
+                dtype=ArrowDtype(
+                    pa.struct(
+                        [
+                            ("outer_int", pa.int64()),
+                            (
+                                "inner",
+                                pa.struct(
+                                    [
+                                        ("int_list", pa.list_(pa.int64())),
+                                        ("text", pa.string()),
+                                    ]
+                                ),
+                            ),
+                        ]
+                    )
+                ),
+            ),
+            "binary_16": pd.Series(
+                [b"0123456789abcdef", b"fedcba9876543210"],
+                dtype=ArrowDtype(pa.binary(16)),
+            ),
+            "list_struct": pd.Series(
+                [
+                    [{"id": 1, "value": 10.5}, {"id": 2, "value": 20.5}],
+                    [{"id": 3, "value": 30.5}],
+                ],
+                dtype=ArrowDtype(
+                    pa.list_(pa.struct([("id", pa.int64()), ("value", pa.float64())]))
+                ),
+            ),
+        }
+    )
+
+    table = pa.Table.from_pandas(df)
+    result = table.to_pandas()
+    tm.assert_frame_equal(result, df)
+
+
 @pytest.mark.parametrize("type_name", ["string", "binary", "large_string"])
 def test_arrow_dtype_itemsize_variable_width(type_name):
     # GH 57948

From 668b119aff485e21bb95f18a566288aca11e4783 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 1 Nov 2025 17:21:09 +0000
Subject: [PATCH 2/4] Fixup

---
 pandas/tests/extension/test_arrow.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index c4ea4f28d7a34..70ee8f0468202 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3780,6 +3780,7 @@ def test_arrow_dtype_itemsize_fixed_width(type_name, expected_size):
     )
 
 
+@pytest.mark.filterwarnings("ignore::Pandas4Warning")  # min versions build
 def test_roundtrip_of_nested_types():
     df = pd.DataFrame(
         {

From cecfe20f3acb0f5dbc207cd7e19206a7ac1a25ba Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 1 Nov 2025 22:37:28 +0000
Subject: [PATCH 3/4] Fixup

---
 pandas/tests/extension/test_arrow.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 70ee8f0468202..b8357f53486ba 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3780,7 +3780,6 @@ def test_arrow_dtype_itemsize_fixed_width(type_name, expected_size):
     )
 
 
-@pytest.mark.filterwarnings("ignore::Pandas4Warning")  # min versions build
 def test_roundtrip_of_nested_types():
     df = pd.DataFrame(
         {
@@ -3853,8 +3852,13 @@ def test_roundtrip_of_nested_types():
         }
     )
 
-    table = pa.Table.from_pandas(df)
-    result = table.to_pandas()
+    if pa_version_under19p0:
+        with tm.assert_produces_warning(Pandas4Warning):
+            table = pa.Table.from_pandas(df)
+            result = table.to_pandas()
+    else:
+        table = pa.Table.from_pandas(df)
+        result = table.to_pandas()
     tm.assert_frame_equal(result, df)
 
 

From 87e6841d78c2af25fff2dd8fadce521be7d1abb2 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 1 Nov 2025 23:06:38 +0000
Subject: [PATCH 4/4] Update test_arrow.py

---
 pandas/tests/extension/test_arrow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index b8357f53486ba..d0c889f1a27c0 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3853,7 +3853,7 @@ def test_roundtrip_of_nested_types():
     )
 
     if pa_version_under19p0:
-        with tm.assert_produces_warning(Pandas4Warning):
+        with tm.assert_produces_warning(Pandas4Warning, check_stacklevel=False):
             table = pa.Table.from_pandas(df)
             result = table.to_pandas()
     else: