BUG: Fix DataFrame.from_dict empty row drop

parthava-adabala · parthava-adabala · commit ea680cdb8859 · 2025-10-28T01:01:48.000-05:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1179,6 +1179,7 @@ Reshaping
 - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
 - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
+- Bug in :meth:`DataFrame.from_dict` where rows corresponding to an empty :class:`Series` or ``dict`` would be dropped when ``orient='index'`` (:issue:`62775`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1914,11 +1914,13 @@ def from_dict(
         orient = orient.lower()  # type: ignore[assignment]
         if orient == "index":
             if len(data) > 0:
+                index = list(data.keys())
                 # TODO speed up Series case
                 if isinstance(next(iter(data.values())), (Series, dict)):
                     data = _from_nested_dict(data)
+                    if not data and columns is None:
+                        columns = []
                 else:
-                    index = list(data.keys())
                     # error: Incompatible types in assignment (expression has type
                     # "List[Any]", variable has type "Dict[Any, Any]")
                     data = list(data.values())  # type: ignore[assignment]
@@ -14413,9 +14415,22 @@ def _from_nested_dict(
     new_data: collections.defaultdict[HashableT2, dict[HashableT, T]] = (
         collections.defaultdict(dict)
     )
+    all_cols_dict = {}
+    for s in data.values():
+        if isinstance(s, (dict, ABCSeries)):
+            all_cols_dict.update(dict.fromkeys(s.keys()))
+    all_cols_list = list(all_cols_dict.keys())
+    if not all_cols_list:
+        return new_data
     for index, s in data.items():
-        for col, v in s.items():
-            new_data[col][index] = v
+        if isinstance(s, (dict, ABCSeries)):
+            for col in all_cols_list:
+                new_data[col][index] = s.get(col, None)
+        elif s is None or is_scalar(s):
+            for col in all_cols_list:
+                new_data[col][index] = s
+        else:
+            raise TypeError(f"Value at index {index} is not a dict/Series/scalar/None")
     return new_data
 
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2649,6 +2649,48 @@ def test_error_from_2darray(self, col_a, col_b):
         with pytest.raises(ValueError, match=msg):
             DataFrame({"a": col_a, "b": col_b})
 
+    @pytest.mark.parametrize(
+        "data, expected",
+        [
+            (
+                {
+                    "good": Series({"a": 1, "b": 2}),
+                    "blank": Series(dtype="float64"),
+                },
+                DataFrame(
+                    {"a": [1.0, np.nan], "b": [2.0, np.nan]}, index=["good", "blank"]
+                ),
+            ),
+            (
+                {
+                    "blank": Series(dtype="float64"),
+                    "good": Series({"a": 1, "b": 2}),
+                },
+                DataFrame(
+                    {"a": [np.nan, 1.0], "b": [np.nan, 2.0]}, index=["blank", "good"]
+                ),
+            ),
+            (
+                {"blank": Series(dtype="float64")},
+                DataFrame(index=["blank"], columns=[]),
+            ),
+            (
+                {
+                    "good": Series({"a": 1, "b": 2}),
+                    "blank_dict": {},
+                },
+                DataFrame(
+                    {"a": [1.0, np.nan], "b": [2.0, np.nan]},
+                    index=["good", "blank_dict"],
+                ),
+            ),
+        ],
+    )
+    def test_from_dict_orient_index_empty_series_or_dict(self, data, expected):
+        # GH-62775
+        result = DataFrame.from_dict(data, orient="index")
+        tm.assert_frame_equal(result, expected)
+
     def test_from_dict_with_missing_copy_false(self):
         # GH#45369 filled columns should not be views of one another
         df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False)