Skip to content

Commit ea680cd

Browse files
BUG: Fix DataFrame.from_dict empty row drop
1 parent 9f4c0ba commit ea680cd

File tree

3 files changed

+61
-3
lines changed

3 files changed

+61
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,7 @@ Reshaping
11791179
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
11801180
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
11811181
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
1182+
- Bug in :meth:`DataFrame.from_dict` where rows corresponding to an empty :class:`Series` or ``dict`` would be dropped when ``orient='index'`` (:issue:`62775`)
11821183
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
11831184
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
11841185
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)

pandas/core/frame.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,11 +1914,13 @@ def from_dict(
19141914
orient = orient.lower() # type: ignore[assignment]
19151915
if orient == "index":
19161916
if len(data) > 0:
1917+
index = list(data.keys())
19171918
# TODO speed up Series case
19181919
if isinstance(next(iter(data.values())), (Series, dict)):
19191920
data = _from_nested_dict(data)
1921+
if not data and columns is None:
1922+
columns = []
19201923
else:
1921-
index = list(data.keys())
19221924
# error: Incompatible types in assignment (expression has type
19231925
# "List[Any]", variable has type "Dict[Any, Any]")
19241926
data = list(data.values()) # type: ignore[assignment]
@@ -14413,9 +14415,22 @@ def _from_nested_dict(
1441314415
new_data: collections.defaultdict[HashableT2, dict[HashableT, T]] = (
1441414416
collections.defaultdict(dict)
1441514417
)
14418+
all_cols_dict = {}
14419+
for s in data.values():
14420+
if isinstance(s, (dict, ABCSeries)):
14421+
all_cols_dict.update(dict.fromkeys(s.keys()))
14422+
all_cols_list = list(all_cols_dict.keys())
14423+
if not all_cols_list:
14424+
return new_data
1441614425
for index, s in data.items():
14417-
for col, v in s.items():
14418-
new_data[col][index] = v
14426+
if isinstance(s, (dict, ABCSeries)):
14427+
for col in all_cols_list:
14428+
new_data[col][index] = s.get(col, None)
14429+
elif s is None or is_scalar(s):
14430+
for col in all_cols_list:
14431+
new_data[col][index] = s
14432+
else:
14433+
raise TypeError(f"Value at index {index} is not a dict/Series/scalar/None")
1441914434
return new_data
1442014435

1442114436

pandas/tests/frame/test_constructors.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2649,6 +2649,48 @@ def test_error_from_2darray(self, col_a, col_b):
26492649
with pytest.raises(ValueError, match=msg):
26502650
DataFrame({"a": col_a, "b": col_b})
26512651

2652+
@pytest.mark.parametrize(
2653+
"data, expected",
2654+
[
2655+
(
2656+
{
2657+
"good": Series({"a": 1, "b": 2}),
2658+
"blank": Series(dtype="float64"),
2659+
},
2660+
DataFrame(
2661+
{"a": [1.0, np.nan], "b": [2.0, np.nan]}, index=["good", "blank"]
2662+
),
2663+
),
2664+
(
2665+
{
2666+
"blank": Series(dtype="float64"),
2667+
"good": Series({"a": 1, "b": 2}),
2668+
},
2669+
DataFrame(
2670+
{"a": [np.nan, 1.0], "b": [np.nan, 2.0]}, index=["blank", "good"]
2671+
),
2672+
),
2673+
(
2674+
{"blank": Series(dtype="float64")},
2675+
DataFrame(index=["blank"], columns=[]),
2676+
),
2677+
(
2678+
{
2679+
"good": Series({"a": 1, "b": 2}),
2680+
"blank_dict": {},
2681+
},
2682+
DataFrame(
2683+
{"a": [1.0, np.nan], "b": [2.0, np.nan]},
2684+
index=["good", "blank_dict"],
2685+
),
2686+
),
2687+
],
2688+
)
2689+
def test_from_dict_orient_index_empty_series_or_dict(self, data, expected):
2690+
# GH-62775
2691+
result = DataFrame.from_dict(data, orient="index")
2692+
tm.assert_frame_equal(result, expected)
2693+
26522694
def test_from_dict_with_missing_copy_false(self):
26532695
# GH#45369 filled columns should not be views of one another
26542696
df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False)

0 commit comments

Comments
 (0)