Skip to content

Commit 95c4eba

Browse files
committed
fix(core): handle empty columns and rows gracefully in unpivot
1 parent 5d581a3 commit 95c4eba

File tree

1 file changed

+55
-17
lines changed

1 file changed

+55
-17
lines changed

bigframes/core/blocks.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3418,6 +3418,11 @@ def unpivot(
34183418
array_value, type="cross"
34193419
)
34203420

3421+
if not labels_array.column_ids:
3422+
import traceback
3423+
3424+
traceback.print_stack()
3425+
34213426
new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
34223427
# Last column is offsets
34233428
index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
@@ -3427,20 +3432,24 @@ def unpivot(
34273432
unpivot_exprs: List[ex.Expression] = []
34283433
# Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
34293434
for input_ids in unpivot_columns:
3430-
# row explode offset used to choose the input column
3431-
# we use offset instead of label as labels are not necessarily unique
3432-
cases = itertools.chain(
3433-
*(
3434-
(
3435-
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3436-
ex.deref(column_mapping[id_or_null])
3437-
if (id_or_null is not None)
3438-
else ex.const(None),
3435+
col_expr: ex.Expression
3436+
if not input_ids:
3437+
col_expr = ex.const(None)
3438+
else:
3439+
# row explode offset used to choose the input column
3440+
# we use offset instead of label as labels are not necessarily unique
3441+
cases = itertools.chain(
3442+
*(
3443+
(
3444+
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3445+
ex.deref(column_mapping[id_or_null])
3446+
if (id_or_null is not None)
3447+
else ex.const(None),
3448+
)
3449+
for i, id_or_null in enumerate(input_ids)
34393450
)
3440-
for i, id_or_null in enumerate(input_ids)
34413451
)
3442-
)
3443-
col_expr = ops.case_when_op.as_expr(*cases)
3452+
col_expr = ops.case_when_op.as_expr(*cases)
34443453
unpivot_exprs.append(col_expr)
34453454

34463455
joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
@@ -3458,19 +3467,48 @@ def _pd_index_to_array_value(
34583467
Create an ArrayValue from a list of label tuples.
34593468
The last column will be row offsets.
34603469
"""
3470+
id_gen = bigframes.core.identifiers.standard_id_strings()
3471+
col_ids = [next(id_gen) for _ in range(index.nlevels)]
3472+
offset_id = next(id_gen)
3473+
34613474
rows = []
34623475
labels_as_tuples = utils.index_as_tuples(index)
34633476
for row_offset in range(len(index)):
3464-
id_gen = bigframes.core.identifiers.standard_id_strings()
34653477
row_label = labels_as_tuples[row_offset]
34663478
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
34673479
row = {}
3468-
for label_part, id in zip(row_label, id_gen):
3469-
row[id] = label_part if pd.notnull(label_part) else None
3470-
row[next(id_gen)] = row_offset
3480+
for label_part, col_id in zip(row_label, col_ids):
3481+
row[col_id] = label_part if pd.notnull(label_part) else None
3482+
row[offset_id] = row_offset
34713483
rows.append(row)
34723484

3473-
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
3485+
import pyarrow as pa
3486+
3487+
if not rows:
3488+
from bigframes.dtypes import bigframes_dtype_to_arrow_dtype
3489+
3490+
dtypes_list = getattr(index, "dtypes", None)
3491+
if dtypes_list is None:
3492+
dtypes_list = (
3493+
[index.dtype] if hasattr(index, "dtype") else [pd.Float64Dtype()]
3494+
)
3495+
3496+
fields = []
3497+
for col_id, dtype in zip(col_ids, dtypes_list):
3498+
try:
3499+
pa_type = bigframes_dtype_to_arrow_dtype(dtype)
3500+
except Exception:
3501+
pa_type = pa.string()
3502+
fields.append(pa.field(col_id, pa_type))
3503+
fields.append(pa.field(offset_id, pa.int64()))
3504+
schema = pa.schema(fields)
3505+
pt = pa.Table.from_pylist([], schema=schema)
3506+
else:
3507+
pt = pa.Table.from_pylist(rows)
3508+
# Ensure correct column names
3509+
pt = pt.rename_columns([*col_ids, offset_id])
3510+
3511+
return core.ArrayValue.from_pyarrow(pt, session=session)
34743512

34753513

34763514
def _resolve_index_col(

0 commit comments

Comments
 (0)