Skip to content

Commit caf06a4

Browse files
committed
fix(describe): add count aggregation for JSON and OBJ_REF types
1 parent 1abd8d1 commit caf06a4

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

bigframes/pandas/core/methods/describe.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,5 +120,10 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
120120
dtypes.TIME_DTYPE,
121121
]:
122122
return [aggregations.count_op, aggregations.nunique_op]
123+
elif dtype in [
124+
dtypes.JSON_DTYPE,
125+
dtypes.OBJ_REF_DTYPE,
126+
]:
127+
return [aggregations.count_op]
123128
else:
124129
return []

tests/system/small/pandas/test_describe.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,3 +352,22 @@ def test_series_groupby_describe(scalars_dfs):
352352
check_dtype=False,
353353
check_index_type=False,
354354
)
355+
356+
357+
def test_describe_json_and_obj_ref_returns_count(session):
358+
# Test describe() works on JSON and OBJ_REF types (without nunique, which fails)
359+
sql = """
360+
SELECT
361+
PARSE_JSON('{"a": 1}') AS json_col,
362+
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
363+
"""
364+
df = session.read_gbq(sql)
365+
366+
df["obj_ref_col"] = df["uri_col"].str.to_blob()
367+
df = df.drop(columns=["uri_col"])
368+
369+
res = df.describe(include="all").to_pandas()
370+
371+
assert "count" in res.index
372+
assert res.loc["count", "json_col"] == 1.0
373+
assert res.loc["count", "obj_ref_col"] == 1.0

0 commit comments

Comments
 (0)