From fd21999860a1829677797b4bef33050c344eeb8a Mon Sep 17 00:00:00 2001 From: Igor Date: Mon, 23 Jun 2025 09:35:09 +0200 Subject: [PATCH 1/3] evaluate stream_dataframe before concat and check if its empty --- quasardb/pandas/__init__.py | 11 +++++++---- tests/test_pandas.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/quasardb/pandas/__init__.py b/quasardb/pandas/__init__.py index 031e2d42..b24285b0 100644 --- a/quasardb/pandas/__init__.py +++ b/quasardb/pandas/__init__.py @@ -344,10 +344,13 @@ def read_dataframe(conn: quasardb.Cluster, table, **kwargs): ) kwargs["batch_size"] = 2**16 - # Note that this is *lazy*, dfs is a generator, not a list -- as such, dataframes will be - # fetched on-demand, which means that an error could occur in the middle of processing - # dataframes. - dfs = stream_dataframe(conn, table, **kwargs) + # stream_dataframe is *lazy* - is a generator, not a list. + # if result of stream_dataframe is empty this could result in ValueError on pd.concat() + # we need to evaluate the generator first, and then concatenate if result set is not empty. + dfs = list(stream_dataframe(conn, table, **kwargs)) + + if len(dfs) == 0: + return pd.DataFrame() return pd.concat(dfs) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index f9a2b0d2..d2bcdeb7 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -665,3 +665,21 @@ def test_retries( ) assert seen == True + + +def test_read_dataframe_empty_table_sc16881(qdbd_connection, table_name): + """ + If results of qdbpd.read_dataframe are empty it would raise ValueError on pd.concat(). + It should return an empty DataFrame instead. + """ + table = qdbd_connection.ts(table_name) + + table_config = [ + quasardb.ColumnInfo(quasardb.ColumnType.Double, "d"), + ] + + table.create(table_config) + + df = qdbpd.read_dataframe(qdbd_connection, table) + + assert df.empty From 69679dcb78df684e5dff25161ac13fb51738c4dd Mon Sep 17 00:00:00 2001 From: Igor Date: Mon, 23 Jun 2025 09:42:48 +0200 Subject: [PATCH 2/3] add better test description --- tests/test_pandas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index d2bcdeb7..25c505f6 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -669,8 +669,9 @@ def test_retries( def test_read_dataframe_empty_table_sc16881(qdbd_connection, table_name): """ - If results of qdbpd.read_dataframe are empty it would raise ValueError on pd.concat(). - It should return an empty DataFrame instead. + Ensures qdbpd.read_dataframe returns an empty DataFrame when the table exists but contains no data. + + Previously this raised ValueError due to pd.concat() on an empty result set. """ table = qdbd_connection.ts(table_name) From 257993c47f34064658794e888f07b5ca9c0fde2e Mon Sep 17 00:00:00 2001 From: Igor Date: Tue, 24 Jun 2025 12:46:32 +0200 Subject: [PATCH 3/3] simplify --- quasardb/pandas/__init__.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/quasardb/pandas/__init__.py b/quasardb/pandas/__init__.py index b24285b0..224d0a9f 100644 --- a/quasardb/pandas/__init__.py +++ b/quasardb/pandas/__init__.py @@ -344,16 +344,23 @@ def read_dataframe(conn: quasardb.Cluster, table, **kwargs): ) kwargs["batch_size"] = 2**16 - # stream_dataframe is *lazy* - is a generator, not a list. - # if result of stream_dataframe is empty this could result in ValueError on pd.concat() - # we need to evaluate the generator first, and then concatenate if result set is not empty. - dfs = list(stream_dataframe(conn, table, **kwargs)) + # Note that this is *lazy*, dfs is a generator, not a list -- as such, dataframes will be + # fetched on-demand, which means that an error could occur in the middle of processing + # dataframes. + dfs = stream_dataframe(conn, table, **kwargs) - if len(dfs) == 0: + # if result of stream_dataframe is empty this could result in ValueError on pd.concat() + # as stream_dataframe is a generator there is no easy way to check for this condition without evaluation + # the most simple way is to catch the ValueError and return an empty DataFrame + try: + return pd.concat(dfs, copy=False) + except ValueError as e: + logger.error( + "Error while concatenating dataframes. This can happen if result set is empty. Returning empty dataframe. Error: %s", + e, + ) return pd.DataFrame() - return pd.concat(dfs) - def _extract_columns(df, cinfos): """