From 30f48bf83e85c84731c2be8c54995114815b610f Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 11 Jun 2025 09:32:02 +0700 Subject: [PATCH 1/4] Optimize indexing, use named index --- quasardb/pandas/__init__.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/quasardb/pandas/__init__.py b/quasardb/pandas/__init__.py index a314d74c..8f0e5cfb 100644 --- a/quasardb/pandas/__init__.py +++ b/quasardb/pandas/__init__.py @@ -175,7 +175,13 @@ def write_series(series, table, col_name, infer_types=True, dtype=None): ) -def query(cluster: quasardb.Cluster, query, index=None, blobs=False, numpy=True): +def query( + cluster: quasardb.Cluster, + query: str, + index: str | None = None, + blobs: bool = False, + numpy: bool = True, +): """ Execute a query and return the results as DataFrames. Returns a dict of tablename / DataFrame pairs. @@ -199,11 +205,12 @@ def query(cluster: quasardb.Cluster, query, index=None, blobs=False, numpy=True) """ logger.debug("querying and returning as DataFrame: %s", query) - (index, m) = qdbnp.query(cluster, query, index=index, dict=True) - df = pd.DataFrame(m) + index_vals, m = qdbnp.query(cluster, query, index=index, dict=True) + + index_name = "$index" if index is None else index + index_obj = pd.Index(index_vals, name=index_name) - df.set_index(index, inplace=True) - return df + return pd.DataFrame(m, index=index_obj) def stream_dataframes( From a043e4fa0acfd5aea64de991e2945dea67f39e51 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 11 Jun 2025 09:39:19 +0700 Subject: [PATCH 2/4] Add proper deprecation of the `blobs` and `numpy` arguments --- quasardb/pandas/__init__.py | 47 ++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/quasardb/pandas/__init__.py b/quasardb/pandas/__init__.py index 8f0e5cfb..7a2bad8d 100644 --- a/quasardb/pandas/__init__.py +++ b/quasardb/pandas/__init__.py @@ -28,6 +28,7 @@ # import logging +import warnings from datetime import datetime from functools import partial @@ -183,27 +184,41 @@ def query( numpy: bool = True, ): """ - Execute a query and return the results as DataFrames. Returns a dict of - tablename / DataFrame pairs. - - Parameters: - ----------- + Execute *query* and return the result as a pandas DataFrame. + Parameters + ---------- cluster : quasardb.Cluster - Active connection to the QuasarDB cluster + Active connection to the QuasarDB cluster. query : str - The query to execute. + The query to execute. + + index : str | None, default None + Column to use as index. When None a synthetic index is created and + named “$index”. + + blobs, numpy + DEPRECATED – no longer used. Supplying a non-default value raises a + DeprecationWarning and the argument is ignored. + """ + # ------------------------------------------------------------------ deprecations + if blobs is not False: + warnings.warn( + "`blobs` is deprecated and will be removed in a future version; " + "the argument is ignored.", + DeprecationWarning, + stacklevel=2, + ) + if numpy is not True: + warnings.warn( + "`numpy` is deprecated and will be removed in a future version; " + "the argument is ignored.", + DeprecationWarning, + stacklevel=2, + ) + # ------------------------------------------------------------------------------ - blobs : bool or list - Determines which QuasarDB blob-columns should be returned as bytearrays; otherwise - they are returned as UTF-8 strings. - - True means every blob column should be returned as byte-array, or a list will - specify which specific columns. Defaults to false, meaning all blobs are returned - as strings. - - """ logger.debug("querying and returning as DataFrame: %s", query) index_vals, m = qdbnp.query(cluster, query, index=index, dict=True) From bd690b6c1222a35cc6b136ee83ca2153bc46ac9f Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 11 Jun 2025 09:42:04 +0700 Subject: [PATCH 3/4] Fix indentation issue --- quasardb/pandas/__init__.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/quasardb/pandas/__init__.py b/quasardb/pandas/__init__.py index 7a2bad8d..b1731a2b 100644 --- a/quasardb/pandas/__init__.py +++ b/quasardb/pandas/__init__.py @@ -201,22 +201,22 @@ def query( blobs, numpy DEPRECATED – no longer used. Supplying a non-default value raises a DeprecationWarning and the argument is ignored. - """ + """ # ------------------------------------------------------------------ deprecations - if blobs is not False: - warnings.warn( - "`blobs` is deprecated and will be removed in a future version; " - "the argument is ignored.", - DeprecationWarning, - stacklevel=2, - ) - if numpy is not True: - warnings.warn( - "`numpy` is deprecated and will be removed in a future version; " - "the argument is ignored.", - DeprecationWarning, - stacklevel=2, - ) + if blobs is not False: + warnings.warn( + "`blobs` is deprecated and will be removed in a future version; " + "the argument is ignored.", + DeprecationWarning, + stacklevel=2, + ) + if numpy is not True: + warnings.warn( + "`numpy` is deprecated and will be removed in a future version; " + "the argument is ignored.", + DeprecationWarning, + stacklevel=2, + ) # ------------------------------------------------------------------------------ logger.debug("querying and returning as DataFrame: %s", query) From cbffe66e1c2e2f55c279a78688177b38b72c65aa Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 11 Jun 2025 09:52:55 +0700 Subject: [PATCH 4/4] Operand not supported in Python 3.9 --- quasardb/pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quasardb/pandas/__init__.py b/quasardb/pandas/__init__.py index b1731a2b..031e2d42 100644 --- a/quasardb/pandas/__init__.py +++ b/quasardb/pandas/__init__.py @@ -179,7 +179,7 @@ def write_series(series, table, col_name, infer_types=True, dtype=None): def query( cluster: quasardb.Cluster, query: str, - index: str | None = None, + index: str = None, blobs: bool = False, numpy: bool = True, ):