From f810c33349a200b1f714ca925239cc22302cf6c0 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Wed, 24 Sep 2025 19:33:59 +0000 Subject: [PATCH 01/20] WIP: Creating a test framework for json operators --- src/ldlite/_database/__init__.py | 13 +++-- src/ldlite/_database/duckdb.py | 14 ++++++ tests/test_json_operators.py | 82 ++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 tests/test_json_operators.py diff --git a/src/ldlite/_database/__init__.py b/src/ldlite/_database/__init__.py index ffd72f6..9decad5 100644 --- a/src/ldlite/_database/__init__.py +++ b/src/ldlite/_database/__init__.py @@ -98,10 +98,11 @@ def record_history(self, history: LoadHistory) -> None: ... class TypedDatabase(Database, Generic[DB]): def __init__(self, conn_factory: Callable[[], DB]): self._conn_factory = conn_factory - with closing(self._conn_factory()) as conn, conn.cursor() as cur: + with closing(self._conn_factory()) as conn: try: - cur.execute('CREATE SCHEMA IF NOT EXISTS "ldlite_system";') - cur.execute(""" + with conn.cursor() as cur: + cur.execute('CREATE SCHEMA IF NOT EXISTS "ldlite_system";') + cur.execute(""" CREATE TABLE IF NOT EXISTS "ldlite_system"."load_history" ( "table_name" TEXT UNIQUE ,"path" TEXT @@ -112,6 +113,8 @@ def __init__(self, conn_factory: Callable[[], DB]): ,"index_complete_utc" TIMESTAMP ,"row_count" INTEGER );""") + + self._setup_jfuncs(conn) except psycopg.errors.UniqueViolation: # postgres throws this when multiple threads try to create # the same resource even if CREATE IF NOT EXISTS was used @@ -122,6 +125,10 @@ def __init__(self, conn_factory: Callable[[], DB]): else: conn.commit() + @staticmethod + @abstractmethod + def _setup_jfuncs(conn: DB) -> None: ... + @property @abstractmethod def _default_schema(self) -> str: ... diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 0676875..f4b399d 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -8,6 +8,20 @@ class DuckDbDatabase(TypedDatabase[duckdb.DuckDBPyConnection]): + @staticmethod + def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: + with conn.cursor() as cur: + cur.execute( + """ +CREATE OR REPLACE FUNCTION ldlite_system.jextract(j, p) AS + main.json_extract(j, p) +; +CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j, p) AS + main.json_extract_string(j, p) +; +""", + ) + @property def _default_schema(self) -> str: return "main" diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py new file mode 100644 index 0000000..ada5500 --- /dev/null +++ b/tests/test_json_operators.py @@ -0,0 +1,82 @@ +from collections.abc import Sequence +from contextlib import closing +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, cast +from uuid import uuid4 + +import duckdb +import pytest +from pytest_cases import parametrize, parametrize_with_cases + +if TYPE_CHECKING: + from _typeshed import dbapi + + +def _db() -> str: + db = "db" + str(uuid4()).split("-")[0] + print(db) # noqa: T201 + return db + + +@dataclass +class JsonTC: + assertion: str + params: Sequence[Any] + debug: str + debug_params: Sequence[Any] + + +@parametrize( + p=[ + ("str", '"str_val"'), + ("num", 12), + ("obj", '{"k1":"v1","k2":"v2"}'), + ("arr_str", '["s1","s2","s3"]'), + ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), + ], +) +def case_jextract(p: Sequence[Any]) -> JsonTC: + return JsonTC( + """SELECT ldlite_system.jextract(jc, $1) == $2 FROM j;""", + p, + """SELECT ldlite_system.jextract(jc, $1) FROM j;""", + p[:1], + ) + + +def _assert(conn: "dbapi.DBAPIConnection", tc: JsonTC) -> None: + with closing(conn.cursor()) as cur: + cur.execute(tc.assertion, tc.params) + actual = cur.fetchone() + assert actual is not None + assert actual[0] is not None + if not actual[0]: + conn.rollback() # type:ignore[attr-defined] + with closing(conn.cursor()) as cur: + cur.execute(tc.debug, tc.debug_params) + pytest.fail(str(cur.fetchone())) + + +@parametrize_with_cases("tc", cases=".") +def test_duckdb(tc: JsonTC) -> None: + from ldlite import LDLite + + ld = LDLite() + dsn = f":memory:{_db()}" + ld.connect_db(dsn) + + with duckdb.connect(dsn) as conn: + conn.execute("CREATE TABLE j (jc JSON)") + conn.execute( + "INSERT INTO j VALUES " + "('{" + """ "str": "str_val",""" + """ "num": 12,""" + """ "obj": {"k1": "v1", "k2": "v2"},""" + """ "arr_str": ["s1", "s2", "s3"],""" + """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" + " }')", + ) + + with duckdb.connect(dsn) as conn, conn.begin() as tx: + _assert(cast("dbapi.DBAPIConnection", tx), tc) From 232f5cd406182353b5cf622210329917ef29a8e3 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Wed, 24 Sep 2025 20:01:02 +0000 Subject: [PATCH 02/20] Create and test a common json_extract function --- src/ldlite/_database/postgres.py | 13 +++++++++ tests/test_json_operators.py | 48 +++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 479018d..a02dd09 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -13,6 +13,19 @@ def __init__(self, dsn: str): # same sql between duckdb and postgres super().__init__(lambda: psycopg.connect(dsn, cursor_factory=psycopg.RawCursor)) + @staticmethod + def _setup_jfuncs(conn: psycopg.Connection) -> None: + with conn.cursor() as cur: + cur.execute( + """ +CREATE OR REPLACE FUNCTION ldlite_system.jextract(j JSONB, p TEXT) RETURNS JSONB AS $$ +BEGIN + RETURN j->p; +END +$$ LANGUAGE plpgsql; +""", + ) + @property def _default_schema(self) -> str: return "public" diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index ada5500..1062941 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -1,10 +1,11 @@ -from collections.abc import Sequence +from collections.abc import Callable, Sequence from contextlib import closing from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast from uuid import uuid4 import duckdb +import psycopg import pytest from pytest_cases import parametrize, parametrize_with_cases @@ -21,15 +22,16 @@ def _db() -> str: @dataclass class JsonTC: assertion: str - params: Sequence[Any] + assertion_params: Sequence[Any] debug: str debug_params: Sequence[Any] + format_type: bool = False @parametrize( p=[ ("str", '"str_val"'), - ("num", 12), + ("num", "12"), ("obj", '{"k1":"v1","k2":"v2"}'), ("arr_str", '["s1","s2","s3"]'), ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), @@ -37,16 +39,20 @@ class JsonTC: ) def case_jextract(p: Sequence[Any]) -> JsonTC: return JsonTC( - """SELECT ldlite_system.jextract(jc, $1) == $2 FROM j;""", + """SELECT ldlite_system.jextract(jc, $1) = $2::{jtype} FROM j;""", p, """SELECT ldlite_system.jextract(jc, $1) FROM j;""", p[:1], + format_type=True, ) -def _assert(conn: "dbapi.DBAPIConnection", tc: JsonTC) -> None: +def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: - cur.execute(tc.assertion, tc.params) + if tc.format_type: + cur.execute(tc.assertion.format(jtype=jtype), tc.assertion_params) + else: + cur.execute(tc.assertion, tc.assertion_params) actual = cur.fetchone() assert actual is not None assert actual[0] is not None @@ -79,4 +85,32 @@ def test_duckdb(tc: JsonTC) -> None: ) with duckdb.connect(dsn) as conn, conn.begin() as tx: - _assert(cast("dbapi.DBAPIConnection", tx), tc) + _assert(cast("dbapi.DBAPIConnection", tx), "JSON", tc) + + +@parametrize_with_cases("tc", cases=".") +def test_postgres(pg_dsn: None | Callable[[str], str], tc: JsonTC) -> None: + if pg_dsn is None: + pytest.skip("Specify the pg host using --pg-host to run") + + from ldlite import LDLite + + ld = LDLite() + dsn = pg_dsn(_db()) + ld.connect_db_postgresql(dsn) + + with psycopg.connect(dsn) as conn, conn.cursor() as cur: + cur.execute("CREATE TABLE j (jc JSONB)") + cur.execute( + "INSERT INTO j VALUES " + "('{" + """ "str": "str_val",""" + """ "num": 12,""" + """ "obj": {"k1": "v1", "k2": "v2"},""" + """ "arr_str": ["s1", "s2", "s3"],""" + """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" + " }')", + ) + + with psycopg.connect(dsn, cursor_factory=psycopg.RawCursor) as conn: + _assert(cast("dbapi.DBAPIConnection", conn), "JSONB", tc) From e51cdabe49d0fe0286f5f33b4fcbf8c46e3f041d Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Wed, 24 Sep 2025 20:15:12 +0000 Subject: [PATCH 03/20] Create a normalized version of the json_extract_string --- src/ldlite/_database/postgres.py | 11 ++++++++++- tests/test_json_operators.py | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index a02dd09..2adaed2 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -23,7 +23,16 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: RETURN j->p; END $$ LANGUAGE plpgsql; -""", +CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j JSONB, p TEXT) RETURNS TEXT AS $$ +BEGIN + RETURN CASE + WHEN jsonb_typeof(j->p) NOT IN ('object', 'array') THEN j->>p + ELSE json_strip_nulls(j::JSON->p)::TEXT + END; +END +$$ LANGUAGE plpgsql; + +""", # noqa: E501 ) @property diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 1062941..ebc29fb 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -47,6 +47,24 @@ def case_jextract(p: Sequence[Any]) -> JsonTC: ) +@parametrize( + p=[ + ("str", "str_val"), + ("num", "12"), + ("obj", '{"k1":"v1","k2":"v2"}'), + ("arr_str", '["s1","s2","s3"]'), + ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), + ], +) +def case_jextract_string(p: Sequence[Any]) -> JsonTC: + return JsonTC( + """SELECT ldlite_system.jextract_string(jc, $1) = $2 FROM j;""", + p, + """SELECT ldlite_system.jextract_string(jc, $1) FROM j;""", + p[:1], + ) + + def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: if tc.format_type: From 8e2ae2e9f87d82ccfb74432cd32a490f97879070 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 13:36:41 +0000 Subject: [PATCH 04/20] Refactor how test cases work for operators --- tests/test_json_operators.py | 47 +++++++++++++++++------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index ebc29fb..fdb7eba 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -1,4 +1,4 @@ -from collections.abc import Callable, Sequence +from collections.abc import Callable from contextlib import closing from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast @@ -21,11 +21,10 @@ def _db() -> str: @dataclass class JsonTC: + query: str + query_params: tuple[Any, ...] assertion: str - assertion_params: Sequence[Any] - debug: str - debug_params: Sequence[Any] - format_type: bool = False + assertion_params: tuple[Any, ...] @parametrize( @@ -37,13 +36,12 @@ class JsonTC: ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), ], ) -def case_jextract(p: Sequence[Any]) -> JsonTC: +def case_jextract(p: tuple[Any, ...]) -> JsonTC: return JsonTC( - """SELECT ldlite_system.jextract(jc, $1) = $2::{jtype} FROM j;""", - p, - """SELECT ldlite_system.jextract(jc, $1) FROM j;""", + """SELECT ldlite_system.jextract(jc, $1){assertion} FROM j;""", p[:1], - format_type=True, + """= $2::{jtype}""", + p[1:], ) @@ -51,34 +49,33 @@ def case_jextract(p: Sequence[Any]) -> JsonTC: p=[ ("str", "str_val"), ("num", "12"), - ("obj", '{"k1":"v1","k2":"v2"}'), - ("arr_str", '["s1","s2","s3"]'), - ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), ], ) -def case_jextract_string(p: Sequence[Any]) -> JsonTC: +def case_jextract_string(p: tuple[Any, ...]) -> JsonTC: return JsonTC( - """SELECT ldlite_system.jextract_string(jc, $1) = $2 FROM j;""", - p, - """SELECT ldlite_system.jextract_string(jc, $1) FROM j;""", + """SELECT ldlite_system.jextract_string(jc, $1){assertion} FROM j;""", p[:1], + """ = $2""", + p[1:], ) def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: - if tc.format_type: - cur.execute(tc.assertion.format(jtype=jtype), tc.assertion_params) - else: - cur.execute(tc.assertion, tc.assertion_params) + query = tc.query.format(assertion="", jtype=jtype) + assertion = tc.query.format( + assertion=tc.assertion.format(jtype=jtype), + jtype=jtype, + ) + + cur.execute(assertion, (*tc.query_params, *tc.assertion_params)) actual = cur.fetchone() assert actual is not None assert actual[0] is not None + assert actual[0] if not actual[0]: - conn.rollback() # type:ignore[attr-defined] - with closing(conn.cursor()) as cur: - cur.execute(tc.debug, tc.debug_params) - pytest.fail(str(cur.fetchone())) + cur.execute(query, tc.query_params) + pytest.fail(str(cur.fetchone())) @parametrize_with_cases("tc", cases=".") From b4c5eedbd0d049b559b3766968f1679dd3b3977a Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 13:39:49 +0000 Subject: [PATCH 05/20] Only create postgres database once for json operator tests --- tests/test_json_operators.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index fdb7eba..c38bc1c 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -103,17 +103,12 @@ def test_duckdb(tc: JsonTC) -> None: _assert(cast("dbapi.DBAPIConnection", tx), "JSON", tc) -@parametrize_with_cases("tc", cases=".") -def test_postgres(pg_dsn: None | Callable[[str], str], tc: JsonTC) -> None: +@pytest.fixture(scope="session") +def pg_jop_dsn(pg_dsn: None | Callable[[str], str]) -> str: if pg_dsn is None: pytest.skip("Specify the pg host using --pg-host to run") - from ldlite import LDLite - - ld = LDLite() dsn = pg_dsn(_db()) - ld.connect_db_postgresql(dsn) - with psycopg.connect(dsn) as conn, conn.cursor() as cur: cur.execute("CREATE TABLE j (jc JSONB)") cur.execute( @@ -126,6 +121,15 @@ def test_postgres(pg_dsn: None | Callable[[str], str], tc: JsonTC) -> None: """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" " }')", ) + return dsn + + +@parametrize_with_cases("tc", cases=".") +def test_postgres(pg_jop_dsn: str, tc: JsonTC) -> None: + from ldlite import LDLite + + ld = LDLite() + ld.connect_db_postgresql(pg_jop_dsn) - with psycopg.connect(dsn, cursor_factory=psycopg.RawCursor) as conn: + with psycopg.connect(pg_jop_dsn, cursor_factory=psycopg.RawCursor) as conn: _assert(cast("dbapi.DBAPIConnection", conn), "JSONB", tc) From f05c4d6b90348f3218f14163dc9e53d386495e86 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 13:45:15 +0000 Subject: [PATCH 06/20] Also only setup the duckdb database once --- tests/test_json_operators.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index c38bc1c..34deef8 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -1,4 +1,4 @@ -from collections.abc import Callable +from collections.abc import Callable, Iterator from contextlib import closing from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast @@ -78,13 +78,9 @@ def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: pytest.fail(str(cur.fetchone())) -@parametrize_with_cases("tc", cases=".") -def test_duckdb(tc: JsonTC) -> None: - from ldlite import LDLite - - ld = LDLite() +@pytest.fixture(scope="session") +def duckdb_jop_dsn() -> Iterator[str]: dsn = f":memory:{_db()}" - ld.connect_db(dsn) with duckdb.connect(dsn) as conn: conn.execute("CREATE TABLE j (jc JSON)") @@ -99,7 +95,17 @@ def test_duckdb(tc: JsonTC) -> None: " }')", ) - with duckdb.connect(dsn) as conn, conn.begin() as tx: + yield dsn + + +@parametrize_with_cases("tc", cases=".") +def test_duckdb(duckdb_jop_dsn: str, tc: JsonTC) -> None: + from ldlite import LDLite + + ld = LDLite() + ld.connect_db(duckdb_jop_dsn) + + with duckdb.connect(duckdb_jop_dsn) as conn, conn.begin() as tx: _assert(cast("dbapi.DBAPIConnection", tx), "JSON", tc) From f82a58fc685c930c739bc12e54b4a186a3b49f25 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 13:53:00 +0000 Subject: [PATCH 07/20] Refactor json string function for postgres --- src/ldlite/_database/duckdb.py | 1 + src/ldlite/_database/postgres.py | 7 ++----- tests/test_json_operators.py | 4 ++++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index f4b399d..db4b45d 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -16,6 +16,7 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jextract(j, p) AS main.json_extract(j, p) ; + CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j, p) AS main.json_extract_string(j, p) ; diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 2adaed2..0efe408 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -23,15 +23,12 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: RETURN j->p; END $$ LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j JSONB, p TEXT) RETURNS TEXT AS $$ BEGIN - RETURN CASE - WHEN jsonb_typeof(j->p) NOT IN ('object', 'array') THEN j->>p - ELSE json_strip_nulls(j::JSON->p)::TEXT - END; + RETURN j->>p; END $$ LANGUAGE plpgsql; - """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 34deef8..7fb19f9 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -31,6 +31,7 @@ class JsonTC: p=[ ("str", '"str_val"'), ("num", "12"), + ("float", "16.3"), ("obj", '{"k1":"v1","k2":"v2"}'), ("arr_str", '["s1","s2","s3"]'), ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), @@ -49,6 +50,7 @@ def case_jextract(p: tuple[Any, ...]) -> JsonTC: p=[ ("str", "str_val"), ("num", "12"), + ("float", "16.3"), ], ) def case_jextract_string(p: tuple[Any, ...]) -> JsonTC: @@ -89,6 +91,7 @@ def duckdb_jop_dsn() -> Iterator[str]: "('{" """ "str": "str_val",""" """ "num": 12,""" + """ "float": 16.3,""" """ "obj": {"k1": "v1", "k2": "v2"},""" """ "arr_str": ["s1", "s2", "s3"],""" """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" @@ -122,6 +125,7 @@ def pg_jop_dsn(pg_dsn: None | Callable[[str], str]) -> str: "('{" """ "str": "str_val",""" """ "num": 12,""" + """ "float": 16.3,""" """ "obj": {"k1": "v1", "k2": "v2"},""" """ "arr_str": ["s1", "s2", "s3"],""" """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" From 653ce754d7bf2a46d773421ed0c01b790bbf4f10 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 14:34:06 +0000 Subject: [PATCH 08/20] Add a normalized object keys function --- src/ldlite/_database/duckdb.py | 3 +++ src/ldlite/_database/postgres.py | 6 ++++++ tests/test_json_operators.py | 26 +++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index db4b45d..7e2047b 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -20,6 +20,9 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j, p) AS main.json_extract_string(j, p) ; + +CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j) AS + unnest(main.json_keys(j)) """, ) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 0efe408..62e85b9 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -29,6 +29,12 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: RETURN j->>p; END $$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j JSONB) RETURNS SETOF TEXT AS $$ +BEGIN + RETURN QUERY SELECT jsonb_object_keys(j); +END +$$ LANGUAGE plpgsql; """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 7fb19f9..644f1dc 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -62,6 +62,21 @@ def case_jextract_string(p: tuple[Any, ...]) -> JsonTC: ) +def case_jobject_keys() -> JsonTC: + return JsonTC( + """ +{assertion} +(SELECT e.jkey, a.jkey +FROM (SELECT 'k1' jkey UNION SELECT 'k2' jkey) as e +FULL OUTER JOIN (SELECT ldlite_system.jobject_keys(jc->'obj') jkey FROM j) a + USING (jkey) +WHERE e.jkey IS NULL or a.jkey IS NULL);""", + (), + "SELECT COUNT(1) = 0 FROM ", + (), + ) + + def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: query = tc.query.format(assertion="", jtype=jtype) @@ -74,10 +89,15 @@ def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: actual = cur.fetchone() assert actual is not None assert actual[0] is not None + + if not actual[0]: + cur.execute(query, tc.query_params) + diff = "" + for r in cur.fetchall(): + diff += f"{r}\n" + pytest.fail(diff) + assert actual[0] - if not actual[0]: - cur.execute(query, tc.query_params) - pytest.fail(str(cur.fetchone())) @pytest.fixture(scope="session") From 61d08e832b701615b6c8978a2d75d36907a22edb Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 14:47:04 +0000 Subject: [PATCH 09/20] Add a common typeof method --- src/ldlite/_database/duckdb.py | 13 +++++++++++++ src/ldlite/_database/postgres.py | 6 ++++++ tests/test_json_operators.py | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 7e2047b..0526eab 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -23,6 +23,19 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j) AS unnest(main.json_keys(j)) +; + +CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j) AS + CASE main.json_type(j) + WHEN 'VARCHAR' THEN 'string' + WHEN 'BIGINT' THEN 'number' + WHEN 'DOUBLE' THEN 'number' + WHEN 'UBIGINT' THEN 'number' + WHEN 'OBJECT' THEN 'object' + WHEN 'ARRAY' THEN 'array' + ELSE main.json_type(j) + END +; """, ) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 62e85b9..318958f 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -35,6 +35,12 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: RETURN QUERY SELECT jsonb_object_keys(j); END $$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j JSONB) RETURNS TEXT AS $$ +BEGIN + RETURN jsonb_typeof(j); +END +$$ LANGUAGE plpgsql; """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 644f1dc..08fb0e5 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -77,6 +77,27 @@ def case_jobject_keys() -> JsonTC: ) +@parametrize( + p=[ + ("str", "string"), + ("num", "number"), + ("float", "number"), + ("obj", "object"), + ("arr_str", "array"), + ("arr_obj", "array"), + ], +) +def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: + return JsonTC( + """ +SELECT ldlite_system.jtype_of(ldlite_system.jextract(jc, $1)){assertion} +FROM j;""", + p[:1], + """ = $2""", + p[1:], + ) + + def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: query = tc.query.format(assertion="", jtype=jtype) From 995511b062ffeef524f4fb677ac4832419c37f0f Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 14:55:07 +0000 Subject: [PATCH 10/20] Handle nulls and bools --- src/ldlite/_database/duckdb.py | 2 ++ tests/test_json_operators.py | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 0526eab..0833e16 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -32,7 +32,9 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: WHEN 'DOUBLE' THEN 'number' WHEN 'UBIGINT' THEN 'number' WHEN 'OBJECT' THEN 'object' + WHEN 'BOOLEAN' THEN 'boolean' WHEN 'ARRAY' THEN 'array' + WHEN 'NULL' THEN 'null' ELSE main.json_type(j) END ; diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 08fb0e5..8991984 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -32,9 +32,11 @@ class JsonTC: ("str", '"str_val"'), ("num", "12"), ("float", "16.3"), + ("bool", "true"), ("obj", '{"k1":"v1","k2":"v2"}'), ("arr_str", '["s1","s2","s3"]'), ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), + ("na", "null"), ], ) def case_jextract(p: tuple[Any, ...]) -> JsonTC: @@ -51,13 +53,15 @@ def case_jextract(p: tuple[Any, ...]) -> JsonTC: ("str", "str_val"), ("num", "12"), ("float", "16.3"), + ("bool", "true"), + ("na",), ], ) def case_jextract_string(p: tuple[Any, ...]) -> JsonTC: return JsonTC( """SELECT ldlite_system.jextract_string(jc, $1){assertion} FROM j;""", p[:1], - """ = $2""", + """ = $2""" if p[0] != "na" else """ IS NULL""", p[1:], ) @@ -82,9 +86,11 @@ def case_jobject_keys() -> JsonTC: ("str", "string"), ("num", "number"), ("float", "number"), + ("bool", "boolean"), ("obj", "object"), ("arr_str", "array"), ("arr_obj", "array"), + ("na", "null"), ], ) def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: @@ -133,9 +139,11 @@ def duckdb_jop_dsn() -> Iterator[str]: """ "str": "str_val",""" """ "num": 12,""" """ "float": 16.3,""" + """ "bool": true,""" """ "obj": {"k1": "v1", "k2": "v2"},""" """ "arr_str": ["s1", "s2", "s3"],""" - """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" + """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}],""" + """ "na": null""" " }')", ) @@ -167,9 +175,11 @@ def pg_jop_dsn(pg_dsn: None | Callable[[str], str]) -> str: """ "str": "str_val",""" """ "num": 12,""" """ "float": 16.3,""" + """ "bool": true,""" """ "obj": {"k1": "v1", "k2": "v2"},""" """ "arr_str": ["s1", "s2", "s3"],""" - """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}]""" + """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}],""" + """ "na": null""" " }')", ) return dsn From f5113683e2ba5d707fc7c09afeb79112f00c5659 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 15:00:43 +0000 Subject: [PATCH 11/20] Refactor setup to share insert statement --- tests/test_json_operators.py | 38 +++++++++++++++--------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 8991984..1b18c29 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -127,13 +127,9 @@ def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: assert actual[0] -@pytest.fixture(scope="session") -def duckdb_jop_dsn() -> Iterator[str]: - dsn = f":memory:{_db()}" - - with duckdb.connect(dsn) as conn: - conn.execute("CREATE TABLE j (jc JSON)") - conn.execute( +def _arrange(conn: "dbapi.DBAPIConnection") -> None: + with closing(conn.cursor()) as cur: + cur.execute( "INSERT INTO j VALUES " "('{" """ "str": "str_val",""" @@ -141,12 +137,22 @@ def duckdb_jop_dsn() -> Iterator[str]: """ "float": 16.3,""" """ "bool": true,""" """ "obj": {"k1": "v1", "k2": "v2"},""" + """ "arr_zero": [],""" """ "arr_str": ["s1", "s2", "s3"],""" """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}],""" """ "na": null""" " }')", ) + +@pytest.fixture(scope="session") +def duckdb_jop_dsn() -> Iterator[str]: + dsn = f":memory:{_db()}" + + with duckdb.connect(dsn) as conn: + conn.execute("CREATE TABLE j (jc JSON)") + _arrange(cast("dbapi.DBAPIConnection", conn)) + yield dsn @@ -157,8 +163,8 @@ def test_duckdb(duckdb_jop_dsn: str, tc: JsonTC) -> None: ld = LDLite() ld.connect_db(duckdb_jop_dsn) - with duckdb.connect(duckdb_jop_dsn) as conn, conn.begin() as tx: - _assert(cast("dbapi.DBAPIConnection", tx), "JSON", tc) + with duckdb.connect(duckdb_jop_dsn) as conn: + _assert(cast("dbapi.DBAPIConnection", conn), "JSON", tc) @pytest.fixture(scope="session") @@ -169,19 +175,7 @@ def pg_jop_dsn(pg_dsn: None | Callable[[str], str]) -> str: dsn = pg_dsn(_db()) with psycopg.connect(dsn) as conn, conn.cursor() as cur: cur.execute("CREATE TABLE j (jc JSONB)") - cur.execute( - "INSERT INTO j VALUES " - "('{" - """ "str": "str_val",""" - """ "num": 12,""" - """ "float": 16.3,""" - """ "bool": true,""" - """ "obj": {"k1": "v1", "k2": "v2"},""" - """ "arr_str": ["s1", "s2", "s3"],""" - """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}],""" - """ "na": null""" - " }')", - ) + _arrange(cast("dbapi.DBAPIConnection", conn)) return dsn From c77b1aa2958d7728a5a5fbb62c05616c40daf40a Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 15:02:29 +0000 Subject: [PATCH 12/20] Add a common array length function --- src/ldlite/_database/duckdb.py | 4 ++++ src/ldlite/_database/postgres.py | 7 +++++++ tests/test_json_operators.py | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 0833e16..aff9006 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -38,6 +38,10 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: ELSE main.json_type(j) END ; + +CREATE OR REPLACE FUNCTION ldlite_system.jarray_length(j) AS + main.json_array_length(j) +; """, ) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 318958f..68bc96c 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -41,6 +41,13 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: RETURN jsonb_typeof(j); END $$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION ldlite_system.jarray_length(j JSONB) RETURNS INTEGER AS $$ +BEGIN + RETURN jsonb_array_length(j); +END +$$ LANGUAGE plpgsql; + """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 1b18c29..6d0e1d6 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -104,6 +104,24 @@ def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: ) +@parametrize( + p=[ + ("arr_zero", 0), + ("arr_str", 3), + ("arr_obj", 2), + ], +) +def case_jarray_length(p: tuple[Any, ...]) -> JsonTC: + return JsonTC( + """ +SELECT ldlite_system.jarray_length(ldlite_system.jextract(jc, $1)){assertion} +FROM j;""", + p[:1], + """ = $2""", + p[1:], + ) + + def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: query = tc.query.format(assertion="", jtype=jtype) From 360d8b09b0db8aac4eb93ce5c4c4dfb2e85cdd3b Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 18:43:27 +0000 Subject: [PATCH 13/20] Push null handling into jextract function --- src/ldlite/_database/duckdb.py | 43 +++++++++++++++++++++----------- src/ldlite/_database/postgres.py | 39 ++++++++++++++++++----------- tests/test_json_operators.py | 42 ++++++++++++++----------------- 3 files changed, 72 insertions(+), 52 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index aff9006..5120dec 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -13,18 +13,6 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: with conn.cursor() as cur: cur.execute( """ -CREATE OR REPLACE FUNCTION ldlite_system.jextract(j, p) AS - main.json_extract(j, p) -; - -CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j, p) AS - main.json_extract_string(j, p) -; - -CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j) AS - unnest(main.json_keys(j)) -; - CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j) AS CASE main.json_type(j) WHEN 'VARCHAR' THEN 'string' @@ -39,10 +27,35 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: END ; -CREATE OR REPLACE FUNCTION ldlite_system.jarray_length(j) AS - main.json_array_length(j) +CREATE OR REPLACE FUNCTION ldlite_system.jextract(j, p) AS + CASE ldlite_system.jtype_of(main.json_extract(j, p)) + WHEN 'string' THEN + CASE + WHEN lower(main.json_extract_string(j, p)) = 'null' THEN 'null'::JSON + ELSE main.json_extract(j, p) + END + WHEN 'object' THEN + CASE + WHEN main.json_extract_string(j, p) = '{}' THEN 'null'::JSON + ELSE main.json_extract(j, p) + END + WHEN 'array' THEN + CASE + WHEN length(list_filter((main.json_extract(j, p))::JSON[], lambda x: x != 'null'::JSON)) = 0 THEN 'null'::JSON + ELSE list_filter((main.json_extract(j, p))::JSON[], lambda x: x != 'null'::JSON) + END + ELSE main.json_extract(j, p) + END +; + +CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j, p) AS + main.json_extract_string(ldlite_system.jextract(j, p), '$') +; + +CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j) AS + unnest(main.json_keys(j)) ; -""", +""", # noqa: E501 ) @property diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 68bc96c..39a74ff 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -18,33 +18,44 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: with conn.cursor() as cur: cur.execute( """ -CREATE OR REPLACE FUNCTION ldlite_system.jextract(j JSONB, p TEXT) RETURNS JSONB AS $$ -BEGIN - RETURN j->p; -END -$$ LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j JSONB, p TEXT) RETURNS TEXT AS $$ +CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j JSONB) RETURNS TEXT AS $$ BEGIN - RETURN j->>p; + RETURN jsonb_typeof(j); END $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j JSONB) RETURNS SETOF TEXT AS $$ +CREATE OR REPLACE FUNCTION ldlite_system.jextract(j JSONB, p TEXT) RETURNS JSONB AS $$ BEGIN - RETURN QUERY SELECT jsonb_object_keys(j); + RETURN CASE + WHEN ldlite_system.jtype_of(j->p) = 'string' THEN + CASE + WHEN lower(j->>p) = 'null' THEN 'null'::JSONB + ELSE j->p + END + WHEN ldlite_system.jtype_of(j->p) = 'array' THEN + CASE + WHEN jsonb_array_length(jsonb_path_query_array(j->p, '$[*] ? (@ != null)')) = 0 THEN 'null'::JSONB + ELSE jsonb_path_query_array(j->p, '$[*] ? (@ != null)') + END + WHEN ldlite_system.jtype_of(j->p) = 'object' THEN + CASE + WHEN j->>p = '{}' THEN 'null'::JSONB + ELSE j->p + END + ELSE j->p + END; END $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j JSONB) RETURNS TEXT AS $$ +CREATE OR REPLACE FUNCTION ldlite_system.jextract_string(j JSONB, p TEXT) RETURNS TEXT AS $$ BEGIN - RETURN jsonb_typeof(j); + RETURN ldlite_system.jextract(j, p) ->> 0; END $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION ldlite_system.jarray_length(j JSONB) RETURNS INTEGER AS $$ +CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j JSONB) RETURNS SETOF TEXT AS $$ BEGIN - RETURN jsonb_array_length(j); + RETURN QUERY SELECT jsonb_object_keys(j); END $$ LANGUAGE plpgsql; diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 6d0e1d6..dcedd2b 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -34,9 +34,15 @@ class JsonTC: ("float", "16.3"), ("bool", "true"), ("obj", '{"k1":"v1","k2":"v2"}'), + ("obj_some", '{"k1":"v1","k2":null}'), + ("obj_empty", "null"), + ("arr_zero", "null"), ("arr_str", '["s1","s2","s3"]'), - ("arr_obj", '[{"k1":"v1"},{"k2":"v2"}]'), + ("arr_str_some", '["s1","s2"]'), + ("arr_obj_some", '[{"k1":"v1"}]'), ("na", "null"), + ("na_str1", "null"), + ("na_str2", "null"), ], ) def case_jextract(p: tuple[Any, ...]) -> JsonTC: @@ -55,13 +61,15 @@ def case_jextract(p: tuple[Any, ...]) -> JsonTC: ("float", "16.3"), ("bool", "true"), ("na",), + ("na_str1",), + ("na_str2",), ], ) def case_jextract_string(p: tuple[Any, ...]) -> JsonTC: return JsonTC( """SELECT ldlite_system.jextract_string(jc, $1){assertion} FROM j;""", p[:1], - """ = $2""" if p[0] != "na" else """ IS NULL""", + """ = $2""" if len(p) == 2 else """ IS NULL""", p[1:], ) @@ -104,24 +112,6 @@ def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: ) -@parametrize( - p=[ - ("arr_zero", 0), - ("arr_str", 3), - ("arr_obj", 2), - ], -) -def case_jarray_length(p: tuple[Any, ...]) -> JsonTC: - return JsonTC( - """ -SELECT ldlite_system.jarray_length(ldlite_system.jextract(jc, $1)){assertion} -FROM j;""", - p[:1], - """ = $2""", - p[1:], - ) - - def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: query = tc.query.format(assertion="", jtype=jtype) @@ -133,15 +123,15 @@ def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: cur.execute(assertion, (*tc.query_params, *tc.assertion_params)) actual = cur.fetchone() assert actual is not None - assert actual[0] is not None - if not actual[0]: + if actual[0] is None or not actual[0]: cur.execute(query, tc.query_params) diff = "" for r in cur.fetchall(): diff += f"{r}\n" pytest.fail(diff) + assert actual[0] is not None assert actual[0] @@ -155,10 +145,16 @@ def _arrange(conn: "dbapi.DBAPIConnection") -> None: """ "float": 16.3,""" """ "bool": true,""" """ "obj": {"k1": "v1", "k2": "v2"},""" + """ "obj_some": {"k1": "v1", "k2": null},""" + """ "obj_empty": {},""" """ "arr_zero": [],""" """ "arr_str": ["s1", "s2", "s3"],""" + """ "arr_str_some": ["s1", "s2", null],""" """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}],""" - """ "na": null""" + """ "arr_obj_some": [{"k1": "v1"}, null],""" + """ "na": null,""" + """ "na_str1": "null", """ + """ "na_str2": "NULL" """ " }')", ) From 851d5bc09c460194da54db1cdb0f9ab208739bb3 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 19:27:49 +0000 Subject: [PATCH 14/20] Check for uuid --- src/ldlite/_database/duckdb.py | 8 ++++++++ src/ldlite/_database/postgres.py | 10 ++++++++++ tests/test_json_operators.py | 27 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 5120dec..5734bb3 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -32,6 +32,7 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: WHEN 'string' THEN CASE WHEN lower(main.json_extract_string(j, p)) = 'null' THEN 'null'::JSON + WHEN length(main.json_extract_string(j, p)) = 0 THEN 'null'::JSON ELSE main.json_extract(j, p) END WHEN 'object' THEN @@ -55,6 +56,13 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j) AS unnest(main.json_keys(j)) ; +CREATE OR REPLACE FUNCTION ldlite_system.jis_uuid(j) AS + CASE ldlite_system.jtype_of(j) + WHEN 'string' THEN regexp_full_match(main.json_extract_string(j, '$'), '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[1-5][a-fA-F0-9]{3}-[89abAB][a-fA-F0-9]{3}-[a-fA-F0-9]{12}$') + ELSE FALSE + END +; + """, # noqa: E501 ) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 39a74ff..c603eea 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -30,6 +30,7 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: WHEN ldlite_system.jtype_of(j->p) = 'string' THEN CASE WHEN lower(j->>p) = 'null' THEN 'null'::JSONB + WHEN length(j->>p) = 0 THEN 'null'::JSONB ELSE j->p END WHEN ldlite_system.jtype_of(j->p) = 'array' THEN @@ -59,6 +60,15 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: END $$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION ldlite_system.jis_uuid(j JSONB) RETURNS BOOLEAN AS $$ +BEGIN + RETURN CASE + WHEN ldlite_system.jtype_of(j) = 'string' THEN j->>0 ~ '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[1-5][a-fA-F0-9]{3}-[89abAB][a-fA-F0-9]{3}-[a-fA-F0-9]{12}$' + ELSE FALSE + END; +END +$$ LANGUAGE plpgsql; + """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index dcedd2b..fd2b113 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -30,6 +30,7 @@ class JsonTC: @parametrize( p=[ ("str", '"str_val"'), + ("str_empty", "null"), ("num", "12"), ("float", "16.3"), ("bool", "true"), @@ -112,6 +113,29 @@ def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: ) +@parametrize( + p=[ + ("str", False), + ("str_empty", False), + ("num", False), + ("na", False), + ("na_str1", False), + ("na_str2", False), + ("uuid_nof", False), + ("uuid", True), + ], +) +def case_jis_uuid(p: tuple[Any, ...]) -> JsonTC: + return JsonTC( + """ +SELECT {assertion}ldlite_system.jis_uuid(ldlite_system.jextract(jc, $1)) +FROM j;""", + p[:1], + "" if (p[1]) else """ NOT """, + (), + ) + + def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: query = tc.query.format(assertion="", jtype=jtype) @@ -141,9 +165,12 @@ def _arrange(conn: "dbapi.DBAPIConnection") -> None: "INSERT INTO j VALUES " "('{" """ "str": "str_val",""" + """ "str_empty": "",""" """ "num": 12,""" """ "float": 16.3,""" """ "bool": true,""" + """ "uuid": "5b285d03-5490-1111-8888-52b2003b475c",""" + """ "uuid_nof": "5b285d03-5490-FFFF-0000-52b2003b475c",""" """ "obj": {"k1": "v1", "k2": "v2"},""" """ "obj_some": {"k1": "v1", "k2": null},""" """ "obj_empty": {},""" From 3afa0e7ea9cda6ce792601dcbf63cfb515e29492 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 19:48:23 +0000 Subject: [PATCH 15/20] Check for datetime strings --- src/ldlite/_database/duckdb.py | 10 +++++++++- src/ldlite/_database/postgres.py | 11 ++++++++++- tests/test_json_operators.py | 29 +++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 5734bb3..c740f82 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -12,7 +12,7 @@ class DuckDbDatabase(TypedDatabase[duckdb.DuckDBPyConnection]): def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: with conn.cursor() as cur: cur.execute( - """ + r""" CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j) AS CASE main.json_type(j) WHEN 'VARCHAR' THEN 'string' @@ -56,6 +56,7 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jobject_keys(j) AS unnest(main.json_keys(j)) ; + CREATE OR REPLACE FUNCTION ldlite_system.jis_uuid(j) AS CASE ldlite_system.jtype_of(j) WHEN 'string' THEN regexp_full_match(main.json_extract_string(j, '$'), '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[1-5][a-fA-F0-9]{3}-[89abAB][a-fA-F0-9]{3}-[a-fA-F0-9]{12}$') @@ -63,6 +64,13 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: END ; +CREATE OR REPLACE FUNCTION ldlite_system.jis_datetime(j) AS + CASE ldlite_system.jtype_of(j) + WHEN 'string' THEN try_strptime(main.json_extract_string(j, '$'), '%Y-%m-%dT%H:%M:%S.%g%z') IS NOT NULL + ELSE FALSE + END +; + """, # noqa: E501 ) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index c603eea..cd83dd4 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -17,7 +17,7 @@ def __init__(self, dsn: str): def _setup_jfuncs(conn: psycopg.Connection) -> None: with conn.cursor() as cur: cur.execute( - """ + r""" CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j JSONB) RETURNS TEXT AS $$ BEGIN RETURN jsonb_typeof(j); @@ -69,6 +69,15 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: END $$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION ldlite_system.jis_datetime(j JSONB) RETURNS BOOLEAN AS $$ +BEGIN + RETURN CASE + WHEN ldlite_system.jtype_of(j) = 'string' THEN j->>0 ~ '^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$' + ELSE FALSE + END; +END +$$ LANGUAGE plpgsql; + """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index fd2b113..5d94223 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -105,7 +105,7 @@ def case_jobject_keys() -> JsonTC: def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: return JsonTC( """ -SELECT ldlite_system.jtype_of(ldlite_system.jextract(jc, $1)){assertion} +SELECT ldlite_system.jtype_of(jc->$1){assertion} FROM j;""", p[:1], """ = $2""", @@ -128,7 +128,31 @@ def case_jtypeof(p: tuple[Any, ...]) -> JsonTC: def case_jis_uuid(p: tuple[Any, ...]) -> JsonTC: return JsonTC( """ -SELECT {assertion}ldlite_system.jis_uuid(ldlite_system.jextract(jc, $1)) +SELECT {assertion}ldlite_system.jis_uuid(jc->$1) +FROM j;""", + p[:1], + "" if (p[1]) else """ NOT """, + (), + ) + + +@parametrize( + p=[ + ("str", False), + ("str_empty", False), + ("num", False), + ("na", False), + ("na_str1", False), + ("na_str2", False), + ("uuid_nof", False), + ("uuid", False), + ("dt", True), + ], +) +def case_jis_datetime(p: tuple[Any, ...]) -> JsonTC: + return JsonTC( + """ +SELECT {assertion}ldlite_system.jis_datetime(jc->$1) FROM j;""", p[:1], "" if (p[1]) else """ NOT """, @@ -179,6 +203,7 @@ def _arrange(conn: "dbapi.DBAPIConnection") -> None: """ "arr_str_some": ["s1", "s2", null],""" """ "arr_obj": [{"k1": "v1"}, {"k2": "v2"}],""" """ "arr_obj_some": [{"k1": "v1"}, null],""" + """ "dt": "2022-04-21T18:47:33.581+00:00",""" """ "na": null,""" """ "na_str1": "null", """ """ "na_str2": "NULL" """ From f1001f5956280604b4bb9a3339dbb50e91b11193 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 19:53:13 +0000 Subject: [PATCH 16/20] Check for float types --- src/ldlite/_database/duckdb.py | 7 +++++++ src/ldlite/_database/postgres.py | 9 +++++++++ tests/test_json_operators.py | 26 ++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index c740f82..7dde5de 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -71,6 +71,13 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: END ; +CREATE OR REPLACE FUNCTION ldlite_system.jis_float(j) AS + CASE ldlite_system.jtype_of(j) + WHEN 'number' THEN contains(main.json_extract_string(j, '$'), '.') + ELSE FALSE + END +; + """, # noqa: E501 ) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index cd83dd4..2dc7c61 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -78,6 +78,15 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: END $$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION ldlite_system.jis_float(j JSONB) RETURNS BOOLEAN AS $$ +BEGIN + RETURN CASE + WHEN ldlite_system.jtype_of(j) = 'number' THEN j->>0 LIKE '%.%' + ELSE FALSE + END; +END +$$ LANGUAGE plpgsql; """, # noqa: E501 ) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 5d94223..906cb42 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -160,6 +160,32 @@ def case_jis_datetime(p: tuple[Any, ...]) -> JsonTC: ) +@parametrize( + p=[ + ("str", False), + ("str_empty", False), + ("num", False), + ("na", False), + ("na_str1", False), + ("na_str2", False), + ("uuid_nof", False), + ("uuid", False), + ("dt", False), + ("num", False), + ("float", True), + ], +) +def case_jis_float(p: tuple[Any, ...]) -> JsonTC: + return JsonTC( + """ +SELECT {assertion}ldlite_system.jis_float(jc->$1) +FROM j;""", + p[:1], + "" if (p[1]) else """ NOT """, + (), + ) + + def _assert(conn: "dbapi.DBAPIConnection", jtype: str, tc: JsonTC) -> None: with closing(conn.cursor()) as cur: query = tc.query.format(assertion="", jtype=jtype) From 7888781482d7fb6ed045e50900087ccb0e45202b Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Thu, 25 Sep 2025 20:12:40 +0000 Subject: [PATCH 17/20] Use older lambda syntax on older duckdb --- src/ldlite/_database/duckdb.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index 7dde5de..c6caf5c 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -10,6 +10,11 @@ class DuckDbDatabase(TypedDatabase[duckdb.DuckDBPyConnection]): @staticmethod def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: + with conn.cursor() as cur: + cur.execute("SELECT version() > '1.3.0' AS has_lambda;") + if (ver := cur.fetchone()) and ver[0]: + cur.execute("SET lambda_syntax = 'ENABLE_SINGLE_ARROW';") + with conn.cursor() as cur: cur.execute( r""" @@ -42,8 +47,8 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: END WHEN 'array' THEN CASE - WHEN length(list_filter((main.json_extract(j, p))::JSON[], lambda x: x != 'null'::JSON)) = 0 THEN 'null'::JSON - ELSE list_filter((main.json_extract(j, p))::JSON[], lambda x: x != 'null'::JSON) + WHEN length(list_filter((main.json_extract(j, p))::JSON[], x -> x != 'null'::JSON)) = 0 THEN 'null'::JSON + ELSE list_filter((main.json_extract(j, p))::JSON[], x -> x != 'null'::JSON) END ELSE main.json_extract(j, p) END From 146347d7af94f8d14985c3cc166d195917cda060 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Fri, 26 Sep 2025 19:05:17 +0000 Subject: [PATCH 18/20] Make duckdb work with minimal dependencies --- src/ldlite/_database/duckdb.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/ldlite/_database/duckdb.py b/src/ldlite/_database/duckdb.py index c6caf5c..c9b994a 100644 --- a/src/ldlite/_database/duckdb.py +++ b/src/ldlite/_database/duckdb.py @@ -11,15 +11,17 @@ class DuckDbDatabase(TypedDatabase[duckdb.DuckDBPyConnection]): @staticmethod def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: with conn.cursor() as cur: - cur.execute("SELECT version() > '1.3.0' AS has_lambda;") - if (ver := cur.fetchone()) and ver[0]: - cur.execute("SET lambda_syntax = 'ENABLE_SINGLE_ARROW';") + cur.execute("SELECT string_split(ltrim(version(),'v'), '.') AS has_lambda;") + if ver := cur.fetchone(): + (ma, mi, _) = ver[0] + if int(ma) > 1 and int(mi) >= 3: + cur.execute("SET lambda_syntax = 'ENABLE_SINGLE_ARROW';") with conn.cursor() as cur: cur.execute( r""" CREATE OR REPLACE FUNCTION ldlite_system.jtype_of(j) AS - CASE main.json_type(j) + CASE coalesce(main.json_type(j), 'NULL') WHEN 'VARCHAR' THEN 'string' WHEN 'BIGINT' THEN 'number' WHEN 'DOUBLE' THEN 'number' @@ -50,7 +52,7 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: WHEN length(list_filter((main.json_extract(j, p))::JSON[], x -> x != 'null'::JSON)) = 0 THEN 'null'::JSON ELSE list_filter((main.json_extract(j, p))::JSON[], x -> x != 'null'::JSON) END - ELSE main.json_extract(j, p) + ELSE coalesce(main.json_extract(j, p), 'null'::JSON) END ; @@ -71,16 +73,13 @@ def _setup_jfuncs(conn: duckdb.DuckDBPyConnection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jis_datetime(j) AS CASE ldlite_system.jtype_of(j) - WHEN 'string' THEN try_strptime(main.json_extract_string(j, '$'), '%Y-%m-%dT%H:%M:%S.%g%z') IS NOT NULL + WHEN 'string' THEN regexp_full_match(main.json_extract_string(j, '$'), '^\d{4}-[01]\d-[0123]\dT[012]\d:[012345]\d:[012345]\d\.\d{3}(\+\d{2}:\d{2})?$') ELSE FALSE END ; CREATE OR REPLACE FUNCTION ldlite_system.jis_float(j) AS - CASE ldlite_system.jtype_of(j) - WHEN 'number' THEN contains(main.json_extract_string(j, '$'), '.') - ELSE FALSE - END + coalesce(main.json_type(j), 'NULL')='DOUBLE' ; """, # noqa: E501 From 38fcdcf63562b72ba37ec6ead8cb593681d6d870 Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Fri, 26 Sep 2025 19:22:26 +0000 Subject: [PATCH 19/20] Less complex regex for postgres datetime check --- src/ldlite/_database/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ldlite/_database/postgres.py b/src/ldlite/_database/postgres.py index 2dc7c61..7a78477 100644 --- a/src/ldlite/_database/postgres.py +++ b/src/ldlite/_database/postgres.py @@ -72,7 +72,7 @@ def _setup_jfuncs(conn: psycopg.Connection) -> None: CREATE OR REPLACE FUNCTION ldlite_system.jis_datetime(j JSONB) RETURNS BOOLEAN AS $$ BEGIN RETURN CASE - WHEN ldlite_system.jtype_of(j) = 'string' THEN j->>0 ~ '^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$' + WHEN ldlite_system.jtype_of(j) = 'string' THEN j->>0 ~ '^\d{4}-[01]\d-[0123]\dT[012]\d:[012345]\d:[012345]\d\.\d{3}(\+\d{2}:\d{2})?$' ELSE FALSE END; END From 6357420014aad7eff58bccb1bdaebb79781fdcbd Mon Sep 17 00:00:00 2001 From: Katherine Bargar Date: Fri, 26 Sep 2025 20:14:24 +0000 Subject: [PATCH 20/20] Fix postgres tests on version 13 --- tests/test_json_operators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_json_operators.py b/tests/test_json_operators.py index 906cb42..5eec23e 100644 --- a/tests/test_json_operators.py +++ b/tests/test_json_operators.py @@ -81,9 +81,9 @@ def case_jobject_keys() -> JsonTC: {assertion} (SELECT e.jkey, a.jkey FROM (SELECT 'k1' jkey UNION SELECT 'k2' jkey) as e -FULL OUTER JOIN (SELECT ldlite_system.jobject_keys(jc->'obj') jkey FROM j) a +FULL OUTER JOIN (SELECT ldlite_system.jobject_keys(jc->'obj') jkey FROM j) as a USING (jkey) -WHERE e.jkey IS NULL or a.jkey IS NULL);""", +WHERE e.jkey IS NULL or a.jkey IS NULL) as q;""", (), "SELECT COUNT(1) = 0 FROM ", (),