From c22a945523efce7e9a6eaf6a17ca3860e8f9eb44 Mon Sep 17 00:00:00 2001 From: ZhouXing19 Date: Wed, 29 Oct 2025 00:32:28 -0400 Subject: [PATCH] sql/jsonpath: support index acceleration with AnyKey (`*`) at the chain end We now support index accelerating `jsonb_path_exists` filters with json path expression that ends with an AnyKey (`*`). Note that the AnyKey is allowed only at the end of the expression. I.e. the following are not allowed: ``` $.a.*.b $.a.b.*.* ``` Release note (sql change): We now support index accelerating `jsonb_path_exists` filters with json path expression that ends with an AnyKey (`*`). --- pkg/sql/inverted/expression.go | 8 ++-- pkg/sql/inverted/expression_test.go | 8 ++-- .../jsonb_path_exists_index_acceleration | 47 +++++++++++++++++++ pkg/sql/opt/xform/testdata/rules/select | 31 ++++++++++++ pkg/util/jsonpath/path.go | 45 ++++++++++++++++-- 5 files changed, 126 insertions(+), 13 deletions(-) diff --git a/pkg/sql/inverted/expression.go b/pkg/sql/inverted/expression.go index c87de68999ae..cb0866ea0c78 100644 --- a/pkg/sql/inverted/expression.go +++ b/pkg/sql/inverted/expression.go @@ -682,8 +682,8 @@ func intersectSpanExpressions(left, right *SpanExpression) *SpanExpression { Right: right, } if expr.FactoredUnionSpans != nil { - left.FactoredUnionSpans = subtractSpans(left.FactoredUnionSpans, expr.FactoredUnionSpans) - right.FactoredUnionSpans = subtractSpans(right.FactoredUnionSpans, expr.FactoredUnionSpans) + left.FactoredUnionSpans = SubtractSpans(left.FactoredUnionSpans, expr.FactoredUnionSpans) + right.FactoredUnionSpans = SubtractSpans(right.FactoredUnionSpans, expr.FactoredUnionSpans) } tryPruneChildren(expr) return expr @@ -910,9 +910,9 @@ func intersectSpans(left []Span, right []Span) []Span { return spans } -// subtractSpans subtracts right from left, under the assumption that right is a +// SubtractSpans subtracts right from left, under the assumption that right is a // subset of left. -func subtractSpans(left []Span, right []Span) []Span { +func SubtractSpans(left []Span, right []Span) []Span { if len(right) == 0 { return left } diff --git a/pkg/sql/inverted/expression_test.go b/pkg/sql/inverted/expression_test.go index 537965ccb030..616a3643cf3a 100644 --- a/pkg/sql/inverted/expression_test.go +++ b/pkg/sql/inverted/expression_test.go @@ -291,21 +291,21 @@ func TestSetIntersection(t *testing.T) { func TestSetSubtraction(t *testing.T) { checkEqual(t, nil, - subtractSpans( + SubtractSpans( []Span{single("b")}, []Span{span("b", "c")}, ), ) checkEqual(t, []Span{span("b\x00", "d")}, - subtractSpans( + SubtractSpans( []Span{span("b", "d")}, []Span{span("b", "b\x00")}, ), ) checkEqual(t, []Span{span("b", "d"), span("e", "ea")}, - subtractSpans( + SubtractSpans( []Span{span("b", "d"), span("e", "f")}, []Span{span("ea", "f")}, ), @@ -313,7 +313,7 @@ func TestSetSubtraction(t *testing.T) { checkEqual(t, []Span{span("d", "da"), span("db", "dc"), span("dd", "df"), span("fa", "g")}, - subtractSpans( + SubtractSpans( []Span{single("b"), span("d", "e"), span("f", "g")}, []Span{span("b", "c"), span("da", "db"), span("dc", "dd"), span("df", "e"), span("f", "fa")}, diff --git a/pkg/sql/logictest/testdata/logic_test/jsonb_path_exists_index_acceleration b/pkg/sql/logictest/testdata/logic_test/jsonb_path_exists_index_acceleration index 86458e368f5f..5d74a8c45649 100644 --- a/pkg/sql/logictest/testdata/logic_test/jsonb_path_exists_index_acceleration +++ b/pkg/sql/logictest/testdata/logic_test/jsonb_path_exists_index_acceleration @@ -556,3 +556,50 @@ SELECT a FROM json_tab@primary WHERE jsonb_path_exists(b, '$.a ? (@.b == $x)', ' statement error index "foo_inv" is inverted and cannot be used for this query SELECT a FROM json_tab@foo_inv WHERE jsonb_path_exists(b, '$.a ? (@.b == $x)', '{"x": "c"}') ORDER BY a; + +subtest anykey + +statement ok +DROP TABLE IF EXISTS anykey_json_tab; + +statement ok +CREATE TABLE anykey_json_tab ( + a INT PRIMARY KEY, + b JSONB +); + +statement ok +CREATE INVERTED INDEX anykey_inv ON anykey_json_tab(b) + +statement ok +INSERT INTO anykey_json_tab VALUES +(1, '{"a": {"b": {"c": "d"}}}'), +(2, '{"a": {"b": {"c": {"d": "e"}}}}'), +(3, '{"a": {"b": [{"c": {"d": "e"}}]}}'), +(4, '{"a": {"b": ["c", "d"]}}'), +(5, '{"a": {"b": "d"}}'), +(6, '{"a": {"b1": "d"}}'), +(7, '{"a": {"b1": {"c": {"d": "e"}}}}'); + +query IT +SELECT a, b FROM anykey_json_tab@primary WHERE jsonb_path_exists(b, '$.a.b.*') ORDER BY a; +---- +1 {"a": {"b": {"c": "d"}}} +2 {"a": {"b": {"c": {"d": "e"}}}} +3 {"a": {"b": [{"c": {"d": "e"}}]}} + +query IT +SELECT a, b FROM anykey_json_tab@anykey_inv WHERE jsonb_path_exists(b, '$.a.b.*') ORDER BY a; +---- +1 {"a": {"b": {"c": "d"}}} +2 {"a": {"b": {"c": {"d": "e"}}}} +3 {"a": {"b": [{"c": {"d": "e"}}]}} + +# AnyKey should only be allowed at the end of the path chain. +statement error index "anykey_inv" is inverted and cannot be used for this query +SELECT a, b FROM anykey_json_tab@anykey_inv WHERE jsonb_path_exists(b, '$.a.*.b') ORDER BY a; + +statement error index "anykey_inv" is inverted and cannot be used for this query +SELECT a, b FROM anykey_json_tab@anykey_inv WHERE jsonb_path_exists(b, '$.a.b.*.*') ORDER BY a; + +subtest end diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index b4bc03674601..5bb5e55e380a 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -13883,3 +13883,34 @@ project ├── ["a"/Arr/"x"/Arr/False, "a"/Arr/"x"/Arr/False] ├── ["a"/"x"/False, "a"/"x"/False] └── ["a"/"x"/Arr/False, "a"/"x"/Arr/False] + + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE jsonb_path_exists(j, '$.a.b.*') +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /9 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── [???, "a"/Arr/"b") + │ ├── ["a"/Arr/"b"/PrefixEnd, "a"/Arr/"b"/Arr) + │ ├── ["a"/Arr/"b"/Arr/PrefixEnd, "a"/Arr/Arr/) + │ ├── [???, "a"/"b") + │ ├── ["a"/"b"/PrefixEnd, "a"/"b"/Arr) + │ └── ["a"/"b"/Arr/PrefixEnd, "a"/Arr/) + ├── key: (1) + └── scan b@j_inv_idx,inverted + ├── columns: k:1!null j_inverted_key:9!null + └── inverted constraint: /9/1 + └── spans + ├── [???, "a"/Arr/"b") + ├── ["a"/Arr/"b"/PrefixEnd, "a"/Arr/"b"/Arr) + ├── ["a"/Arr/"b"/Arr/PrefixEnd, "a"/Arr/Arr/) + ├── [???, "a"/"b") + ├── ["a"/"b"/PrefixEnd, "a"/"b"/Arr) + └── ["a"/"b"/Arr/PrefixEnd, "a"/Arr/) diff --git a/pkg/util/jsonpath/path.go b/pkg/util/jsonpath/path.go index 82a5d5d47b1d..f27d4e3eb4bf 100644 --- a/pkg/util/jsonpath/path.go +++ b/pkg/util/jsonpath/path.go @@ -316,7 +316,7 @@ func (a AnyKey) Validate(nestingLevel int, insideArraySubscript bool) error { // isSupportedPathPattern returns true if the given paths matches one of // the following patterns, which can be supported by the inverted index: -// - keychain mode: $.[key|wildcard].[key|wildcard]... +// - keychain mode: $.[key|wildcard].[key|wildcard]...(*) // - end value mode: $.[key|wildcard]? (@.[key|wildcard].[key|wildcard]... == [string|number|null|boolean]) // We might call this function recursively if a Path is a Filter, which contains // child Paths. If isSupportedPathPattern is called within a Filter, atRoot @@ -350,6 +350,9 @@ func isSupportedPathPattern(ps []Path, atRoot bool) bool { for i := 1; i < len(ps); i++ { switch pt := ps[i].(type) { case Wildcard, Key: + case AnyKey: + // We only allow AnyKey at the end of the root path. + return i == len(ps)-1 && atRoot case Filter: // We only allow filter at the end of the path. if i != len(ps)-1 { @@ -569,11 +572,43 @@ func buildInvertedIndexSpans( } resultExpression = addSpanToResult(resultExpression, inverted.MakeSingleValSpan(arrayKeys[0])) } else { - // Meaning this is of the keychain mode. (See isSupportedPathPattern). - resultExpression = addSpanToResult(resultExpression, inverted.Span{ + resSpans := []inverted.Span{{ Start: baseKey, - End: keysbase.PrefixEnd(encoding.AddJSONPathSeparator(baseKey)), - }) + End: keysbase.PrefixEnd(encoding.AddJSONPathSeparator(baseKey[:len(baseKey):len(baseKey)])), + }} + // If the last path component is an AnyKey, it means the + // current key must not be an end key (since AnyKey matches + // any key under the current object/array). For example, for + // path $.a.b.*, the following won't match because "b" is the + // end key: + // - {"a": {"b": "d"}} + // - {"a": {"b": ["c"]}} + // But the following will match: + // - {"a": {"b": {"c": "d"}}} + // - {"a": {"b": [{"c": {"d": "e"}}]}} + // In these 2 cases, after "b", there is still "c" + // as the next key, so "b" is not an end key. + if _, isAnyKey := pathComponents[len(pathComponents)-1].(AnyKey); isAnyKey { + // asEndValKey means the baseKey is mapped to an end value + // in the json object. (e.g. {"a": {"b": "d"}}) + asEndValKey := encoding.AddJSONPathTerminator(baseKey[:len(baseKey):len(baseKey)]) + asEndValKeySpan := inverted.Span{ + Start: asEndValKey, + End: keysbase.PrefixEnd(asEndValKey), + } + // asEndArrayValKey means the baseKey is mapped to an end value + // in the array object. (e.g. {"a": {"b": ["c"]}}}) + asEndArrayValKey := encoding.AddJSONPathTerminator(encoding.EncodeArrayAscending(encoding.AddJSONPathSeparator(baseKey[:len(baseKey):len(baseKey)]))) + asEndArrayValKeySpan := inverted.Span{ + Start: asEndArrayValKey, + End: keysbase.PrefixEnd(asEndArrayValKey), + } + resSpans = inverted.SubtractSpans(resSpans, []inverted.Span{asEndValKeySpan, asEndArrayValKeySpan}) + } + + for _, sp := range resSpans { + resultExpression = addSpanToResult(resultExpression, sp) + } } } return resultExpression