Skip to content

Commit 979a66f

Browse files
KyleAMathewsclaudesamwillis
authored
Enable auto-indexing for nested field paths (#728)
* fix: enable auto-indexing for nested field paths This fix allows auto-indexes to be created for nested field paths (e.g., `profile.score`, `metadata.stats.views`), not just top-level fields. This resolves performance issues where queries with `eq()`, `gt()`, etc. on nested fields were forced to do full table scans instead of using indexes. Changes: - Remove the `fieldPath.length !== 1` restriction in `extractIndexableExpressions()` - Update `ensureIndexForField()` to properly traverse nested paths when creating index accessors - Add comprehensive tests for nested path auto-indexing with 1, 2, and 3-level nesting - Verify that nested path indexes are properly used by the query optimizer Fixes #727 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * fix: use colon-prefixed naming for auto-indexes to avoid conflicts Change auto-index naming from 'auto_field_path' to 'auto:field.path' to prevent ambiguity between nested paths and fields with underscores. Examples: - user.profile -> auto:user.profile - user_profile -> auto:user_profile (no conflict!) Co-authored-by: Sam Willis <sam.willis@gmail.com> * chore: add changeset for nested auto-index fix * style: format changeset with prettier --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Sam Willis <sam.willis@gmail.com>
1 parent f8a979b commit 979a66f

File tree

3 files changed

+217
-10
lines changed

3 files changed

+217
-10
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
---
2+
"@tanstack/db": patch
3+
---
4+
5+
Enable auto-indexing for nested field paths
6+
7+
Previously, auto-indexes were only created for top-level fields. Queries filtering on nested fields like `vehicleDispatch.date` or `profile.score` were forced to perform full table scans, causing significant performance issues.
8+
9+
Now, auto-indexes are automatically created for nested field paths of any depth when using `eq()`, `gt()`, `gte()`, `lt()`, `lte()`, or `in()` operations.
10+
11+
**Performance Impact:**
12+
13+
Before this fix, filtering on nested fields resulted in expensive full scans:
14+
15+
- Query time: ~353ms for 39 executions (from issue #727)
16+
- "graph run" and "d2ts join" operations dominated execution time
17+
18+
After this fix, nested field queries use indexes:
19+
20+
- Query time: Sub-millisecond (typical indexed lookup)
21+
- Proper index utilization verified through query optimizer
22+
23+
**Example:**
24+
25+
```typescript
26+
const collection = createCollection({
27+
getKey: (item) => item.id,
28+
autoIndex: "eager", // default
29+
// ... sync config
30+
})
31+
32+
// These now automatically create and use indexes:
33+
collection.subscribeChanges((items) => console.log(items), {
34+
whereExpression: eq(row.vehicleDispatch?.date, "2024-01-01"),
35+
})
36+
37+
collection.subscribeChanges((items) => console.log(items), {
38+
whereExpression: gt(row.profile?.stats.rating, 4.5),
39+
})
40+
```
41+
42+
**Index Naming:**
43+
44+
Auto-indexes for nested paths use the format `auto:field.path` to avoid naming conflicts:
45+
46+
- `auto:status` for top-level field `status`
47+
- `auto:profile.score` for nested field `profile.score`
48+
- `auto:metadata.stats.views` for deeply nested field `metadata.stats.views`
49+
50+
Fixes #727

packages/db/src/indexes/auto-index.ts

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,25 @@ export function ensureIndexForField<
4444

4545
// Create a new index for this field using the collection's createIndex method
4646
try {
47-
collection.createIndex((row) => (row as any)[fieldName], {
48-
name: `auto_${fieldName}`,
49-
indexType: BTreeIndex,
50-
options: compareFn ? { compareFn, compareOptions } : {},
51-
})
47+
// Use the proxy-based approach to create the proper accessor for nested paths
48+
collection.createIndex(
49+
(row) => {
50+
// Navigate through the field path
51+
let current: any = row
52+
for (const part of fieldPath) {
53+
current = current[part]
54+
}
55+
return current
56+
},
57+
{
58+
name: `auto:${fieldPath.join(`.`)}`,
59+
indexType: BTreeIndex,
60+
options: compareFn ? { compareFn, compareOptions } : {},
61+
}
62+
)
5263
} catch (error) {
5364
console.warn(
54-
`${collection.id ? `[${collection.id}] ` : ``}Failed to create auto-index for field "${fieldName}":`,
65+
`${collection.id ? `[${collection.id}] ` : ``}Failed to create auto-index for field path "${fieldPath.join(`.`)}":`,
5566
error
5667
)
5768
}
@@ -108,20 +119,22 @@ function extractIndexableExpressions(
108119
return
109120
}
110121

111-
// Check if the first argument is a property reference (single field)
122+
// Check if the first argument is a property reference
112123
if (func.args.length < 1 || func.args[0].type !== `ref`) {
113124
return
114125
}
115126

116127
const fieldRef = func.args[0]
117128
const fieldPath = fieldRef.path
118129

119-
// Skip if it's not a simple field (e.g., nested properties or array access)
120-
if (fieldPath.length !== 1) {
130+
// Skip if the path is empty
131+
if (fieldPath.length === 0) {
121132
return
122133
}
123134

124-
const fieldName = fieldPath[0]
135+
// For nested paths, use the full path joined with underscores as the field name
136+
// For simple paths, use the first (and only) element
137+
const fieldName = fieldPath.join(`_`)
125138
results.push({ fieldName, fieldPath })
126139
}
127140

packages/db/tests/collection-auto-index.test.ts

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,4 +750,148 @@ describe(`Collection Auto-Indexing`, () => {
750750

751751
subscription.unsubscribe()
752752
})
753+
754+
it(`should create auto-indexes for nested field paths`, async () => {
755+
interface NestedTestItem {
756+
id: string
757+
name: string
758+
profile?: {
759+
score: number
760+
bio: string
761+
}
762+
metadata?: {
763+
tags: Array<string>
764+
stats: {
765+
views: number
766+
likes: number
767+
}
768+
}
769+
}
770+
771+
const nestedTestData: Array<NestedTestItem> = [
772+
{
773+
id: `1`,
774+
name: `Alice`,
775+
profile: { score: 85, bio: `Developer` },
776+
metadata: {
777+
tags: [`tech`, `coding`],
778+
stats: { views: 100, likes: 50 },
779+
},
780+
},
781+
{
782+
id: `2`,
783+
name: `Bob`,
784+
profile: { score: 92, bio: `Designer` },
785+
metadata: {
786+
tags: [`design`, `ui`],
787+
stats: { views: 200, likes: 75 },
788+
},
789+
},
790+
{
791+
id: `3`,
792+
name: `Charlie`,
793+
profile: { score: 78, bio: `Manager` },
794+
metadata: {
795+
tags: [`management`, `leadership`],
796+
stats: { views: 150, likes: 60 },
797+
},
798+
},
799+
]
800+
801+
const collection = createCollection<NestedTestItem, string>({
802+
getKey: (item) => item.id,
803+
autoIndex: `eager`,
804+
startSync: true,
805+
sync: {
806+
sync: ({ begin, write, commit, markReady }) => {
807+
begin()
808+
for (const item of nestedTestData) {
809+
write({
810+
type: `insert`,
811+
value: item,
812+
})
813+
}
814+
commit()
815+
markReady()
816+
},
817+
},
818+
})
819+
820+
await collection.stateWhenReady()
821+
822+
// Should have no indexes initially
823+
expect(collection.indexes.size).toBe(0)
824+
825+
// Test 1: Nested field one level deep (profile.score)
826+
const changes1: Array<any> = []
827+
const subscription1 = collection.subscribeChanges(
828+
(items) => {
829+
changes1.push(...items)
830+
},
831+
{
832+
includeInitialState: true,
833+
whereExpression: gt(new PropRef([`profile`, `score`]), 80),
834+
}
835+
)
836+
837+
// Should have created an auto-index for profile.score
838+
const profileScoreIndex = Array.from(collection.indexes.values()).find(
839+
(index) =>
840+
index.expression.type === `ref` &&
841+
(index.expression as any).path.length === 2 &&
842+
(index.expression as any).path[0] === `profile` &&
843+
(index.expression as any).path[1] === `score`
844+
)
845+
expect(profileScoreIndex).toBeDefined()
846+
847+
// Verify the filtered results are correct
848+
expect(changes1.filter((c) => c.type === `insert`).length).toBe(2) // Alice (85) and Bob (92)
849+
850+
subscription1.unsubscribe()
851+
852+
// Test 2: Deeply nested field (metadata.stats.views)
853+
const changes2: Array<any> = []
854+
const subscription2 = collection.subscribeChanges(
855+
(items) => {
856+
changes2.push(...items)
857+
},
858+
{
859+
includeInitialState: true,
860+
whereExpression: eq(new PropRef([`metadata`, `stats`, `views`]), 200),
861+
}
862+
)
863+
864+
// Should have created an auto-index for metadata.stats.views
865+
const viewsIndex = Array.from(collection.indexes.values()).find(
866+
(index) =>
867+
index.expression.type === `ref` &&
868+
(index.expression as any).path.length === 3 &&
869+
(index.expression as any).path[0] === `metadata` &&
870+
(index.expression as any).path[1] === `stats` &&
871+
(index.expression as any).path[2] === `views`
872+
)
873+
expect(viewsIndex).toBeDefined()
874+
875+
// Verify the filtered results are correct
876+
expect(changes2.filter((c) => c.type === `insert`).length).toBe(1) // Only Bob has 200 views
877+
878+
subscription2.unsubscribe()
879+
880+
// Test 3: Index usage verification with tracker
881+
withIndexTracking(collection, (tracker) => {
882+
const result = collection.currentStateAsChanges({
883+
where: gt(new PropRef([`profile`, `score`]), 80),
884+
})!
885+
886+
expect(result.length).toBe(2) // Alice and Bob
887+
888+
// Verify it used the auto-created index
889+
expectIndexUsage(tracker.stats, {
890+
shouldUseIndex: true,
891+
shouldUseFullScan: false,
892+
indexCallCount: 1,
893+
fullScanCallCount: 0,
894+
})
895+
})
896+
})
753897
})

0 commit comments

Comments
 (0)