Skip to content

Commit d351fce

Browse files
authored
perf(builder): defer NativeDatabase init to after no-op early exit (#884)
* perf(builder): defer NativeDatabase init to after no-op early exit On no-op rebuilds, the native engine opened NativeDatabase, ran initSchema, checkpointed WAL, then immediately closed it — all before detectChanges even ran. This added ~5ms overhead on every no-op rebuild where the connection was never used. Move NativeDatabase.openReadWrite() from setupPipeline to on-demand: - tryNativeOrchestrator opens it when the Rust fast path will run - runPipelineStages defers it until after the early-exit check - setupPipeline now only probes loadNative() for availability - checkEngineSchemaMismatch uses better-sqlite3 (reads are WAL-safe) - Remove dead batched native path from getChangedFiles (nativeDb was already undefined during detectChanges due to suspendNativeDb) * fix(builder): restore WAL checkpoint callbacks after reopenNativeDb (#884) suspendJsDb and resumeJsDb were initialized to undefined in initializeEngine and never reassigned when nativeDb was reopened for the analysis phase. This silently removed the dual-connection WAL corruption guard introduced in #696/#715. Assign both callbacks alongside engineOpts.nativeDb in the reopenNativeDb('analyses') block.
1 parent 3dd317d commit d351fce

File tree

3 files changed

+62
-88
lines changed

3 files changed

+62
-88
lines changed

src/domain/graph/builder/context.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ export class PipelineContext {
3333
forceFullRebuild: boolean = false;
3434
schemaVersion!: number;
3535
nativeDb?: NativeDatabase;
36+
/** Whether native engine is available (deferred — DB opened only when needed). */
37+
nativeAvailable: boolean = false;
3638

3739
// ── File collection (set by collectFiles stage) ────────────────────
3840
allFiles!: string[];

src/domain/graph/builder/pipeline.ts

Lines changed: 57 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -43,29 +43,11 @@ function initializeEngine(ctx: PipelineContext): void {
4343
engine: ctx.opts.engine || 'auto',
4444
dataflow: ctx.opts.dataflow !== false,
4545
ast: ctx.opts.ast !== false,
46-
nativeDb: ctx.nativeDb,
47-
// WAL checkpoint callbacks for dual-connection WAL guard (#696, #715).
48-
// Feature modules (ast, cfg, complexity, dataflow) receive `db` as a
49-
// parameter and cannot tolerate close/reopen (stale reference). Instead,
50-
// checkpoint the WAL so native writes start with a clean slate.
51-
// After native writes, resumeJsDb checkpoints through rusqlite so
52-
// better-sqlite3 never reads WAL frames from a different SQLite library.
53-
suspendJsDb: ctx.nativeDb
54-
? () => {
55-
ctx.db.pragma('wal_checkpoint(TRUNCATE)');
56-
}
57-
: undefined,
58-
resumeJsDb: ctx.nativeDb
59-
? () => {
60-
try {
61-
ctx.nativeDb?.exec('PRAGMA wal_checkpoint(TRUNCATE)');
62-
} catch (e) {
63-
debug(
64-
`resumeJsDb: WAL checkpoint failed (nativeDb may already be closed): ${toErrorMessage(e)}`,
65-
);
66-
}
67-
}
68-
: undefined,
46+
// nativeDb and WAL callbacks are set later when NativeDatabase is opened
47+
// (deferred to skip overhead on no-op rebuilds).
48+
nativeDb: undefined,
49+
suspendJsDb: undefined,
50+
resumeJsDb: undefined,
6951
};
7052
const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts);
7153
ctx.engineName = engineName as 'native' | 'wasm';
@@ -79,11 +61,10 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
7961
ctx.forceFullRebuild = false;
8062
if (!ctx.incremental) return;
8163

82-
// Route metadata reads through NativeDatabase only when using the native engine,
83-
// to avoid dual-SQLite WAL conflicts (rusqlite + better-sqlite3 on same file).
84-
const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb;
85-
const meta = (key: string): string | null =>
86-
useNativeDb ? ctx.nativeDb!.getBuildMeta(key) : getBuildMeta(ctx.db, key);
64+
// NativeDatabase is deferred until after change detection, so always use
65+
// better-sqlite3 for metadata reads here. Reads are safe — WAL conflicts
66+
// only arise from concurrent writes.
67+
const meta = (key: string): string | null => getBuildMeta(ctx.db, key);
8768

8869
const prevEngine = meta('engine');
8970
if (prevEngine && prevEngine !== ctx.engineName) {
@@ -130,35 +111,13 @@ function setupPipeline(ctx: PipelineContext): void {
130111
ctx.rootDir = path.resolve(ctx.rootDir);
131112
ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db');
132113
ctx.db = openDb(ctx.dbPath);
114+
initSchema(ctx.db);
133115

134-
// Use NativeDatabase for schema init when native engine is available (Phase 6.13).
135-
// better-sqlite3 (ctx.db) is still always opened — needed for queries and stages
136-
// that haven't been migrated to rusqlite yet.
137-
// Skip native DB entirely when user explicitly requested --engine wasm.
116+
// Detect whether native engine is available, but defer opening NativeDatabase.
117+
// The native orchestrator opens it on demand; the JS pipeline defers until
118+
// after change detection — avoiding ~5ms open+initSchema+close on no-op rebuilds.
138119
const enginePref = ctx.opts.engine || 'auto';
139-
const native = enginePref !== 'wasm' ? loadNative() : null;
140-
if (native?.NativeDatabase) {
141-
try {
142-
ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
143-
ctx.nativeDb.initSchema();
144-
// Checkpoint WAL through rusqlite so better-sqlite3 sees a clean DB
145-
// with no cross-library WAL frames (#715, #717).
146-
ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
147-
} catch (err) {
148-
warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
149-
try {
150-
ctx.nativeDb?.close();
151-
} catch (e) {
152-
debug(`setupNativeDb: close failed during fallback: ${toErrorMessage(e)}`);
153-
}
154-
ctx.nativeDb = undefined;
155-
}
156-
// Always run JS initSchema so better-sqlite3 sees the schema —
157-
// nativeDb is closed during pipeline stages and reopened for analyses.
158-
initSchema(ctx.db);
159-
} else {
160-
initSchema(ctx.db);
161-
}
120+
ctx.nativeAvailable = enginePref !== 'wasm' && !!loadNative()?.NativeDatabase;
162121

163122
ctx.config = loadConfig(ctx.rootDir);
164123
ctx.incremental =
@@ -553,6 +512,28 @@ async function tryNativeOrchestrator(
553512
debug(`Skipping native orchestrator: ${skipReason}`);
554513
return undefined;
555514
}
515+
516+
// Open NativeDatabase on demand for the orchestrator.
517+
// Deferred from setupPipeline so no-op JS pipeline rebuilds skip the overhead.
518+
if (!ctx.nativeDb && ctx.nativeAvailable) {
519+
const native = loadNative();
520+
if (native?.NativeDatabase) {
521+
try {
522+
ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
523+
ctx.nativeDb.initSchema();
524+
ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
525+
} catch (err) {
526+
warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
527+
try {
528+
ctx.nativeDb?.close();
529+
} catch (e) {
530+
debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
531+
}
532+
ctx.nativeDb = undefined;
533+
}
534+
}
535+
}
536+
556537
if (!ctx.nativeDb?.buildGraph) return undefined;
557538

558539
const resultJson = ctx.nativeDb.buildGraph(
@@ -639,12 +620,11 @@ async function tryNativeOrchestrator(
639620
// ── Pipeline stages execution ───────────────────────────────────────────
640621

641622
async function runPipelineStages(ctx: PipelineContext): Promise<void> {
642-
// Prevent dual-connection WAL corruption during pipeline stages: when both
643-
// better-sqlite3 (ctx.db) and rusqlite (ctx.nativeDb) are open to the same
644-
// WAL-mode file, native writes corrupt the DB. Close nativeDb so stages
645-
// use JS fallback paths. Reopened before runAnalyses for feature modules
646-
// that use suspendJsDb/resumeJsDb WAL checkpoint pattern (#696).
647-
const hadNativeDb = !!ctx.nativeDb;
623+
// NativeDatabase is deferred — not opened during setup. collectFiles and
624+
// detectChanges only need better-sqlite3. If no files changed, we exit
625+
// early without ever opening the native connection, saving ~5ms.
626+
// If nativeDb was opened by tryNativeOrchestrator (which fell through),
627+
// suspend it now to avoid dual-connection WAL corruption during stages.
648628
if (ctx.db && ctx.nativeDb) {
649629
suspendNativeDb(ctx, 'pre-collect');
650630
}
@@ -659,7 +639,7 @@ async function runPipelineStages(ctx: PipelineContext): Promise<void> {
659639
// Temporarily reopen nativeDb for insertNodes — it uses the WAL checkpoint
660640
// guard internally (same pattern as feature modules). Closed again before
661641
// resolveImports/buildEdges which don't yet have the guard (#709).
662-
if (hadNativeDb && ctx.engineName === 'native') {
642+
if (ctx.nativeAvailable && ctx.engineName === 'native') {
663643
reopenNativeDb(ctx, 'insertNodes');
664644
}
665645

@@ -677,13 +657,27 @@ async function runPipelineStages(ctx: PipelineContext): Promise<void> {
677657

678658
// Reopen nativeDb for feature modules (ast, cfg, complexity, dataflow)
679659
// which use suspendJsDb/resumeJsDb WAL checkpoint before native writes.
680-
if (hadNativeDb) {
660+
if (ctx.nativeAvailable) {
681661
reopenNativeDb(ctx, 'analyses');
682662
if (ctx.nativeDb && ctx.engineOpts) {
683663
ctx.engineOpts.nativeDb = ctx.nativeDb;
664+
ctx.engineOpts.suspendJsDb = () => {
665+
ctx.db.pragma('wal_checkpoint(TRUNCATE)');
666+
};
667+
ctx.engineOpts.resumeJsDb = () => {
668+
try {
669+
ctx.nativeDb?.exec('PRAGMA wal_checkpoint(TRUNCATE)');
670+
} catch (e) {
671+
debug(
672+
`resumeJsDb: WAL checkpoint failed (nativeDb may already be closed): ${toErrorMessage(e)}`,
673+
);
674+
}
675+
};
684676
}
685677
if (!ctx.nativeDb && ctx.engineOpts) {
686678
ctx.engineOpts.nativeDb = undefined;
679+
ctx.engineOpts.suspendJsDb = undefined;
680+
ctx.engineOpts.resumeJsDb = undefined;
687681
}
688682
}
689683

src/domain/graph/builder/stages/detect-changes.ts

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -58,26 +58,9 @@ function getChangedFiles(
5858
db: BetterSqlite3Database,
5959
allFiles: string[],
6060
rootDir: string,
61-
nativeDb?: NativeDatabase,
6261
): ChangeResult {
63-
// Batched native path: single napi call for table check + all rows + max mtime
64-
if (nativeDb?.getFileHashData) {
65-
const data = nativeDb.getFileHashData();
66-
if (!data.exists) {
67-
return {
68-
changed: allFiles.map((f) => ({ file: f })),
69-
removed: [],
70-
isFullBuild: true,
71-
};
72-
}
73-
const existing = new Map<string, FileHashRow>(data.rows.map((r) => [r.file, r]));
74-
const removed = detectRemovedFiles(existing, allFiles, rootDir);
75-
const journalResult = tryJournalTier(db, existing, rootDir, removed, data.maxMtime);
76-
if (journalResult) return journalResult;
77-
return mtimeAndHashTiers(existing, allFiles, rootDir, removed);
78-
}
79-
80-
// WASM / fallback path
62+
// NativeDatabase is not open during change detection (deferred to after
63+
// early-exit check). All queries use better-sqlite3 here.
8164
let hasTable = false;
8265
try {
8366
db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
@@ -487,12 +470,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
487470
}
488471
const increResult =
489472
incremental && !forceFullRebuild
490-
? getChangedFiles(
491-
db,
492-
allFiles,
493-
rootDir,
494-
ctx.engineName === 'native' ? ctx.nativeDb : undefined,
495-
)
473+
? getChangedFiles(db, allFiles, rootDir)
496474
: {
497475
changed: allFiles.map((f): ChangedFile => ({ file: f })),
498476
removed: [] as string[],

0 commit comments

Comments
 (0)