From 54e99fbd3e33005ebb47a7c489532ae094b6d495 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Thu, 26 Mar 2026 20:34:04 +0200 Subject: [PATCH 01/34] Migration feature - Phase 1 - comparison between the target and source DBs --- .claude/settings.local.json | 182 ++++++++ apps/api/src/app.module.ts | 2 + .../src/migration/analysis/analysis-job.ts | 14 + .../migration/analysis/commandlog-analyzer.ts | 55 +++ .../analysis/compatibility-checker.ts | 226 +++++++++ .../src/migration/analysis/hfe-detector.ts | 128 ++++++ .../api/src/migration/analysis/ttl-sampler.ts | 43 ++ .../src/migration/analysis/type-sampler.ts | 50 ++ .../api/src/migration/migration.controller.ts | 45 ++ apps/api/src/migration/migration.module.ts | 10 + apps/api/src/migration/migration.service.ts | 435 ++++++++++++++++++ apps/web/src/App.tsx | 11 + .../src/components/migration/AnalysisForm.tsx | 110 +++++ .../migration/AnalysisProgressBar.tsx | 81 ++++ .../src/components/migration/ExportBar.tsx | 34 ++ .../components/migration/MigrationReport.tsx | 24 + .../migration/sections/CommandSection.tsx | 55 +++ .../migration/sections/DataTypeSection.tsx | 80 ++++ .../migration/sections/HfeSection.tsx | 54 +++ .../migration/sections/SummarySection.tsx | 86 ++++ .../migration/sections/TtlSection.tsx | 50 ++ .../migration/sections/VerdictSection.tsx | 96 ++++ apps/web/src/pages/MigrationPage.tsx | 77 ++++ packages/shared/src/index.ts | 1 + packages/shared/src/license/types.ts | 2 + packages/shared/src/types/migration.ts | 101 ++++ packages/shared/tsconfig.tsbuildinfo | 1 + 27 files changed, 2053 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 apps/api/src/migration/analysis/analysis-job.ts create mode 100644 apps/api/src/migration/analysis/commandlog-analyzer.ts create mode 100644 apps/api/src/migration/analysis/compatibility-checker.ts create mode 100644 apps/api/src/migration/analysis/hfe-detector.ts create mode 100644 apps/api/src/migration/analysis/ttl-sampler.ts create mode 100644 apps/api/src/migration/analysis/type-sampler.ts create mode 100644 apps/api/src/migration/migration.controller.ts create mode 100644 apps/api/src/migration/migration.module.ts create mode 100644 apps/api/src/migration/migration.service.ts create mode 100644 apps/web/src/components/migration/AnalysisForm.tsx create mode 100644 apps/web/src/components/migration/AnalysisProgressBar.tsx create mode 100644 apps/web/src/components/migration/ExportBar.tsx create mode 100644 apps/web/src/components/migration/MigrationReport.tsx create mode 100644 apps/web/src/components/migration/sections/CommandSection.tsx create mode 100644 apps/web/src/components/migration/sections/DataTypeSection.tsx create mode 100644 apps/web/src/components/migration/sections/HfeSection.tsx create mode 100644 apps/web/src/components/migration/sections/SummarySection.tsx create mode 100644 apps/web/src/components/migration/sections/TtlSection.tsx create mode 100644 apps/web/src/components/migration/sections/VerdictSection.tsx create mode 100644 apps/web/src/pages/MigrationPage.tsx create mode 100644 packages/shared/src/types/migration.ts create mode 100644 packages/shared/tsconfig.tsbuildinfo diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 00000000..8d16b3c3 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,182 @@ +{ + "permissions": { + "allow": [ + "Bash(git fetch:*)", + "Bash(fuser:*)", + "Bash(NODE_ENV=development BETTERDB_LICENSE_KEY=asdasd STORAGE_TYPE=postgres STORAGE_URL=\"postgresql://betterdb:devpassword@localhost:5432/betterdb\" DB_HOST=localhost DB_PORT=6380 DB_PASSWORD=devpassword DB_USERNAME=default pnpm dev:*)", + "Bash(curl:*)", + "Bash(__NEW_LINE_69c8496eb0256e29__ echo \"\")", + "Bash(# Get full response to see timestamps curl -s \"\"http://localhost:3001/metrics/slowlog?count=30&excludeMonitor=true\"\")", + "Bash(__NEW_LINE_6cdd3e3918f56f8e__ echo \"\")", + "Bash(pnpm add:*)", + "Bash(NODE_ENV=development BETTERDB_LICENSE_KEY=asdasd STORAGE_TYPE=postgres STORAGE_URL=\"postgresql://postgres:postgres@localhost:5433/betterdb\" pnpm dev:*)", + "Bash(NODE_ENV=development BETTERDB_LICENSE_KEY=asdasd DATABASE_HOST=localhost DATABASE_PORT=6380 DATABASE_PASSWORD=devpassword STORAGE_TYPE=postgres STORAGE_URL=\"postgresql://betterdb:devpassword@localhost:5432/betterdb\" pnpm dev:*)", + "Bash(pnpm:*)", + "Bash(valkey-cli:*)", + "Bash(pkill:*)", + "Bash(pgrep:*)", + "Bash(npx tsc:*)", + "Bash(npx ts-node:*)", + "Bash(# Kill all related processes and start fresh pkill -9 -f \"\"ts-node\"\" || true pkill -9 -f \"\"vite\"\" || true sleep 2 cd /home/kristiyan/projects/valkey/monitor && NODE_ENV=development DB_HOST=localhost DB_PORT=6380 DB_PASSWORD=devpassword STORAGE_TYPE=postgres STORAGE_URL=\"\"postgresql://betterdb:devpassword@localhost:5432/betterdb\"\" pnpm dev 2>&1 & sleep 8 grep -E \"\"\\(LOG|ERROR|starting|running\\)\"\" /tmp/server2.log)", + "Bash(PGPASSWORD=devpassword psql:*)", + "Bash(python3:*)", + "Bash(node /tmp/insert-test-data.js:*)", + "Bash(# Test with exact range around yesterday''s timestamp \\(1769538717\\) echo \"\"Querying for timestamp around 1769538717 \\(yesterday\\)\"\" curl -s \"\"http://localhost:3001/commandlog-analytics/entries?type=slow&startTime=1769538000&endTime=1769539000&limit=50\"\" cat /tmp/yesterday.json)", + "Bash(ls:*)", + "Bash(docker run:*)", + "Bash(docker logs:*)", + "Bash(docker rm:*)", + "Bash(docker ps:*)", + "Bash(docker stop:*)", + "Bash(docker network ls:*)", + "Bash(docker exec betterdb-monitor-valkey valkey-cli:*)", + "Bash(# Find the SQLite database in the container docker exec betterdb-monitor-app ls -la /app/data/ || docker exec betterdb-monitor-app find /app -name \"\"*.db\"\")", + "Bash(docker exec:*)", + "Bash(docker pull:*)", + "Bash(git -C /home/kristiyan/projects/valkey/monitor ls-files:*)", + "Bash(git -C /home/kristiyan/projects/valkey/monitor/proprietary ls-files:*)", + "Bash(git -C /home/kristiyan/projects/valkey/monitor check-ignore proprietary/)", + "Bash(git add:*)", + "Bash(git check-ignore:*)", + "Bash(docker compose:*)", + "Bash(docker start:*)", + "Bash(docker restart:*)", + "Bash(ss:*)", + "Bash(netstat:*)", + "Bash(# Check current memory usage docker exec betterdb-monitor-valkey valkey-cli -a devpassword --no-auth-warning INFO memory)", + "Bash(# Check what network the existing containers are on docker network ls docker inspect betterdb-monitor-valkey --format ''{{range .NetworkSettings.Networks}}{{.NetworkID}}{{end}}'')", + "Bash(docker inspect:*)", + "Bash(tree:*)", + "Bash(npx jest:*)", + "Bash(NODE_OPTIONS=\"--experimental-vm-modules\" node:*)", + "Bash(echo:*)", + "Bash(SKIP_DOCKER_SETUP=1 npx jest:*)", + "Bash(SKIP_DOCKER_SETUP=true npx jest:*)", + "Bash(for i in {1..20})", + "Bash(do echo \"Pull #$i\")", + "Bash(done)", + "Bash(git pull:*)", + "Bash(sudo lsof:*)", + "Bash(npm run build:*)", + "Bash(npm test:*)", + "Bash(python:*)", + "Bash(source venv/bin/activate)", + "Bash(VALKEY_PASSWORD=devpassword python3:*)", + "Bash(grep:*)", + "Bash(# Check the password from docker-compose or environment grep -r \"\"6380\\\\|password\\\\|VALKEY\"\" /home/kristiyan/projects/valkey/monitor/docker-compose*.yml)", + "Bash(redis-cli:*)", + "Bash(# Check current connected clients echo \"\"Current connected clients:\"\" redis-cli -p 6380 -a devpassword CLIENT LIST)", + "Bash(# Check rejected_connections before echo \"\"Before - rejected connections:\"\" redis-cli -p 6380 -a devpassword INFO clients)", + "Bash(# Kill any lingering background redis-cli processes pkill -f \"\"BLPOP.*flood_queue\"\" pkill -f \"\"BLPOP.*waiting_queue\"\" # Get baseline rejected connections echo \"\"Baseline rejected_connections:\"\" redis-cli -p 6380 -a devpassword INFO clients)", + "Bash(# Check docker-compose for 6381 config grep -A 20 \"\"6381\"\" /home/kristiyan/projects/valkey/monitor/docker-compose.yml)", + "Bash(node -e:*)", + "Bash(do valkey-cli -p 6380 SET \"test-key-$i\" \"value-$i\")", + "Bash(# Create multiple connections to 6380 to spike connection count for i in {1..10}; do \\(valkey-cli -p 6380 DEBUG SLEEP 5 &\\) done echo \"\"Spiked connections on 6380. Wait 10 seconds for metrics to be collected...\"\" sleep 10 valkey-cli -p 6380 CLIENT LIST)", + "Bash(# Use BLPOP to keep connections open on 6380 for i in {1..15}; do \\(valkey-cli -p 6380 BLPOP nonexistent-key-$i 30 &\\) done echo \"\"Created blocking connections on 6380\"\" sleep 2 valkey-cli -p 6380 CLIENT LIST)", + "Bash(find:*)", + "Bash(__NEW_LINE_f35492c487c1cd20__ echo \"\")", + "Bash(__NEW_LINE_6c920833c1681c54__ echo \"\")", + "Bash(__NEW_LINE_39bd60a6c51894a0__ echo \"\")", + "Bash(__NEW_LINE_66edc211ee499a7e__ echo \"\")", + "Bash(docker-compose ps:*)", + "Bash(xargs:*)", + "Bash(__NEW_LINE_42366b605d3a7b88__ echo \"\")", + "Bash(__NEW_LINE_53e463c78b7eeb92__ echo \"\")", + "Bash(# Generate some data first for i in {1..1000}; do docker exec betterdb-monitor-valkey valkey-cli -a devpassword SET \"\"testkey:$i\"\" \"\"value$i\"\" done echo \"\"Created 1000 keys\"\" # Run expensive KEYS command \\(will be slow and logged\\) docker exec betterdb-monitor-valkey valkey-cli -a devpassword KEYS \"\"*\"\")", + "Bash(# Generate data on 6381 for i in {1..1000}; do docker exec valkey-6381 valkey-cli -a devpassword SET \"\"testkey:$i\"\" \"\"value$i\"\" done echo \"\"Created 1000 keys on 6381\"\" # Run expensive KEYS command docker exec valkey-6381 valkey-cli -a devpassword KEYS \"\"*\"\")", + "Bash(WEBHOOK_ID=\"7e8fd7cc-931a-4485-916e-bb0c519271e9\":*)", + "Bash(__NEW_LINE_d20a33920bc98c2c__ echo \"\")", + "Bash(node /home/kristiyan/projects/valkey/monitor/packages/cli/bin/betterdb.js:*)", + "Bash(npx esbuild:*)", + "Bash(docker network inspect:*)", + "Bash(npm view:*)", + "Bash(node:*)", + "Bash(npm pack:*)", + "Bash(git -C /home/kristiyan/projects/valkey/monitor log --oneline -5)", + "Bash(npm cache clean:*)", + "Bash(npm uninstall:*)", + "Bash(npm install:*)", + "Bash(betterdb:*)", + "Bash(gh run list:*)", + "Bash(gh run view:*)", + "Bash(lsof:*)", + "Bash(npm ls:*)", + "Bash(sudo npm uninstall:*)", + "Bash(sudo npm install:*)", + "Bash(npx prisma migrate dev:*)", + "Bash(npx prisma generate:*)", + "Bash(docker build:*)", + "Bash(docker images:*)", + "Bash(cd /home/kristiyan/projects/valkey/monitor/apps/frontend && npx vite build 2>&1)", + "Bash(cd /home/kristiyan/projects/valkey/monitor/apps/web && npx vite build 2>&1)", + "Bash(git:*)", + "Bash(roborev show:*)", + "Bash(roborev review:*)", + "Bash(roborev list:*)", + "Bash(roborev status:*)", + "Bash(roborev:*)", + "mcp__pointer__get-pointed-element", + "Bash(npm list:*)", + "Bash(valkey-benchmark:*)", + "Bash(npx vitest:*)", + "Bash(gh release:*)", + "Bash(gh pr:*)", + "Bash(do echo:*)", + "Bash(psql:*)", + "Read(//usr/bin/**)", + "Read(//proc/567452/**)", + "Bash(kill 599425:*)", + "Bash(kill -9 815654 2>/dev/null; lsof -ti:3001 | xargs kill -9 2>/dev/null; echo \"killed\")", + "Bash(cd /home/kristiyan/projects/valkey/monitor/apps/web && ./node_modules/.bin/tsc --noEmit 2>&1 | head -20)", + "Bash(cd /home/kristiyan/projects/valkey/monitor && npx turbo run test 2>&1 | tail -30)", + "Bash(head -5 apps/api/jest.config.* 2>/dev/null || head -5 apps/api/package.json 2>/dev/null; grep -m1 '\"test\"' apps/api/package.json 2>/dev/null || true)", + "Bash(npm run:*)", + "Bash(kill 1600607 2>/dev/null; kill 1596434 2>/dev/null; sleep 1 && fuser -k 3001/tcp 2>/dev/null; fuser -k 5173/tcp 2>/dev/null; sleep 1 && echo \"All cleared\")", + "Bash(kill 1607901)", + "Bash(find /home/kristiyan/projects/valkey/monitor/proprietary -type f \\\\\\(-name *.ts -o -name *.js -o -name *.md \\\\\\))", + "Skill(update-config)", + "Bash(npx:*)", + "Bash(kill:*)", + "Bash(1 echo curl -s -o /dev/null -w %{http_code} http://localhost:3390/api/health 2)", + "Bash(1 echo ss -tlnp)", + "Bash(BETTERDB_URL=http://localhost:3390 node:*)", + "Bash(/tmp/mcp-stderr.log echo 'EXIT: $?' echo '=== STDOUT ===' cat /tmp/mcp-stdout.log echo '=== STDERR ===' cat /tmp/mcp-stderr.log)", + "Bash(printf:*)", + "Bash(/tmp/mcp-stderr2.log echo 'EXIT: $?' echo '=== STDOUT ===' cat /tmp/mcp-stdout2.log echo '=== STDERR ===' cat /tmp/mcp-stderr2.log)", + "Bash(xxd:*)", + "Bash(bash:*)", + "Bash(PORT=3001 npx @betterdb/monitor:*)", + "WebFetch(domain:static.modelcontextprotocol.io)", + "Bash(VALKEY_URL=redis://localhost:6390 pnpm test 2>&1)", + "Bash(VALKEY_URL=redis://localhost:6390 npx tsx index.ts --mock)", + "Bash(ln -sf ../../../../ /home/kristiyan/projects/valkey/monitor/packages/semantic-cache/examples/basic/node_modules/@betterdb/semantic-cache)", + "Bash(cd:*)", + "Bash(VALKEY_URL=redis://localhost:6390 node -e \":*)", + "WebFetch(domain:eclips4.github.io)", + "Bash(node -e \"console.log\\(require\\(''iovalkey/package.json''\\).version\\)\")", + "WebFetch(domain:github.com)", + "Bash(gh api:*)", + "WebFetch(domain:www.npmjs.com)", + "Bash(claude mcp:*)", + "Read(//tmp/**)", + "Bash(tar xzf:*)", + "Bash(npm --version)", + "mcp__betterdb__list_instances", + "mcp__betterdb__select_instance", + "mcp__betterdb__get_health", + "mcp__betterdb__get_slowlog", + "mcp__betterdb__get_slowlog_patterns", + "mcp__betterdb__get_memory", + "mcp__betterdb__get_hot_keys", + "mcp__betterdb__get_info", + "mcp__betterdb__get_anomalies", + "Bash(cat:*)", + "mcp__betterdb-memory__search_context", + "Bash(test:*)" + ], + "deny": [], + "ask": [] + }, + "enableAllProjectMcpServers": true, + "enabledMcpjsonServers": [] +} diff --git a/apps/api/src/app.module.ts b/apps/api/src/app.module.ts index e63e8d3a..10cad571 100644 --- a/apps/api/src/app.module.ts +++ b/apps/api/src/app.module.ts @@ -16,6 +16,7 @@ import { SettingsModule } from './settings/settings.module'; import { WebhooksModule } from './webhooks/webhooks.module'; import { TelemetryModule } from './telemetry/telemetry.module'; import { VectorSearchModule } from './vector-search/vector-search.module'; +import { MigrationModule } from './migration/migration.module'; import { CloudAuthModule } from './auth/cloud-auth.module'; import { McpModule } from './mcp/mcp.module'; @@ -119,6 +120,7 @@ const baseImports = [ WebhooksModule, McpModule, VectorSearchModule, + MigrationModule, ]; const proprietaryImports = [ diff --git a/apps/api/src/migration/analysis/analysis-job.ts b/apps/api/src/migration/analysis/analysis-job.ts new file mode 100644 index 00000000..4c21a25c --- /dev/null +++ b/apps/api/src/migration/analysis/analysis-job.ts @@ -0,0 +1,14 @@ +import type { MigrationJobStatus, MigrationAnalysisResult } from '@betterdb/shared'; +import type Valkey from 'iovalkey'; + +export interface AnalysisJob { + id: string; + status: MigrationJobStatus; + progress: number; + createdAt: number; + completedAt?: number; + error?: string; + result: Partial; + cancelled: boolean; + nodeClients: Valkey[]; +} diff --git a/apps/api/src/migration/analysis/commandlog-analyzer.ts b/apps/api/src/migration/analysis/commandlog-analyzer.ts new file mode 100644 index 00000000..ffdfc6f8 --- /dev/null +++ b/apps/api/src/migration/analysis/commandlog-analyzer.ts @@ -0,0 +1,55 @@ +import type { DatabasePort } from '../../common/interfaces/database-port.interface'; +import type { CommandAnalysis } from '@betterdb/shared'; + +export async function analyzeCommands( + adapter: DatabasePort, +): Promise { + const result: CommandAnalysis = { + sourceUsed: 'unavailable', + topCommands: [], + }; + + const capabilities = adapter.getCapabilities(); + let commandNames: string[] = []; + + // Try COMMANDLOG first + if (capabilities.hasCommandLog) { + try { + const entries = await adapter.getCommandLog(200); + commandNames = entries.map(e => { + const args = e.command ?? []; + return args.length > 0 ? String(args[0]).toUpperCase() : ''; + }).filter(Boolean); + result.sourceUsed = 'commandlog'; + } catch { + // Fall through to slowlog + } + } + + // Fallback to SLOWLOG + if (result.sourceUsed === 'unavailable') { + try { + const entries = await adapter.getSlowLog(128); + commandNames = entries.map(e => { + const args = e.command ?? []; + return args.length > 0 ? String(args[0]).toUpperCase() : ''; + }).filter(Boolean); + result.sourceUsed = 'slowlog'; + } catch { + // Both unavailable + return result; + } + } + + // Top commands + const counts = new Map(); + for (const cmd of commandNames) { + counts.set(cmd, (counts.get(cmd) ?? 0) + 1); + } + result.topCommands = Array.from(counts.entries()) + .map(([command, count]) => ({ command, count })) + .sort((a, b) => b.count - a.count) + .slice(0, 50); + + return result; +} diff --git a/apps/api/src/migration/analysis/compatibility-checker.ts b/apps/api/src/migration/analysis/compatibility-checker.ts new file mode 100644 index 00000000..dc7e81aa --- /dev/null +++ b/apps/api/src/migration/analysis/compatibility-checker.ts @@ -0,0 +1,226 @@ +import type { Incompatibility } from '@betterdb/shared'; +import type { DatabaseCapabilities } from '../../common/interfaces/database-port.interface'; + +export interface InstanceMeta { + dbType: 'valkey' | 'redis'; + version: string; + capabilities: DatabaseCapabilities; + clusterEnabled: boolean; + databases: number[]; + modules: string[]; + maxmemoryPolicy: string; + hasAclUsers: boolean; + persistenceMode: string; +} + +/** + * Compare two semver strings: returns true if a >= b. + * Handles versions like "8.1.0", "7.2.4", etc. + */ +function semverGte(a: string, b: string): boolean { + const partsA = a.split('.').map(s => parseInt(s, 10) || 0); + const partsB = b.split('.').map(s => parseInt(s, 10) || 0); + const len = Math.max(partsA.length, partsB.length); + for (let i = 0; i < len; i++) { + const va = partsA[i] ?? 0; + const vb = partsB[i] ?? 0; + if (va > vb) return true; + if (va < vb) return false; + } + return true; // equal +} + +export function buildInstanceMeta( + info: Record, + capabilities: DatabaseCapabilities, + aclUsers: string[], +): InstanceMeta { + // clusterEnabled + const clusterEnabled = String(info['cluster_enabled'] ?? '0') === '1'; + + // databases: parse keys like 'db0', 'db1', etc. + const databases: number[] = []; + for (const key of Object.keys(info)) { + const match = key.match(/^db(\d+)$/); + if (match && typeof info[key] === 'string') { + databases.push(parseInt(match[1], 10)); + } + } + if (databases.length === 0) { + databases.push(0); + } + + // modules: will be populated by caller via client.call('MODULE', 'LIST') + // For now, default to empty — the caller sets this after construction if needed + const modules: string[] = []; + + // maxmemoryPolicy + const maxmemoryPolicy = (info['maxmemory_policy'] as string) ?? 'noeviction'; + + // hasAclUsers: more than just the 'default' user + const hasAclUsers = aclUsers.length > 1; + + // persistenceMode + let hasRdb = false; + let hasAof = false; + + const rdbLastSaveTime = Number(info['rdb_last_save_time'] ?? 0); + if (rdbLastSaveTime > 0) { + hasRdb = true; + } + + const aofEnabled = String(info['aof_enabled'] ?? '0'); + if (aofEnabled === '1') { + hasAof = true; + } + + let persistenceMode: string; + if (hasRdb && hasAof) { + persistenceMode = 'rdb+aof'; + } else if (hasRdb) { + persistenceMode = 'rdb'; + } else if (hasAof) { + persistenceMode = 'aof'; + } else { + persistenceMode = 'none'; + } + + return { + dbType: capabilities.dbType, + version: capabilities.version, + capabilities, + clusterEnabled, + databases, + modules, + maxmemoryPolicy, + hasAclUsers, + persistenceMode, + }; +} + +export function checkCompatibility( + source: InstanceMeta, + target: InstanceMeta, + hfeDetected: boolean, +): Incompatibility[] { + const issues: Incompatibility[] = []; + + // 1. Valkey -> Redis direction + if (source.dbType === 'valkey' && target.dbType === 'redis') { + issues.push({ + severity: 'blocking', + category: 'type_direction', + title: 'Valkey \u2192 Redis migration', + detail: + 'Migrating from Valkey to Redis may lose Valkey-specific features and data structures. This direction is not recommended.', + }); + } + + // 2. HFE unsupported on target + if (hfeDetected) { + const targetSupportsHfe = + target.dbType === 'valkey' && semverGte(target.version, '8.1.0'); + if (!targetSupportsHfe) { + issues.push({ + severity: 'blocking', + category: 'hfe', + title: 'Hash Field Expiry unsupported', + detail: + 'Source uses Hash Field Expiry (HFE). Target does not support HFE \u2014 per-field TTLs will be lost during migration. Requires Valkey 8.1+.', + }); + } + } + + // 3. Missing modules + for (const mod of source.modules) { + if (!target.modules.includes(mod)) { + issues.push({ + severity: 'blocking', + category: 'modules', + title: `Missing module: ${mod}`, + detail: `Source uses the '${mod}' module which is not loaded on the target instance.`, + }); + } + } + + // 4. Cluster -> standalone mismatch + if (source.clusterEnabled && !target.clusterEnabled) { + issues.push({ + severity: 'blocking', + category: 'cluster_topology', + title: 'Cluster \u2192 standalone mismatch', + detail: + 'Source runs in cluster mode but target is standalone. Data spread across multiple slots cannot be directly migrated to a single-node instance.', + }); + } + + // 5. Standalone -> cluster migration + if (!source.clusterEnabled && target.clusterEnabled) { + issues.push({ + severity: 'warning', + category: 'cluster_topology', + title: 'Standalone \u2192 cluster migration', + detail: + 'Source is standalone, target is clustered. Migration is possible but keys will be resharded across target slots.', + }); + } + + // 6. Multi-DB to cluster unsupported + if (source.databases.some(db => db !== 0) && target.clusterEnabled) { + issues.push({ + severity: 'blocking', + category: 'multi_db', + title: 'Multi-DB to cluster unsupported', + detail: + 'Source uses multiple databases (db indices beyond 0). Cluster mode only supports db0.', + }); + } + + // 7. Multi-DB data may be lost (standalone target without matching DBs) + if ( + source.databases.some(db => db !== 0) && + !target.clusterEnabled && + !target.databases.some(db => db !== 0) + ) { + issues.push({ + severity: 'warning', + category: 'multi_db', + title: 'Multi-DB data may be lost', + detail: + 'Source uses databases beyond db0. Verify the target is configured to accept multiple databases.', + }); + } + + // 8. Eviction policy mismatch + if (source.maxmemoryPolicy !== target.maxmemoryPolicy) { + issues.push({ + severity: 'warning', + category: 'maxmemory_policy', + title: 'Eviction policy mismatch', + detail: `Source uses '${source.maxmemoryPolicy}', target uses '${target.maxmemoryPolicy}'. Mismatched eviction policies may cause unexpected key eviction after migration.`, + }); + } + + // 9. ACL users not configured + if (source.hasAclUsers && !target.hasAclUsers) { + issues.push({ + severity: 'warning', + category: 'acl', + title: 'ACL users not configured', + detail: + 'Source has custom ACL users configured. Target only has the default user. Recreate ACL rules on the target before migrating.', + }); + } + + // 10. Persistence mode differs + if (source.persistenceMode !== target.persistenceMode) { + issues.push({ + severity: 'info', + category: 'persistence', + title: 'Persistence mode differs', + detail: `Source uses '${source.persistenceMode}' persistence, target uses '${target.persistenceMode}'. Review target persistence settings to ensure durability requirements are met.`, + }); + } + + return issues; +} diff --git a/apps/api/src/migration/analysis/hfe-detector.ts b/apps/api/src/migration/analysis/hfe-detector.ts new file mode 100644 index 00000000..092edb06 --- /dev/null +++ b/apps/api/src/migration/analysis/hfe-detector.ts @@ -0,0 +1,128 @@ +import type Valkey from 'iovalkey'; + +export interface HfeResult { + hfeDetected: boolean; + hfeSupported: boolean; + hfeKeyCount: number; + hfeOversizedHashesSkipped: number; + sampledHashCount: number; +} + +const MAX_HASH_SAMPLE = 300; +const MAX_HASH_FIELDS = 10_000; + +export async function detectHfe( + client: Valkey, + hashKeys: string[], + totalEstimatedHashKeys: number, +): Promise { + const result: HfeResult = { + hfeDetected: false, + hfeSupported: true, + hfeKeyCount: 0, + hfeOversizedHashesSkipped: 0, + sampledHashCount: 0, + }; + + const candidates = hashKeys.slice(0, MAX_HASH_SAMPLE); + if (candidates.length === 0) { + return result; + } + + // Check HLEN for each candidate, skip oversized ones + const validKeys: string[] = []; + for (let i = 0; i < candidates.length; i += 1000) { + const batch = candidates.slice(i, i + 1000); + const pipeline = client.pipeline(); + for (const key of batch) { + pipeline.hlen(key); + } + const results = await pipeline.exec(); + if (!results) continue; + for (let j = 0; j < batch.length; j++) { + const [err, len] = results[j] ?? []; + if (err || Number(len) > MAX_HASH_FIELDS) { + result.hfeOversizedHashesSkipped++; + } else { + validKeys.push(batch[j]); + } + } + } + + if (validKeys.length === 0) { + result.sampledHashCount = 0; + return result; + } + + // HRANDFIELD to get up to 3 random fields per key + const keyFieldPairs: Array<{ key: string; field: string }> = []; + for (let i = 0; i < validKeys.length; i += 1000) { + const batch = validKeys.slice(i, i + 1000); + const pipeline = client.pipeline(); + for (const key of batch) { + pipeline.call('HRANDFIELD', key, '-3'); + } + const results = await pipeline.exec(); + if (!results) continue; + for (let j = 0; j < batch.length; j++) { + const [err, fields] = results[j] ?? []; + if (err || !fields) continue; + const fieldList = Array.isArray(fields) ? fields : [fields]; + for (const f of fieldList) { + keyFieldPairs.push({ key: batch[j], field: String(f) }); + } + } + } + + result.sampledHashCount = validKeys.length; + + if (keyFieldPairs.length === 0) { + return result; + } + + // Pipeline HEXPIRETIME — wrap in try/catch for Redis (unknown command) + try { + let hfePositiveKeys = 0; + const checkedKeys = new Set(); + + const pipeline = client.pipeline(); + for (const { key, field } of keyFieldPairs) { + pipeline.call('HEXPIRETIME', key, 'FIELDS', '1', field); + } + const results = await pipeline.exec(); + if (results) { + for (let i = 0; i < keyFieldPairs.length; i++) { + const [err, val] = results[i] ?? []; + if (err) { + // If the error indicates unknown command, HFE not supported + const errMsg = String(err); + if (errMsg.includes('unknown command') || errMsg.includes('ERR')) { + result.hfeSupported = false; + result.hfeDetected = false; + return result; + } + continue; + } + // HEXPIRETIME returns an array with the expiry time, >0 means HFE in use + const expiry = Array.isArray(val) ? Number(val[0]) : Number(val); + if (expiry > 0 && !checkedKeys.has(keyFieldPairs[i].key)) { + checkedKeys.add(keyFieldPairs[i].key); + hfePositiveKeys++; + } + } + } + + if (hfePositiveKeys > 0) { + result.hfeDetected = true; + result.hfeKeyCount = validKeys.length > 0 + ? Math.round((hfePositiveKeys / validKeys.length) * totalEstimatedHashKeys) + : 0; + } + } catch { + // HEXPIRETIME not supported (Redis) + result.hfeSupported = false; + result.hfeDetected = false; + } + + return result; +} diff --git a/apps/api/src/migration/analysis/ttl-sampler.ts b/apps/api/src/migration/analysis/ttl-sampler.ts new file mode 100644 index 00000000..f3eb6598 --- /dev/null +++ b/apps/api/src/migration/analysis/ttl-sampler.ts @@ -0,0 +1,43 @@ +import type Valkey from 'iovalkey'; +import type { TtlDistribution } from '@betterdb/shared'; + +export async function sampleTtls( + client: Valkey, + keys: string[], +): Promise { + const dist: TtlDistribution = { + noExpiry: 0, + expiresWithin1h: 0, + expiresWithin24h: 0, + expiresWithin7d: 0, + expiresAfter7d: 0, + sampledKeyCount: keys.length, + }; + + for (let i = 0; i < keys.length; i += 1000) { + const batch = keys.slice(i, i + 1000); + const pipeline = client.pipeline(); + for (const key of batch) { + pipeline.pttl(key); + } + const results = await pipeline.exec(); + if (!results) continue; + for (const [err, ttl] of results) { + const ms = err ? -1 : Number(ttl); + if (ms < 0) { + // -1 = no expiry, -2 = key gone (count as no expiry) + dist.noExpiry++; + } else if (ms < 3_600_000) { + dist.expiresWithin1h++; + } else if (ms < 86_400_000) { + dist.expiresWithin24h++; + } else if (ms < 604_800_000) { + dist.expiresWithin7d++; + } else { + dist.expiresAfter7d++; + } + } + } + + return dist; +} diff --git a/apps/api/src/migration/analysis/type-sampler.ts b/apps/api/src/migration/analysis/type-sampler.ts new file mode 100644 index 00000000..0d318f1e --- /dev/null +++ b/apps/api/src/migration/analysis/type-sampler.ts @@ -0,0 +1,50 @@ +import type Valkey from 'iovalkey'; + +export interface SampledKey { + key: string; + type: string; +} + +/** + * SCAN each client up to maxKeysPerNode, pipeline TYPE in batches of 1000. + * Returns combined list of sampled keys with types. + */ +export async function sampleKeyTypes( + clients: Valkey[], + maxKeysPerNode: number, + onProgress?: (scannedSoFar: number) => void, +): Promise { + const allKeys: SampledKey[] = []; + + for (const client of clients) { + const nodeKeys: string[] = []; + let cursor = '0'; + do { + const [nextCursor, keys] = await client.scan(cursor, 'COUNT', 1000); + cursor = nextCursor; + for (const k of keys) { + if (nodeKeys.length >= maxKeysPerNode) break; + nodeKeys.push(k); + } + onProgress?.(allKeys.length + nodeKeys.length); + } while (cursor !== '0' && nodeKeys.length < maxKeysPerNode); + + // Pipeline TYPE in batches of 1000 + for (let i = 0; i < nodeKeys.length; i += 1000) { + const batch = nodeKeys.slice(i, i + 1000); + const pipeline = client.pipeline(); + for (const key of batch) { + pipeline.type(key); + } + const results = await pipeline.exec(); + if (results) { + for (let j = 0; j < batch.length; j++) { + const [err, type] = results[j] ?? []; + allKeys.push({ key: batch[j], type: err ? 'unknown' : String(type) }); + } + } + } + } + + return allKeys; +} diff --git a/apps/api/src/migration/migration.controller.ts b/apps/api/src/migration/migration.controller.ts new file mode 100644 index 00000000..55125aaf --- /dev/null +++ b/apps/api/src/migration/migration.controller.ts @@ -0,0 +1,45 @@ +import { Controller, Get, Post, Delete, Param, Body, NotFoundException, BadRequestException } from '@nestjs/common'; +import type { MigrationAnalysisRequest, StartAnalysisResponse, MigrationAnalysisResult } from '@betterdb/shared'; +import { MigrationService } from './migration.service'; + +@Controller('migration') +export class MigrationController { + constructor(private readonly migrationService: MigrationService) {} + + @Post('analysis') + async startAnalysis(@Body() body: MigrationAnalysisRequest): Promise { + if (!body.sourceConnectionId) { + throw new BadRequestException('sourceConnectionId is required'); + } + if (!body.targetConnectionId) { + throw new BadRequestException('targetConnectionId is required'); + } + if (body.sourceConnectionId === body.targetConnectionId) { + throw new BadRequestException('Source and target must be different connections'); + } + if (body.scanSampleSize !== undefined) { + if (body.scanSampleSize < 1000 || body.scanSampleSize > 50000) { + throw new BadRequestException('scanSampleSize must be between 1000 and 50000'); + } + } + return this.migrationService.startAnalysis(body); + } + + @Get('analysis/:id') + getJob(@Param('id') id: string): MigrationAnalysisResult { + const job = this.migrationService.getJob(id); + if (!job) { + throw new NotFoundException(`Analysis job '${id}' not found`); + } + return job; + } + + @Delete('analysis/:id') + cancelJob(@Param('id') id: string): { cancelled: boolean } { + const success = this.migrationService.cancelJob(id); + if (!success) { + throw new NotFoundException(`Analysis job '${id}' not found`); + } + return { cancelled: true }; + } +} diff --git a/apps/api/src/migration/migration.module.ts b/apps/api/src/migration/migration.module.ts new file mode 100644 index 00000000..8178bac4 --- /dev/null +++ b/apps/api/src/migration/migration.module.ts @@ -0,0 +1,10 @@ +import { Module } from '@nestjs/common'; +import { MigrationController } from './migration.controller'; +import { MigrationService } from './migration.service'; + +@Module({ + controllers: [MigrationController], + providers: [MigrationService], + exports: [MigrationService], +}) +export class MigrationModule {} diff --git a/apps/api/src/migration/migration.service.ts b/apps/api/src/migration/migration.service.ts new file mode 100644 index 00000000..b1e1b64c --- /dev/null +++ b/apps/api/src/migration/migration.service.ts @@ -0,0 +1,435 @@ +import { Injectable, Logger, NotFoundException } from '@nestjs/common'; +import { randomUUID } from 'crypto'; +import Valkey from 'iovalkey'; +import type { MigrationAnalysisRequest, MigrationAnalysisResult, StartAnalysisResponse, DataTypeBreakdown, DataTypeCount } from '@betterdb/shared'; +import { ConnectionRegistry } from '../connections/connection-registry.service'; +import type { AnalysisJob } from './analysis/analysis-job'; +import { sampleKeyTypes } from './analysis/type-sampler'; +import { sampleTtls } from './analysis/ttl-sampler'; +import { detectHfe } from './analysis/hfe-detector'; +import { analyzeCommands } from './analysis/commandlog-analyzer'; +import { buildInstanceMeta, checkCompatibility } from './analysis/compatibility-checker'; + +@Injectable() +export class MigrationService { + private readonly logger = new Logger(MigrationService.name); + private jobs = new Map(); + private readonly MAX_JOBS = 20; + private readonly STUCK_JOB_TTL_MS = 30 * 60 * 1000; + + constructor( + private readonly connectionRegistry: ConnectionRegistry, + ) {} + + async startAnalysis(req: MigrationAnalysisRequest): Promise { + // Verify both connections exist before creating job (get() throws NotFoundException if not found) + this.connectionRegistry.get(req.sourceConnectionId); + this.connectionRegistry.get(req.targetConnectionId); + + this.evictOldJobs(); + + const id = randomUUID(); + const job: AnalysisJob = { + id, + status: 'pending', + progress: 0, + createdAt: Date.now(), + result: { id, status: 'pending', progress: 0, createdAt: Date.now() }, + cancelled: false, + nodeClients: [], + }; + + this.jobs.set(id, job); + + // Fire and forget — do not await + this.runAnalysis(job, req).catch(err => { + this.logger.error(`Analysis ${id} failed: ${err.message}`); + }); + + return { id, status: 'pending' }; + } + + getJob(id: string): MigrationAnalysisResult | undefined { + const job = this.jobs.get(id); + if (!job) return undefined; + if (this.isJobStuck(job)) { + this.jobs.delete(id); + return undefined; + } + return { + id: job.id, + status: job.status, + progress: job.progress, + createdAt: job.createdAt, + completedAt: job.completedAt, + error: job.error, + ...job.result, + } as MigrationAnalysisResult; + } + + cancelJob(id: string): boolean { + const job = this.jobs.get(id); + if (!job) return false; + job.cancelled = true; + job.status = 'cancelled'; + // Immediately quit all temporary node clients + for (const client of job.nodeClients) { + client.quit().catch(() => {}); + } + job.nodeClients = []; + return true; + } + + private async runAnalysis(job: AnalysisJob, req: MigrationAnalysisRequest): Promise { + const scanSampleSize = req.scanSampleSize ?? 10_000; + const tempClients: Valkey[] = []; + + try { + job.status = 'running'; + job.progress = 5; + + // Step 1: Resolve source connection + const adapter = this.connectionRegistry.get(req.sourceConnectionId); + const config = this.connectionRegistry.getConfig(req.sourceConnectionId); + const capabilities = adapter.getCapabilities(); + + job.result.sourceConnectionId = req.sourceConnectionId; + job.result.sourceConnectionName = config?.name; + job.result.sourceDbType = capabilities.dbType; + job.result.sourceDbVersion = capabilities.version; + + if (job.cancelled) return; + job.progress = 10; + + // Step 2: Get source server info (keyspace for total key count, memory) + const info = await adapter.getInfo(['keyspace', 'memory', 'cluster', 'server', 'persistence']); + const keyspaceInfo = info as Record; + + // Parse total keys from keyspace info + let totalKeys = 0; + for (const [key, val] of Object.entries(keyspaceInfo)) { + if (key.startsWith('db') && typeof val === 'string') { + const match = val.match(/keys=(\d+)/); + if (match) totalKeys += parseInt(match[1], 10); + } + } + job.result.totalKeys = totalKeys; + + // Parse used_memory + const usedMemory = Number(keyspaceInfo['used_memory']) || 0; + job.result.totalMemoryBytes = usedMemory; + + if (job.cancelled) return; + job.progress = 12; + + // Step 2b: Read target info + if (job.cancelled) return; + + const targetAdapter = this.connectionRegistry.get(req.targetConnectionId); + const targetConfig = this.connectionRegistry.getConfig(req.targetConnectionId); + const targetInfo = await targetAdapter.getInfo(['server', 'keyspace', 'cluster', 'memory', 'persistence']); + const targetCapabilities = targetAdapter.getCapabilities(); + + let targetAclUsers: string[] = []; + try { + const client = targetAdapter.getClient(); + const result = await client.call('ACL', 'USERS') as string[]; + targetAclUsers = result ?? []; + } catch { /* ignore - ACL not supported or no permission */ } + + job.result.targetConnectionId = req.targetConnectionId; + job.result.targetConnectionName = targetConfig?.name; + job.result.targetDbType = targetCapabilities.dbType; + job.result.targetDbVersion = targetCapabilities.version; + job.result.targetIsCluster = String((targetInfo as Record)['cluster_enabled'] ?? '0') === '1'; + + if (job.cancelled) return; + job.progress = 13; + + // Step 3: Cluster check (source) + let isCluster = false; + let clusterMasterCount = 0; + const scanClients: Valkey[] = []; + let isAdapterClient = false; + + const clusterEnabled = String(keyspaceInfo['cluster_enabled'] ?? '0'); + if (clusterEnabled === '1') { + isCluster = true; + const nodes = await adapter.getClusterNodes(); + const masters = nodes.filter(n => n.flags.includes('master')); + clusterMasterCount = masters.length; + + for (const master of masters) { + // Parse address: 'host:port@clusterport' + const addrPart = master.address?.split('@')[0] ?? ''; + const [host, portStr] = addrPart.split(':'); + const port = parseInt(portStr, 10); + if (!host || isNaN(port)) continue; + + const client = new Valkey({ + host, + port, + username: config?.username || undefined, + password: config?.password || undefined, + tls: config?.tls ? {} : undefined, + lazyConnect: true, + connectionName: 'BetterDB-Migration-Analysis', + }); + await client.connect(); + tempClients.push(client); + job.nodeClients.push(client); + scanClients.push(client); + } + } else { + scanClients.push(adapter.getClient()); + isAdapterClient = true; + } + + job.result.isCluster = isCluster; + job.result.clusterMasterCount = clusterMasterCount; + job.result.sampledPerNode = scanSampleSize; + + if (job.cancelled) return; + job.progress = 15; + + // Step 4: Type sampling (SCAN + TYPE) + const sampledKeys = await sampleKeyTypes( + scanClients, + scanSampleSize, + (scanned) => { + const progressRange = 50 - 15; // 15-50% + const totalExpected = scanSampleSize * scanClients.length; + const fraction = Math.min(scanned / totalExpected, 1); + job.progress = Math.round(15 + fraction * progressRange); + }, + ); + + job.result.sampledKeys = sampledKeys.length; + + if (job.cancelled) return; + job.progress = 50; + + // Step 5: Memory sampling + const memoryClient = isAdapterClient ? adapter.getClient() : (tempClients[0] ?? adapter.getClient()); + const memoryByType = new Map(); + + for (let i = 0; i < sampledKeys.length; i += 1000) { + if (job.cancelled) return; + const batch = sampledKeys.slice(i, i + 1000); + const pipeline = memoryClient.pipeline(); + for (const { key } of batch) { + pipeline.call('MEMORY', 'USAGE', key, 'SAMPLES', '0'); + } + const results = await pipeline.exec(); + if (results) { + for (let j = 0; j < batch.length; j++) { + const [err, mem] = results[j] ?? []; + const bytes = err ? 0 : Number(mem) || 0; + const t = batch[j].type; + const entry = memoryByType.get(t) ?? { count: 0, bytes: 0 }; + entry.count++; + entry.bytes += bytes; + memoryByType.set(t, entry); + } + } + job.progress = Math.round(50 + ((i + batch.length) / sampledKeys.length) * 15); + } + + // Build DataTypeBreakdown + const knownTypes = new Set(['string', 'hash', 'list', 'set', 'zset', 'stream']); + let otherCount = 0; + let otherBytes = 0; + + for (const [typeName, data] of memoryByType) { + if (!knownTypes.has(typeName)) { + otherCount += data.count; + otherBytes += data.bytes; + } + } + + const buildDtc = (typeName: string): DataTypeCount => { + const data = memoryByType.get(typeName); + if (!data) return { count: 0, sampledMemoryBytes: 0, estimatedTotalMemoryBytes: 0 }; + return { + count: data.count, + sampledMemoryBytes: data.bytes, + estimatedTotalMemoryBytes: sampledKeys.length > 0 + ? Math.round((data.bytes / sampledKeys.length) * totalKeys) + : 0, + }; + }; + + const breakdown: DataTypeBreakdown = { + string: buildDtc('string'), + hash: buildDtc('hash'), + list: buildDtc('list'), + set: buildDtc('set'), + zset: buildDtc('zset'), + stream: buildDtc('stream'), + other: { + count: otherCount, + sampledMemoryBytes: otherBytes, + estimatedTotalMemoryBytes: sampledKeys.length > 0 + ? Math.round((otherBytes / sampledKeys.length) * totalKeys) + : 0, + }, + }; + + job.result.dataTypeBreakdown = breakdown; + + // Compute estimated total memory + const totalSampledBytes = Array.from(memoryByType.values()).reduce((s, d) => s + d.bytes, 0); + job.result.estimatedTotalMemoryBytes = sampledKeys.length > 0 + ? Math.round((totalSampledBytes / sampledKeys.length) * totalKeys) + : 0; + + if (job.cancelled) return; + job.progress = 65; + + // Step 6: TTL distribution + const allKeyNames = sampledKeys.map(k => k.key); + const ttlClient = isAdapterClient ? adapter.getClient() : (tempClients[0] ?? adapter.getClient()); + job.result.ttlDistribution = await sampleTtls(ttlClient, allKeyNames); + + if (job.cancelled) return; + job.progress = 75; + + // Step 7: HFE detection + if (capabilities.dbType === 'valkey') { + const hashKeys = sampledKeys.filter(k => k.type === 'hash').map(k => k.key); + const totalEstimatedHashKeys = totalKeys > 0 && sampledKeys.length > 0 + ? Math.round((hashKeys.length / sampledKeys.length) * totalKeys) + : hashKeys.length; + const hfeClient = isAdapterClient ? adapter.getClient() : (tempClients[0] ?? adapter.getClient()); + const hfeResult = await detectHfe(hfeClient, hashKeys, totalEstimatedHashKeys); + job.result.hfeDetected = hfeResult.hfeDetected; + job.result.hfeSupported = hfeResult.hfeSupported; + job.result.hfeKeyCount = hfeResult.hfeKeyCount; + job.result.hfeOversizedHashesSkipped = hfeResult.hfeOversizedHashesSkipped; + } else { + job.result.hfeSupported = false; + job.result.hfeDetected = false; + } + + if (job.cancelled) return; + job.progress = 85; + + // Step 8: Command analysis + job.result.commandAnalysis = await analyzeCommands(adapter); + + if (job.cancelled) return; + job.progress = 90; + + // Step 9: Compatibility checking + // Fetch source ACL users + let sourceAclUsers: string[] = []; + try { + const sourceClient = adapter.getClient(); + const result = await sourceClient.call('ACL', 'USERS') as string[]; + sourceAclUsers = result ?? []; + } catch { /* ignore - ACL not supported or no permission */ } + + // Build source meta + const sourceMeta = buildInstanceMeta(keyspaceInfo, capabilities, sourceAclUsers); + + // Fetch source modules + try { + const sourceClient = adapter.getClient(); + const moduleResult = await sourceClient.call('MODULE', 'LIST') as unknown[]; + sourceMeta.modules = parseModuleList(moduleResult); + } catch { /* ignore */ } + + // Build target meta + const targetMeta = buildInstanceMeta(targetInfo as Record, targetCapabilities, targetAclUsers); + + // Fetch target modules + try { + const targetClient = targetAdapter.getClient(); + const moduleResult = await targetClient.call('MODULE', 'LIST') as unknown[]; + targetMeta.modules = parseModuleList(moduleResult); + } catch { /* ignore */ } + + const incompatibilities = checkCompatibility(sourceMeta, targetMeta, job.result.hfeDetected ?? false); + job.result.incompatibilities = incompatibilities; + job.result.blockingCount = incompatibilities.filter(i => i.severity === 'blocking').length; + job.result.warningCount = incompatibilities.filter(i => i.severity === 'warning').length; + + if (job.cancelled) return; + job.progress = 95; + + // Done + job.progress = 100; + job.status = 'completed'; + job.completedAt = Date.now(); + job.result.status = 'completed'; + job.result.completedAt = job.completedAt; + + this.logger.log(`Analysis ${job.id} completed: blocking=${job.result.blockingCount}, warnings=${job.result.warningCount}, sampledKeys=${sampledKeys.length}, totalKeys=${totalKeys}`); + + } catch (err: unknown) { + if (!job.cancelled) { + const message = err instanceof Error ? err.message : String(err); + job.status = 'failed'; + job.error = message; + job.result.status = 'failed'; + job.result.error = job.error; + job.completedAt = Date.now(); + this.logger.error(`Analysis ${job.id} failed: ${job.error}`); + } + } finally { + // Only quit temporary per-node clients, never the adapter's client + await Promise.allSettled(tempClients.map(c => c.quit())); + job.nodeClients = []; + } + } + + private evictOldJobs(): void { + if (this.jobs.size < this.MAX_JOBS) return; + + // First: evict stuck running jobs + for (const [id, job] of this.jobs) { + if (this.isJobStuck(job)) { + this.jobs.delete(id); + } + } + + // Then: evict oldest completed/failed/cancelled + if (this.jobs.size >= this.MAX_JOBS) { + const terminal = Array.from(this.jobs.entries()) + .filter(([, j]) => j.status === 'completed' || j.status === 'failed' || j.status === 'cancelled') + .sort((a, b) => a[1].createdAt - b[1].createdAt); + + for (const [id] of terminal) { + if (this.jobs.size < this.MAX_JOBS) break; + this.jobs.delete(id); + } + } + } + + private isJobStuck(job: AnalysisJob): boolean { + return job.status === 'running' && Date.now() - job.createdAt > this.STUCK_JOB_TTL_MS; + } +} + +/** + * Parse the result of MODULE LIST command. + * The result is typically an array of arrays, where each inner element + * contains name/value pairs like: [['name', 'modulename', 'ver', 1, ...], ...] + * or in newer versions: [[name, modulename, ver, 1], ...] + */ +function parseModuleList(result: unknown[]): string[] { + if (!Array.isArray(result)) return []; + const modules: string[] = []; + for (const entry of result) { + if (Array.isArray(entry)) { + // Find the 'name' key and take the next element as the value + for (let i = 0; i < entry.length - 1; i++) { + if (String(entry[i]).toLowerCase() === 'name') { + modules.push(String(entry[i + 1])); + break; + } + } + } + } + return modules; +} diff --git a/apps/web/src/App.tsx b/apps/web/src/App.tsx index 042d968e..16c6deb9 100644 --- a/apps/web/src/App.tsx +++ b/apps/web/src/App.tsx @@ -27,6 +27,7 @@ import { KeyAnalytics } from './pages/KeyAnalytics'; import { ClusterDashboard } from './pages/ClusterDashboard'; import { Settings } from './pages/Settings'; import { Webhooks } from './pages/Webhooks'; +import { MigrationPage } from './pages/MigrationPage'; import { VectorSearch } from './pages/VectorSearch'; import { Members } from './pages/Members'; import { workspaceApi, CloudUser } from './api/workspace'; @@ -155,6 +156,9 @@ function AppLayout({ cloudUser }: { cloudUser: CloudUser | null }) { Webhooks + + Migration + {!cloudUser && ( @@ -213,6 +217,7 @@ function AppLayout({ cloudUser }: { cloudUser: CloudUser | null }) { } /> } /> } /> + } /> {cloudUser && ( } /> )} @@ -220,6 +225,12 @@ function AppLayout({ cloudUser }: { cloudUser: CloudUser | null }) { + ); } diff --git a/apps/web/src/components/migration/AnalysisForm.tsx b/apps/web/src/components/migration/AnalysisForm.tsx new file mode 100644 index 00000000..6d6c07d9 --- /dev/null +++ b/apps/web/src/components/migration/AnalysisForm.tsx @@ -0,0 +1,110 @@ +import { useState } from 'react'; +import { useConnection } from '../../hooks/useConnection'; +import { fetchApi } from '../../api/client'; +import type { StartAnalysisResponse } from '@betterdb/shared'; + +interface Props { + onStart: (analysisId: string) => void; +} + +export function AnalysisForm({ onStart }: Props) { + const { connections, currentConnection } = useConnection(); + const [sourceConnectionId, setSourceConnectionId] = useState(currentConnection?.id ?? ''); + const [targetConnectionId, setTargetConnectionId] = useState(''); + const [scanSampleSize, setScanSampleSize] = useState(10000); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + const sameConnection = + sourceConnectionId !== '' && + targetConnectionId !== '' && + sourceConnectionId === targetConnectionId; + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + if (!sourceConnectionId || !targetConnectionId || sameConnection) return; + setLoading(true); + setError(null); + try { + const res = await fetchApi('/migration/analysis', { + method: 'POST', + body: JSON.stringify({ sourceConnectionId, targetConnectionId, scanSampleSize }), + }); + onStart(res.id); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to start analysis'); + } finally { + setLoading(false); + } + }; + + return ( +
+
+ + +
+ +
+ + + {sameConnection && ( +

+ Source and target must be different connections +

+ )} +
+ +
+ + +

+ Higher sample = more accurate estimates, slower analysis. +

+
+ + {error &&

{error}

} + + +
+ ); +} diff --git a/apps/web/src/components/migration/AnalysisProgressBar.tsx b/apps/web/src/components/migration/AnalysisProgressBar.tsx new file mode 100644 index 00000000..502992a3 --- /dev/null +++ b/apps/web/src/components/migration/AnalysisProgressBar.tsx @@ -0,0 +1,81 @@ +import { useState, useEffect } from 'react'; +import { fetchApi } from '../../api/client'; +import type { MigrationAnalysisResult } from '@betterdb/shared'; + +interface Props { + analysisId: string; + onComplete: (result: MigrationAnalysisResult) => void; + onError: (msg: string) => void; + onCancel: () => void; +} + +function getStepLabel(progress: number): string { + if (progress <= 12) return 'Connecting and reading server info'; + if (progress <= 14) return 'Detecting cluster topology'; + if (progress <= 50) return 'Scanning keyspace'; + if (progress <= 65) return 'Sampling memory usage'; + if (progress <= 75) return 'Analyzing TTL distribution'; + if (progress <= 85) return 'Checking Hash Field Expiry'; + if (progress <= 95) return 'Analyzing command patterns'; + return 'Computing migration verdict'; +} + +export function AnalysisProgressBar({ analysisId, onComplete, onError, onCancel }: Props) { + const [job, setJob] = useState(null); + + useEffect(() => { + const interval = setInterval(async () => { + try { + const result = await fetchApi(`/migration/analysis/${analysisId}`); + setJob(result); + if (result.status === 'completed') { + clearInterval(interval); + onComplete(result); + } else if (result.status === 'failed') { + clearInterval(interval); + onError(result.error ?? 'Analysis failed'); + } else if (result.status === 'cancelled') { + clearInterval(interval); + onCancel(); + } + } catch { + clearInterval(interval); + onError('Analysis job not found or server error'); + } + }, 2000); + return () => clearInterval(interval); + }, [analysisId, onComplete, onError, onCancel]); + + const handleCancel = async () => { + try { + await fetchApi(`/migration/analysis/${analysisId}`, { method: 'DELETE' }); + } catch { + /* ignore */ + } + onCancel(); + }; + + const currentProgress = job?.progress ?? 0; + + return ( +
+
+ Analyzing... + {currentProgress}% +
+
+
+
+

{getStepLabel(currentProgress)}

+ +
+ ); +} diff --git a/apps/web/src/components/migration/ExportBar.tsx b/apps/web/src/components/migration/ExportBar.tsx new file mode 100644 index 00000000..b6c6b65d --- /dev/null +++ b/apps/web/src/components/migration/ExportBar.tsx @@ -0,0 +1,34 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; + +interface Props { + job: MigrationAnalysisResult; +} + +export function ExportBar({ job }: Props) { + const handleExportJson = () => { + const blob = new Blob([JSON.stringify(job, null, 2)], { type: 'application/json' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `betterdb-migration-${job.sourceConnectionName ?? 'unknown'}-${Date.now()}.json`; + a.click(); + URL.revokeObjectURL(url); + }; + + return ( +
+ + +
+ ); +} diff --git a/apps/web/src/components/migration/MigrationReport.tsx b/apps/web/src/components/migration/MigrationReport.tsx new file mode 100644 index 00000000..099a5919 --- /dev/null +++ b/apps/web/src/components/migration/MigrationReport.tsx @@ -0,0 +1,24 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; +import { SummarySection } from './sections/SummarySection'; +import { VerdictSection } from './sections/VerdictSection'; +import { DataTypeSection } from './sections/DataTypeSection'; +import { TtlSection } from './sections/TtlSection'; +import { CommandSection } from './sections/CommandSection'; +import { HfeSection } from './sections/HfeSection'; + +interface Props { + job: MigrationAnalysisResult; +} + +export function MigrationReport({ job }: Props) { + return ( +
+ + + + + + +
+ ); +} diff --git a/apps/web/src/components/migration/sections/CommandSection.tsx b/apps/web/src/components/migration/sections/CommandSection.tsx new file mode 100644 index 00000000..926f184e --- /dev/null +++ b/apps/web/src/components/migration/sections/CommandSection.tsx @@ -0,0 +1,55 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; + +interface Props { + job: MigrationAnalysisResult; +} + +const SOURCE_LABELS: Record = { + commandlog: 'COMMANDLOG (Valkey 8.1+)', + slowlog: 'SLOWLOG (fallback)', + unavailable: 'Unavailable — command history not accessible on this instance.', +}; + +export function CommandSection({ job }: Props) { + const cmd = job.commandAnalysis; + + if (!cmd) { + return ( +
+

Command Analysis

+

Not available for this analysis.

+
+ ); + } + + return ( +
+

Command Analysis

+ + {cmd.topCommands.length > 0 && ( +
+ + + + + + + + + {cmd.topCommands.map(({ command, count }) => ( + + + + + ))} + +
CommandOccurrences
{command}{count.toLocaleString()}
+
+ )} + +

+ Command data sourced from: {SOURCE_LABELS[cmd.sourceUsed] ?? cmd.sourceUsed} +

+
+ ); +} diff --git a/apps/web/src/components/migration/sections/DataTypeSection.tsx b/apps/web/src/components/migration/sections/DataTypeSection.tsx new file mode 100644 index 00000000..b25d0f90 --- /dev/null +++ b/apps/web/src/components/migration/sections/DataTypeSection.tsx @@ -0,0 +1,80 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; +import { PieChart, Pie, Cell, Tooltip, ResponsiveContainer, Legend } from 'recharts'; + +const COLORS = ['#3b82f6', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#06b6d4', '#6b7280']; +const TYPE_NAMES = ['string', 'hash', 'list', 'set', 'zset', 'stream', 'other'] as const; + +function formatBytes(bytes: number): string { + if (bytes === 0) return '0 B'; + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(1024)); + return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`; +} + +interface Props { + job: MigrationAnalysisResult; +} + +export function DataTypeSection({ job }: Props) { + const breakdown = job.dataTypeBreakdown; + + if (!breakdown) { + return ( +
+

Data Types

+

Not available for this analysis.

+
+ ); + } + + const chartData = TYPE_NAMES + .map(name => ({ name, count: breakdown[name]?.count ?? 0 })) + .filter(d => d.count > 0); + + return ( +
+

Data Types

+
+
+ + + + {chartData.map((_, i) => )} + + + + + +
+
+ + + + + + + + + + + {TYPE_NAMES.map(name => { + const dt = breakdown[name]; + if (!dt || dt.count === 0) return null; + return ( + + + + + + + ); + })} + +
TypeKey CountSampled Memory + Est. Total Memory +
{name}{dt.count.toLocaleString()}{formatBytes(dt.sampledMemoryBytes)}~{formatBytes(dt.estimatedTotalMemoryBytes)}
+
+
+
+ ); +} diff --git a/apps/web/src/components/migration/sections/HfeSection.tsx b/apps/web/src/components/migration/sections/HfeSection.tsx new file mode 100644 index 00000000..030b217c --- /dev/null +++ b/apps/web/src/components/migration/sections/HfeSection.tsx @@ -0,0 +1,54 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; +import { AlertTriangle, CheckCircle } from 'lucide-react'; + +interface Props { + job: MigrationAnalysisResult; +} + +export function HfeSection({ job }: Props) { + if (job.hfeSupported === undefined && job.hfeDetected === undefined) { + return ( +
+

Hash Field Expiry

+

Not available for this analysis.

+
+ ); + } + + return ( +
+

Hash Field Expiry

+ + {job.hfeSupported === false ? ( +

+ HFE check not available — source is Redis (Hash Field Expiry is a Valkey-only feature). +

+ ) : job.hfeDetected ? ( +
+ +
+

+ Hash Field Expiry keys detected (~{(job.hfeKeyCount ?? 0).toLocaleString()} estimated). +

+

+ Hash fields with per-field TTLs will lose their expiry metadata during migration + unless the target instance supports HFE (Valkey 8.1+). Verify your target version + before proceeding. +

+
+
+ ) : ( +
+ + Not detected in sample. +
+ )} + + {(job.hfeOversizedHashesSkipped ?? 0) > 0 && ( +

+ Note: {job.hfeOversizedHashesSkipped} hash key(s) with >10,000 fields were skipped during HFE sampling. +

+ )} +
+ ); +} diff --git a/apps/web/src/components/migration/sections/SummarySection.tsx b/apps/web/src/components/migration/sections/SummarySection.tsx new file mode 100644 index 00000000..9507a06b --- /dev/null +++ b/apps/web/src/components/migration/sections/SummarySection.tsx @@ -0,0 +1,86 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; +import { AlertTriangle } from 'lucide-react'; + +function formatBytes(bytes: number): string { + if (bytes === 0) return '0 B'; + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(1024)); + return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`; +} + +function DbBadge({ dbType, dbVersion, connectionName }: { + dbType?: 'valkey' | 'redis'; + dbVersion?: string; + connectionName?: string; +}) { + const label = dbType === 'valkey' ? 'Valkey' : dbType === 'redis' ? 'Redis' : 'Unknown'; + const colorClass = dbType === 'valkey' + ? 'bg-teal-100 text-teal-700' + : dbType === 'redis' + ? 'bg-red-100 text-red-700' + : 'bg-gray-100 text-gray-700'; + + return ( +
+ + {label} + + {dbVersion ?? 'Unknown'} + + {connectionName ?? 'Unknown'} + +
+ ); +} + +interface Props { + job: MigrationAnalysisResult; +} + +export function SummarySection({ job }: Props) { + return ( +
+

Summary

+ +
+ + + +
+ +
+
+

Total Keys

+

{(job.totalKeys ?? 0).toLocaleString()}

+
+
+

Est. Memory

+

~{formatBytes(job.estimatedTotalMemoryBytes ?? 0)}

+
+
+ + {job.isCluster && ( +
+ +

+ Cluster mode detected — analysis covers {job.clusterMasterCount} master nodes. + Key count and memory are aggregated across all masters. +

+
+ )} + +

+ {(job.sampledKeys ?? 0).toLocaleString()} keys sampled out of {(job.totalKeys ?? 0).toLocaleString()} total + {job.isCluster ? ` (${(job.sampledPerNode ?? 0).toLocaleString()} per node)` : ''} +

+
+ ); +} diff --git a/apps/web/src/components/migration/sections/TtlSection.tsx b/apps/web/src/components/migration/sections/TtlSection.tsx new file mode 100644 index 00000000..a19a5b93 --- /dev/null +++ b/apps/web/src/components/migration/sections/TtlSection.tsx @@ -0,0 +1,50 @@ +import type { MigrationAnalysisResult } from '@betterdb/shared'; +import { BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Cell } from 'recharts'; + +interface Props { + job: MigrationAnalysisResult; +} + +export function TtlSection({ job }: Props) { + const ttl = job.ttlDistribution; + + if (!ttl) { + return ( +
+

TTL Distribution

+

Not available for this analysis.

+
+ ); + } + + const data = [ + { name: 'No Expiry', value: ttl.noExpiry, color: '#6b7280' }, + { name: '< 1 hour', value: ttl.expiresWithin1h, color: '#f59e0b' }, + { name: '< 24 hours', value: ttl.expiresWithin24h, color: '#3b82f6' }, + { name: '< 7 days', value: ttl.expiresWithin7d, color: '#10b981' }, + { name: '> 7 days', value: ttl.expiresAfter7d, color: '#8b5cf6' }, + ]; + + return ( +
+

TTL Distribution

+
+ + + + + [`${Number(value).toLocaleString()} keys`, 'Count']} + /> + + {data.map((entry, i) => )} + + + +
+

+ Sampled from {ttl.sampledKeyCount.toLocaleString()} keys. +

+
+ ); +} diff --git a/apps/web/src/components/migration/sections/VerdictSection.tsx b/apps/web/src/components/migration/sections/VerdictSection.tsx new file mode 100644 index 00000000..3fab6243 --- /dev/null +++ b/apps/web/src/components/migration/sections/VerdictSection.tsx @@ -0,0 +1,96 @@ +import type { MigrationAnalysisResult, Incompatibility } from '@betterdb/shared'; +import { CheckCircle, AlertTriangle, XCircle, Info } from 'lucide-react'; + +const SEVERITY_ORDER: Record = { + blocking: 0, + warning: 1, + info: 2, +}; + +const SEVERITY_ICON_MAP: Record = { + blocking: { icon: XCircle, color: 'text-red-600' }, + warning: { icon: AlertTriangle, color: 'text-amber-600' }, + info: { icon: Info, color: 'text-blue-600' }, +}; + +interface Props { + job: MigrationAnalysisResult; +} + +export function VerdictSection({ job }: Props) { + if (job.incompatibilities === undefined) { + return ( +
+

Compatibility

+

Not available for this analysis.

+
+ ); + } + + const blockingCount = job.blockingCount ?? 0; + const warningCount = job.warningCount ?? 0; + + let bannerBg: string; + let bannerText: string; + let BannerIcon: typeof CheckCircle; + let bannerMessage: string; + + if (blockingCount > 0) { + bannerBg = 'bg-red-50 border-red-200'; + bannerText = 'text-red-800'; + BannerIcon = XCircle; + bannerMessage = `${blockingCount} blocking issue(s) — resolve before migrating.`; + } else if (warningCount > 0) { + bannerBg = 'bg-amber-50 border-amber-200'; + bannerText = 'text-amber-800'; + BannerIcon = AlertTriangle; + bannerMessage = `No blocking issues. ${warningCount} warning(s) to review.`; + } else { + bannerBg = 'bg-green-50 border-green-200'; + bannerText = 'text-green-800'; + BannerIcon = CheckCircle; + bannerMessage = 'No compatibility issues found. Migration appears safe.'; + } + + const sorted = [...job.incompatibilities].sort( + (a, b) => SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity], + ); + + return ( +
+
+

Compatibility

+
+ +

{bannerMessage}

+
+
+ + {sorted.length > 0 && ( +
+ {sorted.map((item, idx) => { + const sev = SEVERITY_ICON_MAP[item.severity]; + const SevIcon = sev.icon; + return ( +
+ +
+
+ {item.title} + + {item.category} + +
+

{item.detail}

+
+
+ ); + })} +
+ )} +
+ ); +} diff --git a/apps/web/src/pages/MigrationPage.tsx b/apps/web/src/pages/MigrationPage.tsx new file mode 100644 index 00000000..1299014e --- /dev/null +++ b/apps/web/src/pages/MigrationPage.tsx @@ -0,0 +1,77 @@ +import { useState } from 'react'; +import type { MigrationAnalysisResult } from '@betterdb/shared'; +import { AnalysisForm } from '../components/migration/AnalysisForm'; +import { AnalysisProgressBar } from '../components/migration/AnalysisProgressBar'; +import { MigrationReport } from '../components/migration/MigrationReport'; +import { ExportBar } from '../components/migration/ExportBar'; + +type Phase = 'idle' | 'running' | 'done'; + +export function MigrationPage() { + const [phase, setPhase] = useState('idle'); + const [analysisId, setAnalysisId] = useState(null); + const [job, setJob] = useState(null); + const [error, setError] = useState(null); + + return ( +
+
+

Migration Analysis

+

+ Analyze your source instance to assess migration readiness. +

+
+ + {error && ( +
+ {error} + +
+ )} + + {phase === 'idle' && ( + { + setAnalysisId(id); + setPhase('running'); + setError(null); + }} + /> + )} + + {phase === 'running' && analysisId && ( + { + setJob(result); + setPhase('done'); + }} + onError={(msg) => { + setError(msg); + setPhase('idle'); + }} + onCancel={() => { + setPhase('idle'); + }} + /> + )} + + {phase === 'done' && job && ( + <> + + + + + )} +
+ ); +} diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index 5a418089..d786eb6a 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -13,3 +13,4 @@ export * from './utils/key-patterns'; export * from './license/index'; export * from './webhooks/index'; export * from './types/vector-index-snapshots'; +export * from './types/migration'; diff --git a/packages/shared/src/license/types.ts b/packages/shared/src/license/types.ts index 7f165c6a..20e11905 100644 --- a/packages/shared/src/license/types.ts +++ b/packages/shared/src/license/types.ts @@ -42,6 +42,7 @@ export enum Feature { AI_CLOUD = 'aiCloud', WEBHOOK_COMPLIANCE_EVENTS = 'webhookComplianceEvents', WEBHOOK_DLQ = 'webhookDlq', + MIGRATION_EXECUTION = 'migrationExecution', } export const TIER_FEATURES: Record = { @@ -56,6 +57,7 @@ export const TIER_FEATURES: Record = { Feature.WEBHOOK_CUSTOM_HEADERS, Feature.WEBHOOK_DELIVERY_PAYLOAD, Feature.WEBHOOK_CONFIGURABLE_RETRY, + Feature.MIGRATION_EXECUTION, ], [Tier.enterprise]: Object.values(Feature), }; diff --git a/packages/shared/src/types/migration.ts b/packages/shared/src/types/migration.ts new file mode 100644 index 00000000..93785f41 --- /dev/null +++ b/packages/shared/src/types/migration.ts @@ -0,0 +1,101 @@ +export type MigrationJobStatus = + | 'pending' + | 'running' + | 'completed' + | 'failed' + | 'cancelled'; + +export type IncompatibilitySeverity = 'blocking' | 'warning' | 'info'; + +export interface Incompatibility { + severity: IncompatibilitySeverity; + category: string; + title: string; + detail: string; +} + +export interface MigrationAnalysisRequest { + sourceConnectionId: string; + targetConnectionId: string; + scanSampleSize?: number; // default 10000, range 1000-50000 +} + +export interface DataTypeCount { + count: number; + sampledMemoryBytes: number; + estimatedTotalMemoryBytes: number; +} + +export interface DataTypeBreakdown { + string: DataTypeCount; + hash: DataTypeCount; + list: DataTypeCount; + set: DataTypeCount; + zset: DataTypeCount; + stream: DataTypeCount; + other: DataTypeCount; +} + +export interface TtlDistribution { + noExpiry: number; + expiresWithin1h: number; + expiresWithin24h: number; + expiresWithin7d: number; + expiresAfter7d: number; + sampledKeyCount: number; +} + +export interface CommandAnalysis { + sourceUsed: 'commandlog' | 'slowlog' | 'unavailable'; + topCommands: Array<{ command: string; count: number }>; +} + +export interface MigrationAnalysisResult { + id: string; + status: MigrationJobStatus; + progress: number; // 0-100 + createdAt: number; + completedAt?: number; + error?: string; + + // Source metadata + sourceConnectionId?: string; + sourceConnectionName?: string; + sourceDbType?: 'valkey' | 'redis'; + sourceDbVersion?: string; + isCluster?: boolean; + clusterMasterCount?: number; + + // Target metadata + targetConnectionId?: string; + targetConnectionName?: string; + targetDbType?: 'valkey' | 'redis'; + targetDbVersion?: string; + targetIsCluster?: boolean; + + // Key / memory overview + totalKeys?: number; + sampledKeys?: number; + sampledPerNode?: number; // scanSampleSize used + totalMemoryBytes?: number; + estimatedTotalMemoryBytes?: number; + + // Section results + dataTypeBreakdown?: DataTypeBreakdown; + hfeDetected?: boolean; + hfeKeyCount?: number; // estimated from sample ratio + hfeSupported?: boolean; // false on Redis + hfeOversizedHashesSkipped?: number; + ttlDistribution?: TtlDistribution; + commandAnalysis?: CommandAnalysis; + + // Compatibility + incompatibilities?: Incompatibility[]; + blockingCount?: number; + warningCount?: number; +} + +export interface StartAnalysisResponse { + id: string; + status: 'pending'; +} diff --git a/packages/shared/tsconfig.tsbuildinfo b/packages/shared/tsconfig.tsbuildinfo new file mode 100644 index 00000000..f41373ed --- /dev/null +++ b/packages/shared/tsconfig.tsbuildinfo @@ -0,0 +1 @@ +{"root":["./src/encryption.ts","./src/index.ts","./src/license/index.ts","./src/license/types.ts","./src/types/agent-protocol.ts","./src/types/ai.ts","./src/types/anomaly.ts","./src/types/audit.ts","./src/types/client-analytics.ts","./src/types/connections.ts","./src/types/health.ts","./src/types/key-analytics.ts","./src/types/migration.ts","./src/types/settings.types.ts","./src/types/slowlog.ts","./src/types/vector-index-snapshots.ts","./src/types/version.types.ts","./src/utils/key-patterns.ts","./src/webhooks/defaults.ts","./src/webhooks/index.ts","./src/webhooks/types.ts"],"version":"5.9.3"} \ No newline at end of file From 55bd637072a3a09e2c38fb0c0ed6d2acf91c3dc5 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Thu, 26 Mar 2026 21:09:18 +0200 Subject: [PATCH 02/34] Fixed roborev findings --- .../src/migration/analysis/type-sampler.ts | 6 +- .../api/src/migration/migration.controller.ts | 2 + apps/api/src/migration/migration.module.ts | 2 + apps/api/src/migration/migration.service.ts | 117 ++++++++++++------ .../migration/AnalysisProgressBar.tsx | 18 ++- 5 files changed, 100 insertions(+), 45 deletions(-) diff --git a/apps/api/src/migration/analysis/type-sampler.ts b/apps/api/src/migration/analysis/type-sampler.ts index 0d318f1e..b6b2602a 100644 --- a/apps/api/src/migration/analysis/type-sampler.ts +++ b/apps/api/src/migration/analysis/type-sampler.ts @@ -3,6 +3,7 @@ import type Valkey from 'iovalkey'; export interface SampledKey { key: string; type: string; + clientIndex: number; } /** @@ -16,7 +17,8 @@ export async function sampleKeyTypes( ): Promise { const allKeys: SampledKey[] = []; - for (const client of clients) { + for (let ci = 0; ci < clients.length; ci++) { + const client = clients[ci]; const nodeKeys: string[] = []; let cursor = '0'; do { @@ -40,7 +42,7 @@ export async function sampleKeyTypes( if (results) { for (let j = 0; j < batch.length; j++) { const [err, type] = results[j] ?? []; - allKeys.push({ key: batch[j], type: err ? 'unknown' : String(type) }); + allKeys.push({ key: batch[j], type: err ? 'unknown' : String(type), clientIndex: ci }); } } } diff --git a/apps/api/src/migration/migration.controller.ts b/apps/api/src/migration/migration.controller.ts index 55125aaf..c5fe3cb9 100644 --- a/apps/api/src/migration/migration.controller.ts +++ b/apps/api/src/migration/migration.controller.ts @@ -2,6 +2,8 @@ import { Controller, Get, Post, Delete, Param, Body, NotFoundException, BadReque import type { MigrationAnalysisRequest, StartAnalysisResponse, MigrationAnalysisResult } from '@betterdb/shared'; import { MigrationService } from './migration.service'; +// Migration analysis is intentionally community-tier (no license guard). +// MIGRATION_EXECUTION gating applies to the execution phase only. @Controller('migration') export class MigrationController { constructor(private readonly migrationService: MigrationService) {} diff --git a/apps/api/src/migration/migration.module.ts b/apps/api/src/migration/migration.module.ts index 8178bac4..61bf085f 100644 --- a/apps/api/src/migration/migration.module.ts +++ b/apps/api/src/migration/migration.module.ts @@ -1,8 +1,10 @@ import { Module } from '@nestjs/common'; +import { ConnectionsModule } from '../connections/connections.module'; import { MigrationController } from './migration.controller'; import { MigrationService } from './migration.service'; @Module({ + imports: [ConnectionsModule], controllers: [MigrationController], providers: [MigrationService], exports: [MigrationService], diff --git a/apps/api/src/migration/migration.service.ts b/apps/api/src/migration/migration.service.ts index b1e1b64c..593f3235 100644 --- a/apps/api/src/migration/migration.service.ts +++ b/apps/api/src/migration/migration.service.ts @@ -1,7 +1,7 @@ import { Injectable, Logger, NotFoundException } from '@nestjs/common'; import { randomUUID } from 'crypto'; import Valkey from 'iovalkey'; -import type { MigrationAnalysisRequest, MigrationAnalysisResult, StartAnalysisResponse, DataTypeBreakdown, DataTypeCount } from '@betterdb/shared'; +import type { MigrationAnalysisRequest, MigrationAnalysisResult, StartAnalysisResponse, DataTypeBreakdown, DataTypeCount, TtlDistribution } from '@betterdb/shared'; import { ConnectionRegistry } from '../connections/connection-registry.service'; import type { AnalysisJob } from './analysis/analysis-job'; import { sampleKeyTypes } from './analysis/type-sampler'; @@ -57,13 +57,13 @@ export class MigrationService { return undefined; } return { + ...job.result, id: job.id, status: job.status, progress: job.progress, createdAt: job.createdAt, completedAt: job.completedAt, error: job.error, - ...job.result, } as MigrationAnalysisResult; } @@ -150,7 +150,6 @@ export class MigrationService { let isCluster = false; let clusterMasterCount = 0; const scanClients: Valkey[] = []; - let isAdapterClient = false; const clusterEnabled = String(keyspaceInfo['cluster_enabled'] ?? '0'); if (clusterEnabled === '1') { @@ -182,7 +181,6 @@ export class MigrationService { } } else { scanClients.push(adapter.getClient()); - isAdapterClient = true; } job.result.isCluster = isCluster; @@ -209,30 +207,41 @@ export class MigrationService { if (job.cancelled) return; job.progress = 50; - // Step 5: Memory sampling - const memoryClient = isAdapterClient ? adapter.getClient() : (tempClients[0] ?? adapter.getClient()); - const memoryByType = new Map(); + // Step 5: Memory sampling (per-node to avoid cross-slot errors in cluster mode) + const keysByClientIndex = new Map(); + for (const sk of sampledKeys) { + const group = keysByClientIndex.get(sk.clientIndex) ?? []; + group.push(sk); + keysByClientIndex.set(sk.clientIndex, group); + } - for (let i = 0; i < sampledKeys.length; i += 1000) { - if (job.cancelled) return; - const batch = sampledKeys.slice(i, i + 1000); - const pipeline = memoryClient.pipeline(); - for (const { key } of batch) { - pipeline.call('MEMORY', 'USAGE', key, 'SAMPLES', '0'); - } - const results = await pipeline.exec(); - if (results) { - for (let j = 0; j < batch.length; j++) { - const [err, mem] = results[j] ?? []; - const bytes = err ? 0 : Number(mem) || 0; - const t = batch[j].type; - const entry = memoryByType.get(t) ?? { count: 0, bytes: 0 }; - entry.count++; - entry.bytes += bytes; - memoryByType.set(t, entry); + const memoryByType = new Map(); + let memoryProcessed = 0; + + for (const [clientIndex, clientKeys] of keysByClientIndex) { + const client = scanClients[clientIndex]; + for (let i = 0; i < clientKeys.length; i += 1000) { + if (job.cancelled) return; + const batch = clientKeys.slice(i, i + 1000); + const pipeline = client.pipeline(); + for (const { key } of batch) { + pipeline.call('MEMORY', 'USAGE', key, 'SAMPLES', '0'); + } + const results = await pipeline.exec(); + if (results) { + for (let j = 0; j < batch.length; j++) { + const [err, mem] = results[j] ?? []; + const bytes = err ? 0 : Number(mem) || 0; + const t = batch[j].type; + const entry = memoryByType.get(t) ?? { count: 0, bytes: 0 }; + entry.count++; + entry.bytes += bytes; + memoryByType.set(t, entry); + } } + memoryProcessed += batch.length; + job.progress = Math.round(50 + (memoryProcessed / sampledKeys.length) * 15); } - job.progress = Math.round(50 + ((i + batch.length) / sampledKeys.length) * 15); } // Build DataTypeBreakdown @@ -286,26 +295,60 @@ export class MigrationService { if (job.cancelled) return; job.progress = 65; - // Step 6: TTL distribution - const allKeyNames = sampledKeys.map(k => k.key); - const ttlClient = isAdapterClient ? adapter.getClient() : (tempClients[0] ?? adapter.getClient()); - job.result.ttlDistribution = await sampleTtls(ttlClient, allKeyNames); + // Step 6: TTL distribution (per-node) + const mergedTtl: TtlDistribution = { + noExpiry: 0, expiresWithin1h: 0, expiresWithin24h: 0, + expiresWithin7d: 0, expiresAfter7d: 0, sampledKeyCount: sampledKeys.length, + }; + for (const [clientIndex, clientKeys] of keysByClientIndex) { + const nodeTtl = await sampleTtls(scanClients[clientIndex], clientKeys.map(k => k.key)); + mergedTtl.noExpiry += nodeTtl.noExpiry; + mergedTtl.expiresWithin1h += nodeTtl.expiresWithin1h; + mergedTtl.expiresWithin24h += nodeTtl.expiresWithin24h; + mergedTtl.expiresWithin7d += nodeTtl.expiresWithin7d; + mergedTtl.expiresAfter7d += nodeTtl.expiresAfter7d; + } + job.result.ttlDistribution = mergedTtl; if (job.cancelled) return; job.progress = 75; - // Step 7: HFE detection + // Step 7: HFE detection (per-node) if (capabilities.dbType === 'valkey') { - const hashKeys = sampledKeys.filter(k => k.type === 'hash').map(k => k.key); + const hashKeys = sampledKeys.filter(k => k.type === 'hash'); const totalEstimatedHashKeys = totalKeys > 0 && sampledKeys.length > 0 ? Math.round((hashKeys.length / sampledKeys.length) * totalKeys) : hashKeys.length; - const hfeClient = isAdapterClient ? adapter.getClient() : (tempClients[0] ?? adapter.getClient()); - const hfeResult = await detectHfe(hfeClient, hashKeys, totalEstimatedHashKeys); - job.result.hfeDetected = hfeResult.hfeDetected; - job.result.hfeSupported = hfeResult.hfeSupported; - job.result.hfeKeyCount = hfeResult.hfeKeyCount; - job.result.hfeOversizedHashesSkipped = hfeResult.hfeOversizedHashesSkipped; + + // Group hash keys by originating client + const hashByClient = new Map(); + for (const hk of hashKeys) { + const group = hashByClient.get(hk.clientIndex) ?? []; + group.push(hk.key); + hashByClient.set(hk.clientIndex, group); + } + + let hfeDetected = false; + let hfeSupported = true; + let hfeKeyCount = 0; + let hfeOversizedHashesSkipped = 0; + + for (const [clientIndex, nodeHashKeys] of hashByClient) { + // Each node's estimated share of total hash keys + const nodeEstimatedTotal = hashKeys.length > 0 + ? Math.round((nodeHashKeys.length / hashKeys.length) * totalEstimatedHashKeys) + : 0; + const hfeResult = await detectHfe(scanClients[clientIndex], nodeHashKeys, nodeEstimatedTotal); + if (!hfeResult.hfeSupported) hfeSupported = false; + if (hfeResult.hfeDetected) hfeDetected = true; + hfeKeyCount += hfeResult.hfeKeyCount; + hfeOversizedHashesSkipped += hfeResult.hfeOversizedHashesSkipped; + } + + job.result.hfeDetected = hfeDetected; + job.result.hfeSupported = hfeSupported; + job.result.hfeKeyCount = hfeKeyCount; + job.result.hfeOversizedHashesSkipped = hfeOversizedHashesSkipped; } else { job.result.hfeSupported = false; job.result.hfeDetected = false; diff --git a/apps/web/src/components/migration/AnalysisProgressBar.tsx b/apps/web/src/components/migration/AnalysisProgressBar.tsx index 502992a3..9f6de9aa 100644 --- a/apps/web/src/components/migration/AnalysisProgressBar.tsx +++ b/apps/web/src/components/migration/AnalysisProgressBar.tsx @@ -1,4 +1,4 @@ -import { useState, useEffect } from 'react'; +import { useState, useEffect, useRef } from 'react'; import { fetchApi } from '../../api/client'; import type { MigrationAnalysisResult } from '@betterdb/shared'; @@ -22,6 +22,12 @@ function getStepLabel(progress: number): string { export function AnalysisProgressBar({ analysisId, onComplete, onError, onCancel }: Props) { const [job, setJob] = useState(null); + const onCompleteRef = useRef(onComplete); + const onErrorRef = useRef(onError); + const onCancelRef = useRef(onCancel); + onCompleteRef.current = onComplete; + onErrorRef.current = onError; + onCancelRef.current = onCancel; useEffect(() => { const interval = setInterval(async () => { @@ -30,21 +36,21 @@ export function AnalysisProgressBar({ analysisId, onComplete, onError, onCancel setJob(result); if (result.status === 'completed') { clearInterval(interval); - onComplete(result); + onCompleteRef.current(result); } else if (result.status === 'failed') { clearInterval(interval); - onError(result.error ?? 'Analysis failed'); + onErrorRef.current(result.error ?? 'Analysis failed'); } else if (result.status === 'cancelled') { clearInterval(interval); - onCancel(); + onCancelRef.current(); } } catch { clearInterval(interval); - onError('Analysis job not found or server error'); + onErrorRef.current('Analysis job not found or server error'); } }, 2000); return () => clearInterval(interval); - }, [analysisId, onComplete, onError, onCancel]); + }, [analysisId]); const handleCancel = async () => { try { From 5298ec222796fced8c7d163b07fbd55672dbdd2b Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Fri, 27 Mar 2026 15:01:00 +0200 Subject: [PATCH 03/34] Phase 2 - migration with RedisShake and proprietary key by key batch option --- .claude/settings.local.json | 6 +- Dockerfile | 9 + Dockerfile.prod | 9 + .../execution/command-migration-worker.ts | 201 +++++++++++++ .../src/migration/execution/execution-job.ts | 19 ++ .../api/src/migration/execution/log-parser.ts | 71 +++++ .../migration/execution/redisshake-runner.ts | 24 ++ .../src/migration/execution/toml-builder.ts | 40 +++ .../src/migration/execution/type-handlers.ts | 195 +++++++++++++ .../migration/migration-execution.service.ts | 268 ++++++++++++++++++ .../api/src/migration/migration.controller.ts | 58 +++- apps/api/src/migration/migration.module.ts | 5 +- .../src/components/migration/AnalysisForm.tsx | 32 ++- .../migration/ExecutionLogViewer.tsx | 43 +++ .../components/migration/ExecutionPanel.tsx | 137 +++++++++ .../src/components/migration/ExportBar.tsx | 5 +- apps/web/src/pages/MigrationPage.tsx | 137 ++++++++- packages/shared/src/types/migration.ts | 39 +++ 18 files changed, 1283 insertions(+), 15 deletions(-) create mode 100644 apps/api/src/migration/execution/command-migration-worker.ts create mode 100644 apps/api/src/migration/execution/execution-job.ts create mode 100644 apps/api/src/migration/execution/log-parser.ts create mode 100644 apps/api/src/migration/execution/redisshake-runner.ts create mode 100644 apps/api/src/migration/execution/toml-builder.ts create mode 100644 apps/api/src/migration/execution/type-handlers.ts create mode 100644 apps/api/src/migration/migration-execution.service.ts create mode 100644 apps/web/src/components/migration/ExecutionLogViewer.tsx create mode 100644 apps/web/src/components/migration/ExecutionPanel.tsx diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 8d16b3c3..8569b8ca 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -172,7 +172,11 @@ "mcp__betterdb__get_anomalies", "Bash(cat:*)", "mcp__betterdb-memory__search_context", - "Bash(test:*)" + "Bash(test:*)", + "Bash(tar:*)", + "Bash(chmod:*)", + "Bash(~/.betterdb/bin/redis-shake:*)", + "Bash(wget:*)" ], "deny": [], "ask": [] diff --git a/Dockerfile b/Dockerfile index 74b0d890..5d01c7c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -76,6 +76,15 @@ ENV NODE_ENV=production ENV PORT=3001 ENV STORAGE_TYPE=memory +# Install RedisShake binary for migration execution +ARG TARGETARCH +ARG REDISSHAKE_VERSION=4.6.0 +RUN apk add --no-cache wget && \ + wget -qO- "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" \ + | tar -xz --strip-components=0 -C /usr/local/bin ./redis-shake && \ + chmod +x /usr/local/bin/redis-shake && \ + apk del wget + # Create non-root user for security (Docker Scout compliance) RUN addgroup --system --gid 1001 nodejs && \ adduser --system --uid 1001 --ingroup nodejs betterdb diff --git a/Dockerfile.prod b/Dockerfile.prod index bc5bbf87..1ce65152 100644 --- a/Dockerfile.prod +++ b/Dockerfile.prod @@ -154,6 +154,15 @@ ENV DB_USERNAME=default ENV STORAGE_TYPE=memory ENV AI_ENABLED=false +# Install RedisShake binary for migration execution +ARG TARGETARCH +ARG REDISSHAKE_VERSION=4.6.0 +RUN apk add --no-cache wget && \ + wget -qO- "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" \ + | tar -xz --strip-components=0 -C /usr/local/bin ./redis-shake && \ + chmod +x /usr/local/bin/redis-shake && \ + apk del wget + # Create non-root user for security (Docker Scout compliance) RUN addgroup --system --gid 1001 nodejs && \ adduser --system --uid 1001 --ingroup nodejs betterdb diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts new file mode 100644 index 00000000..c13b0b7a --- /dev/null +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -0,0 +1,201 @@ +import Valkey from 'iovalkey'; +import type { DatabaseConnectionConfig } from '@betterdb/shared'; +import type { ExecutionJob } from './execution-job'; +import { migrateKey } from './type-handlers'; + +const SCAN_COUNT = 500; +const TYPE_BATCH = 500; + +export interface CommandMigrationOptions { + sourceConfig: DatabaseConnectionConfig; + targetConfig: DatabaseConnectionConfig; + sourceIsCluster: boolean; + job: ExecutionJob; + maxLogLines: number; +} + +/** + * Run a command-based migration: SCAN source → TYPE → type-specific read/write → TTL. + * Operates entirely in-process using iovalkey. No external binary needed. + */ +export async function runCommandMigration(opts: CommandMigrationOptions): Promise { + const { sourceConfig, targetConfig, sourceIsCluster, job, maxLogLines } = opts; + const sourceClients: Valkey[] = []; + const targetClient = createClient(targetConfig, 'BetterDB-Migration-Target'); + + try { + await targetClient.connect(); + log(job, maxLogLines, 'Connected to target'); + + // Build source clients (one per cluster master, or single standalone) + if (sourceIsCluster) { + const discoveryClient = createClient(sourceConfig, 'BetterDB-Migration-Discovery'); + await discoveryClient.connect(); + try { + const nodesRaw = await discoveryClient.call('CLUSTER', 'NODES') as string; + const masters = parseClusterMasters(nodesRaw); + log(job, maxLogLines, `Cluster mode: ${masters.length} master(s) detected`); + for (const { host, port } of masters) { + const client = new Valkey({ + host, + port, + username: sourceConfig.username || undefined, + password: sourceConfig.password || undefined, + tls: sourceConfig.tls ? {} : undefined, + lazyConnect: true, + connectionName: 'BetterDB-Migration-Source', + }); + await client.connect(); + sourceClients.push(client); + } + } finally { + await discoveryClient.quit(); + } + } else { + const client = createClient(sourceConfig, 'BetterDB-Migration-Source'); + await client.connect(); + sourceClients.push(client); + } + + log(job, maxLogLines, `Connected to source (${sourceClients.length} node(s))`); + + // Count total keys across all source nodes for progress tracking + let totalKeys = 0; + for (const client of sourceClients) { + const dbsize = await client.dbsize(); + totalKeys += dbsize; + } + job.totalKeys = totalKeys; + log(job, maxLogLines, `Total keys to migrate: ${totalKeys.toLocaleString()}`); + + if (totalKeys === 0) { + log(job, maxLogLines, 'No keys to migrate'); + job.progress = 100; + return; + } + + // Scan and migrate each source node + let keysProcessed = 0; + let keysSkipped = 0; + + for (let nodeIdx = 0; nodeIdx < sourceClients.length; nodeIdx++) { + const sourceClient = sourceClients[nodeIdx]; + if (isCancelled(job)) return; + + if (sourceClients.length > 1) { + log(job, maxLogLines, `Scanning node ${nodeIdx + 1}/${sourceClients.length}...`); + } + + let cursor = '0'; + do { + if (isCancelled(job)) return; + + const [nextCursor, keys] = await sourceClient.scan(cursor, 'COUNT', SCAN_COUNT); + cursor = nextCursor; + + if (keys.length === 0) continue; + + // Batch TYPE lookup + const types = await batchType(sourceClient, keys); + + // Migrate each key + for (let i = 0; i < keys.length; i++) { + if (isCancelled(job)) return; + + const key = keys[i]; + const type = types[i]; + + if (type === 'none') { + // Key expired between SCAN and TYPE + keysProcessed++; + continue; + } + + const result = await migrateKey(sourceClient, targetClient, key, type); + + if (result.ok) { + job.keysTransferred++; + } else { + keysSkipped++; + job.keysSkipped = keysSkipped; + log(job, maxLogLines, `SKIP ${key} (${type}): ${result.error}`); + } + + keysProcessed++; + job.progress = Math.min(99, Math.round((keysProcessed / totalKeys) * 100)); + } + + // Periodic progress log + if (keysProcessed % 5000 < keys.length) { + log(job, maxLogLines, + `Progress: ${keysProcessed.toLocaleString()}/${totalKeys.toLocaleString()} keys ` + + `(${job.keysTransferred.toLocaleString()} transferred, ${keysSkipped} skipped)`); + } + } while (cursor !== '0'); + } + + job.progress = 100; + log(job, maxLogLines, + `Migration complete: ${job.keysTransferred.toLocaleString()} transferred, ${keysSkipped} skipped out of ${totalKeys.toLocaleString()} total`); + + } finally { + await Promise.allSettled([...sourceClients, targetClient].map(c => c.quit())); + } +} + +// ── Helpers ── + +function createClient(config: DatabaseConnectionConfig, name: string): Valkey { + return new Valkey({ + host: config.host, + port: config.port, + username: config.username || undefined, + password: config.password || undefined, + tls: config.tls ? {} : undefined, + lazyConnect: true, + connectionName: name, + }); +} + +function parseClusterMasters(nodesRaw: string): Array<{ host: string; port: number }> { + const results: Array<{ host: string; port: number }> = []; + for (const line of nodesRaw.split('\n')) { + if (!line.trim()) continue; + const parts = line.split(' '); + const flags = parts[2] ?? ''; + if (!flags.includes('master')) continue; + // address format: host:port@clusterport + const addrPart = (parts[1] ?? '').split('@')[0]; + const [host, portStr] = addrPart.split(':'); + const port = parseInt(portStr, 10); + if (host && !isNaN(port)) { + results.push({ host, port }); + } + } + return results; +} + +async function batchType(client: Valkey, keys: string[]): Promise { + const pipeline = client.pipeline(); + for (const key of keys) { + pipeline.type(key); + } + const results = await pipeline.exec(); + return (results ?? []).map(([err, val]) => { + if (err) return 'none'; + return String(val); + }); +} + +function isCancelled(job: ExecutionJob): boolean { + return (job.status as string) === 'cancelled'; +} + +function log(job: ExecutionJob, maxLines: number, message: string): void { + const timestamp = new Date().toISOString().replace('T', ' ').replace('Z', ''); + const line = `[${timestamp}] ${message}`; + job.logs.push(line); + if (job.logs.length > maxLines) { + job.logs.shift(); + } +} diff --git a/apps/api/src/migration/execution/execution-job.ts b/apps/api/src/migration/execution/execution-job.ts new file mode 100644 index 00000000..e4e38258 --- /dev/null +++ b/apps/api/src/migration/execution/execution-job.ts @@ -0,0 +1,19 @@ +import type { ChildProcess } from 'child_process'; +import type { ExecutionJobStatus, ExecutionMode } from '@betterdb/shared'; + +export interface ExecutionJob { + id: string; + mode: ExecutionMode; + status: ExecutionJobStatus; + startedAt: number; + completedAt?: number; + error?: string; + keysTransferred: number; + bytesTransferred: number; + keysSkipped: number; + totalKeys: number; + logs: string[]; // rolling, capped at MAX_LOG_LINES = 500 + progress: number | null; + process: ChildProcess | null; // redis_shake mode only + tomlPath: string | null; // redis_shake mode only +} diff --git a/apps/api/src/migration/execution/log-parser.ts b/apps/api/src/migration/execution/log-parser.ts new file mode 100644 index 00000000..6d97e2d5 --- /dev/null +++ b/apps/api/src/migration/execution/log-parser.ts @@ -0,0 +1,71 @@ +export interface ParsedLogLine { + keysTransferred: number | null; + bytesTransferred: number | null; + progress: number | null; // 0–100 +} + +const NULL_RESULT: ParsedLogLine = { keysTransferred: null, bytesTransferred: null, progress: null }; + +export function parseLogLine(line: string): ParsedLogLine { + // Strategy 1: Try JSON parse + try { + const obj = JSON.parse(line); + if (typeof obj === 'object' && obj !== null) { + const scanned = + obj?.counts?.scanned ?? + obj?.key_counts?.scanned ?? + obj?.scanned ?? + null; + const total = + obj?.counts?.total ?? + obj?.key_counts?.total ?? + obj?.total ?? + null; + const bytes = + obj?.bytes ?? + obj?.bytes_transferred ?? + null; + + const keysTransferred = typeof scanned === 'number' ? scanned : null; + const bytesTransferred = typeof bytes === 'number' ? bytes : null; + let progress: number | null = null; + + if (typeof scanned === 'number' && typeof total === 'number' && total > 0) { + progress = Math.min(100, Math.round((scanned / total) * 100)); + } + + if (keysTransferred !== null || bytesTransferred !== null || progress !== null) { + return { keysTransferred, bytesTransferred, progress }; + } + } + } catch { + // Not JSON — fall through to regex + } + + // Strategy 2: Regex patterns + const result: ParsedLogLine = { keysTransferred: null, bytesTransferred: null, progress: null }; + + const scannedMatch = line.match(/scanned[=: ]+(\d+)/i); + if (scannedMatch) { + result.keysTransferred = parseInt(scannedMatch[1], 10); + } + + const totalMatch = line.match(/total[=: ]+(\d+)/i); + if (totalMatch && result.keysTransferred !== null) { + const total = parseInt(totalMatch[1], 10); + if (total > 0) { + result.progress = Math.min(100, Math.round((result.keysTransferred / total) * 100)); + } + } + + const percentMatch = line.match(/(\d+(?:\.\d+)?)\s*%/); + if (percentMatch && result.progress === null) { + result.progress = Math.min(100, Math.round(parseFloat(percentMatch[1]))); + } + + if (result.keysTransferred !== null || result.bytesTransferred !== null || result.progress !== null) { + return result; + } + + return NULL_RESULT; +} diff --git a/apps/api/src/migration/execution/redisshake-runner.ts b/apps/api/src/migration/execution/redisshake-runner.ts new file mode 100644 index 00000000..68377d83 --- /dev/null +++ b/apps/api/src/migration/execution/redisshake-runner.ts @@ -0,0 +1,24 @@ +import { existsSync } from 'fs'; +import { join } from 'path'; +import * as os from 'os'; + +export function findRedisShakeBinary(): string { + // 1. Explicit env override + if (process.env.REDIS_SHAKE_PATH && existsSync(process.env.REDIS_SHAKE_PATH)) { + return process.env.REDIS_SHAKE_PATH; + } + // 2. Docker image location + if (existsSync('/usr/local/bin/redis-shake')) { + return '/usr/local/bin/redis-shake'; + } + // 3. npx install location + const npxPath = join(os.homedir(), '.betterdb', 'bin', 'redis-shake'); + if (npxPath && existsSync(npxPath)) { + return npxPath; + } + throw new Error( + 'RedisShake binary not found. ' + + 'Set REDIS_SHAKE_PATH env var, or install it to ~/.betterdb/bin/redis-shake. ' + + 'See https://docs.betterdb.com/migration for instructions.', + ); +} diff --git a/apps/api/src/migration/execution/toml-builder.ts b/apps/api/src/migration/execution/toml-builder.ts new file mode 100644 index 00000000..14645d2e --- /dev/null +++ b/apps/api/src/migration/execution/toml-builder.ts @@ -0,0 +1,40 @@ +import type { DatabaseConnectionConfig } from '@betterdb/shared'; + +function escapeTomlString(value: string): string { + return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); +} + +export function buildScanReaderToml( + source: DatabaseConnectionConfig, + target: DatabaseConnectionConfig, + sourceIsCluster: boolean, +): string { + const srcUsername = (!source.username || source.username === 'default') ? '' : source.username; + const srcPassword = source.password ?? ''; + const tgtUsername = (!target.username || target.username === 'default') ? '' : target.username; + const tgtPassword = target.password ?? ''; + + let toml = `[scan_reader] +address = "${source.host}:${source.port}" +username = "${escapeTomlString(srcUsername)}" +password = "${escapeTomlString(srcPassword)}" +tls = ${source.tls ? 'true' : 'false'} +`; + + if (sourceIsCluster) { + toml += `cluster = true\n`; + } + + toml += ` +[redis_writer] +address = "${target.host}:${target.port}" +username = "${escapeTomlString(tgtUsername)}" +password = "${escapeTomlString(tgtPassword)}" +tls = ${target.tls ? 'true' : 'false'} + +[advanced] +log_level = "info" +`; + + return toml; +} diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts new file mode 100644 index 00000000..a98408bf --- /dev/null +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -0,0 +1,195 @@ +import type Valkey from 'iovalkey'; + +// Threshold above which we use cursor-based reads (HSCAN/SSCAN/ZSCAN) instead of bulk reads +const LARGE_KEY_THRESHOLD = 10_000; +const SCAN_BATCH = 1000; +const LIST_CHUNK = 1000; +const STREAM_CHUNK = 1000; + +export interface MigratedKey { + key: string; + type: string; + ok: boolean; + error?: string; +} + +/** + * Migrate a single key from source to target using type-specific commands. + * Returns success/failure per key. Never throws — errors are captured in the result. + */ +export async function migrateKey( + source: Valkey, + target: Valkey, + key: string, + type: string, +): Promise { + try { + switch (type) { + case 'string': + await migrateString(source, target, key); + break; + case 'hash': + await migrateHash(source, target, key); + break; + case 'list': + await migrateList(source, target, key); + break; + case 'set': + await migrateSet(source, target, key); + break; + case 'zset': + await migrateZset(source, target, key); + break; + case 'stream': + await migrateStream(source, target, key); + break; + default: + return { key, type, ok: false, error: `Unsupported type: ${type}` }; + } + // Preserve TTL + await migrateTtl(source, target, key); + return { key, type, ok: true }; + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + return { key, type, ok: false, error: message }; + } +} + +// ── String ── + +async function migrateString(source: Valkey, target: Valkey, key: string): Promise { + const value = await source.getBuffer(key); + if (value === null) return; // key expired/deleted between SCAN and GET + await target.set(key, value); +} + +// ── Hash ── + +async function migrateHash(source: Valkey, target: Valkey, key: string): Promise { + const len = await source.hlen(key); + if (len === 0) return; + + if (len <= LARGE_KEY_THRESHOLD) { + // Small hash: single HGETALL + const data = await source.hgetallBuffer(key); + if (!data || Object.keys(data).length === 0) return; + const args: (string | Buffer)[] = [key]; + for (const [field, val] of Object.entries(data)) { + args.push(field, val as Buffer); + } + await (target as any).hset(...args); + } else { + // Large hash: HSCAN + let cursor = '0'; + do { + const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); + cursor = String(next); + if (fields.length === 0) continue; + const args: (string | Buffer)[] = [key]; + for (let i = 0; i < fields.length; i += 2) { + args.push(fields[i], fields[i + 1]); + } + await (target as any).hset(...args); + } while (cursor !== '0'); + } +} + +// ── List ── + +async function migrateList(source: Valkey, target: Valkey, key: string): Promise { + const len = await source.llen(key); + if (len === 0) return; + + // Delete target key first to avoid appending to existing data + await target.del(key); + + for (let start = 0; start < len; start += LIST_CHUNK) { + const end = Math.min(start + LIST_CHUNK - 1, len - 1); + const items = await source.lrangeBuffer(key, start, end); + if (items.length === 0) break; + await (target as any).rpush(key, ...items); + } +} + +// ── Set ── + +async function migrateSet(source: Valkey, target: Valkey, key: string): Promise { + const card = await source.scard(key); + if (card === 0) return; + + if (card <= LARGE_KEY_THRESHOLD) { + const members = await source.smembersBuffer(key); + if (members.length === 0) return; + await (target as any).sadd(key, ...members); + } else { + let cursor = '0'; + do { + const [next, members] = await source.sscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); + cursor = String(next); + if (members.length === 0) continue; + await (target as any).sadd(key, ...members); + } while (cursor !== '0'); + } +} + +// ── Sorted Set ── + +async function migrateZset(source: Valkey, target: Valkey, key: string): Promise { + const card = await source.zcard(key); + if (card === 0) return; + + if (card <= LARGE_KEY_THRESHOLD) { + const data = await (source as any).call('ZRANGE', key, '0', '-1', 'WITHSCORES') as string[]; + if (!data || data.length === 0) return; + // data is [member, score, member, score, ...] + const pipeline = target.pipeline(); + for (let i = 0; i < data.length; i += 2) { + pipeline.zadd(key, data[i + 1], data[i]); + } + await pipeline.exec(); + } else { + let cursor = '0'; + do { + const [next, entries] = await source.zscan(key, cursor, 'COUNT', SCAN_BATCH); + cursor = next; + if (entries.length === 0) continue; + const pipeline = target.pipeline(); + for (let i = 0; i < entries.length; i += 2) { + pipeline.zadd(key, entries[i + 1], entries[i]); + } + await pipeline.exec(); + } while (cursor !== '0'); + } +} + +// ── Stream ── + +async function migrateStream(source: Valkey, target: Valkey, key: string): Promise { + let lastId = '-'; + let hasMore = true; + + while (hasMore) { + const entries = await source.xrange(key, lastId === '-' ? '-' : `(${lastId}`, '+', 'COUNT', STREAM_CHUNK); + if (!entries || entries.length === 0) { + hasMore = false; + break; + } + for (const [id, fields] of entries) { + // XADD with explicit ID to preserve ordering + await (target as any).xadd(key, id, ...fields); + lastId = id; + } + if (entries.length < STREAM_CHUNK) { + hasMore = false; + } + } +} + +// ── TTL ── + +async function migrateTtl(source: Valkey, target: Valkey, key: string): Promise { + const pttl = await source.pttl(key); + if (pttl > 0) { + await target.pexpire(key, pttl); + } +} diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts new file mode 100644 index 00000000..895a0008 --- /dev/null +++ b/apps/api/src/migration/migration-execution.service.ts @@ -0,0 +1,268 @@ +import { Injectable, Logger, BadRequestException, NotFoundException, ServiceUnavailableException } from '@nestjs/common'; +import { randomUUID } from 'crypto'; +import { spawn } from 'child_process'; +import { writeFileSync, unlinkSync, existsSync } from 'fs'; +import { join } from 'path'; +import * as os from 'os'; +import type { MigrationExecutionRequest, MigrationExecutionResult, StartExecutionResponse, ExecutionMode } from '@betterdb/shared'; +import { ConnectionRegistry } from '../connections/connection-registry.service'; +import type { ExecutionJob } from './execution/execution-job'; +import { findRedisShakeBinary } from './execution/redisshake-runner'; +import { buildScanReaderToml } from './execution/toml-builder'; +import { parseLogLine } from './execution/log-parser'; +import { runCommandMigration } from './execution/command-migration-worker'; + +@Injectable() +export class MigrationExecutionService { + private readonly logger = new Logger(MigrationExecutionService.name); + private jobs = new Map(); + private readonly MAX_JOBS = 10; + private readonly MAX_LOG_LINES = 500; + + constructor( + private readonly connectionRegistry: ConnectionRegistry, + ) {} + + async startExecution(req: MigrationExecutionRequest): Promise { + const mode: ExecutionMode = req.mode ?? 'redis_shake'; + + // 1. Resolve both connections (throws NotFoundException if missing) + const sourceAdapter = this.connectionRegistry.get(req.sourceConnectionId); + const sourceConfig = this.connectionRegistry.getConfig(req.sourceConnectionId); + this.connectionRegistry.get(req.targetConnectionId); + const targetConfig = this.connectionRegistry.getConfig(req.targetConnectionId); + + if (!sourceConfig || !targetConfig) { + throw new NotFoundException('Connection config not found'); + } + + // 2. Validate different connections + if (req.sourceConnectionId === req.targetConnectionId) { + throw new BadRequestException('Source and target must be different connections'); + } + + // 3. Detect if source is cluster + const info = await sourceAdapter.getInfo(['cluster']); + const clusterEnabled = String((info as Record)['cluster_enabled'] ?? '0') === '1'; + + // 4. For redis_shake mode, locate the binary upfront + let binaryPath: string | undefined; + if (mode === 'redis_shake') { + try { + binaryPath = findRedisShakeBinary(); + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + throw new ServiceUnavailableException(message); + } + } + + // 5. Create the job + const id = randomUUID(); + const job: ExecutionJob = { + id, + mode, + status: 'pending', + startedAt: Date.now(), + keysTransferred: 0, + bytesTransferred: 0, + keysSkipped: 0, + totalKeys: 0, + logs: [], + progress: null, + process: null, + tomlPath: null, + }; + this.jobs.set(id, job); + + // 6. Evict old jobs + this.evictOldJobs(); + + // 7. Fire and forget based on mode + if (mode === 'redis_shake') { + const tomlContent = buildScanReaderToml(sourceConfig, targetConfig, clusterEnabled); + const tomlPath = join(os.tmpdir(), `${id}.toml`); + writeFileSync(tomlPath, tomlContent, 'utf-8'); + job.tomlPath = tomlPath; + + this.runRedisShake(job, binaryPath!).catch(err => { + this.logger.error(`Execution ${id} failed: ${err.message}`); + }); + } else { + this.runCommandMode(job, sourceConfig, targetConfig, clusterEnabled).catch(err => { + this.logger.error(`Execution ${id} failed: ${err.message}`); + }); + } + + return { id, status: 'pending' }; + } + + // ── RedisShake mode ── + + private async runRedisShake(job: ExecutionJob, binaryPath: string): Promise { + try { + const proc = spawn(binaryPath, [job.tomlPath!], { + stdio: ['ignore', 'pipe', 'pipe'], + }); + job.process = proc; + job.status = 'running'; + + const handleData = (chunk: Buffer) => { + const lines = chunk.toString().split('\n'); + for (const line of lines) { + if (!line) continue; + job.logs.push(line); + if (job.logs.length > this.MAX_LOG_LINES) { + job.logs.shift(); + } + const parsed = parseLogLine(line); + if (parsed.keysTransferred !== null) job.keysTransferred = parsed.keysTransferred; + if (parsed.bytesTransferred !== null) job.bytesTransferred = parsed.bytesTransferred; + if (parsed.progress !== null) job.progress = parsed.progress; + } + }; + + proc.stdout.on('data', handleData); + proc.stderr.on('data', handleData); + + const code = await new Promise((resolve, reject) => { + proc.on('exit', (exitCode) => resolve(exitCode ?? 1)); + proc.on('error', reject); + }); + + const currentStatus = job.status as string; + if (code === 0) { + job.status = 'completed'; + job.progress = 100; + } else if (currentStatus !== 'cancelled') { + job.status = 'failed'; + job.error = `RedisShake exited with code ${code}`; + } + } catch (err: unknown) { + if ((job.status as string) !== 'cancelled') { + const message = err instanceof Error ? err.message : String(err); + job.status = 'failed'; + job.error = message; + this.logger.error(`Execution ${job.id} error: ${message}`); + } + } finally { + if (!job.completedAt) { + job.completedAt = Date.now(); + } + if (job.tomlPath) { + try { + if (existsSync(job.tomlPath)) { + unlinkSync(job.tomlPath); + } + } catch { /* ignore cleanup errors */ } + } + job.process = null; + job.tomlPath = null; + } + } + + // ── Command-based mode ── + + private async runCommandMode( + job: ExecutionJob, + sourceConfig: Parameters[0]['sourceConfig'], + targetConfig: Parameters[0]['targetConfig'], + sourceIsCluster: boolean, + ): Promise { + job.status = 'running'; + try { + await runCommandMigration({ + sourceConfig, + targetConfig, + sourceIsCluster, + job, + maxLogLines: this.MAX_LOG_LINES, + }); + + const currentStatus = job.status as string; + if (currentStatus !== 'cancelled') { + job.status = 'completed'; + } + } catch (err: unknown) { + if ((job.status as string) !== 'cancelled') { + const message = err instanceof Error ? err.message : String(err); + job.status = 'failed'; + job.error = message; + this.logger.error(`Execution ${job.id} error: ${message}`); + } + } finally { + if (!job.completedAt) { + job.completedAt = Date.now(); + } + } + } + + // ── Shared methods ── + + stopExecution(id: string): boolean { + const job = this.jobs.get(id); + if (!job) return false; + + // Idempotent for terminal states + if (job.status === 'completed' || job.status === 'failed' || job.status === 'cancelled') { + return true; + } + + job.status = 'cancelled'; + + // For redis_shake mode, kill the subprocess + if (job.process) { + const proc = job.process; + try { + proc.kill('SIGTERM'); + } catch { /* process may already be dead */ } + + setTimeout(() => { + if (job.process) { + try { + proc.kill('SIGKILL'); + } catch { /* ignore */ } + } + }, 3000); + } + // For command mode, the worker checks job.status === 'cancelled' between batches + + return true; + } + + getExecution(id: string): MigrationExecutionResult | undefined { + const job = this.jobs.get(id); + if (!job) return undefined; + + return { + id: job.id, + status: job.status, + mode: job.mode, + startedAt: job.startedAt, + completedAt: job.completedAt, + error: job.error, + keysTransferred: job.keysTransferred, + bytesTransferred: job.bytesTransferred, + keysSkipped: job.keysSkipped, + totalKeys: job.totalKeys || undefined, + logs: job.logs, + progress: job.progress, + }; + } + + private evictOldJobs(): void { + if (this.jobs.size < this.MAX_JOBS) return; + + const terminal = Array.from(this.jobs.entries()) + .filter(([, j]) => j.status === 'completed' || j.status === 'failed' || j.status === 'cancelled') + .sort((a, b) => a[1].startedAt - b[1].startedAt); + + for (const [id] of terminal) { + if (this.jobs.size < this.MAX_JOBS) break; + this.jobs.delete(id); + } + + if (this.jobs.size >= this.MAX_JOBS) { + this.logger.warn(`Execution job limit reached (${this.MAX_JOBS}). Cannot evict running jobs.`); + } + } +} diff --git a/apps/api/src/migration/migration.controller.ts b/apps/api/src/migration/migration.controller.ts index c5fe3cb9..6d430dbd 100644 --- a/apps/api/src/migration/migration.controller.ts +++ b/apps/api/src/migration/migration.controller.ts @@ -1,12 +1,21 @@ -import { Controller, Get, Post, Delete, Param, Body, NotFoundException, BadRequestException } from '@nestjs/common'; -import type { MigrationAnalysisRequest, StartAnalysisResponse, MigrationAnalysisResult } from '@betterdb/shared'; +import { Controller, Get, Post, Delete, Param, Body, UseGuards, NotFoundException, BadRequestException } from '@nestjs/common'; +import type { MigrationAnalysisRequest, StartAnalysisResponse, MigrationAnalysisResult, MigrationExecutionRequest, StartExecutionResponse, MigrationExecutionResult } from '@betterdb/shared'; +import { Feature } from '@betterdb/shared'; +import { LicenseGuard } from '@proprietary/licenses'; +import { RequiresFeature } from '@proprietary/licenses/requires-feature.decorator'; import { MigrationService } from './migration.service'; +import { MigrationExecutionService } from './migration-execution.service'; // Migration analysis is intentionally community-tier (no license guard). // MIGRATION_EXECUTION gating applies to the execution phase only. @Controller('migration') export class MigrationController { - constructor(private readonly migrationService: MigrationService) {} + constructor( + private readonly migrationService: MigrationService, + private readonly executionService: MigrationExecutionService, + ) {} + + // ── Analysis endpoints (community-tier) ── @Post('analysis') async startAnalysis(@Body() body: MigrationAnalysisRequest): Promise { @@ -44,4 +53,47 @@ export class MigrationController { } return { cancelled: true }; } + + // ── Execution endpoints (Pro-tier) ── + + @Post('execution') + @UseGuards(LicenseGuard) + @RequiresFeature(Feature.MIGRATION_EXECUTION) + async startExecution(@Body() body: MigrationExecutionRequest): Promise { + if (!body.sourceConnectionId) { + throw new BadRequestException('sourceConnectionId is required'); + } + if (!body.targetConnectionId) { + throw new BadRequestException('targetConnectionId is required'); + } + if (body.sourceConnectionId === body.targetConnectionId) { + throw new BadRequestException('Source and target must be different connections'); + } + if (body.mode && body.mode !== 'redis_shake' && body.mode !== 'command') { + throw new BadRequestException('mode must be "redis_shake" or "command"'); + } + return this.executionService.startExecution(body); + } + + @Get('execution/:id') + @UseGuards(LicenseGuard) + @RequiresFeature(Feature.MIGRATION_EXECUTION) + getExecution(@Param('id') id: string): MigrationExecutionResult { + const result = this.executionService.getExecution(id); + if (!result) { + throw new NotFoundException(`Execution job '${id}' not found`); + } + return result; + } + + @Delete('execution/:id') + @UseGuards(LicenseGuard) + @RequiresFeature(Feature.MIGRATION_EXECUTION) + stopExecution(@Param('id') id: string): { stopped: true } { + const found = this.executionService.stopExecution(id); + if (!found) { + throw new NotFoundException(`Execution job '${id}' not found`); + } + return { stopped: true }; + } } diff --git a/apps/api/src/migration/migration.module.ts b/apps/api/src/migration/migration.module.ts index 61bf085f..6e920e6b 100644 --- a/apps/api/src/migration/migration.module.ts +++ b/apps/api/src/migration/migration.module.ts @@ -2,11 +2,12 @@ import { Module } from '@nestjs/common'; import { ConnectionsModule } from '../connections/connections.module'; import { MigrationController } from './migration.controller'; import { MigrationService } from './migration.service'; +import { MigrationExecutionService } from './migration-execution.service'; @Module({ imports: [ConnectionsModule], controllers: [MigrationController], - providers: [MigrationService], - exports: [MigrationService], + providers: [MigrationService, MigrationExecutionService], + exports: [MigrationService, MigrationExecutionService], }) export class MigrationModule {} diff --git a/apps/web/src/components/migration/AnalysisForm.tsx b/apps/web/src/components/migration/AnalysisForm.tsx index 6d6c07d9..f655ab2b 100644 --- a/apps/web/src/components/migration/AnalysisForm.tsx +++ b/apps/web/src/components/migration/AnalysisForm.tsx @@ -20,6 +20,20 @@ export function AnalysisForm({ onStart }: Props) { targetConnectionId !== '' && sourceConnectionId === targetConnectionId; + // The API returns connectionType on each connection but the Connection + // interface in useConnection doesn't surface it. Cast to access at runtime. + const isAgentConnection = (id: string): boolean => { + if (!id) return false; + const conn = connections.find(c => c.id === id) as + | (typeof connections[number] & { connectionType?: 'direct' | 'agent' }) + | undefined; + return conn?.connectionType === 'agent'; + }; + const hasAgentConnection = + isAgentConnection(sourceConnectionId) || isAgentConnection(targetConnectionId); + + const isCloudMode = import.meta.env.VITE_CLOUD_MODE === 'true'; + const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); if (!sourceConnectionId || !targetConnectionId || sameConnection) return; @@ -79,6 +93,22 @@ export function AnalysisForm({ onStart }: Props) { )}
+ {hasAgentConnection && ( +

+ One or more selected instances is connected via agent. Contact us at{' '} + support@betterdb.com to + plan your migration — we'll help you do it safely. +

+ )} + + {isCloudMode && ( +

+ Migration execution is not available in BetterDB Cloud. Contact us at{' '} + support@betterdb.com to + plan your migration. +

+ )} +
setExecutionMode(e.target.value as ExecutionMode)} + className="text-sm border rounded-md px-2 py-1 bg-background" + > + + + +
+ )} + {!canExecute ? ( +
+ +

+ Migration execution requires a Pro license. Upgrade at betterdb.com/pricing +

+
+ ) : ( +
+ + {blockingCount > 0 && ( +

+ {blockingCount} blocking issue{blockingCount !== 1 ? 's' : ''} detected — proceed at your own risk. +

+ )} +
+ )} + + + + + )} + + {phase === 'executing' && job && executionId && ( + <> + + setPhase('executed')} + /> + + )} + + {phase === 'executed' && job && executionId && ( <> - + + {/* already stopped */}} + /> +

+ Post-migration validation requires a Pro license. Upgrade at betterdb.com/pricing +

+ + ) : ( + + )} + + + + + )} + + {phase === 'validating' && job && validationId && ( + <> + + {executionId && ( + {/* already stopped */}} + /> + )} + setPhase('validated')} + /> + + )} + + {phase === 'validated' && job && validationId && ( + <> + + + {executionId && ( + {/* already stopped */}} + /> + )} + diff --git a/apps/web/src/components/migration/ExecutionPanel.tsx b/apps/web/src/components/migration/ExecutionPanel.tsx index 09bf9260..4368f265 100644 --- a/apps/web/src/components/migration/ExecutionPanel.tsx +++ b/apps/web/src/components/migration/ExecutionPanel.tsx @@ -116,6 +116,22 @@ export function ExecutionPanel({ executionId, onStopped }: Props) { )} + {/* Progress bar — shown while running */} + {execution.status === 'running' && execution.progress != null && ( +
+
+ Migration progress + {Math.min(100, execution.progress)}% +
+
+
+
+
+ )} + {/* Status banners */} {execution.status === 'failed' && (
diff --git a/apps/web/src/components/migration/ExportBar.tsx b/apps/web/src/components/migration/ExportBar.tsx index 234f76f2..917924f0 100644 --- a/apps/web/src/components/migration/ExportBar.tsx +++ b/apps/web/src/components/migration/ExportBar.tsx @@ -19,19 +19,19 @@ export function ExportBar({ job, phase }: Props) { }; return ( -
+ <> -
+ ); } diff --git a/apps/web/src/components/migration/MigrationReport.tsx b/apps/web/src/components/migration/MigrationReport.tsx index 099a5919..d770e212 100644 --- a/apps/web/src/components/migration/MigrationReport.tsx +++ b/apps/web/src/components/migration/MigrationReport.tsx @@ -15,8 +15,10 @@ export function MigrationReport({ job }: Props) {
- - +
+ + +
diff --git a/apps/web/src/components/migration/sections/CommandSection.tsx b/apps/web/src/components/migration/sections/CommandSection.tsx index 926f184e..6b4bf937 100644 --- a/apps/web/src/components/migration/sections/CommandSection.tsx +++ b/apps/web/src/components/migration/sections/CommandSection.tsx @@ -13,13 +13,8 @@ const SOURCE_LABELS: Record = { export function CommandSection({ job }: Props) { const cmd = job.commandAnalysis; - if (!cmd) { - return ( -
-

Command Analysis

-

Not available for this analysis.

-
- ); + if (!cmd || cmd.topCommands.length === 0) { + return null; } return ( diff --git a/apps/web/src/components/migration/sections/DataTypeSection.tsx b/apps/web/src/components/migration/sections/DataTypeSection.tsx index b25d0f90..370a7f65 100644 --- a/apps/web/src/components/migration/sections/DataTypeSection.tsx +++ b/apps/web/src/components/migration/sections/DataTypeSection.tsx @@ -34,7 +34,7 @@ export function DataTypeSection({ job }: Props) { return (

Data Types

-
+
diff --git a/apps/web/src/pages/MigrationPage.tsx b/apps/web/src/pages/MigrationPage.tsx index 3c17a273..1c3985bc 100644 --- a/apps/web/src/pages/MigrationPage.tsx +++ b/apps/web/src/pages/MigrationPage.tsx @@ -1,4 +1,4 @@ -import { useState } from 'react'; +import { useState, useRef, useEffect } from 'react'; import type { MigrationAnalysisResult, MigrationExecutionResult, ExecutionMode } from '@betterdb/shared'; import { Feature } from '@betterdb/shared'; import { fetchApi } from '../api/client'; @@ -12,6 +12,67 @@ import { ValidationPanel } from '../components/migration/ValidationPanel'; type Phase = 'idle' | 'analyzing' | 'analyzed' | 'executing' | 'executed' | 'validating' | 'validated'; +// ── Helpers ── + +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; +} + +function stepIndex(phase: Phase): number { + if (phase === 'idle') return 0; + if (phase === 'analyzing' || phase === 'analyzed') return 1; + return 2; +} + +// ── Small shared components ── + +function LockIcon() { + return ( + + + + ); +} + +const STEPS = ['Configure', 'Analyse', 'Migrate'] as const; + +function StepIndicator({ phase, onBack }: { phase: Phase; onBack?: () => void }) { + const current = stepIndex(phase); + return ( + + ); +} + +// ── Main page ── + export function MigrationPage() { const [phase, setPhase] = useState('idle'); const [analysisId, setAnalysisId] = useState(null); @@ -26,18 +87,47 @@ export function MigrationPage() { const blockingCount = job?.blockingCount ?? 0; const [executionMode, setExecutionMode] = useState('redis_shake'); - const handleStartMigration = async () => { - if (!job?.sourceConnectionId || !job?.targetConnectionId) return; + // Issue 1 + 4: confirmation dialog state + const [showConfirmDialog, setShowConfirmDialog] = useState(false); + const [migrationStarting, setMigrationStarting] = useState(false); + + // Scroll target for validation section + const validationRef = useRef(null); + + // Issue 15: history + const [history, setHistory] = useState([]); + const [expandedHistoryId, setExpandedHistoryId] = useState(null); + + // Scroll to validation section when it appears + useEffect(() => { + if (phase === 'validating') { + validationRef.current?.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } + }, [phase]); + + // General cleanup: centralized reset + const resetToIdle = (saveToHistory = true) => { + if (saveToHistory && job) { + setHistory(prev => [job, ...prev].slice(0, 5)); + } + setPhase('idle'); + setJob(null); + setAnalysisId(null); + setExecutionId(null); + setValidationId(null); + setExecutionResult(null); + }; - const modeLabel = executionMode === 'command' ? 'command-based' : 'DUMP/RESTORE (RedisShake)'; - const warning = blockingCount > 0 - ? `\n\nWARNING: There ${blockingCount === 1 ? 'is' : 'are'} ${blockingCount} unresolved blocking issue${blockingCount !== 1 ? 's' : ''}. Proceeding may cause data loss or incompatibility.` - : ''; - const confirmed = window.confirm( - `This will start copying data from ${job.sourceConnectionName ?? 'source'} to ${job.targetConnectionName ?? 'target'} using ${modeLabel} mode. The target instance will receive all scanned keys.${warning}\n\nContinue?`, - ); - if (!confirmed) return; + // Issue 1: open dialog instead of window.confirm + const handleStartMigration = () => { + if (!job?.sourceConnectionId || !job?.targetConnectionId) return; + setShowConfirmDialog(true); + }; + // Issue 4: actual API call after user confirms + const handleConfirmMigration = async () => { + if (!job?.sourceConnectionId || !job?.targetConnectionId) return; + setMigrationStarting(true); try { const result = await fetchApi<{ id: string }>('/migration/execution', { method: 'POST', @@ -47,11 +137,15 @@ export function MigrationPage() { mode: executionMode, }), }); + setShowConfirmDialog(false); setExecutionId(result.id); setPhase('executing'); } catch (err: unknown) { const message = err instanceof Error ? err.message : String(err); setError(message); + setShowConfirmDialog(false); + } finally { + setMigrationStarting(false); } }; @@ -79,12 +173,18 @@ export function MigrationPage() { return (
-

Migration Analysis

+

Migration

Analyze your source instance to assess migration readiness.

+ {/* Issue 3: Step indicator */} + resetToIdle() : undefined} + /> + {error && (
{error} @@ -121,7 +221,6 @@ export function MigrationPage() { {phase === 'analyzed' && job && ( <> - {/* Mode selector + Start Migration button */} @@ -139,54 +238,55 @@ export function MigrationPage() {
)} - {!canExecute ? ( -
+ + {/* Issue 8: prominent blocking warning */} + {blockingCount > 0 && ( +
+ + + +
+

{blockingCount} blocking issue{blockingCount !== 1 ? 's' : ''} detected

+

Proceeding may cause data loss or incompatibility on the target instance.

+
+
+ )} + + {!canExecute && ( +

+ Migration execution requires a Pro license. Upgrade at betterdb.com/pricing +

+ )} + +
+ {!canExecute ? ( -

- Migration execution requires a Pro license. Upgrade at betterdb.com/pricing -

-
- ) : ( -
+ ) : ( - {blockingCount > 0 && ( -

- {blockingCount} blocking issue{blockingCount !== 1 ? 's' : ''} detected — proceed at your own risk. -

- )} -
- )} + )} + + +
- - )} @@ -196,7 +296,6 @@ export function MigrationPage() { { - // Fetch final execution result to get startedAt for validation try { const result = await fetchApi(`/migration/execution/${executionId}`); setExecutionResult(result); @@ -209,53 +308,46 @@ export function MigrationPage() { {phase === 'executed' && job && executionId && ( <> - {/* already stopped */}} /> - {/* Run Validation button */} + {/* Run Validation + actions */}
- {!canExecute ? ( -
+ {!canExecute && ( +

+ Post-migration validation requires a Pro license. Upgrade at betterdb.com/pricing +

+ )} + +
+ {!canExecute ? ( -

- Post-migration validation requires a Pro license. Upgrade at betterdb.com/pricing -

-
- ) : ( + ) : ( + + )} + - )} +
- - )} @@ -268,16 +360,17 @@ export function MigrationPage() { onStopped={() => {/* already stopped */}} /> )} - setPhase('validated')} - /> +
+ setPhase('validated')} + /> +
)} {phase === 'validated' && job && validationId && ( <> - {executionId && ( {/* already stopped */}} /> )} - - +
+ +
+ +
+ + +
)} + + {/* Issue 4: Confirmation dialog */} + {showConfirmDialog && job && ( +
{ if (!migrationStarting) setShowConfirmDialog(false); }} + > +
e.stopPropagation()} + > +

Confirm Migration

+
+
+
Source
+
{job.sourceConnectionName ?? 'Unknown'}
+
+
+
Target
+
{job.targetConnectionName ?? 'Unknown'}
+
+
+
Total keys
+
{(job.totalKeys ?? 0).toLocaleString()}
+
+
+
Estimated memory
+
{formatBytes(job.estimatedTotalMemoryBytes ?? job.totalMemoryBytes ?? 0)}
+
+
+
Mode
+
{executionMode === 'command' ? 'Command-based' : 'DUMP/RESTORE (RedisShake)'}
+
+
+ + {blockingCount > 0 && ( +
+ Warning: {blockingCount} blocking issue{blockingCount !== 1 ? 's' : ''} detected. + Proceeding may cause data loss or incompatibility. +
+ )} + +
+ + +
+
+
+ )} + + {/* Issue 15: Past analyses history */} + {history.length > 0 && ( +
+

Past Analyses

+
+ {history.map(entry => ( +
+ + {expandedHistoryId === entry.id && ( +
+ +
+ )} +
+ ))} +
+
+ )}
); } From 852c250c95f421c9e53ce8c96b7c23a83afb830e Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 09:06:19 +0300 Subject: [PATCH 11/34] Fixed PR feedback --- .betterdb_context.md | 298 ------------------ .claude/settings.local.json | 187 ----------- .gitignore | 4 + Dockerfile | 11 +- Dockerfile.prod | 11 +- .../migration/__tests__/type-handlers.spec.ts | 13 +- .../execution/command-migration-worker.ts | 49 +-- .../src/migration/execution/toml-builder.ts | 4 +- .../src/migration/execution/type-handlers.ts | 22 +- .../migration/migration-execution.service.ts | 3 +- .../migration/migration-validation.service.ts | 2 +- packages/shared/src/types/migration.ts | 3 +- 12 files changed, 72 insertions(+), 535 deletions(-) delete mode 100644 .betterdb_context.md delete mode 100644 .claude/settings.local.json diff --git a/.betterdb_context.md b/.betterdb_context.md deleted file mode 100644 index abce5a22..00000000 --- a/.betterdb_context.md +++ /dev/null @@ -1,298 +0,0 @@ -# BetterDB Session Context -_Retrieved 3 memories. Auto-generated — do not edit._ - -## Session Memories -- **[2026-03-19]** Stopped BetterDB monitor, drafted commit message for new autostart lifecycle code, and reviewed branch against master identifying 10 code quality findings (2 high, 5 medium, 3 low severity). - - Decision: Stopped persistent BetterDB monitor process (PID 3166397) - - Decision: Used git diff to identify staged and unstaged changes before drafting commit message - - Decision: Focused commit message on the new autostart.ts file as the meaningful change - - Decision: Ran code review against master branch for all commits on current branch - - Decision: Triaged 10 review findings across high, medium, and low severity categories - - Solved: BetterDB monitor running in background needed to be stopped → Identified and stopped process PID 3166397 - - Solved: Need concise commit message describing staged changes → Drafted message focusing on new autostart/stop lifecycle and connection management tools - - Solved: Code quality issues in new MCP autostart implementation → Ran full branch review identifying 10 findings: 2 high severity (TOCTOU race, mutable env), 5 medium, 3 low - - Open: Fix TOCTOU race condition on PID read vs process.kill in autostart.ts:51-58 - - Open: Resolve unlinkSync ENOENT error on concurrent stopMonitor calls (autostart.ts:155) - - Open: Address empty string env var handling using || operator in runner.ts:31-42 - - Open: Add test coverage for new autostart and connection management code - - Open: Consider whether to address setAsDefault in POST body and prefix detection timing findings -- **[2026-03-19]** Added MCP monitor lifecycle management with start/stop tools, environment variable config overrides, and fixes for process orphaning, stale URLs, signal leaks, and resource cleanup. - - Decision: Added environment variable overrides for database and storage config (DB_HOST, DB_PORT, STORAGE_TYPE, PORT) to take precedence over saved config - - Decision: Implemented MCP tools for start_monitor and stop_monitor with persist flag and dynamic port/storage selection - - Decision: Created unified apiRequest function to replace separate apiFetch and rawFetch implementations - - Decision: Added PID file guarding with existsSync checks before writes and reads - - Decision: Replaced sleep-based polling with port-release detection polling in monitor startup - - Decision: Implemented ephemeral signal handler registration that cleans up after health check completion - - Decision: Used process.kill(pid, 0) pattern for process existence verification - - Decision: Bundled module-level BETTERDB_URL and detectedPrefix as mutable connection state - - Decision: Added alreadyRunning flag to distinguish between fresh start and reused monitor instances - - Decision: Implemented cmdline validation for PID file verification on process health checks - - Solved: Orphan monitor processes left running on health check timeout → Moved signal handler registration inside health-check try block and removed it in finally clause to prevent ephemeral handler leak - - Solved: Stale URL used when monitor was already running from previous session → Added alreadyRunning detection and always update BETTERDB_URL and process.env.BETTERDB_URL in both fresh start and reuse scenarios - - Solved: Blocking sleep(1000) in monitor startup preventing rapid iteration → Replaced sleep with polling loop checking for port availability release on success - - Solved: Unconsumed stdout pipe from child process causing resource leak → Added explicit stdout.pipe(process.stdout) for monitor subprocess - - Solved: Inconsistent API request handling with duplicate fetch logic → Created unified apiRequest function replacing separate apiFetch and rawFetch methods - - Open: TOCTOU vulnerability: PID can be recycled between existsSync check and process.kill(pid, 0) — needs cmdline validation - - Open: Race condition on concurrent start_monitor/stop_monitor calls mutating module-level BETTERDB_URL and detectedPrefix state - - Open: stopMonitor may throw if PID file already deleted by concurrent call — needs atomic unlink with error suppression - - Open: process.kill(pid, 0) succeeds on Windows regardless of actual process state — cross-platform validation needed - - Open: No timeout enforcement on initial port availability polling — could hang indefinitely if port never releases -- **[2026-03-19]** Added MCP monitor autostart/stop lifecycle management with environment variable override support and fixed PID write guards and polling logic. - - Decision: Add environment variable override support in mapConfigToEnv with || operator fallback pattern - - Decision: Implement MCP tool handlers for start_monitor and stop_monitor lifecycle management - - Decision: Use process.once() for signal handling in autostart lifecycle - - Decision: Guard PID file writes with existence checks before writing - - Decision: Replace sleep-based port release polling with active port-check polling - - Decision: Module-level BETTERDB_URL and detectedPrefix variables for connection state - - Decision: Deduplicate apiFetch calls by moving to shared utility - - Decision: Autostart mode triggered by CLI flag with persist option for background monitoring - - Solved: PID file writes not guarded against errors or existing files → Added existence checks before writing PID file (commit b833ae3) - - Solved: Sleep-based polling for port release was inefficient and brittle → Replaced with active port-release polling mechanism (commit b833ae3) - - Solved: apiFetch logic duplicated across multiple code paths → Deduplicated into single utility function (commit b833ae3) - - Solved: Config values overriding environment variables in runner setup → Reversed precedence to allow environment variable override of saved config - - Solved: No way to manage monitor lifecycle from MCP tools → Added start_monitor and stop_monitor MCP tool handlers with persistence support - - Open: Signal handler accumulation risk in ephemeral mode if startMonitor called multiple times (handlers not deduplicated by closure) - - Open: Race condition possible on mutable module-level BETTERDB_URL when tool calls execute concurrently - - Open: stopMonitor does not await process exit before returning, port may still be in use - - Open: Missing test coverage for env variable override precedence in mapConfigToEnv - - Open: No documented guarantee about concurrent tool call safety for start_monitor with other tools - -## Files with History -- packages/mcp/src/autostart.ts -- packages/mcp/src/index.ts -- packages/cli/src/runner.ts - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/storage-port.interface.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/shared/src/index.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/database/adapters/unified.adapter.ts -- Implemented Valkey Search index stats backend (capability detection, on-demand FT.INFO parsing, controller endpoints) and frontend page, but FT.INFO parser needs debugging to correctly extract vector metadata from the flat array response. (2026-03-11) -- Fixed missing capability guard in vectorSearch and prevented binary arg replacement from corrupting command names. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/shared/src/index.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/app.module.ts -- Scaffolded @betterdb/mcp as a thin wrapper MCP server that authenticates via agent tokens and proxies Valkey observability queries through a dedicated guarded API controller. (2026-03-13) -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Scaffolded packages/mcp as a production-ready MCP server, secured it with agent token authentication, fixed three security/correctness bugs in workflows and services, and cleaned up unused dependencies. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/mcp/mcp.module.ts -- Scaffolded @betterdb/mcp as a thin wrapper MCP server that authenticates via agent tokens and proxies Valkey observability queries through a dedicated guarded API controller. (2026-03-13) -- Scaffolded a complete MCP server for BetterDB with JWT token auth, 6 observability endpoints, and integrated it into Claude Code for end-user testing. (2026-03-12) -- Scaffolded packages/mcp as a production-ready MCP server, secured it with agent token authentication, fixed three security/correctness bugs in workflows and services, and cleaned up unused dependencies. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/mcp/mcp.controller.ts -- Scaffolded @betterdb/mcp as a thin wrapper MCP server that authenticates via agent tokens and proxies Valkey observability queries through a dedicated guarded API controller. (2026-03-13) -- Scaffolded a complete MCP server for BetterDB with JWT token auth, 6 observability endpoints, and integrated it into Claude Code for end-user testing. (2026-03-12) -- Scaffolded packages/mcp as a production-ready MCP server, secured it with agent token authentication, fixed three security/correctness bugs in workflows and services, and cleaned up unused dependencies. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/app.module.ts -- Scaffolded @betterdb/mcp as a thin wrapper MCP server that authenticates via agent tokens and proxies Valkey observability queries through a dedicated guarded API controller. (2026-03-13) -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Scaffolded packages/mcp as a production-ready MCP server, secured it with agent token authentication, fixed three security/correctness bugs in workflows and services, and cleaned up unused dependencies. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/mcp/src/index.ts -- Scaffolded @betterdb/mcp as a thin wrapper MCP server that authenticates via agent tokens and proxies Valkey observability queries through a dedicated guarded API controller. (2026-03-13) -- Scaffolded a complete MCP server for BetterDB with JWT token auth, 6 observability endpoints, and integrated it into Claude Code for end-user testing. (2026-03-12) -- Scaffolded packages/mcp as a production-ready MCP server, secured it with agent token authentication, fixed three security/correctness bugs in workflows and services, and cleaned up unused dependencies. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/mcp/src/autostart.ts -- Fixed environment variable precedence in CLI config resolution so MCP can override the monitor port via process.env, and updated MCP autostart to use local CLI bin for testing before npm release. (2026-03-19) -- Fixed three high/medium severity issues in MCP autostart and monitor: ephemeral signal handler leak, async process exit waiting, and stdout pipe blocking. (2026-03-19) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/agent/README.md -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/agent/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/agent/src/index.ts -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/agent/src/command-executor.ts -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) -- Fixed missing capability guard in vectorSearch and prevented binary arg replacement from corrupting command names. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/agent/src/command-executor.ts -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) -- Fixed missing capability guard in vectorSearch and prevented binary arg replacement from corrupting command names. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/shared/src/types/health.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) -- Implemented Valkey Search index stats backend (capability detection, on-demand FT.INFO parsing, controller endpoints) and frontend page, but FT.INFO parser needs debugging to correctly extract vector metadata from the flat array response. (2026-03-11) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/shared/src/index.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/storage-port.interface.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/types/metrics.types.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/types/metrics.types.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/types/metrics.types.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/types/metrics.types.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/types/metrics.types.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/shared/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/storage-port.interface.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/storage-port.interface.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/types/metrics.types.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Vector Search index stats support with on-demand FT.INFO/FT.SEARCH querying and fixed binary arg corruption in agent executor. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/sqlite.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/sqlite.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/postgres.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/postgres.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/memory.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/database/adapters/unified.adapter.ts -- Implemented Valkey Search index stats backend (capability detection, on-demand FT.INFO parsing, controller endpoints) and frontend page, but FT.INFO parser needs debugging to correctly extract vector metadata from the flat array response. (2026-03-11) -- Fixed missing capability guard in vectorSearch and prevented binary arg replacement from corrupting command names. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/database/adapters/unified.adapter.ts -- Implemented Valkey Search index stats backend (capability detection, on-demand FT.INFO parsing, controller endpoints) and frontend page, but FT.INFO parser needs debugging to correctly extract vector metadata from the flat array response. (2026-03-11) -- Fixed missing capability guard in vectorSearch and prevented binary arg replacement from corrupting command names. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/memory.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/storage/adapters/memory.adapter.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/storage-port.interface.ts -- Added vector index snapshot tracking with 30-second polling, storage adapters, API endpoint, and frontend Sparkline visualization component. (2026-03-14) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/web/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/web/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/shared/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/packages/semantic-cache/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/web/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/web/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/api/src/common/interfaces/database-port.interface.ts -- Implemented full-stack vector search with FT.SEARCH integration, agent binary protocol support, input validation, and React UI for KNN similarity queries. (2026-03-12) -- Implemented comprehensive vector search index stats and live KNN search tester with multi-version support for Valkey Search and RediSearch, including HNSW parameters and field metadata extraction. (2026-03-11) -- Implemented Valkey vector search index stats API endpoints with capability detection, resolved agent connection ID instability, and identified hot key data loss was due to in-memory-only tracking before persistent storage was added. (2026-03-13) - -## File History: /home/kristiyan/projects/valkey/monitor/apps/web/package.json -- Converted BetterDB Memory from Docker-based project to standalone npm package with compile-on-install flow, validated packaging pipeline, and prepared GitHub Actions for automated npm publishing. (2026-03-02) -- Implemented SSH tunnel support for remote Valkey connections with password/key auth, TLS SNI passthrough, and fixed SSH client/secret lifecycle management issues. (2026-03-04) -- Implemented Valkey Search (FT) index browser as a collapsible tree in VS Code extension with schema awareness, field type badges, and FT.INFO parsing for HASH/JSON indexed keys. (2026-03-18) \ No newline at end of file diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index c10ba685..00000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,187 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(git fetch:*)", - "Bash(fuser:*)", - "Bash(NODE_ENV=development BETTERDB_LICENSE_KEY=asdasd STORAGE_TYPE=postgres STORAGE_URL=\"postgresql://betterdb:devpassword@localhost:5432/betterdb\" DB_HOST=localhost DB_PORT=6380 DB_PASSWORD=devpassword DB_USERNAME=default pnpm dev:*)", - "Bash(curl:*)", - "Bash(__NEW_LINE_69c8496eb0256e29__ echo \"\")", - "Bash(# Get full response to see timestamps curl -s \"\"http://localhost:3001/metrics/slowlog?count=30&excludeMonitor=true\"\")", - "Bash(__NEW_LINE_6cdd3e3918f56f8e__ echo \"\")", - "Bash(pnpm add:*)", - "Bash(NODE_ENV=development BETTERDB_LICENSE_KEY=asdasd STORAGE_TYPE=postgres STORAGE_URL=\"postgresql://postgres:postgres@localhost:5433/betterdb\" pnpm dev:*)", - "Bash(NODE_ENV=development BETTERDB_LICENSE_KEY=asdasd DATABASE_HOST=localhost DATABASE_PORT=6380 DATABASE_PASSWORD=devpassword STORAGE_TYPE=postgres STORAGE_URL=\"postgresql://betterdb:devpassword@localhost:5432/betterdb\" pnpm dev:*)", - "Bash(pnpm:*)", - "Bash(valkey-cli:*)", - "Bash(pkill:*)", - "Bash(pgrep:*)", - "Bash(npx tsc:*)", - "Bash(npx ts-node:*)", - "Bash(# Kill all related processes and start fresh pkill -9 -f \"\"ts-node\"\" || true pkill -9 -f \"\"vite\"\" || true sleep 2 cd /home/kristiyan/projects/valkey/monitor && NODE_ENV=development DB_HOST=localhost DB_PORT=6380 DB_PASSWORD=devpassword STORAGE_TYPE=postgres STORAGE_URL=\"\"postgresql://betterdb:devpassword@localhost:5432/betterdb\"\" pnpm dev 2>&1 & sleep 8 grep -E \"\"\\(LOG|ERROR|starting|running\\)\"\" /tmp/server2.log)", - "Bash(PGPASSWORD=devpassword psql:*)", - "Bash(python3:*)", - "Bash(node /tmp/insert-test-data.js:*)", - "Bash(# Test with exact range around yesterday''s timestamp \\(1769538717\\) echo \"\"Querying for timestamp around 1769538717 \\(yesterday\\)\"\" curl -s \"\"http://localhost:3001/commandlog-analytics/entries?type=slow&startTime=1769538000&endTime=1769539000&limit=50\"\" cat /tmp/yesterday.json)", - "Bash(ls:*)", - "Bash(docker run:*)", - "Bash(docker logs:*)", - "Bash(docker rm:*)", - "Bash(docker ps:*)", - "Bash(docker stop:*)", - "Bash(docker network ls:*)", - "Bash(docker exec betterdb-monitor-valkey valkey-cli:*)", - "Bash(# Find the SQLite database in the container docker exec betterdb-monitor-app ls -la /app/data/ || docker exec betterdb-monitor-app find /app -name \"\"*.db\"\")", - "Bash(docker exec:*)", - "Bash(docker pull:*)", - "Bash(git -C /home/kristiyan/projects/valkey/monitor ls-files:*)", - "Bash(git -C /home/kristiyan/projects/valkey/monitor/proprietary ls-files:*)", - "Bash(git -C /home/kristiyan/projects/valkey/monitor check-ignore proprietary/)", - "Bash(git add:*)", - "Bash(git check-ignore:*)", - "Bash(docker compose:*)", - "Bash(docker start:*)", - "Bash(docker restart:*)", - "Bash(ss:*)", - "Bash(netstat:*)", - "Bash(# Check current memory usage docker exec betterdb-monitor-valkey valkey-cli -a devpassword --no-auth-warning INFO memory)", - "Bash(# Check what network the existing containers are on docker network ls docker inspect betterdb-monitor-valkey --format ''{{range .NetworkSettings.Networks}}{{.NetworkID}}{{end}}'')", - "Bash(docker inspect:*)", - "Bash(tree:*)", - "Bash(npx jest:*)", - "Bash(NODE_OPTIONS=\"--experimental-vm-modules\" node:*)", - "Bash(echo:*)", - "Bash(SKIP_DOCKER_SETUP=1 npx jest:*)", - "Bash(SKIP_DOCKER_SETUP=true npx jest:*)", - "Bash(for i in {1..20})", - "Bash(do echo \"Pull #$i\")", - "Bash(done)", - "Bash(git pull:*)", - "Bash(sudo lsof:*)", - "Bash(npm run build:*)", - "Bash(npm test:*)", - "Bash(python:*)", - "Bash(source venv/bin/activate)", - "Bash(VALKEY_PASSWORD=devpassword python3:*)", - "Bash(grep:*)", - "Bash(# Check the password from docker-compose or environment grep -r \"\"6380\\\\|password\\\\|VALKEY\"\" /home/kristiyan/projects/valkey/monitor/docker-compose*.yml)", - "Bash(redis-cli:*)", - "Bash(# Check current connected clients echo \"\"Current connected clients:\"\" redis-cli -p 6380 -a devpassword CLIENT LIST)", - "Bash(# Check rejected_connections before echo \"\"Before - rejected connections:\"\" redis-cli -p 6380 -a devpassword INFO clients)", - "Bash(# Kill any lingering background redis-cli processes pkill -f \"\"BLPOP.*flood_queue\"\" pkill -f \"\"BLPOP.*waiting_queue\"\" # Get baseline rejected connections echo \"\"Baseline rejected_connections:\"\" redis-cli -p 6380 -a devpassword INFO clients)", - "Bash(# Check docker-compose for 6381 config grep -A 20 \"\"6381\"\" /home/kristiyan/projects/valkey/monitor/docker-compose.yml)", - "Bash(node -e:*)", - "Bash(do valkey-cli -p 6380 SET \"test-key-$i\" \"value-$i\")", - "Bash(# Create multiple connections to 6380 to spike connection count for i in {1..10}; do \\(valkey-cli -p 6380 DEBUG SLEEP 5 &\\) done echo \"\"Spiked connections on 6380. Wait 10 seconds for metrics to be collected...\"\" sleep 10 valkey-cli -p 6380 CLIENT LIST)", - "Bash(# Use BLPOP to keep connections open on 6380 for i in {1..15}; do \\(valkey-cli -p 6380 BLPOP nonexistent-key-$i 30 &\\) done echo \"\"Created blocking connections on 6380\"\" sleep 2 valkey-cli -p 6380 CLIENT LIST)", - "Bash(find:*)", - "Bash(__NEW_LINE_f35492c487c1cd20__ echo \"\")", - "Bash(__NEW_LINE_6c920833c1681c54__ echo \"\")", - "Bash(__NEW_LINE_39bd60a6c51894a0__ echo \"\")", - "Bash(__NEW_LINE_66edc211ee499a7e__ echo \"\")", - "Bash(docker-compose ps:*)", - "Bash(xargs:*)", - "Bash(__NEW_LINE_42366b605d3a7b88__ echo \"\")", - "Bash(__NEW_LINE_53e463c78b7eeb92__ echo \"\")", - "Bash(# Generate some data first for i in {1..1000}; do docker exec betterdb-monitor-valkey valkey-cli -a devpassword SET \"\"testkey:$i\"\" \"\"value$i\"\" done echo \"\"Created 1000 keys\"\" # Run expensive KEYS command \\(will be slow and logged\\) docker exec betterdb-monitor-valkey valkey-cli -a devpassword KEYS \"\"*\"\")", - "Bash(# Generate data on 6381 for i in {1..1000}; do docker exec valkey-6381 valkey-cli -a devpassword SET \"\"testkey:$i\"\" \"\"value$i\"\" done echo \"\"Created 1000 keys on 6381\"\" # Run expensive KEYS command docker exec valkey-6381 valkey-cli -a devpassword KEYS \"\"*\"\")", - "Bash(WEBHOOK_ID=\"7e8fd7cc-931a-4485-916e-bb0c519271e9\":*)", - "Bash(__NEW_LINE_d20a33920bc98c2c__ echo \"\")", - "Bash(node /home/kristiyan/projects/valkey/monitor/packages/cli/bin/betterdb.js:*)", - "Bash(npx esbuild:*)", - "Bash(docker network inspect:*)", - "Bash(npm view:*)", - "Bash(node:*)", - "Bash(npm pack:*)", - "Bash(git -C /home/kristiyan/projects/valkey/monitor log --oneline -5)", - "Bash(npm cache clean:*)", - "Bash(npm uninstall:*)", - "Bash(npm install:*)", - "Bash(betterdb:*)", - "Bash(gh run list:*)", - "Bash(gh run view:*)", - "Bash(lsof:*)", - "Bash(npm ls:*)", - "Bash(sudo npm uninstall:*)", - "Bash(sudo npm install:*)", - "Bash(npx prisma migrate dev:*)", - "Bash(npx prisma generate:*)", - "Bash(docker build:*)", - "Bash(docker images:*)", - "Bash(cd /home/kristiyan/projects/valkey/monitor/apps/frontend && npx vite build 2>&1)", - "Bash(cd /home/kristiyan/projects/valkey/monitor/apps/web && npx vite build 2>&1)", - "Bash(git:*)", - "Bash(roborev show:*)", - "Bash(roborev review:*)", - "Bash(roborev list:*)", - "Bash(roborev status:*)", - "Bash(roborev:*)", - "mcp__pointer__get-pointed-element", - "Bash(npm list:*)", - "Bash(valkey-benchmark:*)", - "Bash(npx vitest:*)", - "Bash(gh release:*)", - "Bash(gh pr:*)", - "Bash(do echo:*)", - "Bash(psql:*)", - "Read(//usr/bin/**)", - "Read(//proc/567452/**)", - "Bash(kill 599425:*)", - "Bash(kill -9 815654 2>/dev/null; lsof -ti:3001 | xargs kill -9 2>/dev/null; echo \"killed\")", - "Bash(cd /home/kristiyan/projects/valkey/monitor/apps/web && ./node_modules/.bin/tsc --noEmit 2>&1 | head -20)", - "Bash(cd /home/kristiyan/projects/valkey/monitor && npx turbo run test 2>&1 | tail -30)", - "Bash(head -5 apps/api/jest.config.* 2>/dev/null || head -5 apps/api/package.json 2>/dev/null; grep -m1 '\"test\"' apps/api/package.json 2>/dev/null || true)", - "Bash(npm run:*)", - "Bash(kill 1600607 2>/dev/null; kill 1596434 2>/dev/null; sleep 1 && fuser -k 3001/tcp 2>/dev/null; fuser -k 5173/tcp 2>/dev/null; sleep 1 && echo \"All cleared\")", - "Bash(kill 1607901)", - "Bash(find /home/kristiyan/projects/valkey/monitor/proprietary -type f \\\\\\(-name *.ts -o -name *.js -o -name *.md \\\\\\))", - "Skill(update-config)", - "Bash(npx:*)", - "Bash(kill:*)", - "Bash(1 echo curl -s -o /dev/null -w %{http_code} http://localhost:3390/api/health 2)", - "Bash(1 echo ss -tlnp)", - "Bash(BETTERDB_URL=http://localhost:3390 node:*)", - "Bash(/tmp/mcp-stderr.log echo 'EXIT: $?' echo '=== STDOUT ===' cat /tmp/mcp-stdout.log echo '=== STDERR ===' cat /tmp/mcp-stderr.log)", - "Bash(printf:*)", - "Bash(/tmp/mcp-stderr2.log echo 'EXIT: $?' echo '=== STDOUT ===' cat /tmp/mcp-stdout2.log echo '=== STDERR ===' cat /tmp/mcp-stderr2.log)", - "Bash(xxd:*)", - "Bash(bash:*)", - "Bash(PORT=3001 npx @betterdb/monitor:*)", - "WebFetch(domain:static.modelcontextprotocol.io)", - "Bash(VALKEY_URL=redis://localhost:6390 pnpm test 2>&1)", - "Bash(VALKEY_URL=redis://localhost:6390 npx tsx index.ts --mock)", - "Bash(ln -sf ../../../../ /home/kristiyan/projects/valkey/monitor/packages/semantic-cache/examples/basic/node_modules/@betterdb/semantic-cache)", - "Bash(cd:*)", - "Bash(VALKEY_URL=redis://localhost:6390 node -e \":*)", - "WebFetch(domain:eclips4.github.io)", - "Bash(node -e \"console.log\\(require\\(''iovalkey/package.json''\\).version\\)\")", - "WebFetch(domain:github.com)", - "Bash(gh api:*)", - "WebFetch(domain:www.npmjs.com)", - "Bash(claude mcp:*)", - "Read(//tmp/**)", - "Bash(tar xzf:*)", - "Bash(npm --version)", - "mcp__betterdb__list_instances", - "mcp__betterdb__select_instance", - "mcp__betterdb__get_health", - "mcp__betterdb__get_slowlog", - "mcp__betterdb__get_slowlog_patterns", - "mcp__betterdb__get_memory", - "mcp__betterdb__get_hot_keys", - "mcp__betterdb__get_info", - "mcp__betterdb__get_anomalies", - "Bash(cat:*)", - "mcp__betterdb-memory__search_context", - "Bash(test:*)", - "Bash(tar:*)", - "Bash(chmod:*)", - "Bash(~/.betterdb/bin/redis-shake:*)", - "Bash(wget:*)", - "Bash(wc:*)" - ], - "deny": [], - "ask": [] - }, - "enableAllProjectMcpServers": true, - "enabledMcpjsonServers": [] -} diff --git a/.gitignore b/.gitignore index 9ea451ac..bc2ca2d8 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,10 @@ pnpm-debug.log* # Claude Code .claude/settings.local.json +# BetterDB context +.betterdb_context.md +**/.betterdb_context.md + # Turbo .turbo diff --git a/Dockerfile b/Dockerfile index 5d01c7c9..0ad6165e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -76,13 +76,18 @@ ENV NODE_ENV=production ENV PORT=3001 ENV STORAGE_TYPE=memory -# Install RedisShake binary for migration execution +# Install RedisShake binary for migration execution (with checksum verification) ARG TARGETARCH ARG REDISSHAKE_VERSION=4.6.0 RUN apk add --no-cache wget && \ - wget -qO- "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" \ - | tar -xz --strip-components=0 -C /usr/local/bin ./redis-shake && \ + REDISSHAKE_SHA256_AMD64="6ccab1ff2ba3c200950f8ada811f0c6fe6e2f5e6bd3b8e92b4d9444dc0aff4df" && \ + REDISSHAKE_SHA256_ARM64="653298efa83ef3d495ae2ec21b40c773f36eb15e507f8b3f2931660509d09690" && \ + if [ "${TARGETARCH}" = "amd64" ]; then EXPECTED_SHA256="${REDISSHAKE_SHA256_AMD64}"; else EXPECTED_SHA256="${REDISSHAKE_SHA256_ARM64}"; fi && \ + wget -qO /tmp/redis-shake.tar.gz "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" && \ + echo "${EXPECTED_SHA256} /tmp/redis-shake.tar.gz" | sha256sum -c - && \ + tar -xzf /tmp/redis-shake.tar.gz --strip-components=0 -C /usr/local/bin ./redis-shake && \ chmod +x /usr/local/bin/redis-shake && \ + rm /tmp/redis-shake.tar.gz && \ apk del wget # Create non-root user for security (Docker Scout compliance) diff --git a/Dockerfile.prod b/Dockerfile.prod index 1ce65152..c843ecdd 100644 --- a/Dockerfile.prod +++ b/Dockerfile.prod @@ -154,13 +154,18 @@ ENV DB_USERNAME=default ENV STORAGE_TYPE=memory ENV AI_ENABLED=false -# Install RedisShake binary for migration execution +# Install RedisShake binary for migration execution (with checksum verification) ARG TARGETARCH ARG REDISSHAKE_VERSION=4.6.0 RUN apk add --no-cache wget && \ - wget -qO- "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" \ - | tar -xz --strip-components=0 -C /usr/local/bin ./redis-shake && \ + REDISSHAKE_SHA256_AMD64="6ccab1ff2ba3c200950f8ada811f0c6fe6e2f5e6bd3b8e92b4d9444dc0aff4df" && \ + REDISSHAKE_SHA256_ARM64="653298efa83ef3d495ae2ec21b40c773f36eb15e507f8b3f2931660509d09690" && \ + if [ "${TARGETARCH}" = "amd64" ]; then EXPECTED_SHA256="${REDISSHAKE_SHA256_AMD64}"; else EXPECTED_SHA256="${REDISSHAKE_SHA256_ARM64}"; fi && \ + wget -qO /tmp/redis-shake.tar.gz "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" && \ + echo "${EXPECTED_SHA256} /tmp/redis-shake.tar.gz" | sha256sum -c - && \ + tar -xzf /tmp/redis-shake.tar.gz --strip-components=0 -C /usr/local/bin ./redis-shake && \ chmod +x /usr/local/bin/redis-shake && \ + rm /tmp/redis-shake.tar.gz && \ apk del wget # Create non-root user for security (Docker Scout compliance) diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index 3dbf28fe..ac82e052 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -27,12 +27,9 @@ function createMockSource(overrides: Record = {}) { function createMockTarget() { return { set: jest.fn().mockResolvedValue('OK'), - hset: jest.fn().mockResolvedValue(1), - rpush: jest.fn().mockResolvedValue(1), - sadd: jest.fn().mockResolvedValue(1), del: jest.fn().mockResolvedValue(1), - xadd: jest.fn().mockResolvedValue('1-0'), pexpire: jest.fn().mockResolvedValue(1), + call: jest.fn().mockResolvedValue('OK'), pipeline: jest.fn().mockReturnValue({ zadd: jest.fn().mockReturnThis(), exec: jest.fn().mockResolvedValue([]), @@ -76,7 +73,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.hgetallBuffer).toHaveBeenCalledWith('hash:1'); - expect(target.hset).toHaveBeenCalled(); + expect(target.call).toHaveBeenCalledWith('HSET', 'hash:1', expect.any(String), expect.any(String), expect.any(String), expect.any(String)); }); it('should use HSCAN for large hashes (>10K fields)', async () => { @@ -95,7 +92,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.lrangeBuffer).toHaveBeenCalled(); - expect(target.rpush).toHaveBeenCalled(); + expect(target.call).toHaveBeenCalledWith('RPUSH', 'list:1', 'a', 'b'); }); it('should delete target key first to avoid appending', async () => { @@ -114,7 +111,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.smembersBuffer).toHaveBeenCalledWith('set:1'); - expect(target.sadd).toHaveBeenCalled(); + expect(target.call).toHaveBeenCalledWith('SADD', 'set:1', 'm1', 'm2'); }); it('should use SSCAN for large sets (>10K members)', async () => { @@ -153,7 +150,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.xrange).toHaveBeenCalled(); - expect(target.xadd).toHaveBeenCalledWith('stream:1', '1-0', 'field', 'value'); + expect(target.call).toHaveBeenCalledWith('XADD', 'stream:1', '1-0', 'field', 'value'); }); }); diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts index c13b0b7a..f5d3ed4f 100644 --- a/apps/api/src/migration/execution/command-migration-worker.ts +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -5,6 +5,7 @@ import { migrateKey } from './type-handlers'; const SCAN_COUNT = 500; const TYPE_BATCH = 500; +const MIGRATE_BATCH = 50; export interface CommandMigrationOptions { sourceConfig: DatabaseConnectionConfig; @@ -98,30 +99,38 @@ export async function runCommandMigration(opts: CommandMigrationOptions): Promis // Batch TYPE lookup const types = await batchType(sourceClient, keys); - // Migrate each key - for (let i = 0; i < keys.length; i++) { + // Migrate keys in parallel batches for throughput + for (let batchStart = 0; batchStart < keys.length; batchStart += MIGRATE_BATCH) { if (isCancelled(job)) return; - const key = keys[i]; - const type = types[i]; - - if (type === 'none') { - // Key expired between SCAN and TYPE - keysProcessed++; - continue; - } - - const result = await migrateKey(sourceClient, targetClient, key, type); - - if (result.ok) { - job.keysTransferred++; - } else { - keysSkipped++; - job.keysSkipped = keysSkipped; - log(job, maxLogLines, `SKIP ${key} (${type}): ${result.error}`); + const batchEnd = Math.min(batchStart + MIGRATE_BATCH, keys.length); + const batchPromises: Promise[] = []; + + for (let i = batchStart; i < batchEnd; i++) { + const key = keys[i]; + const type = types[i]; + + if (type === 'none') { + // Key expired between SCAN and TYPE + keysProcessed++; + continue; + } + + batchPromises.push( + migrateKey(sourceClient, targetClient, key, type).then(result => { + if (result.ok) { + job.keysTransferred++; + } else { + keysSkipped++; + job.keysSkipped = keysSkipped; + log(job, maxLogLines, `SKIP ${key} (${type}): ${result.error}`); + } + keysProcessed++; + }), + ); } - keysProcessed++; + await Promise.all(batchPromises); job.progress = Math.min(99, Math.round((keysProcessed / totalKeys) * 100)); } diff --git a/apps/api/src/migration/execution/toml-builder.ts b/apps/api/src/migration/execution/toml-builder.ts index 83122bf8..cad09c62 100644 --- a/apps/api/src/migration/execution/toml-builder.ts +++ b/apps/api/src/migration/execution/toml-builder.ts @@ -22,7 +22,7 @@ export function buildScanReaderToml( const tgtPassword = target.password ?? ''; let toml = `[scan_reader] -address = "${source.host}:${source.port}" +address = "${escapeTomlString(source.host)}:${source.port}" username = "${escapeTomlString(srcUsername)}" password = "${escapeTomlString(srcPassword)}" tls = ${source.tls ? 'true' : 'false'} @@ -34,7 +34,7 @@ tls = ${source.tls ? 'true' : 'false'} toml += ` [redis_writer] -address = "${target.host}:${target.port}" +address = "${escapeTomlString(target.host)}:${target.port}" username = "${escapeTomlString(tgtUsername)}" password = "${escapeTomlString(tgtPassword)}" tls = ${target.tls ? 'true' : 'false'} diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index a98408bf..5b62efc4 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -73,11 +73,11 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise // Small hash: single HGETALL const data = await source.hgetallBuffer(key); if (!data || Object.keys(data).length === 0) return; - const args: (string | Buffer)[] = [key]; + const args: string[] = [key]; for (const [field, val] of Object.entries(data)) { - args.push(field, val as Buffer); + args.push(field, String(val)); } - await (target as any).hset(...args); + await target.call('HSET', ...args); } else { // Large hash: HSCAN let cursor = '0'; @@ -85,11 +85,11 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); cursor = String(next); if (fields.length === 0) continue; - const args: (string | Buffer)[] = [key]; + const args: string[] = [key]; for (let i = 0; i < fields.length; i += 2) { - args.push(fields[i], fields[i + 1]); + args.push(String(fields[i]), String(fields[i + 1])); } - await (target as any).hset(...args); + await target.call('HSET', ...args); } while (cursor !== '0'); } } @@ -107,7 +107,7 @@ async function migrateList(source: Valkey, target: Valkey, key: string): Promise const end = Math.min(start + LIST_CHUNK - 1, len - 1); const items = await source.lrangeBuffer(key, start, end); if (items.length === 0) break; - await (target as any).rpush(key, ...items); + await target.call('RPUSH', key, ...items.map(String)); } } @@ -120,14 +120,14 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< if (card <= LARGE_KEY_THRESHOLD) { const members = await source.smembersBuffer(key); if (members.length === 0) return; - await (target as any).sadd(key, ...members); + await target.call('SADD', key, ...members.map(String)); } else { let cursor = '0'; do { const [next, members] = await source.sscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); cursor = String(next); if (members.length === 0) continue; - await (target as any).sadd(key, ...members); + await target.call('SADD', key, ...members.map(String)); } while (cursor !== '0'); } } @@ -139,7 +139,7 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise if (card === 0) return; if (card <= LARGE_KEY_THRESHOLD) { - const data = await (source as any).call('ZRANGE', key, '0', '-1', 'WITHSCORES') as string[]; + const data = await source.call('ZRANGE', key, '0', '-1', 'WITHSCORES') as string[]; if (!data || data.length === 0) return; // data is [member, score, member, score, ...] const pipeline = target.pipeline(); @@ -176,7 +176,7 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi } for (const [id, fields] of entries) { // XADD with explicit ID to preserve ordering - await (target as any).xadd(key, id, ...fields); + await target.call('XADD', key, id, ...fields); lastId = id; } if (entries.length < STREAM_CHUNK) { diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 5bef3337..8f3b5aa4 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -43,7 +43,8 @@ export class MigrationExecutionService { // 3. Detect if source is cluster const info = await sourceAdapter.getInfo(['cluster']); - const clusterEnabled = String((info as Record)['cluster_enabled'] ?? '0') === '1'; + const clusterSection = (info as Record>).cluster ?? {}; + const clusterEnabled = String(clusterSection['cluster_enabled'] ?? '0') === '1'; // 4. For redis_shake mode, locate the binary upfront let binaryPath: string | undefined; diff --git a/apps/api/src/migration/migration-validation.service.ts b/apps/api/src/migration/migration-validation.service.ts index 55845d51..262002a8 100644 --- a/apps/api/src/migration/migration-validation.service.ts +++ b/apps/api/src/migration/migration-validation.service.ts @@ -233,7 +233,7 @@ export class MigrationValidationService { if (this.jobs.size < this.MAX_JOBS) return; const terminal = Array.from(this.jobs.entries()) - .filter(([, j]) => j.status === 'completed' || j.status === 'failed') + .filter(([, j]) => j.status === 'completed' || j.status === 'failed' || j.status === 'cancelled') .sort((a, b) => a[1].createdAt - b[1].createdAt); for (const [id] of terminal) { diff --git a/packages/shared/src/types/migration.ts b/packages/shared/src/types/migration.ts index d9d045db..a2b18509 100644 --- a/packages/shared/src/types/migration.ts +++ b/packages/shared/src/types/migration.ts @@ -145,7 +145,8 @@ export type ValidationJobStatus = | 'pending' | 'running' | 'completed' - | 'failed'; + | 'failed' + | 'cancelled'; export interface KeyCountComparison { sourceKeys: number; From 48ce84fd22b8a96c6b1eada0febfff40e5be7dc8 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 09:58:49 +0300 Subject: [PATCH 12/34] Fixed PR feedback --- .../migration/__tests__/type-handlers.spec.ts | 9 +++++--- .../src/migration/analysis/hfe-detector.ts | 6 ++++- .../execution/command-migration-worker.ts | 7 +++--- .../src/migration/execution/type-handlers.ts | 23 +++++++++++++------ apps/api/src/migration/migration.service.ts | 7 +++--- 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index ac82e052..cf00c6f7 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -73,7 +73,8 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.hgetallBuffer).toHaveBeenCalledWith('hash:1'); - expect(target.call).toHaveBeenCalledWith('HSET', 'hash:1', expect.any(String), expect.any(String), expect.any(String), expect.any(String)); + expect(target.del).toHaveBeenCalledWith('hash:1'); + expect(target.call).toHaveBeenCalledWith('HSET', 'hash:1', 'f1', expect.any(Buffer), 'f2', expect.any(Buffer)); }); it('should use HSCAN for large hashes (>10K fields)', async () => { @@ -92,7 +93,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.lrangeBuffer).toHaveBeenCalled(); - expect(target.call).toHaveBeenCalledWith('RPUSH', 'list:1', 'a', 'b'); + expect(target.call).toHaveBeenCalledWith('RPUSH', 'list:1', expect.any(Buffer), expect.any(Buffer)); }); it('should delete target key first to avoid appending', async () => { @@ -111,7 +112,8 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.smembersBuffer).toHaveBeenCalledWith('set:1'); - expect(target.call).toHaveBeenCalledWith('SADD', 'set:1', 'm1', 'm2'); + expect(target.del).toHaveBeenCalledWith('set:1'); + expect(target.call).toHaveBeenCalledWith('SADD', 'set:1', expect.any(Buffer), expect.any(Buffer)); }); it('should use SSCAN for large sets (>10K members)', async () => { @@ -131,6 +133,7 @@ describe('type-handlers / migrateKey', () => { const result = await migrateKey(source, target, 'zset:1', 'zset'); expect(result.ok).toBe(true); + expect(target.del).toHaveBeenCalledWith('zset:1'); expect(source.call).toHaveBeenCalledWith('ZRANGE', 'zset:1', '0', '-1', 'WITHSCORES'); }); diff --git a/apps/api/src/migration/analysis/hfe-detector.ts b/apps/api/src/migration/analysis/hfe-detector.ts index 092edb06..887745f9 100644 --- a/apps/api/src/migration/analysis/hfe-detector.ts +++ b/apps/api/src/migration/analysis/hfe-detector.ts @@ -41,7 +41,11 @@ export async function detectHfe( if (!results) continue; for (let j = 0; j < batch.length; j++) { const [err, len] = results[j] ?? []; - if (err || Number(len) > MAX_HASH_FIELDS) { + if (err) { + // Pipeline error (key expired, permission denied, etc.) — skip without counting as oversized + continue; + } + if (Number(len) > MAX_HASH_FIELDS) { result.hfeOversizedHashesSkipped++; } else { validKeys.push(batch[j]); diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts index f5d3ed4f..b980e8da 100644 --- a/apps/api/src/migration/execution/command-migration-worker.ts +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -173,10 +173,11 @@ function parseClusterMasters(nodesRaw: string): Array<{ host: string; port: numb const parts = line.split(' '); const flags = parts[2] ?? ''; if (!flags.includes('master')) continue; - // address format: host:port@clusterport + // address format: host:port@clusterport (host may be IPv6, e.g. ::1:6379@16379) const addrPart = (parts[1] ?? '').split('@')[0]; - const [host, portStr] = addrPart.split(':'); - const port = parseInt(portStr, 10); + const lastColon = addrPart.lastIndexOf(':'); + const host = lastColon > 0 ? addrPart.substring(0, lastColon) : ''; + const port = lastColon > 0 ? parseInt(addrPart.substring(lastColon + 1), 10) : NaN; if (host && !isNaN(port)) { results.push({ host, port }); } diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 5b62efc4..8cc23705 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -69,13 +69,16 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise const len = await source.hlen(key); if (len === 0) return; + // Delete target key first to avoid merging with stale data + await target.del(key); + if (len <= LARGE_KEY_THRESHOLD) { // Small hash: single HGETALL const data = await source.hgetallBuffer(key); if (!data || Object.keys(data).length === 0) return; - const args: string[] = [key]; + const args: (string | Buffer | number)[] = [key]; for (const [field, val] of Object.entries(data)) { - args.push(field, String(val)); + args.push(field, val as Buffer); } await target.call('HSET', ...args); } else { @@ -85,9 +88,9 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); cursor = String(next); if (fields.length === 0) continue; - const args: string[] = [key]; + const args: (string | Buffer | number)[] = [key]; for (let i = 0; i < fields.length; i += 2) { - args.push(String(fields[i]), String(fields[i + 1])); + args.push(fields[i], fields[i + 1]); } await target.call('HSET', ...args); } while (cursor !== '0'); @@ -107,7 +110,7 @@ async function migrateList(source: Valkey, target: Valkey, key: string): Promise const end = Math.min(start + LIST_CHUNK - 1, len - 1); const items = await source.lrangeBuffer(key, start, end); if (items.length === 0) break; - await target.call('RPUSH', key, ...items.map(String)); + await target.call('RPUSH', key, ...items); } } @@ -117,17 +120,20 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< const card = await source.scard(key); if (card === 0) return; + // Delete target key first to avoid merging with stale data + await target.del(key); + if (card <= LARGE_KEY_THRESHOLD) { const members = await source.smembersBuffer(key); if (members.length === 0) return; - await target.call('SADD', key, ...members.map(String)); + await target.call('SADD', key, ...members); } else { let cursor = '0'; do { const [next, members] = await source.sscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); cursor = String(next); if (members.length === 0) continue; - await target.call('SADD', key, ...members.map(String)); + await target.call('SADD', key, ...members); } while (cursor !== '0'); } } @@ -138,6 +144,9 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise const card = await source.zcard(key); if (card === 0) return; + // Delete target key first to avoid merging with stale data + await target.del(key); + if (card <= LARGE_KEY_THRESHOLD) { const data = await source.call('ZRANGE', key, '0', '-1', 'WITHSCORES') as string[]; if (!data || data.length === 0) return; diff --git a/apps/api/src/migration/migration.service.ts b/apps/api/src/migration/migration.service.ts index 7098ef22..e231dbb2 100644 --- a/apps/api/src/migration/migration.service.ts +++ b/apps/api/src/migration/migration.service.ts @@ -162,10 +162,11 @@ export class MigrationService { clusterMasterCount = masters.length; for (const master of masters) { - // Parse address: 'host:port@clusterport' + // Parse address: 'host:port@clusterport' (host may be IPv6) const addrPart = master.address?.split('@')[0] ?? ''; - const [host, portStr] = addrPart.split(':'); - const port = parseInt(portStr, 10); + const lastColon = addrPart.lastIndexOf(':'); + const host = lastColon > 0 ? addrPart.substring(0, lastColon) : ''; + const port = lastColon > 0 ? parseInt(addrPart.substring(lastColon + 1), 10) : NaN; if (!host || isNaN(port)) continue; const client = new Valkey({ From 91fdce1a7e5b4bc12ec4687e49e0bcfa4edfb0ba Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 13:10:17 +0300 Subject: [PATCH 13/34] Fix review feedback: eviction ordering, credential sanitization, stream DEL, cancelled status - Move evictOldJobs() before jobs.set() to prevent exceeding MAX_JOBS - Add sanitizeLogLine() to redact credentials from RedisShake logs - Add target.del(key) in migrateStream before XADD to prevent duplicates - Handle cancelled validation status in runValidation finally block Co-Authored-By: Claude --- .../src/migration/execution/type-handlers.ts | 3 +++ .../migration/migration-execution.service.ts | 25 ++++++++++++++++--- .../migration/migration-validation.service.ts | 11 +++++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 8cc23705..f297623a 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -174,6 +174,9 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise // ── Stream ── async function migrateStream(source: Valkey, target: Valkey, key: string): Promise { + // Delete target key first to avoid duplicates on re-migration + await target.del(key); + let lastId = '-'; let hasMore = true; diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 8f3b5aa4..8d7be527 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -73,11 +73,11 @@ export class MigrationExecutionService { process: null, tomlPath: null, }; - this.jobs.set(id, job); - - // 6. Evict old jobs + // 6. Evict old jobs before inserting the new one this.evictOldJobs(); + this.jobs.set(id, job); + // 7. Fire and forget based on mode if (mode === 'redis_shake') { const tomlContent = buildScanReaderToml(sourceConfig, targetConfig, clusterEnabled); @@ -245,7 +245,7 @@ export class MigrationExecutionService { bytesTransferred: job.bytesTransferred, keysSkipped: job.keysSkipped, totalKeys: job.totalKeys || undefined, - logs: job.logs, + logs: job.logs.map(sanitizeLogLine), progress: job.progress, }; } @@ -267,3 +267,20 @@ export class MigrationExecutionService { } } } + +// Redact credentials from RedisShake log lines before serving to the frontend +const CREDENTIAL_PATTERNS = [ + /password\s*[=:]\s*"[^"]*"/gi, + /password\s*[=:]\s*\S+/gi, + /\/\/[^:]+:[^@]+@/g, // redis://user:pass@host +]; + +function sanitizeLogLine(line: string): string { + let sanitized = line; + for (const pattern of CREDENTIAL_PATTERNS) { + sanitized = sanitized.replace(pattern, (match) => + match.replace(/(?<=[=:"\/])[^"@\s]+/, '***'), + ); + } + return sanitized; +} diff --git a/apps/api/src/migration/migration-validation.service.ts b/apps/api/src/migration/migration-validation.service.ts index 262002a8..ec785da5 100644 --- a/apps/api/src/migration/migration-validation.service.ts +++ b/apps/api/src/migration/migration-validation.service.ts @@ -66,11 +66,11 @@ export class MigrationValidationService { }, cancelled: false, }; - this.jobs.set(id, job); - - // 5. Evict old jobs + // 5. Evict old jobs before inserting the new one this.evictOldJobs(); + this.jobs.set(id, job); + // 6. Fire and forget const targetAdapter = this.connectionRegistry.get(req.targetConnectionId); this.runValidation(job, sourceConfig, targetConfig, targetAdapter, req.migrationStartedAt, analysisResult).catch(err => { @@ -192,6 +192,11 @@ export class MigrationValidationService { this.logger.error(`Validation ${job.id} error: ${message}`); } } finally { + // Ensure cancelled jobs get a terminal status + if (job.cancelled && job.status === 'running') { + job.status = 'cancelled'; + job.error = job.error ?? 'Cancelled by user'; + } job.completedAt = Date.now(); // Graceful cleanup — never Promise.all, never disconnect() const clients = [sourceClient, targetClient].filter((c): c is Valkey => c !== null); From d1a6be4b2cc3d211aa17419a6831fc69a235567c Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 13:22:33 +0300 Subject: [PATCH 14/34] Fix Claude Code review action to post inline comments Add --allowedTools with the inline comment MCP tool, gh pr commands, and code exploration tools. Update prompt with repo/PR context and explicit instructions to use create_inline_comment for per-line feedback. Co-Authored-By: Claude --- .github/workflows/claude-review.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml index 71f43303..cde703d5 100644 --- a/.github/workflows/claude-review.yml +++ b/.github/workflows/claude-review.yml @@ -24,15 +24,20 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 1 - uses: anthropics/claude-code-action@v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} trigger_phrase: "@claude" use_sticky_comment: false + claude_args: | + --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Read,Glob,Grep" prompt: | - Review this pull request thoroughly. For every issue you find, post an inline review comment on the relevant line. Focus on: + REPO: ${{ github.repository }} + PR NUMBER: ${{ github.event.pull_request.number }} + + Review this pull request thoroughly. Focus on: - **Correctness**: Logic errors, off-by-one errors, race conditions, null/undefined access - **Security**: Injection vulnerabilities (SQL, XSS, command, TOML), auth issues, secrets exposure @@ -42,4 +47,8 @@ jobs: For each issue, state the severity (Critical / High / Medium / Low), explain why it's a problem, and suggest a fix with a code snippet when possible. + Use `mcp__github_inline_comment__create_inline_comment` (with `confirmed: true`) to post inline comments on specific lines. + Use `gh pr comment` for top-level summary feedback. + Only post GitHub comments — don't submit review text as messages. + Do NOT flag stylistic preferences, minor naming choices, or missing documentation unless they cause a real problem. From 7c6fc1703f5b5cf0c13ffff9fce33fff6c90ae81 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 13:35:38 +0300 Subject: [PATCH 15/34] Fix totalKeys falsy check and credential sanitization regex - Use ?? instead of || for totalKeys so 0 is not coerced to undefined - Rewrite sanitizeLogLine to avoid overlapping patterns and ensure full redaction of quoted/unquoted passwords and URL credentials Co-Authored-By: Claude --- .../migration/migration-execution.service.ts | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 8d7be527..6faa94cb 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -244,7 +244,7 @@ export class MigrationExecutionService { keysTransferred: job.keysTransferred, bytesTransferred: job.bytesTransferred, keysSkipped: job.keysSkipped, - totalKeys: job.totalKeys || undefined, + totalKeys: job.totalKeys ?? undefined, logs: job.logs.map(sanitizeLogLine), progress: job.progress, }; @@ -269,18 +269,19 @@ export class MigrationExecutionService { } // Redact credentials from RedisShake log lines before serving to the frontend -const CREDENTIAL_PATTERNS = [ - /password\s*[=:]\s*"[^"]*"/gi, - /password\s*[=:]\s*\S+/gi, - /\/\/[^:]+:[^@]+@/g, // redis://user:pass@host -]; - function sanitizeLogLine(line: string): string { let sanitized = line; - for (const pattern of CREDENTIAL_PATTERNS) { - sanitized = sanitized.replace(pattern, (match) => - match.replace(/(?<=[=:"\/])[^"@\s]+/, '***'), - ); - } + // 1. Quoted passwords: password = "secret" or password:"secret" + sanitized = sanitized.replace(/password\s*[=:]\s*"[^"]*"/gi, (match) => { + const eqIdx = match.search(/[=:]/); + return match.slice(0, eqIdx + 1) + ' "***"'; + }); + // 2. Unquoted passwords (skip already-redacted quoted ones): password = secret + sanitized = sanitized.replace(/password\s*[=:]\s*(?!["*])\S+/gi, (match) => { + const eqIdx = match.search(/[=:]/); + return match.slice(0, eqIdx + 1) + ' ***'; + }); + // 3. URL credentials: redis://user:pass@host + sanitized = sanitized.replace(/\/\/[^:]+:[^@]+@/g, '//***:***@'); return sanitized; } From 4a88b3246b7c81052bb52495abe9767612ab81d3 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 13:48:20 +0300 Subject: [PATCH 16/34] Fix claude-review action: set base_branch to master, fix PR number for issue_comment events Co-Authored-By: Claude --- .github/workflows/claude-review.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml index cde703d5..80507dd7 100644 --- a/.github/workflows/claude-review.yml +++ b/.github/workflows/claude-review.yml @@ -30,12 +30,13 @@ jobs: with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} trigger_phrase: "@claude" + base_branch: master use_sticky_comment: false claude_args: | --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Read,Glob,Grep" prompt: | REPO: ${{ github.repository }} - PR NUMBER: ${{ github.event.pull_request.number }} + PR NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} Review this pull request thoroughly. Focus on: From b27035e76bfb4f05b128c85de3c9007dfad4089e Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 14:28:37 +0300 Subject: [PATCH 17/34] Address Claude code review feedback - Fix quoted password regex to handle TOML-escaped quotes (\\") - Remove ghost keys from target when source key expires mid-migration (PTTL -2) - Use HSCAN for all hash sizes to preserve binary field names - Skip expired keys in TTL sampler instead of counting as noExpiry - Set cancelValidation status to 'cancelled' instead of 'failed' Co-Authored-By: Claude --- .../migration/__tests__/type-handlers.spec.ts | 24 +++++++------- .../api/src/migration/analysis/ttl-sampler.ts | 8 +++-- .../src/migration/execution/type-handlers.ts | 33 ++++++++----------- .../migration/migration-execution.service.ts | 2 +- .../migration/migration-validation.service.ts | 4 +-- 5 files changed, 33 insertions(+), 38 deletions(-) diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index cf00c6f7..1cd57b32 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -66,24 +66,15 @@ describe('type-handlers / migrateKey', () => { }); describe('hash', () => { - it('should use HGETALL for small hashes', async () => { + it('should use HSCAN and preserve binary field names', async () => { source.hlen.mockResolvedValue(5); const result = await migrateKey(source, target, 'hash:1', 'hash'); - expect(result.ok).toBe(true); - expect(source.hgetallBuffer).toHaveBeenCalledWith('hash:1'); - expect(target.del).toHaveBeenCalledWith('hash:1'); - expect(target.call).toHaveBeenCalledWith('HSET', 'hash:1', 'f1', expect.any(Buffer), 'f2', expect.any(Buffer)); - }); - - it('should use HSCAN for large hashes (>10K fields)', async () => { - source.hlen.mockResolvedValue(15_000); - - const result = await migrateKey(source, target, 'hash:big', 'hash'); - expect(result.ok).toBe(true); expect(source.hscanBuffer).toHaveBeenCalled(); + expect(target.del).toHaveBeenCalledWith('hash:1'); + expect(target.call).toHaveBeenCalledWith('HSET', 'hash:1', expect.any(Buffer), expect.any(Buffer)); }); }); @@ -175,6 +166,15 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(target.pexpire).not.toHaveBeenCalled(); }); + + it('should delete ghost key from target when source TTL is -2 (expired)', async () => { + source.pttl.mockResolvedValue(-2); + + const result = await migrateKey(source, target, 'str:expired', 'string'); + + expect(result.ok).toBe(true); + expect(target.del).toHaveBeenCalledWith('str:expired'); + }); }); describe('error handling', () => { diff --git a/apps/api/src/migration/analysis/ttl-sampler.ts b/apps/api/src/migration/analysis/ttl-sampler.ts index f3eb6598..c3b7ed2e 100644 --- a/apps/api/src/migration/analysis/ttl-sampler.ts +++ b/apps/api/src/migration/analysis/ttl-sampler.ts @@ -23,10 +23,12 @@ export async function sampleTtls( const results = await pipeline.exec(); if (!results) continue; for (const [err, ttl] of results) { - const ms = err ? -1 : Number(ttl); - if (ms < 0) { - // -1 = no expiry, -2 = key gone (count as no expiry) + const ms = err ? -2 : Number(ttl); + if (ms === -1) { dist.noExpiry++; + } else if (ms < 0) { + // ms === -2: key expired between SCAN and PTTL — skip + continue; } else if (ms < 3_600_000) { dist.expiresWithin1h++; } else if (ms < 86_400_000) { diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index f297623a..26a0292d 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -72,29 +72,19 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise // Delete target key first to avoid merging with stale data await target.del(key); - if (len <= LARGE_KEY_THRESHOLD) { - // Small hash: single HGETALL - const data = await source.hgetallBuffer(key); - if (!data || Object.keys(data).length === 0) return; + // Use HSCAN for all sizes so binary field names are preserved as Buffers + // (hgetallBuffer returns Record which coerces field names to UTF-8) + let cursor = '0'; + do { + const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); + cursor = String(next); + if (fields.length === 0) continue; const args: (string | Buffer | number)[] = [key]; - for (const [field, val] of Object.entries(data)) { - args.push(field, val as Buffer); + for (let i = 0; i < fields.length; i += 2) { + args.push(fields[i], fields[i + 1]); } await target.call('HSET', ...args); - } else { - // Large hash: HSCAN - let cursor = '0'; - do { - const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); - cursor = String(next); - if (fields.length === 0) continue; - const args: (string | Buffer | number)[] = [key]; - for (let i = 0; i < fields.length; i += 2) { - args.push(fields[i], fields[i + 1]); - } - await target.call('HSET', ...args); - } while (cursor !== '0'); - } + } while (cursor !== '0'); } // ── List ── @@ -203,5 +193,8 @@ async function migrateTtl(source: Valkey, target: Valkey, key: string): Promise< const pttl = await source.pttl(key); if (pttl > 0) { await target.pexpire(key, pttl); + } else if (pttl === -2) { + // Key expired between copy and TTL check — remove ghost copy from target + await target.del(key); } } diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 6faa94cb..8f3e077d 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -272,7 +272,7 @@ export class MigrationExecutionService { function sanitizeLogLine(line: string): string { let sanitized = line; // 1. Quoted passwords: password = "secret" or password:"secret" - sanitized = sanitized.replace(/password\s*[=:]\s*"[^"]*"/gi, (match) => { + sanitized = sanitized.replace(/password\s*[=:]\s*"(?:[^"\\]|\\.)*"/gi, (match) => { const eqIdx = match.search(/[=:]/); return match.slice(0, eqIdx + 1) + ' "***"'; }); diff --git a/apps/api/src/migration/migration-validation.service.ts b/apps/api/src/migration/migration-validation.service.ts index ec785da5..c9a165f4 100644 --- a/apps/api/src/migration/migration-validation.service.ts +++ b/apps/api/src/migration/migration-validation.service.ts @@ -208,12 +208,12 @@ export class MigrationValidationService { const job = this.jobs.get(id); if (!job) return false; - if (job.status === 'completed' || job.status === 'failed') { + if (job.status === 'completed' || job.status === 'failed' || job.status === 'cancelled') { return true; // Already terminal } job.cancelled = true; - job.status = 'failed'; + job.status = 'cancelled'; job.error = 'Cancelled by user'; job.completedAt = Date.now(); return true; From ebf3b2d5a661d206706a234eecce28e3fae25e62 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 14:41:15 +0300 Subject: [PATCH 18/34] Fix tests for TTL sampler skip behavior and cancelled validation status Co-Authored-By: Claude --- .../__tests__/migration-validation.service.spec.ts | 2 +- apps/api/src/migration/__tests__/ttl-sampler.spec.ts | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/api/src/migration/__tests__/migration-validation.service.spec.ts b/apps/api/src/migration/__tests__/migration-validation.service.spec.ts index 93a09ce1..d3db6096 100644 --- a/apps/api/src/migration/__tests__/migration-validation.service.spec.ts +++ b/apps/api/src/migration/__tests__/migration-validation.service.spec.ts @@ -162,7 +162,7 @@ describe('MigrationValidationService', () => { expect(result).toBe(true); const validation = service.getValidation(id); - expect(validation!.status).toBe('failed'); + expect(validation!.status).toBe('cancelled'); expect(validation!.error).toBe('Cancelled by user'); }); diff --git a/apps/api/src/migration/__tests__/ttl-sampler.spec.ts b/apps/api/src/migration/__tests__/ttl-sampler.spec.ts index 9759d25e..6fcb4b36 100644 --- a/apps/api/src/migration/__tests__/ttl-sampler.spec.ts +++ b/apps/api/src/migration/__tests__/ttl-sampler.spec.ts @@ -69,16 +69,16 @@ describe('sampleTtls', () => { expect(result.sampledKeyCount).toBe(0); }); - it('should treat TTL = -2 (key gone) as no expiry', async () => { + it('should skip TTL = -2 (key gone) instead of counting as noExpiry', async () => { const ttls = [-2, -1]; const client = createMockClient(ttls); const result = await sampleTtls(client, ['gone', 'persistent']); - expect(result.noExpiry).toBe(2); + expect(result.noExpiry).toBe(1); }); - it('should treat pipeline errors as no expiry', async () => { + it('should skip pipeline errors instead of counting as noExpiry', async () => { const client = { pipeline: jest.fn().mockReturnValue({ pttl: jest.fn().mockReturnThis(), @@ -91,7 +91,7 @@ describe('sampleTtls', () => { const result = await sampleTtls(client, ['k1', 'k2']); - expect(result.noExpiry).toBe(1); + expect(result.noExpiry).toBe(0); expect(result.expiresWithin1h).toBe(1); }); }); From 0988deb7286437ecd41d64996f2960d0e5c98930 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 15:06:27 +0300 Subject: [PATCH 19/34] Fix critical and high severity issues from Claude code review - Reject control characters in TOML values instead of silently stripping - Add host and port validation in toml-builder to prevent injection - Sanitize credentials at write-time in job logs, not just read-time - Fix race condition with stale status check after process exit - Make type handlers return whether data was written, skip migrateTtl when no data was copied to avoid deleting pre-existing target keys Co-Authored-By: Claude --- .../migration/__tests__/toml-builder.spec.ts | 28 ++++++++++ .../migration/__tests__/type-handlers.spec.ts | 5 +- .../src/migration/execution/toml-builder.ts | 38 +++++++++++-- .../src/migration/execution/type-handlers.ts | 53 +++++++++++-------- .../migration/migration-execution.service.ts | 18 ++++--- 5 files changed, 106 insertions(+), 36 deletions(-) diff --git a/apps/api/src/migration/__tests__/toml-builder.spec.ts b/apps/api/src/migration/__tests__/toml-builder.spec.ts index d9f69383..49aa3e90 100644 --- a/apps/api/src/migration/__tests__/toml-builder.spec.ts +++ b/apps/api/src/migration/__tests__/toml-builder.spec.ts @@ -99,4 +99,32 @@ describe('buildScanReaderToml', () => { expect(toml).toContain('[advanced]'); expect(toml).toContain('log_level = "info"'); }); + + it('should reject control characters in password instead of silently stripping', () => { + const source = makeConfig({ password: 'pass\x00word' }); + const target = makeConfig(); + + expect(() => buildScanReaderToml(source, target, false)).toThrow('control characters'); + }); + + it('should reject invalid port', () => { + const source = makeConfig({ port: 99999 as any }); + const target = makeConfig(); + + expect(() => buildScanReaderToml(source, target, false)).toThrow('Invalid port'); + }); + + it('should reject host with whitespace', () => { + const source = makeConfig({ host: 'host name' }); + const target = makeConfig(); + + expect(() => buildScanReaderToml(source, target, false)).toThrow('Invalid host'); + }); + + it('should reject empty host', () => { + const source = makeConfig({ host: '' }); + const target = makeConfig(); + + expect(() => buildScanReaderToml(source, target, false)).toThrow('Invalid host'); + }); }); diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index 1cd57b32..0586841c 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -55,13 +55,14 @@ describe('type-handlers / migrateKey', () => { expect(target.set).toHaveBeenCalledWith('str:1', expect.any(Buffer)); }); - it('should handle deleted key gracefully', async () => { + it('should handle deleted key gracefully and skip TTL', async () => { source.getBuffer.mockResolvedValue(null); const result = await migrateKey(source, target, 'gone', 'string'); - // migrateString returns early without setting, then migrateTtl runs — no error expect(result.ok).toBe(true); expect(target.set).not.toHaveBeenCalled(); + // migrateTtl should be skipped when no data was written + expect(source.pttl).not.toHaveBeenCalled(); }); }); diff --git a/apps/api/src/migration/execution/toml-builder.ts b/apps/api/src/migration/execution/toml-builder.ts index cad09c62..7e5b9f10 100644 --- a/apps/api/src/migration/execution/toml-builder.ts +++ b/apps/api/src/migration/execution/toml-builder.ts @@ -1,14 +1,37 @@ import type { DatabaseConnectionConfig } from '@betterdb/shared'; +// eslint-disable-next-line no-control-regex +const CONTROL_CHAR_RE = /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/; + function escapeTomlString(value: string): string { + if (CONTROL_CHAR_RE.test(value)) { + throw new Error(`Value contains disallowed control characters`); + } return value .replace(/\\/g, '\\\\') .replace(/"/g, '\\"') .replace(/\n/g, '\\n') .replace(/\r/g, '\\r') - .replace(/\t/g, '\\t') - // eslint-disable-next-line no-control-regex - .replace(/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ''); + .replace(/\t/g, '\\t'); +} + +function validatePort(port: unknown): number { + const n = Number(port); + if (!Number.isInteger(n) || n < 1 || n > 65535) { + throw new Error(`Invalid port: ${port}`); + } + return n; +} + +function validateHost(host: string): string { + if (!host || host.length > 253) { + throw new Error('Invalid host: empty or too long'); + } + // Allow hostname, IPv4, or bracketed IPv6 + if (/[\s"\\]/.test(host)) { + throw new Error('Invalid host: contains whitespace, quotes, or backslashes'); + } + return host; } export function buildScanReaderToml( @@ -16,13 +39,18 @@ export function buildScanReaderToml( target: DatabaseConnectionConfig, sourceIsCluster: boolean, ): string { + const srcHost = validateHost(source.host); + const srcPort = validatePort(source.port); + const tgtHost = validateHost(target.host); + const tgtPort = validatePort(target.port); + const srcUsername = (!source.username || source.username === 'default') ? '' : source.username; const srcPassword = source.password ?? ''; const tgtUsername = (!target.username || target.username === 'default') ? '' : target.username; const tgtPassword = target.password ?? ''; let toml = `[scan_reader] -address = "${escapeTomlString(source.host)}:${source.port}" +address = "${escapeTomlString(srcHost)}:${srcPort}" username = "${escapeTomlString(srcUsername)}" password = "${escapeTomlString(srcPassword)}" tls = ${source.tls ? 'true' : 'false'} @@ -34,7 +62,7 @@ tls = ${source.tls ? 'true' : 'false'} toml += ` [redis_writer] -address = "${escapeTomlString(target.host)}:${target.port}" +address = "${escapeTomlString(tgtHost)}:${tgtPort}" username = "${escapeTomlString(tgtUsername)}" password = "${escapeTomlString(tgtPassword)}" tls = ${target.tls ? 'true' : 'false'} diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 26a0292d..089369cc 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -24,30 +24,33 @@ export async function migrateKey( type: string, ): Promise { try { + let wrote: boolean; switch (type) { case 'string': - await migrateString(source, target, key); + wrote = await migrateString(source, target, key); break; case 'hash': - await migrateHash(source, target, key); + wrote = await migrateHash(source, target, key); break; case 'list': - await migrateList(source, target, key); + wrote = await migrateList(source, target, key); break; case 'set': - await migrateSet(source, target, key); + wrote = await migrateSet(source, target, key); break; case 'zset': - await migrateZset(source, target, key); + wrote = await migrateZset(source, target, key); break; case 'stream': - await migrateStream(source, target, key); + wrote = await migrateStream(source, target, key); break; default: return { key, type, ok: false, error: `Unsupported type: ${type}` }; } - // Preserve TTL - await migrateTtl(source, target, key); + // Only set TTL if data was actually written to avoid deleting pre-existing target keys + if (wrote) { + await migrateTtl(source, target, key); + } return { key, type, ok: true }; } catch (err: unknown) { const message = err instanceof Error ? err.message : String(err); @@ -57,17 +60,18 @@ export async function migrateKey( // ── String ── -async function migrateString(source: Valkey, target: Valkey, key: string): Promise { +async function migrateString(source: Valkey, target: Valkey, key: string): Promise { const value = await source.getBuffer(key); - if (value === null) return; // key expired/deleted between SCAN and GET + if (value === null) return false; // key expired/deleted between SCAN and GET await target.set(key, value); + return true; } // ── Hash ── -async function migrateHash(source: Valkey, target: Valkey, key: string): Promise { +async function migrateHash(source: Valkey, target: Valkey, key: string): Promise { const len = await source.hlen(key); - if (len === 0) return; + if (len === 0) return false; // Delete target key first to avoid merging with stale data await target.del(key); @@ -85,13 +89,14 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise } await target.call('HSET', ...args); } while (cursor !== '0'); + return true; } // ── List ── -async function migrateList(source: Valkey, target: Valkey, key: string): Promise { +async function migrateList(source: Valkey, target: Valkey, key: string): Promise { const len = await source.llen(key); - if (len === 0) return; + if (len === 0) return false; // Delete target key first to avoid appending to existing data await target.del(key); @@ -102,20 +107,21 @@ async function migrateList(source: Valkey, target: Valkey, key: string): Promise if (items.length === 0) break; await target.call('RPUSH', key, ...items); } + return true; } // ── Set ── -async function migrateSet(source: Valkey, target: Valkey, key: string): Promise { +async function migrateSet(source: Valkey, target: Valkey, key: string): Promise { const card = await source.scard(key); - if (card === 0) return; + if (card === 0) return false; // Delete target key first to avoid merging with stale data await target.del(key); if (card <= LARGE_KEY_THRESHOLD) { const members = await source.smembersBuffer(key); - if (members.length === 0) return; + if (members.length === 0) return true; // del already ran await target.call('SADD', key, ...members); } else { let cursor = '0'; @@ -126,20 +132,21 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< await target.call('SADD', key, ...members); } while (cursor !== '0'); } + return true; } // ── Sorted Set ── -async function migrateZset(source: Valkey, target: Valkey, key: string): Promise { +async function migrateZset(source: Valkey, target: Valkey, key: string): Promise { const card = await source.zcard(key); - if (card === 0) return; + if (card === 0) return false; // Delete target key first to avoid merging with stale data await target.del(key); if (card <= LARGE_KEY_THRESHOLD) { const data = await source.call('ZRANGE', key, '0', '-1', 'WITHSCORES') as string[]; - if (!data || data.length === 0) return; + if (!data || data.length === 0) return true; // del already ran // data is [member, score, member, score, ...] const pipeline = target.pipeline(); for (let i = 0; i < data.length; i += 2) { @@ -159,16 +166,18 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise await pipeline.exec(); } while (cursor !== '0'); } + return true; } // ── Stream ── -async function migrateStream(source: Valkey, target: Valkey, key: string): Promise { +async function migrateStream(source: Valkey, target: Valkey, key: string): Promise { // Delete target key first to avoid duplicates on re-migration await target.del(key); let lastId = '-'; let hasMore = true; + let wrote = false; while (hasMore) { const entries = await source.xrange(key, lastId === '-' ? '-' : `(${lastId}`, '+', 'COUNT', STREAM_CHUNK); @@ -180,11 +189,13 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi // XADD with explicit ID to preserve ordering await target.call('XADD', key, id, ...fields); lastId = id; + wrote = true; } if (entries.length < STREAM_CHUNK) { hasMore = false; } } + return wrote; } // ── TTL ── diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 8f3e077d..401e5d8e 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -111,7 +111,7 @@ export class MigrationExecutionService { const lines = chunk.toString().split('\n'); for (const line of lines) { if (!line) continue; - job.logs.push(line); + job.logs.push(sanitizeLogLine(line)); if (job.logs.length > this.MAX_LOG_LINES) { job.logs.shift(); } @@ -130,11 +130,14 @@ export class MigrationExecutionService { proc.on('error', reject); }); - const currentStatus = job.status as string; + // Status may have been set to 'cancelled' by stopExecution() while the process was running + const statusAfterExit = job.status as string; if (code === 0) { - job.status = 'completed'; - job.progress = 100; - } else if (currentStatus !== 'cancelled') { + if (statusAfterExit !== 'cancelled') { + job.status = 'completed'; + job.progress = 100; + } + } else if (statusAfterExit !== 'cancelled') { job.status = 'failed'; job.error = `RedisShake exited with code ${code}`; } @@ -179,8 +182,7 @@ export class MigrationExecutionService { maxLogLines: this.MAX_LOG_LINES, }); - const currentStatus = job.status as string; - if (currentStatus !== 'cancelled') { + if ((job.status as string) !== 'cancelled') { job.status = 'completed'; } } catch (err: unknown) { @@ -245,7 +247,7 @@ export class MigrationExecutionService { bytesTransferred: job.bytesTransferred, keysSkipped: job.keysSkipped, totalKeys: job.totalKeys ?? undefined, - logs: job.logs.map(sanitizeLogLine), + logs: [...job.logs], progress: job.progress, }; } From a5326147cec5d431bb06008d910ff50dc2e0f57d Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 15:34:03 +0300 Subject: [PATCH 20/34] Fix TTL sampler denominator, HFE false-negative, sample validator, and client timeouts - TTL sampler: compute sampledKeyCount from actually categorized keys, excluding expired/errored keys so bucket percentages are accurate - HFE detector: drop overly broad errMsg.includes('ERR') clause that matched transient errors; keep only unknown command/subcommand check - Sample validator: skip keys with sourceType 'none' instead of counting as matched, and derive sampledKeys from actual outcomes - Add connectTimeout (10s) and commandTimeout (15s) to validation and command-mode migration clients to prevent indefinite hangs Co-Authored-By: Claude --- apps/api/src/migration/__tests__/ttl-sampler.spec.ts | 6 ++++-- apps/api/src/migration/analysis/hfe-detector.ts | 5 +++-- apps/api/src/migration/analysis/ttl-sampler.ts | 10 ++++++---- .../migration/execution/command-migration-worker.ts | 2 ++ apps/api/src/migration/migration-validation.service.ts | 2 ++ apps/api/src/migration/validation/sample-validator.ts | 5 ++--- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/apps/api/src/migration/__tests__/ttl-sampler.spec.ts b/apps/api/src/migration/__tests__/ttl-sampler.spec.ts index 6fcb4b36..e9a89b22 100644 --- a/apps/api/src/migration/__tests__/ttl-sampler.spec.ts +++ b/apps/api/src/migration/__tests__/ttl-sampler.spec.ts @@ -69,16 +69,17 @@ describe('sampleTtls', () => { expect(result.sampledKeyCount).toBe(0); }); - it('should skip TTL = -2 (key gone) instead of counting as noExpiry', async () => { + it('should skip TTL = -2 (key gone) and exclude from sampledKeyCount', async () => { const ttls = [-2, -1]; const client = createMockClient(ttls); const result = await sampleTtls(client, ['gone', 'persistent']); expect(result.noExpiry).toBe(1); + expect(result.sampledKeyCount).toBe(1); }); - it('should skip pipeline errors instead of counting as noExpiry', async () => { + it('should skip pipeline errors and exclude from sampledKeyCount', async () => { const client = { pipeline: jest.fn().mockReturnValue({ pttl: jest.fn().mockReturnThis(), @@ -93,5 +94,6 @@ describe('sampleTtls', () => { expect(result.noExpiry).toBe(0); expect(result.expiresWithin1h).toBe(1); + expect(result.sampledKeyCount).toBe(1); }); }); diff --git a/apps/api/src/migration/analysis/hfe-detector.ts b/apps/api/src/migration/analysis/hfe-detector.ts index 887745f9..8b4bdb5e 100644 --- a/apps/api/src/migration/analysis/hfe-detector.ts +++ b/apps/api/src/migration/analysis/hfe-detector.ts @@ -98,13 +98,14 @@ export async function detectHfe( for (let i = 0; i < keyFieldPairs.length; i++) { const [err, val] = results[i] ?? []; if (err) { - // If the error indicates unknown command, HFE not supported const errMsg = String(err); - if (errMsg.includes('unknown command') || errMsg.includes('ERR')) { + // Only mark unsupported for genuinely unknown command errors + if (errMsg.includes('unknown command') || errMsg.includes('unknown subcommand')) { result.hfeSupported = false; result.hfeDetected = false; return result; } + // Transient errors (overload, permission, etc.) — skip this field continue; } // HEXPIRETIME returns an array with the expiry time, >0 means HFE in use diff --git a/apps/api/src/migration/analysis/ttl-sampler.ts b/apps/api/src/migration/analysis/ttl-sampler.ts index c3b7ed2e..eb183547 100644 --- a/apps/api/src/migration/analysis/ttl-sampler.ts +++ b/apps/api/src/migration/analysis/ttl-sampler.ts @@ -11,7 +11,7 @@ export async function sampleTtls( expiresWithin24h: 0, expiresWithin7d: 0, expiresAfter7d: 0, - sampledKeyCount: keys.length, + sampledKeyCount: 0, }; for (let i = 0; i < keys.length; i += 1000) { @@ -24,11 +24,13 @@ export async function sampleTtls( if (!results) continue; for (const [err, ttl] of results) { const ms = err ? -2 : Number(ttl); + if (ms < 0 && ms !== -1) { + // ms === -2: key expired between SCAN and PTTL, or pipeline error — skip + continue; + } + dist.sampledKeyCount++; if (ms === -1) { dist.noExpiry++; - } else if (ms < 0) { - // ms === -2: key expired between SCAN and PTTL — skip - continue; } else if (ms < 3_600_000) { dist.expiresWithin1h++; } else if (ms < 86_400_000) { diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts index b980e8da..d27f2e42 100644 --- a/apps/api/src/migration/execution/command-migration-worker.ts +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -162,6 +162,8 @@ function createClient(config: DatabaseConnectionConfig, name: string): Valkey { password: config.password || undefined, tls: config.tls ? {} : undefined, lazyConnect: true, + connectTimeout: 10_000, + commandTimeout: 15_000, connectionName: name, }); } diff --git a/apps/api/src/migration/migration-validation.service.ts b/apps/api/src/migration/migration-validation.service.ts index c9a165f4..67d30867 100644 --- a/apps/api/src/migration/migration-validation.service.ts +++ b/apps/api/src/migration/migration-validation.service.ts @@ -262,6 +262,8 @@ function createClient(config: DatabaseConnectionConfig, name: string): Valkey { password: config.password || undefined, tls: config.tls ? {} : undefined, lazyConnect: true, + connectTimeout: 10_000, + commandTimeout: 15_000, connectionName: name, }); } diff --git a/apps/api/src/migration/validation/sample-validator.ts b/apps/api/src/migration/validation/sample-validator.ts index 86491834..ebc04f1e 100644 --- a/apps/api/src/migration/validation/sample-validator.ts +++ b/apps/api/src/migration/validation/sample-validator.ts @@ -36,8 +36,7 @@ export async function validateSample( const sourceType = sourceTypes[i]; if (sourceType === 'none') { - // Key expired between SCAN and TYPE - matched++; + // Key expired between SCAN and TYPE — skip, don't count toward any outcome continue; } @@ -71,7 +70,7 @@ export async function validateSample( } return { - sampledKeys: keys.length, + sampledKeys: matched + missing + typeMismatches + valueMismatches, matched, missing, typeMismatches, From 9ebe03a37427ce6ada2fddc2ec515bed708dcc75 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 15:57:14 +0300 Subject: [PATCH 21/34] Fix Dockerfile healthcheck, atomic string TTL, binary-safe validation, IPv6 brackets, and PID tracking - Dockerfile: keep wget installed (needed by HEALTHCHECK), don't apk del it - type-handlers: use atomic SET PX for strings to eliminate TTL gap on crash; compound types still use separate migrateTtl - sample-validator: use hgetallBuffer instead of hgetall for binary-safe hash field comparison - command-migration-worker + migration.service: strip IPv6 brackets from CLUSTER NODES addresses so iovalkey can connect - migration-execution.service: write PID file alongside TOML for orphan detection on server restart; clean up both in finally block Co-Authored-By: Claude --- Dockerfile | 6 ++---- .../migration/__tests__/type-handlers.spec.ts | 20 ++++++++++++------ .../execution/command-migration-worker.ts | 8 +++++-- .../src/migration/execution/execution-job.ts | 1 + .../src/migration/execution/type-handlers.ts | 21 +++++++++++++++---- .../migration/migration-execution.service.ts | 21 +++++++++++++------ apps/api/src/migration/migration.service.ts | 6 +++++- .../migration/validation/sample-validator.ts | 10 +++++---- 8 files changed, 66 insertions(+), 27 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0ad6165e..eb6a506f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -79,16 +79,14 @@ ENV STORAGE_TYPE=memory # Install RedisShake binary for migration execution (with checksum verification) ARG TARGETARCH ARG REDISSHAKE_VERSION=4.6.0 -RUN apk add --no-cache wget && \ - REDISSHAKE_SHA256_AMD64="6ccab1ff2ba3c200950f8ada811f0c6fe6e2f5e6bd3b8e92b4d9444dc0aff4df" && \ +RUN REDISSHAKE_SHA256_AMD64="6ccab1ff2ba3c200950f8ada811f0c6fe6e2f5e6bd3b8e92b4d9444dc0aff4df" && \ REDISSHAKE_SHA256_ARM64="653298efa83ef3d495ae2ec21b40c773f36eb15e507f8b3f2931660509d09690" && \ if [ "${TARGETARCH}" = "amd64" ]; then EXPECTED_SHA256="${REDISSHAKE_SHA256_AMD64}"; else EXPECTED_SHA256="${REDISSHAKE_SHA256_ARM64}"; fi && \ wget -qO /tmp/redis-shake.tar.gz "https://github.com/tair-opensource/RedisShake/releases/download/v${REDISSHAKE_VERSION}/redis-shake-v${REDISSHAKE_VERSION}-linux-${TARGETARCH}.tar.gz" && \ echo "${EXPECTED_SHA256} /tmp/redis-shake.tar.gz" | sha256sum -c - && \ tar -xzf /tmp/redis-shake.tar.gz --strip-components=0 -C /usr/local/bin ./redis-shake && \ chmod +x /usr/local/bin/redis-shake && \ - rm /tmp/redis-shake.tar.gz && \ - apk del wget + rm /tmp/redis-shake.tar.gz # Create non-root user for security (Docker Scout compliance) RUN addgroup --system --gid 1001 nodejs && \ diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index 0586841c..97dc92c1 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -55,14 +55,12 @@ describe('type-handlers / migrateKey', () => { expect(target.set).toHaveBeenCalledWith('str:1', expect.any(Buffer)); }); - it('should handle deleted key gracefully and skip TTL', async () => { + it('should handle deleted key gracefully and skip write', async () => { source.getBuffer.mockResolvedValue(null); const result = await migrateKey(source, target, 'gone', 'string'); expect(result.ok).toBe(true); expect(target.set).not.toHaveBeenCalled(); - // migrateTtl should be skipped when no data was written - expect(source.pttl).not.toHaveBeenCalled(); }); }); @@ -150,13 +148,23 @@ describe('type-handlers / migrateKey', () => { }); describe('TTL preservation', () => { - it('should call pexpire on target when source TTL > 0', async () => { + it('should use atomic SET PX for strings when source TTL > 0', async () => { source.pttl.mockResolvedValue(60000); const result = await migrateKey(source, target, 'str:ttl', 'string'); expect(result.ok).toBe(true); - expect(target.pexpire).toHaveBeenCalledWith('str:ttl', 60000); + expect(target.set).toHaveBeenCalledWith('str:ttl', expect.any(Buffer), 'PX', 60000); + expect(target.pexpire).not.toHaveBeenCalled(); + }); + + it('should call pexpire for compound types when source TTL > 0', async () => { + source.pttl.mockResolvedValue(60000); + + const result = await migrateKey(source, target, 'hash:ttl', 'hash'); + + expect(result.ok).toBe(true); + expect(target.pexpire).toHaveBeenCalledWith('hash:ttl', 60000); }); it('should not call pexpire when source TTL is -1', async () => { @@ -168,7 +176,7 @@ describe('type-handlers / migrateKey', () => { expect(target.pexpire).not.toHaveBeenCalled(); }); - it('should delete ghost key from target when source TTL is -2 (expired)', async () => { + it('should return ok: false for string when source TTL is -2 (expired)', async () => { source.pttl.mockResolvedValue(-2); const result = await migrateKey(source, target, 'str:expired', 'string'); diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts index d27f2e42..157a9746 100644 --- a/apps/api/src/migration/execution/command-migration-worker.ts +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -175,11 +175,15 @@ function parseClusterMasters(nodesRaw: string): Array<{ host: string; port: numb const parts = line.split(' '); const flags = parts[2] ?? ''; if (!flags.includes('master')) continue; - // address format: host:port@clusterport (host may be IPv6, e.g. ::1:6379@16379) + // address format: host:port@clusterport (host may be IPv6, e.g. [::1]:6379@16379) const addrPart = (parts[1] ?? '').split('@')[0]; const lastColon = addrPart.lastIndexOf(':'); - const host = lastColon > 0 ? addrPart.substring(0, lastColon) : ''; + let host = lastColon > 0 ? addrPart.substring(0, lastColon) : ''; const port = lastColon > 0 ? parseInt(addrPart.substring(lastColon + 1), 10) : NaN; + // Strip IPv6 brackets — iovalkey expects bare addresses + if (host.startsWith('[') && host.endsWith(']')) { + host = host.slice(1, -1); + } if (host && !isNaN(port)) { results.push({ host, port }); } diff --git a/apps/api/src/migration/execution/execution-job.ts b/apps/api/src/migration/execution/execution-job.ts index e4e38258..20dc3d3f 100644 --- a/apps/api/src/migration/execution/execution-job.ts +++ b/apps/api/src/migration/execution/execution-job.ts @@ -16,4 +16,5 @@ export interface ExecutionJob { progress: number | null; process: ChildProcess | null; // redis_shake mode only tomlPath: string | null; // redis_shake mode only + pidPath: string | null; // redis_shake mode only — for orphan detection } diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 089369cc..5bc34de4 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -27,6 +27,7 @@ export async function migrateKey( let wrote: boolean; switch (type) { case 'string': + // String handles TTL atomically via SET PX wrote = await migrateString(source, target, key); break; case 'hash': @@ -47,8 +48,8 @@ export async function migrateKey( default: return { key, type, ok: false, error: `Unsupported type: ${type}` }; } - // Only set TTL if data was actually written to avoid deleting pre-existing target keys - if (wrote) { + // String handles TTL atomically; compound types need a separate PEXPIRE + if (wrote && type !== 'string') { await migrateTtl(source, target, key); } return { key, type, ok: true }; @@ -61,9 +62,21 @@ export async function migrateKey( // ── String ── async function migrateString(source: Valkey, target: Valkey, key: string): Promise { - const value = await source.getBuffer(key); + const [value, pttl] = await Promise.all([ + source.getBuffer(key), + source.pttl(key), + ]); if (value === null) return false; // key expired/deleted between SCAN and GET - await target.set(key, value); + if (pttl > 0) { + // Atomic SET with PX — no window where key exists without TTL + await target.set(key, value, 'PX', pttl); + } else if (pttl === -2) { + // Key expired between GET and PTTL — remove any ghost copy + await target.del(key); + return false; + } else { + await target.set(key, value); + } return true; } diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 401e5d8e..90c15edf 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -72,6 +72,7 @@ export class MigrationExecutionService { progress: null, process: null, tomlPath: null, + pidPath: null, }; // 6. Evict old jobs before inserting the new one this.evictOldJobs(); @@ -107,6 +108,13 @@ export class MigrationExecutionService { job.process = proc; job.status = 'running'; + // Write PID file for orphan detection on server restart + const pidPath = join(os.tmpdir(), `${job.id}.pid`); + try { + writeFileSync(pidPath, String(proc.pid), { encoding: 'utf-8', mode: 0o600 }); + job.pidPath = pidPath; + } catch { /* non-fatal — orphan detection is best-effort */ } + const handleData = (chunk: Buffer) => { const lines = chunk.toString().split('\n'); for (const line of lines) { @@ -152,15 +160,16 @@ export class MigrationExecutionService { if (!job.completedAt) { job.completedAt = Date.now(); } - if (job.tomlPath) { - try { - if (existsSync(job.tomlPath)) { - unlinkSync(job.tomlPath); - } - } catch { /* ignore cleanup errors */ } + for (const path of [job.tomlPath, job.pidPath]) { + if (path) { + try { + if (existsSync(path)) unlinkSync(path); + } catch { /* ignore cleanup errors */ } + } } job.process = null; job.tomlPath = null; + job.pidPath = null; } } diff --git a/apps/api/src/migration/migration.service.ts b/apps/api/src/migration/migration.service.ts index e231dbb2..5edd30fa 100644 --- a/apps/api/src/migration/migration.service.ts +++ b/apps/api/src/migration/migration.service.ts @@ -165,8 +165,12 @@ export class MigrationService { // Parse address: 'host:port@clusterport' (host may be IPv6) const addrPart = master.address?.split('@')[0] ?? ''; const lastColon = addrPart.lastIndexOf(':'); - const host = lastColon > 0 ? addrPart.substring(0, lastColon) : ''; + let host = lastColon > 0 ? addrPart.substring(0, lastColon) : ''; const port = lastColon > 0 ? parseInt(addrPart.substring(lastColon + 1), 10) : NaN; + // Strip IPv6 brackets — iovalkey expects bare addresses + if (host.startsWith('[') && host.endsWith(']')) { + host = host.slice(1, -1); + } if (!host || isNaN(port)) continue; const client = new Valkey({ diff --git a/apps/api/src/migration/validation/sample-validator.ts b/apps/api/src/migration/validation/sample-validator.ts index ebc04f1e..52409929 100644 --- a/apps/api/src/migration/validation/sample-validator.ts +++ b/apps/api/src/migration/validation/sample-validator.ts @@ -227,8 +227,8 @@ async function compareHash(source: Valkey, target: Valkey, key: string): Promise } const [sourceData, targetData] = await Promise.all([ - source.hgetall(key), - target.hgetall(key), + source.hgetallBuffer(key), + target.hgetallBuffer(key), ]); const sourceFields = Object.keys(sourceData).sort(); @@ -238,13 +238,15 @@ async function compareHash(source: Valkey, target: Valkey, key: string): Promise return `field count differs (source: ${sourceFields.length}, target: ${targetFields.length})`; } - // Compare first 10 sorted fields + // Compare first 10 sorted fields (binary-safe via Buffer.equals) const checkCount = Math.min(10, sourceFields.length); for (let i = 0; i < checkCount; i++) { if (sourceFields[i] !== targetFields[i]) { return `field names differ at index ${i}`; } - if (sourceData[sourceFields[i]] !== targetData[targetFields[i]]) { + const srcVal = sourceData[sourceFields[i]]; + const tgtVal = targetData[targetFields[i]]; + if (!srcVal || !tgtVal || !srcVal.equals(tgtVal)) { return `field "${sourceFields[i]}" value differs`; } } From b4d43f439e602487ffb69d60625ddf45e9383b19 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 16:28:55 +0300 Subject: [PATCH 22/34] Fix binary data corruption in sorted-set and stream migration - migrateZset: use callBuffer('ZRANGE') and callBuffer('ZSCAN') instead of call/zscan to preserve binary member data as raw Buffers. call() decodes non-UTF-8 bytes with the replacement character, silently corrupting msgpack/protobuf/null-byte members. - migrateStream: use callBuffer('XRANGE') and callBuffer('XADD') to preserve binary field names and values. Same UTF-8 decode issue as sorted sets. - Both fixes mirror the binary-safe approach already used in migrateHash (hscanBuffer) and migrateSet (smembersBuffer/sscanBuffer). Co-Authored-By: Claude --- .../migration/__tests__/type-handlers.spec.ts | 32 +++++++++++---- .../src/migration/execution/type-handlers.ts | 41 ++++++++++++------- 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index 97dc92c1..32368d9a 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -12,10 +12,21 @@ function createMockSource(overrides: Record = {}) { smembersBuffer: jest.fn().mockResolvedValue([Buffer.from('m1'), Buffer.from('m2')]), sscanBuffer: jest.fn().mockResolvedValue(['0', [Buffer.from('m1')]]), zcard: jest.fn().mockResolvedValue(2), - zscan: jest.fn().mockResolvedValue(['0', ['m1', '1', 'm2', '2']]), - xrange: jest.fn().mockResolvedValue([['1-0', ['field', 'value']]]), pttl: jest.fn().mockResolvedValue(-1), call: jest.fn().mockResolvedValue(['m1', '1', 'm2', '2']), + // callBuffer returns Buffers for binary-safe zset/stream migration + callBuffer: jest.fn().mockImplementation((cmd: string) => { + if (cmd === 'ZRANGE') { + return Promise.resolve([Buffer.from('m1'), Buffer.from('1'), Buffer.from('m2'), Buffer.from('2')]); + } + if (cmd === 'ZSCAN') { + return Promise.resolve([Buffer.from('0'), [Buffer.from('m1'), Buffer.from('1')]]); + } + if (cmd === 'XRANGE') { + return Promise.resolve([[Buffer.from('1-0'), [Buffer.from('field'), Buffer.from('value')]]]); + } + return Promise.resolve(null); + }), pipeline: jest.fn().mockReturnValue({ zadd: jest.fn().mockReturnThis(), exec: jest.fn().mockResolvedValue([]), @@ -30,6 +41,7 @@ function createMockTarget() { del: jest.fn().mockResolvedValue(1), pexpire: jest.fn().mockResolvedValue(1), call: jest.fn().mockResolvedValue('OK'), + callBuffer: jest.fn().mockResolvedValue(Buffer.from('OK')), pipeline: jest.fn().mockReturnValue({ zadd: jest.fn().mockReturnThis(), exec: jest.fn().mockResolvedValue([]), @@ -117,33 +129,35 @@ describe('type-handlers / migrateKey', () => { }); describe('zset', () => { - it('should use ZRANGE WITHSCORES for small sorted sets', async () => { + it('should use callBuffer ZRANGE WITHSCORES for small sorted sets (binary-safe)', async () => { source.zcard.mockResolvedValue(5); const result = await migrateKey(source, target, 'zset:1', 'zset'); expect(result.ok).toBe(true); expect(target.del).toHaveBeenCalledWith('zset:1'); - expect(source.call).toHaveBeenCalledWith('ZRANGE', 'zset:1', '0', '-1', 'WITHSCORES'); + expect(source.callBuffer).toHaveBeenCalledWith('ZRANGE', 'zset:1', '0', '-1', 'WITHSCORES'); }); - it('should use ZSCAN for large sorted sets (>10K members)', async () => { + it('should use callBuffer ZSCAN for large sorted sets (>10K members)', async () => { source.zcard.mockResolvedValue(15_000); const result = await migrateKey(source, target, 'zset:big', 'zset'); expect(result.ok).toBe(true); - expect(source.zscan).toHaveBeenCalled(); + expect(source.callBuffer).toHaveBeenCalledWith('ZSCAN', 'zset:big', '0', 'COUNT', '1000'); }); }); describe('stream', () => { - it('should XRANGE and XADD with preserved IDs', async () => { + it('should use callBuffer XRANGE and XADD with preserved binary data', async () => { const result = await migrateKey(source, target, 'stream:1', 'stream'); expect(result.ok).toBe(true); - expect(source.xrange).toHaveBeenCalled(); - expect(target.call).toHaveBeenCalledWith('XADD', 'stream:1', '1-0', 'field', 'value'); + expect(source.callBuffer).toHaveBeenCalledWith('XRANGE', 'stream:1', '-', '+', 'COUNT', '1000'); + expect(target.callBuffer).toHaveBeenCalledWith( + 'XADD', 'stream:1', '1-0', Buffer.from('field'), Buffer.from('value'), + ); }); }); diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 5bc34de4..1455c27c 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -158,23 +158,28 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise await target.del(key); if (card <= LARGE_KEY_THRESHOLD) { - const data = await source.call('ZRANGE', key, '0', '-1', 'WITHSCORES') as string[]; - if (!data || data.length === 0) return true; // del already ran - // data is [member, score, member, score, ...] + // Use callBuffer to preserve binary member data (call() decodes as UTF-8) + const raw = await source.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Buffer[]; + if (!raw || raw.length === 0) return true; // del already ran + // raw is [member, score, member, score, ...] as Buffers const pipeline = target.pipeline(); - for (let i = 0; i < data.length; i += 2) { - pipeline.zadd(key, data[i + 1], data[i]); + for (let i = 0; i < raw.length; i += 2) { + // Score is always ASCII-safe, member stays as Buffer + pipeline.zadd(key, raw[i + 1].toString(), raw[i]); } await pipeline.exec(); } else { + // zscanBuffer not available — use callBuffer for ZSCAN to preserve binary members let cursor = '0'; do { - const [next, entries] = await source.zscan(key, cursor, 'COUNT', SCAN_BATCH); - cursor = next; - if (entries.length === 0) continue; + const result = await source.callBuffer('ZSCAN', key, cursor, 'COUNT', String(SCAN_BATCH)) as [Buffer, Buffer[]]; + cursor = result[0].toString(); + const entries = result[1]; + if (!entries || entries.length === 0) continue; + // entries is [member, score, member, score, ...] as Buffers const pipeline = target.pipeline(); for (let i = 0; i < entries.length; i += 2) { - pipeline.zadd(key, entries[i + 1], entries[i]); + pipeline.zadd(key, entries[i + 1].toString(), entries[i]); } await pipeline.exec(); } while (cursor !== '0'); @@ -193,18 +198,24 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi let wrote = false; while (hasMore) { - const entries = await source.xrange(key, lastId === '-' ? '-' : `(${lastId}`, '+', 'COUNT', STREAM_CHUNK); - if (!entries || entries.length === 0) { + const start = lastId === '-' ? '-' : `(${lastId}`; + // Use callBuffer to preserve binary field names and values + const raw = await source.callBuffer( + 'XRANGE', key, start, '+', 'COUNT', String(STREAM_CHUNK), + ) as Buffer[][]; + if (!raw || raw.length === 0) { hasMore = false; break; } - for (const [id, fields] of entries) { - // XADD with explicit ID to preserve ordering - await target.call('XADD', key, id, ...fields); + for (const entry of raw) { + // entry[0] = stream ID (always ASCII), entry[1] = [field, value, field, value, ...] + const id = entry[0].toString(); + const fields = entry[1] as unknown as Buffer[]; + await target.callBuffer('XADD', key, id, ...fields); lastId = id; wrote = true; } - if (entries.length < STREAM_CHUNK) { + if (raw.length < STREAM_CHUNK) { hasMore = false; } } From 4fe1529987b729b8b8a0e6dfd6dc292bdbfc018c Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 16:47:34 +0300 Subject: [PATCH 23/34] Fix IPv6 TOML bracketing, multi-word password redaction, and binary-safe validation - Wrap bare IPv6 addresses in brackets for Go's net.Dial in TOML builder - Fix unquoted password regex to consume full multi-word passwords - Use lrangeBuffer + Buffer.equals for binary-safe list comparison - Replace hgetallBuffer with hscanBuffer to preserve binary hash field names - Add IPv6 bracketing tests to TOML builder spec Co-Authored-By: Claude --- .../migration/__tests__/toml-builder.spec.ts | 29 +++++++++++ .../src/migration/execution/toml-builder.ts | 14 ++++-- .../migration/migration-execution.service.ts | 4 +- .../migration/validation/sample-validator.ts | 50 ++++++++++++------- 4 files changed, 73 insertions(+), 24 deletions(-) diff --git a/apps/api/src/migration/__tests__/toml-builder.spec.ts b/apps/api/src/migration/__tests__/toml-builder.spec.ts index 49aa3e90..09fab359 100644 --- a/apps/api/src/migration/__tests__/toml-builder.spec.ts +++ b/apps/api/src/migration/__tests__/toml-builder.spec.ts @@ -127,4 +127,33 @@ describe('buildScanReaderToml', () => { expect(() => buildScanReaderToml(source, target, false)).toThrow('Invalid host'); }); + + it('should wrap bare IPv6 addresses in brackets for Go net.Dial', () => { + const source = makeConfig({ host: '::1', port: 6379 }); + const target = makeConfig({ host: '2001:db8::1', port: 6380 }); + + const toml = buildScanReaderToml(source, target, false); + + expect(toml).toContain('address = "[::1]:6379"'); + expect(toml).toContain('address = "[2001:db8::1]:6380"'); + }); + + it('should not double-bracket already-bracketed IPv6 addresses', () => { + const source = makeConfig({ host: '[::1]', port: 6379 }); + const target = makeConfig(); + + const toml = buildScanReaderToml(source, target, false); + + expect(toml).toContain('address = "[::1]:6379"'); + expect(toml).not.toContain('[['); + }); + + it('should not bracket IPv4 addresses containing no colons', () => { + const source = makeConfig({ host: '127.0.0.1', port: 6379 }); + const target = makeConfig(); + + const toml = buildScanReaderToml(source, target, false); + + expect(toml).toContain('address = "127.0.0.1:6379"'); + }); }); diff --git a/apps/api/src/migration/execution/toml-builder.ts b/apps/api/src/migration/execution/toml-builder.ts index 7e5b9f10..30aaffcc 100644 --- a/apps/api/src/migration/execution/toml-builder.ts +++ b/apps/api/src/migration/execution/toml-builder.ts @@ -27,13 +27,21 @@ function validateHost(host: string): string { if (!host || host.length > 253) { throw new Error('Invalid host: empty or too long'); } - // Allow hostname, IPv4, or bracketed IPv6 if (/[\s"\\]/.test(host)) { throw new Error('Invalid host: contains whitespace, quotes, or backslashes'); } return host; } +function formatAddress(host: string, port: number): string { + // Bare IPv6 addresses must be wrapped in brackets for Go's net.Dial + // e.g. "::1" → "[::1]:6379" + if (host.includes(':') && !host.startsWith('[')) { + return `[${host}]:${port}`; + } + return `${host}:${port}`; +} + export function buildScanReaderToml( source: DatabaseConnectionConfig, target: DatabaseConnectionConfig, @@ -50,7 +58,7 @@ export function buildScanReaderToml( const tgtPassword = target.password ?? ''; let toml = `[scan_reader] -address = "${escapeTomlString(srcHost)}:${srcPort}" +address = "${escapeTomlString(formatAddress(srcHost, srcPort))}" username = "${escapeTomlString(srcUsername)}" password = "${escapeTomlString(srcPassword)}" tls = ${source.tls ? 'true' : 'false'} @@ -62,7 +70,7 @@ tls = ${source.tls ? 'true' : 'false'} toml += ` [redis_writer] -address = "${escapeTomlString(tgtHost)}:${tgtPort}" +address = "${escapeTomlString(formatAddress(tgtHost, tgtPort))}" username = "${escapeTomlString(tgtUsername)}" password = "${escapeTomlString(tgtPassword)}" tls = ${target.tls ? 'true' : 'false'} diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 90c15edf..7d8148d9 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -287,8 +287,8 @@ function sanitizeLogLine(line: string): string { const eqIdx = match.search(/[=:]/); return match.slice(0, eqIdx + 1) + ' "***"'; }); - // 2. Unquoted passwords (skip already-redacted quoted ones): password = secret - sanitized = sanitized.replace(/password\s*[=:]\s*(?!["*])\S+/gi, (match) => { + // 2. Unquoted passwords (skip already-redacted quoted ones): password = my secret password + sanitized = sanitized.replace(/password\s*[=:]\s*(?!["*])\S.*/gi, (match) => { const eqIdx = match.search(/[=:]/); return match.slice(0, eqIdx + 1) + ' ***'; }); diff --git a/apps/api/src/migration/validation/sample-validator.ts b/apps/api/src/migration/validation/sample-validator.ts index 52409929..5697d3ae 100644 --- a/apps/api/src/migration/validation/sample-validator.ts +++ b/apps/api/src/migration/validation/sample-validator.ts @@ -226,28 +226,27 @@ async function compareHash(source: Valkey, target: Valkey, key: string): Promise return null; } - const [sourceData, targetData] = await Promise.all([ - source.hgetallBuffer(key), - target.hgetallBuffer(key), - ]); - - const sourceFields = Object.keys(sourceData).sort(); - const targetFields = Object.keys(targetData).sort(); + // Use HSCAN to preserve binary field names as raw Buffers + // (hgetallBuffer returns Record which coerces field names to UTF-8 strings) + const sourceEntries = await scanAllHashFields(source, key); + const targetEntries = await scanAllHashFields(target, key); - if (sourceFields.length !== targetFields.length) { - return `field count differs (source: ${sourceFields.length}, target: ${targetFields.length})`; + if (sourceEntries.length !== targetEntries.length) { + return `field count differs (source: ${sourceEntries.length}, target: ${targetEntries.length})`; } - // Compare first 10 sorted fields (binary-safe via Buffer.equals) - const checkCount = Math.min(10, sourceFields.length); + // Sort by field name bytes for deterministic comparison + sourceEntries.sort((a, b) => a.field.compare(b.field)); + targetEntries.sort((a, b) => a.field.compare(b.field)); + + // Compare first 10 sorted fields (fully binary-safe) + const checkCount = Math.min(10, sourceEntries.length); for (let i = 0; i < checkCount; i++) { - if (sourceFields[i] !== targetFields[i]) { + if (!sourceEntries[i].field.equals(targetEntries[i].field)) { return `field names differ at index ${i}`; } - const srcVal = sourceData[sourceFields[i]]; - const tgtVal = targetData[targetFields[i]]; - if (!srcVal || !tgtVal || !srcVal.equals(tgtVal)) { - return `field "${sourceFields[i]}" value differs`; + if (!sourceEntries[i].value.equals(targetEntries[i].value)) { + return `field "${sourceEntries[i].field.toString()}" value differs`; } } @@ -268,8 +267,8 @@ async function compareList(source: Valkey, target: Valkey, key: string): Promise } const [sourceItems, targetItems] = await Promise.all([ - source.lrange(key, 0, -1), - target.lrange(key, 0, -1), + source.lrangeBuffer(key, 0, -1), + target.lrangeBuffer(key, 0, -1), ]); if (sourceItems.length !== targetItems.length) { @@ -277,7 +276,7 @@ async function compareList(source: Valkey, target: Valkey, key: string): Promise } for (let i = 0; i < sourceItems.length; i++) { - if (sourceItems[i] !== targetItems[i]) { + if (!sourceItems[i].equals(targetItems[i])) { return `list element differs at index ${i}`; } } @@ -352,6 +351,19 @@ async function compareZset(source: Valkey, target: Valkey, key: string): Promise return null; } +async function scanAllHashFields(client: Valkey, key: string): Promise> { + const entries: Array<{ field: Buffer; value: Buffer }> = []; + let cursor = '0'; + do { + const [next, fields] = await client.hscanBuffer(key, cursor, 'COUNT', 100); + cursor = String(next); + for (let i = 0; i < fields.length; i += 2) { + entries.push({ field: fields[i], value: fields[i + 1] }); + } + } while (cursor !== '0'); + return entries; +} + async function compareStream(source: Valkey, target: Valkey, key: string): Promise { const [sourceLen, targetLen] = await Promise.all([ source.xlen(key), From e6d8187a73a8615d8a4e71358b7a3bf98fdcbc8c Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 17:27:28 +0300 Subject: [PATCH 24/34] Fix 7 review issues: binary-safe validation, atomic migration, job limits, and more High: - Use smembersBuffer for binary-safe set comparison in sample-validator - Use callBuffer for binary-safe zset comparison in sample-validator - Throw ServiceUnavailableException when evictOldJobs cannot free slots Medium: - Extend sanitizeLogLine to redact username, auth, requirepass, and token fields - Fix unreliable RDB detection: use CONFIG GET save instead of rdb_last_save_time - Use write-to-temp-key + atomic RENAME for compound type migration - Add @UseGuards(LicenseGuard) to GET /migration/analysis/:id endpoint Co-Authored-By: Claude --- .../__tests__/compatibility-checker.spec.ts | 27 ++- .../migration/__tests__/type-handlers.spec.ts | 37 ++- .../analysis/compatibility-checker.ts | 15 +- .../src/migration/execution/type-handlers.ts | 218 +++++++++++------- .../migration/migration-execution.service.ts | 32 ++- .../api/src/migration/migration.controller.ts | 1 + apps/api/src/migration/migration.service.ts | 18 +- .../migration/validation/sample-validator.ts | 25 +- 8 files changed, 233 insertions(+), 140 deletions(-) diff --git a/apps/api/src/migration/__tests__/compatibility-checker.spec.ts b/apps/api/src/migration/__tests__/compatibility-checker.spec.ts index 8dac270b..411bb59c 100644 --- a/apps/api/src/migration/__tests__/compatibility-checker.spec.ts +++ b/apps/api/src/migration/__tests__/compatibility-checker.spec.ts @@ -24,7 +24,6 @@ describe('compatibility-checker', () => { db0: 'keys=100,expires=10', db3: 'keys=50,expires=5', maxmemory_policy: 'allkeys-lru', - rdb_last_save_time: '1700000000', aof_enabled: '1', }; const capabilities: DatabaseCapabilities = { @@ -40,7 +39,7 @@ describe('compatibility-checker', () => { hasVectorSearch: false, }; - const meta = buildInstanceMeta(info, capabilities, ['default', 'admin']); + const meta = buildInstanceMeta(info, capabilities, ['default', 'admin'], '3600 1 300 100'); expect(meta.dbType).toBe('valkey'); expect(meta.version).toBe('8.1.0'); @@ -60,9 +59,29 @@ describe('compatibility-checker', () => { expect(meta.databases).toEqual([0]); }); - it('should detect RDB-only persistence', () => { + it('should detect RDB-only persistence via CONFIG save schedule', () => { const meta = buildInstanceMeta( - { rdb_last_save_time: '1700000000', aof_enabled: '0' }, + { aof_enabled: '0' }, + { dbType: 'valkey', version: '8.0.0' } as DatabaseCapabilities, + [], + '3600 1 300 100', + ); + expect(meta.persistenceMode).toBe('rdb'); + }); + + it('should not detect RDB when CONFIG save is empty', () => { + const meta = buildInstanceMeta( + { aof_enabled: '0' }, + { dbType: 'valkey', version: '8.0.0' } as DatabaseCapabilities, + [], + '', + ); + expect(meta.persistenceMode).toBe('none'); + }); + + it('should fall back to rdb_bgsave_in_progress when CONFIG not available', () => { + const meta = buildInstanceMeta( + { rdb_bgsave_in_progress: '1', aof_enabled: '0' }, { dbType: 'valkey', version: '8.0.0' } as DatabaseCapabilities, [], ); diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index 32368d9a..65dcd34f 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -39,6 +39,7 @@ function createMockTarget() { return { set: jest.fn().mockResolvedValue('OK'), del: jest.fn().mockResolvedValue(1), + rename: jest.fn().mockResolvedValue('OK'), pexpire: jest.fn().mockResolvedValue(1), call: jest.fn().mockResolvedValue('OK'), callBuffer: jest.fn().mockResolvedValue(Buffer.from('OK')), @@ -77,45 +78,40 @@ describe('type-handlers / migrateKey', () => { }); describe('hash', () => { - it('should use HSCAN and preserve binary field names', async () => { + it('should use HSCAN, write to temp key, and RENAME', async () => { source.hlen.mockResolvedValue(5); const result = await migrateKey(source, target, 'hash:1', 'hash'); expect(result.ok).toBe(true); expect(source.hscanBuffer).toHaveBeenCalled(); - expect(target.del).toHaveBeenCalledWith('hash:1'); - expect(target.call).toHaveBeenCalledWith('HSET', 'hash:1', expect.any(Buffer), expect.any(Buffer)); + // Writes to temp key then renames atomically + expect(target.call).toHaveBeenCalledWith('HSET', expect.stringContaining('__betterdb_mig_'), expect.any(Buffer), expect.any(Buffer)); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'hash:1'); }); }); describe('list', () => { - it('should LRANGE and RPUSH to target', async () => { + it('should LRANGE, RPUSH to temp key, and RENAME', async () => { const result = await migrateKey(source, target, 'list:1', 'list'); expect(result.ok).toBe(true); expect(source.lrangeBuffer).toHaveBeenCalled(); - expect(target.call).toHaveBeenCalledWith('RPUSH', 'list:1', expect.any(Buffer), expect.any(Buffer)); - }); - - it('should delete target key first to avoid appending', async () => { - const result = await migrateKey(source, target, 'list:1', 'list'); - - expect(result.ok).toBe(true); - expect(target.del).toHaveBeenCalledWith('list:1'); + expect(target.call).toHaveBeenCalledWith('RPUSH', expect.stringContaining('__betterdb_mig_'), expect.any(Buffer), expect.any(Buffer)); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'list:1'); }); }); describe('set', () => { - it('should use SMEMBERS for small sets', async () => { + it('should use SMEMBERS for small sets, write to temp key, and RENAME', async () => { source.scard.mockResolvedValue(5); const result = await migrateKey(source, target, 'set:1', 'set'); expect(result.ok).toBe(true); expect(source.smembersBuffer).toHaveBeenCalledWith('set:1'); - expect(target.del).toHaveBeenCalledWith('set:1'); - expect(target.call).toHaveBeenCalledWith('SADD', 'set:1', expect.any(Buffer), expect.any(Buffer)); + expect(target.call).toHaveBeenCalledWith('SADD', expect.stringContaining('__betterdb_mig_'), expect.any(Buffer), expect.any(Buffer)); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'set:1'); }); it('should use SSCAN for large sets (>10K members)', async () => { @@ -125,18 +121,19 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.sscanBuffer).toHaveBeenCalled(); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'set:big'); }); }); describe('zset', () => { - it('should use callBuffer ZRANGE WITHSCORES for small sorted sets (binary-safe)', async () => { + it('should use callBuffer ZRANGE WITHSCORES, write to temp key, and RENAME', async () => { source.zcard.mockResolvedValue(5); const result = await migrateKey(source, target, 'zset:1', 'zset'); expect(result.ok).toBe(true); - expect(target.del).toHaveBeenCalledWith('zset:1'); expect(source.callBuffer).toHaveBeenCalledWith('ZRANGE', 'zset:1', '0', '-1', 'WITHSCORES'); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'zset:1'); }); it('should use callBuffer ZSCAN for large sorted sets (>10K members)', async () => { @@ -146,18 +143,20 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.callBuffer).toHaveBeenCalledWith('ZSCAN', 'zset:big', '0', 'COUNT', '1000'); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'zset:big'); }); }); describe('stream', () => { - it('should use callBuffer XRANGE and XADD with preserved binary data', async () => { + it('should use callBuffer XRANGE, XADD to temp key, and RENAME', async () => { const result = await migrateKey(source, target, 'stream:1', 'stream'); expect(result.ok).toBe(true); expect(source.callBuffer).toHaveBeenCalledWith('XRANGE', 'stream:1', '-', '+', 'COUNT', '1000'); expect(target.callBuffer).toHaveBeenCalledWith( - 'XADD', 'stream:1', '1-0', Buffer.from('field'), Buffer.from('value'), + 'XADD', expect.stringContaining('__betterdb_mig_'), '1-0', Buffer.from('field'), Buffer.from('value'), ); + expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'stream:1'); }); }); diff --git a/apps/api/src/migration/analysis/compatibility-checker.ts b/apps/api/src/migration/analysis/compatibility-checker.ts index dc7e81aa..322cae85 100644 --- a/apps/api/src/migration/analysis/compatibility-checker.ts +++ b/apps/api/src/migration/analysis/compatibility-checker.ts @@ -34,6 +34,7 @@ export function buildInstanceMeta( info: Record, capabilities: DatabaseCapabilities, aclUsers: string[], + rdbSaveConfig?: string, ): InstanceMeta { // clusterEnabled const clusterEnabled = String(info['cluster_enabled'] ?? '0') === '1'; @@ -64,9 +65,17 @@ export function buildInstanceMeta( let hasRdb = false; let hasAof = false; - const rdbLastSaveTime = Number(info['rdb_last_save_time'] ?? 0); - if (rdbLastSaveTime > 0) { - hasRdb = true; + // rdb_last_save_time > 0 is unreliable — it's set to server start time even when RDB is disabled. + // Use the CONFIG GET save schedule when available; fall back to rdb_bgsave_in_progress as a weak signal. + if (rdbSaveConfig !== undefined) { + // CONFIG GET save returns "" when RDB is disabled, non-empty when a schedule is set + hasRdb = rdbSaveConfig.length > 0; + } else { + // Fallback: if a BGSAVE is actively running, RDB is clearly configured + const bgsaveInProgress = String(info['rdb_bgsave_in_progress'] ?? '0'); + if (bgsaveInProgress === '1') { + hasRdb = true; + } } const aofEnabled = String(info['aof_enabled'] ?? '0'); diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 1455c27c..88efec03 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -1,4 +1,5 @@ import type Valkey from 'iovalkey'; +import { randomBytes } from 'crypto'; // Threshold above which we use cursor-based reads (HSCAN/SSCAN/ZSCAN) instead of bulk reads const LARGE_KEY_THRESHOLD = 10_000; @@ -6,6 +7,11 @@ const SCAN_BATCH = 1000; const LIST_CHUNK = 1000; const STREAM_CHUNK = 1000; +/** Generate a unique temporary key name to write into before atomic RENAME. */ +function tempKey(key: string): string { + return `__betterdb_mig_${randomBytes(8).toString('hex')}:{${key}}`; +} + export interface MigratedKey { key: string; type: string; @@ -86,22 +92,27 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise const len = await source.hlen(key); if (len === 0) return false; - // Delete target key first to avoid merging with stale data - await target.del(key); - - // Use HSCAN for all sizes so binary field names are preserved as Buffers - // (hgetallBuffer returns Record which coerces field names to UTF-8) - let cursor = '0'; - do { - const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); - cursor = String(next); - if (fields.length === 0) continue; - const args: (string | Buffer | number)[] = [key]; - for (let i = 0; i < fields.length; i += 2) { - args.push(fields[i], fields[i + 1]); - } - await target.call('HSET', ...args); - } while (cursor !== '0'); + // Write to a temp key then atomically RENAME to avoid data loss on crash + const tmp = tempKey(key); + try { + // Use HSCAN for all sizes so binary field names are preserved as Buffers + // (hgetallBuffer returns Record which coerces field names to UTF-8) + let cursor = '0'; + do { + const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); + cursor = String(next); + if (fields.length === 0) continue; + const args: (string | Buffer | number)[] = [tmp]; + for (let i = 0; i < fields.length; i += 2) { + args.push(fields[i], fields[i + 1]); + } + await target.call('HSET', ...args); + } while (cursor !== '0'); + await target.rename(tmp, key); + } catch (err) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + throw err; + } return true; } @@ -111,14 +122,19 @@ async function migrateList(source: Valkey, target: Valkey, key: string): Promise const len = await source.llen(key); if (len === 0) return false; - // Delete target key first to avoid appending to existing data - await target.del(key); - - for (let start = 0; start < len; start += LIST_CHUNK) { - const end = Math.min(start + LIST_CHUNK - 1, len - 1); - const items = await source.lrangeBuffer(key, start, end); - if (items.length === 0) break; - await target.call('RPUSH', key, ...items); + // Write to a temp key then atomically RENAME to avoid data loss on crash + const tmp = tempKey(key); + try { + for (let start = 0; start < len; start += LIST_CHUNK) { + const end = Math.min(start + LIST_CHUNK - 1, len - 1); + const items = await source.lrangeBuffer(key, start, end); + if (items.length === 0) break; + await target.call('RPUSH', tmp, ...items); + } + await target.rename(tmp, key); + } catch (err) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + throw err; } return true; } @@ -129,21 +145,29 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< const card = await source.scard(key); if (card === 0) return false; - // Delete target key first to avoid merging with stale data - await target.del(key); - - if (card <= LARGE_KEY_THRESHOLD) { - const members = await source.smembersBuffer(key); - if (members.length === 0) return true; // del already ran - await target.call('SADD', key, ...members); - } else { - let cursor = '0'; - do { - const [next, members] = await source.sscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); - cursor = String(next); - if (members.length === 0) continue; - await target.call('SADD', key, ...members); - } while (cursor !== '0'); + // Write to a temp key then atomically RENAME to avoid data loss on crash + const tmp = tempKey(key); + try { + if (card <= LARGE_KEY_THRESHOLD) { + const members = await source.smembersBuffer(key); + if (members.length === 0) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + return true; + } + await target.call('SADD', tmp, ...members); + } else { + let cursor = '0'; + do { + const [next, members] = await source.sscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); + cursor = String(next); + if (members.length === 0) continue; + await target.call('SADD', tmp, ...members); + } while (cursor !== '0'); + } + await target.rename(tmp, key); + } catch (err) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + throw err; } return true; } @@ -154,35 +178,43 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise const card = await source.zcard(key); if (card === 0) return false; - // Delete target key first to avoid merging with stale data - await target.del(key); - - if (card <= LARGE_KEY_THRESHOLD) { - // Use callBuffer to preserve binary member data (call() decodes as UTF-8) - const raw = await source.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Buffer[]; - if (!raw || raw.length === 0) return true; // del already ran - // raw is [member, score, member, score, ...] as Buffers - const pipeline = target.pipeline(); - for (let i = 0; i < raw.length; i += 2) { - // Score is always ASCII-safe, member stays as Buffer - pipeline.zadd(key, raw[i + 1].toString(), raw[i]); - } - await pipeline.exec(); - } else { - // zscanBuffer not available — use callBuffer for ZSCAN to preserve binary members - let cursor = '0'; - do { - const result = await source.callBuffer('ZSCAN', key, cursor, 'COUNT', String(SCAN_BATCH)) as [Buffer, Buffer[]]; - cursor = result[0].toString(); - const entries = result[1]; - if (!entries || entries.length === 0) continue; - // entries is [member, score, member, score, ...] as Buffers + // Write to a temp key then atomically RENAME to avoid data loss on crash + const tmp = tempKey(key); + try { + if (card <= LARGE_KEY_THRESHOLD) { + // Use callBuffer to preserve binary member data (call() decodes as UTF-8) + const raw = await source.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Buffer[]; + if (!raw || raw.length === 0) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + return true; + } + // raw is [member, score, member, score, ...] as Buffers const pipeline = target.pipeline(); - for (let i = 0; i < entries.length; i += 2) { - pipeline.zadd(key, entries[i + 1].toString(), entries[i]); + for (let i = 0; i < raw.length; i += 2) { + // Score is always ASCII-safe, member stays as Buffer + pipeline.zadd(tmp, raw[i + 1].toString(), raw[i]); } await pipeline.exec(); - } while (cursor !== '0'); + } else { + // zscanBuffer not available — use callBuffer for ZSCAN to preserve binary members + let cursor = '0'; + do { + const result = await source.callBuffer('ZSCAN', key, cursor, 'COUNT', String(SCAN_BATCH)) as [Buffer, Buffer[]]; + cursor = result[0].toString(); + const entries = result[1]; + if (!entries || entries.length === 0) continue; + // entries is [member, score, member, score, ...] as Buffers + const pipeline = target.pipeline(); + for (let i = 0; i < entries.length; i += 2) { + pipeline.zadd(tmp, entries[i + 1].toString(), entries[i]); + } + await pipeline.exec(); + } while (cursor !== '0'); + } + await target.rename(tmp, key); + } catch (err) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + throw err; } return true; } @@ -190,34 +222,42 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise // ── Stream ── async function migrateStream(source: Valkey, target: Valkey, key: string): Promise { - // Delete target key first to avoid duplicates on re-migration - await target.del(key); - - let lastId = '-'; - let hasMore = true; + // Write to a temp key then atomically RENAME to avoid data loss on crash + const tmp = tempKey(key); let wrote = false; - while (hasMore) { - const start = lastId === '-' ? '-' : `(${lastId}`; - // Use callBuffer to preserve binary field names and values - const raw = await source.callBuffer( - 'XRANGE', key, start, '+', 'COUNT', String(STREAM_CHUNK), - ) as Buffer[][]; - if (!raw || raw.length === 0) { - hasMore = false; - break; - } - for (const entry of raw) { - // entry[0] = stream ID (always ASCII), entry[1] = [field, value, field, value, ...] - const id = entry[0].toString(); - const fields = entry[1] as unknown as Buffer[]; - await target.callBuffer('XADD', key, id, ...fields); - lastId = id; - wrote = true; + try { + let lastId = '-'; + let hasMore = true; + + while (hasMore) { + const start = lastId === '-' ? '-' : `(${lastId}`; + // Use callBuffer to preserve binary field names and values + const raw = await source.callBuffer( + 'XRANGE', key, start, '+', 'COUNT', String(STREAM_CHUNK), + ) as Buffer[][]; + if (!raw || raw.length === 0) { + hasMore = false; + break; + } + for (const entry of raw) { + // entry[0] = stream ID (always ASCII), entry[1] = [field, value, field, value, ...] + const id = entry[0].toString(); + const fields = entry[1] as unknown as Buffer[]; + await target.callBuffer('XADD', tmp, id, ...fields); + lastId = id; + wrote = true; + } + if (raw.length < STREAM_CHUNK) { + hasMore = false; + } } - if (raw.length < STREAM_CHUNK) { - hasMore = false; + if (wrote) { + await target.rename(tmp, key); } + } catch (err) { + try { await target.del(tmp); } catch { /* best-effort cleanup */ } + throw err; } return wrote; } diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index 7d8148d9..c33bf38f 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -274,24 +274,34 @@ export class MigrationExecutionService { } if (this.jobs.size >= this.MAX_JOBS) { - this.logger.warn(`Execution job limit reached (${this.MAX_JOBS}). Cannot evict running jobs.`); + throw new ServiceUnavailableException( + `Execution job limit reached (${this.MAX_JOBS}). All slots occupied by running jobs — try again later.`, + ); } } } // Redact credentials from RedisShake log lines before serving to the frontend +const SENSITIVE_KEYS = /(?:password|username|auth|requirepass|masterauth|token)/i; + function sanitizeLogLine(line: string): string { let sanitized = line; - // 1. Quoted passwords: password = "secret" or password:"secret" - sanitized = sanitized.replace(/password\s*[=:]\s*"(?:[^"\\]|\\.)*"/gi, (match) => { - const eqIdx = match.search(/[=:]/); - return match.slice(0, eqIdx + 1) + ' "***"'; - }); - // 2. Unquoted passwords (skip already-redacted quoted ones): password = my secret password - sanitized = sanitized.replace(/password\s*[=:]\s*(?!["*])\S.*/gi, (match) => { - const eqIdx = match.search(/[=:]/); - return match.slice(0, eqIdx + 1) + ' ***'; - }); + // 1. Quoted sensitive fields: password = "secret" or username:"admin" + sanitized = sanitized.replace( + new RegExp(`(${SENSITIVE_KEYS.source})\\s*[=:]\\s*"(?:[^"\\\\]|\\\\.)*"`, 'gi'), + (match) => { + const eqIdx = match.search(/[=:]/); + return match.slice(0, eqIdx + 1) + ' "***"'; + }, + ); + // 2. Unquoted sensitive fields (skip already-redacted quoted ones) + sanitized = sanitized.replace( + new RegExp(`(${SENSITIVE_KEYS.source})\\s*[=:]\\s*(?!["*])\\S.*`, 'gi'), + (match) => { + const eqIdx = match.search(/[=:]/); + return match.slice(0, eqIdx + 1) + ' ***'; + }, + ); // 3. URL credentials: redis://user:pass@host sanitized = sanitized.replace(/\/\/[^:]+:[^@]+@/g, '//***:***@'); return sanitized; diff --git a/apps/api/src/migration/migration.controller.ts b/apps/api/src/migration/migration.controller.ts index 747b7590..17653077 100644 --- a/apps/api/src/migration/migration.controller.ts +++ b/apps/api/src/migration/migration.controller.ts @@ -39,6 +39,7 @@ export class MigrationController { } @Get('analysis/:id') + @UseGuards(LicenseGuard) getJob(@Param('id') id: string): MigrationAnalysisResult { const job = this.migrationService.getJob(id); if (!job) { diff --git a/apps/api/src/migration/migration.service.ts b/apps/api/src/migration/migration.service.ts index 5edd30fa..27518494 100644 --- a/apps/api/src/migration/migration.service.ts +++ b/apps/api/src/migration/migration.service.ts @@ -380,9 +380,23 @@ export class MigrationService { sourceAclUsers = result ?? []; } catch { /* ignore - ACL not supported or no permission */ } + // Fetch RDB save config from both instances for reliable persistence detection + let sourceRdbSaveConfig: string | undefined; + let targetRdbSaveConfig: string | undefined; + try { + const sourceClient = adapter.getClient(); + const result = await sourceClient.call('CONFIG', 'GET', 'save') as string[]; + if (result && result.length >= 2) sourceRdbSaveConfig = result[1]; + } catch { /* ignore - CONFIG not permitted */ } + try { + const targetClient = targetAdapter.getClient(); + const result = await targetClient.call('CONFIG', 'GET', 'save') as string[]; + if (result && result.length >= 2) targetRdbSaveConfig = result[1]; + } catch { /* ignore - CONFIG not permitted */ } + // Build source meta (buildInstanceMeta expects a flat key-value object) const flatSourceInfo = flattenInfo(info); - const sourceMeta = buildInstanceMeta(flatSourceInfo, capabilities, sourceAclUsers); + const sourceMeta = buildInstanceMeta(flatSourceInfo, capabilities, sourceAclUsers, sourceRdbSaveConfig); // Fetch source modules try { @@ -393,7 +407,7 @@ export class MigrationService { // Build target meta const flatTargetInfo = flattenInfo(targetInfo); - const targetMeta = buildInstanceMeta(flatTargetInfo, targetCapabilities, targetAclUsers); + const targetMeta = buildInstanceMeta(flatTargetInfo, targetCapabilities, targetAclUsers, targetRdbSaveConfig); // Fetch target modules try { diff --git a/apps/api/src/migration/validation/sample-validator.ts b/apps/api/src/migration/validation/sample-validator.ts index 5697d3ae..d231c9c6 100644 --- a/apps/api/src/migration/validation/sample-validator.ts +++ b/apps/api/src/migration/validation/sample-validator.ts @@ -298,19 +298,20 @@ async function compareSet(source: Valkey, target: Valkey, key: string): Promise< } const [sourceMembers, targetMembers] = await Promise.all([ - source.smembers(key), - target.smembers(key), + source.smembersBuffer(key), + target.smembersBuffer(key), ]); - const sourceSet = new Set(sourceMembers); - const targetSet = new Set(targetMembers); - - if (sourceSet.size !== targetSet.size) { - return `set cardinality differs (source: ${sourceSet.size}, target: ${targetSet.size})`; + if (sourceMembers.length !== targetMembers.length) { + return `set cardinality differs (source: ${sourceMembers.length}, target: ${targetMembers.length})`; } - for (const member of sourceSet) { - if (!targetSet.has(member)) { + // Sort by raw bytes for deterministic comparison + sourceMembers.sort((a, b) => a.compare(b)); + targetMembers.sort((a, b) => a.compare(b)); + + for (let i = 0; i < sourceMembers.length; i++) { + if (!sourceMembers[i].equals(targetMembers[i])) { return 'set members differ'; } } @@ -332,8 +333,8 @@ async function compareZset(source: Valkey, target: Valkey, key: string): Promise } const [sourceData, targetData] = await Promise.all([ - (source as any).call('ZRANGE', key, '0', '-1', 'WITHSCORES') as Promise, - (target as any).call('ZRANGE', key, '0', '-1', 'WITHSCORES') as Promise, + source.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Promise, + target.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Promise, ]); if (!sourceData && !targetData) return null; @@ -343,7 +344,7 @@ async function compareZset(source: Valkey, target: Valkey, key: string): Promise } for (let i = 0; i < sourceData.length; i++) { - if (sourceData[i] !== targetData[i]) { + if (!sourceData[i].equals(targetData[i])) { return 'zset member or score differs'; } } From 10d0f8e4e59526c172e0f7ce83df8ee2093a0e35 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 18:09:13 +0300 Subject: [PATCH 25/34] Fix 5 review findings: community-tier guard, key count edge case, race conditions - Remove @UseGuards(LicenseGuard) from GET /migration/analysis/:id to match community-tier design (POST is unguarded, GET must be too) - Fix discrepancyPercent when sourceKeys=0 and targetKeys>0 (was 0, now 100) and add warning about stale target keys - Return false in migrateSet when smembersBuffer returns empty after scard>0 (key expired between the two calls) - Return false in migrateZset when ZRANGE WITHSCORES returns empty after zcard>0 (same race condition) - Push 'none' for every key in batch when pipeline.exec() returns null to prevent index misalignment in batchType Co-Authored-By: Claude --- apps/api/src/migration/execution/type-handlers.ts | 4 ++-- apps/api/src/migration/migration.controller.ts | 1 - apps/api/src/migration/validation/key-count-comparator.ts | 8 +++++++- apps/api/src/migration/validation/sample-validator.ts | 6 +++++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 88efec03..fe88c5bb 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -152,7 +152,7 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< const members = await source.smembersBuffer(key); if (members.length === 0) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } - return true; + return false; // key expired between SCARD and SMEMBERS } await target.call('SADD', tmp, ...members); } else { @@ -186,7 +186,7 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise const raw = await source.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Buffer[]; if (!raw || raw.length === 0) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } - return true; + return false; // key expired between ZCARD and ZRANGE } // raw is [member, score, member, score, ...] as Buffers const pipeline = target.pipeline(); diff --git a/apps/api/src/migration/migration.controller.ts b/apps/api/src/migration/migration.controller.ts index 17653077..747b7590 100644 --- a/apps/api/src/migration/migration.controller.ts +++ b/apps/api/src/migration/migration.controller.ts @@ -39,7 +39,6 @@ export class MigrationController { } @Get('analysis/:id') - @UseGuards(LicenseGuard) getJob(@Param('id') id: string): MigrationAnalysisResult { const job = this.migrationService.getJob(id); if (!job) { diff --git a/apps/api/src/migration/validation/key-count-comparator.ts b/apps/api/src/migration/validation/key-count-comparator.ts index b26a7009..23bd4fea 100644 --- a/apps/api/src/migration/validation/key-count-comparator.ts +++ b/apps/api/src/migration/validation/key-count-comparator.ts @@ -17,7 +17,7 @@ export async function compareKeyCounts( const discrepancy = targetKeys - sourceKeys; const discrepancyPercent = sourceKeys === 0 - ? 0 + ? (targetKeys > 0 ? 100 : 0) : Math.abs(discrepancy / sourceKeys) * 100; const result: KeyCountComparison = { @@ -27,6 +27,12 @@ export async function compareKeyCounts( discrepancyPercent: Math.round(discrepancyPercent * 100) / 100, }; + // Flag when source is empty but target has stale keys + if (sourceKeys === 0 && targetKeys > 0) { + result.warning = + 'Source has 0 keys but target has data. Target may contain stale keys from a previous migration or other writes.'; + } + // Risk #4: DBSIZE counts all databases, SCAN only covers db0 by default. // If source is standalone but target is cluster, key count may be misleading. if (analysisResult?.isCluster === false && analysisResult?.targetIsCluster === true) { diff --git a/apps/api/src/migration/validation/sample-validator.ts b/apps/api/src/migration/validation/sample-validator.ts index d231c9c6..7ce86a7d 100644 --- a/apps/api/src/migration/validation/sample-validator.ts +++ b/apps/api/src/migration/validation/sample-validator.ts @@ -137,7 +137,11 @@ async function batchType(client: Valkey, keys: string[]): Promise { pipeline.type(key); } const pipelineResults = await pipeline.exec(); - for (const [err, val] of (pipelineResults ?? [])) { + if (!pipelineResults) { + for (let j = 0; j < batch.length; j++) results.push('none'); + continue; + } + for (const [err, val] of pipelineResults) { results.push(err ? 'none' : String(val)); } } From 19b5a2330217725d178e758c8acd3e580bfd61a9 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 19:13:37 +0300 Subject: [PATCH 26/34] Fix cluster fail-master filtering, CROSSSLOT temp keys, stuck-job handling, atomic TTL, and list drift check High: - Filter out failed/noaddr masters in parseClusterMasters to avoid 10s connect timeout hangs per failed node - Preserve hash tags in tempKey to prevent CROSSSLOT errors on RENAME in cluster mode (extract existing {tag} or wrap key as {key}) - Stuck-job eviction now cancels jobs (closing Valkey connections) instead of silently deleting them; TTL raised from 30min to 2h; getJob returns cancelled status instead of 404 Medium: - Atomic RENAME+PEXPIRE via Lua script eliminates TTL race window where compound-type keys are briefly live without expiry after rename - Post-migration LLEN check warns when source list length changed during migration (items appended/removed between LLEN and last LRANGE chunk) Co-Authored-By: Claude --- .../migration/__tests__/type-handlers.spec.ts | 23 +++-- .../execution/command-migration-worker.ts | 1 + .../src/migration/execution/type-handlers.ts | 99 ++++++++++++++++--- apps/api/src/migration/migration.service.ts | 13 ++- 4 files changed, 110 insertions(+), 26 deletions(-) diff --git a/apps/api/src/migration/__tests__/type-handlers.spec.ts b/apps/api/src/migration/__tests__/type-handlers.spec.ts index 65dcd34f..01090915 100644 --- a/apps/api/src/migration/__tests__/type-handlers.spec.ts +++ b/apps/api/src/migration/__tests__/type-handlers.spec.ts @@ -40,6 +40,7 @@ function createMockTarget() { set: jest.fn().mockResolvedValue('OK'), del: jest.fn().mockResolvedValue(1), rename: jest.fn().mockResolvedValue('OK'), + llen: jest.fn().mockResolvedValue(2), pexpire: jest.fn().mockResolvedValue(1), call: jest.fn().mockResolvedValue('OK'), callBuffer: jest.fn().mockResolvedValue(Buffer.from('OK')), @@ -87,7 +88,7 @@ describe('type-handlers / migrateKey', () => { expect(source.hscanBuffer).toHaveBeenCalled(); // Writes to temp key then renames atomically expect(target.call).toHaveBeenCalledWith('HSET', expect.stringContaining('__betterdb_mig_'), expect.any(Buffer), expect.any(Buffer)); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'hash:1'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'hash:1', '-1'); }); }); @@ -98,7 +99,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.lrangeBuffer).toHaveBeenCalled(); expect(target.call).toHaveBeenCalledWith('RPUSH', expect.stringContaining('__betterdb_mig_'), expect.any(Buffer), expect.any(Buffer)); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'list:1'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'list:1', '-1'); }); }); @@ -111,7 +112,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.smembersBuffer).toHaveBeenCalledWith('set:1'); expect(target.call).toHaveBeenCalledWith('SADD', expect.stringContaining('__betterdb_mig_'), expect.any(Buffer), expect.any(Buffer)); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'set:1'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'set:1', '-1'); }); it('should use SSCAN for large sets (>10K members)', async () => { @@ -121,7 +122,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.sscanBuffer).toHaveBeenCalled(); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'set:big'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'set:big', '-1'); }); }); @@ -133,7 +134,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.callBuffer).toHaveBeenCalledWith('ZRANGE', 'zset:1', '0', '-1', 'WITHSCORES'); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'zset:1'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'zset:1', '-1'); }); it('should use callBuffer ZSCAN for large sorted sets (>10K members)', async () => { @@ -143,7 +144,7 @@ describe('type-handlers / migrateKey', () => { expect(result.ok).toBe(true); expect(source.callBuffer).toHaveBeenCalledWith('ZSCAN', 'zset:big', '0', 'COUNT', '1000'); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'zset:big'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'zset:big', '-1'); }); }); @@ -156,7 +157,7 @@ describe('type-handlers / migrateKey', () => { expect(target.callBuffer).toHaveBeenCalledWith( 'XADD', expect.stringContaining('__betterdb_mig_'), '1-0', Buffer.from('field'), Buffer.from('value'), ); - expect(target.rename).toHaveBeenCalledWith(expect.stringContaining('__betterdb_mig_'), 'stream:1'); + expect(target.call).toHaveBeenCalledWith('EVAL', expect.any(String), '2', expect.stringContaining('__betterdb_mig_'), 'stream:1', '-1'); }); }); @@ -171,13 +172,17 @@ describe('type-handlers / migrateKey', () => { expect(target.pexpire).not.toHaveBeenCalled(); }); - it('should call pexpire for compound types when source TTL > 0', async () => { + it('should apply TTL atomically via Lua EVAL for compound types when source TTL > 0', async () => { source.pttl.mockResolvedValue(60000); const result = await migrateKey(source, target, 'hash:ttl', 'hash'); expect(result.ok).toBe(true); - expect(target.pexpire).toHaveBeenCalledWith('hash:ttl', 60000); + // TTL is passed to Lua EVAL as the ARGV[1] parameter + expect(target.call).toHaveBeenCalledWith( + 'EVAL', expect.any(String), '2', + expect.stringContaining('__betterdb_mig_'), 'hash:ttl', '60000', + ); }); it('should not call pexpire when source TTL is -1', async () => { diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts index 157a9746..a0d15c1e 100644 --- a/apps/api/src/migration/execution/command-migration-worker.ts +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -175,6 +175,7 @@ function parseClusterMasters(nodesRaw: string): Array<{ host: string; port: numb const parts = line.split(' '); const flags = parts[2] ?? ''; if (!flags.includes('master')) continue; + if (flags.includes('fail') || flags.includes('noaddr')) continue; // address format: host:port@clusterport (host may be IPv6, e.g. [::1]:6379@16379) const addrPart = (parts[1] ?? '').split('@')[0]; const lastColon = addrPart.lastIndexOf(':'); diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index fe88c5bb..e7dec6c8 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -7,9 +7,26 @@ const SCAN_BATCH = 1000; const LIST_CHUNK = 1000; const STREAM_CHUNK = 1000; -/** Generate a unique temporary key name to write into before atomic RENAME. */ +/** + * Generate a unique temporary key that hashes to the same slot as the original key. + * In cluster mode, RENAME requires both keys to be in the same slot. + * We preserve the original key's hash tag if present, or wrap the key itself + * as the hash tag so Redis routes both to the same slot. + */ function tempKey(key: string): string { - return `__betterdb_mig_${randomBytes(8).toString('hex')}:{${key}}`; + const suffix = randomBytes(8).toString('hex'); + // Extract existing hash tag: first {…} pair where content is non-empty + const openBrace = key.indexOf('{'); + if (openBrace !== -1) { + const closeBrace = key.indexOf('}', openBrace + 1); + if (closeBrace > openBrace + 1) { + // Key already has a hash tag — reuse it verbatim + const tag = key.substring(openBrace, closeBrace + 1); + return `__betterdb_mig_${suffix}:${tag}`; + } + } + // No hash tag — wrap the whole key as the tag + return `__betterdb_mig_${suffix}:{${key}}`; } export interface MigratedKey { @@ -17,6 +34,7 @@ export interface MigratedKey { type: string; ok: boolean; error?: string; + warning?: string; } /** @@ -54,11 +72,23 @@ export async function migrateKey( default: return { key, type, ok: false, error: `Unsupported type: ${type}` }; } - // String handles TTL atomically; compound types need a separate PEXPIRE - if (wrote && type !== 'string') { - await migrateTtl(source, target, key); + // TTL is handled atomically in each handler: + // - String: SET PX + // - Compound types: Lua RENAME+PEXPIRE via atomicRenameWithTtl + const result: MigratedKey = { key, type, ok: true }; + + // Post-migration list length check: source list may have changed during migration + if (wrote && type === 'list') { + try { + const targetLen = await target.llen(key); + const sourceLen = await source.llen(key); + if (targetLen !== sourceLen) { + result.warning = `list length changed during migration (migrated: ${targetLen}, current source: ${sourceLen})`; + } + } catch { /* non-fatal check */ } } - return { key, type, ok: true }; + + return result; } catch (err: unknown) { const message = err instanceof Error ? err.message : String(err); return { key, type, ok: false, error: message }; @@ -108,7 +138,8 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise } await target.call('HSET', ...args); } while (cursor !== '0'); - await target.rename(tmp, key); + const pttl = await source.pttl(key); + await atomicRenameWithTtl(target, tmp, key, pttl); } catch (err) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } throw err; @@ -131,7 +162,8 @@ async function migrateList(source: Valkey, target: Valkey, key: string): Promise if (items.length === 0) break; await target.call('RPUSH', tmp, ...items); } - await target.rename(tmp, key); + const pttl = await source.pttl(key); + await atomicRenameWithTtl(target, tmp, key, pttl); } catch (err) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } throw err; @@ -164,7 +196,8 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< await target.call('SADD', tmp, ...members); } while (cursor !== '0'); } - await target.rename(tmp, key); + const pttl = await source.pttl(key); + await atomicRenameWithTtl(target, tmp, key, pttl); } catch (err) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } throw err; @@ -211,7 +244,8 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise await pipeline.exec(); } while (cursor !== '0'); } - await target.rename(tmp, key); + const pttl = await source.pttl(key); + await atomicRenameWithTtl(target, tmp, key, pttl); } catch (err) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } throw err; @@ -253,7 +287,8 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi } } if (wrote) { - await target.rename(tmp, key); + const pttl = await source.pttl(key); + await atomicRenameWithTtl(target, tmp, key, pttl); } } catch (err) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } @@ -264,7 +299,24 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi // ── TTL ── -async function migrateTtl(source: Valkey, target: Valkey, key: string): Promise { +// Lua script: atomically RENAME tmp→key and PEXPIRE in one round-trip. +// KEYS[1] = tmp, KEYS[2] = final key, ARGV[1] = pttl (or "-1" for no expiry, "-2" for expired) +const RENAME_WITH_TTL_LUA = ` +redis.call('RENAME', KEYS[1], KEYS[2]) +local pttl = tonumber(ARGV[1]) +if pttl > 0 then + redis.call('PEXPIRE', KEYS[2], pttl) +elseif pttl == -2 then + redis.call('DEL', KEYS[2]) +end +return 1 +`; + +/** + * Read PTTL from source, then atomically RENAME tmp→key + PEXPIRE on target. + * Eliminates the window where the key exists on target without its TTL. + */ +async function migrateTtlAtomic(source: Valkey, target: Valkey, key: string): Promise { const pttl = await source.pttl(key); if (pttl > 0) { await target.pexpire(key, pttl); @@ -273,3 +325,26 @@ async function migrateTtl(source: Valkey, target: Valkey, key: string): Promise< await target.del(key); } } + +/** + * Atomically RENAME tmp→key and apply PTTL in a single Lua eval. + * Falls back to separate RENAME + PEXPIRE if EVAL is blocked (e.g. by ACL). + */ +async function atomicRenameWithTtl( + target: Valkey, + tmp: string, + key: string, + pttl: number, +): Promise { + try { + await target.call('EVAL', RENAME_WITH_TTL_LUA, '2', tmp, key, String(pttl)); + } catch { + // Fallback: separate commands (e.g. EVAL blocked by ACL) + await target.rename(tmp, key); + if (pttl > 0) { + await target.pexpire(key, pttl); + } else if (pttl === -2) { + await target.del(key); + } + } +} diff --git a/apps/api/src/migration/migration.service.ts b/apps/api/src/migration/migration.service.ts index 27518494..1dbbde01 100644 --- a/apps/api/src/migration/migration.service.ts +++ b/apps/api/src/migration/migration.service.ts @@ -15,7 +15,7 @@ export class MigrationService { private readonly logger = new Logger(MigrationService.name); private jobs = new Map(); private readonly MAX_JOBS = 20; - private readonly STUCK_JOB_TTL_MS = 30 * 60 * 1000; + private readonly STUCK_JOB_TTL_MS = 2 * 60 * 60 * 1000; // 2 hours constructor( private readonly connectionRegistry: ConnectionRegistry, @@ -53,8 +53,9 @@ export class MigrationService { const job = this.jobs.get(id); if (!job) return undefined; if (this.isJobStuck(job)) { - this.jobs.delete(id); - return undefined; + this.logger.warn(`Analysis ${id} exceeded stuck-job TTL — cancelling`); + this.cancelJob(id); + // Return the job with its cancelled/failed status rather than 404 } return { ...job.result, @@ -63,7 +64,7 @@ export class MigrationService { progress: job.progress, createdAt: job.createdAt, completedAt: job.completedAt, - error: job.error, + error: job.error ?? (job.status === 'cancelled' ? 'Analysis timed out' : undefined), } as MigrationAnalysisResult; } @@ -453,9 +454,11 @@ export class MigrationService { private evictOldJobs(): void { if (this.jobs.size < this.MAX_JOBS) return; - // First: evict stuck running jobs + // First: cancel and evict stuck running jobs for (const [id, job] of this.jobs) { if (this.isJobStuck(job)) { + this.logger.warn(`Evicting stuck analysis ${id}`); + this.cancelJob(id); this.jobs.delete(id); } } From afce537d28e7e9554404ba241f3d86a4b2907196 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 20:53:58 +0300 Subject: [PATCH 27/34] Support cluster target in command-mode migration and validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detect target cluster topology at execution/validation time and create an iovalkey Cluster client instead of standalone Valkey when the target reports cluster_enabled=1. Extract shared client-factory to eliminate duplicated createClient helpers. Add migration topology e2e test that spins up 2 standalone + 2 cluster (3-master) Valkey instances via Docker and verifies all four migration combos: standalone→standalone, standalone→cluster, cluster→standalone, cluster→cluster. Co-Authored-By: Claude --- apps/api/package.json | 1 + .../migration-execution.service.spec.ts | 61 +++- .../migration-validation.service.spec.ts | 36 ++- .../src/migration/execution/client-factory.ts | 53 ++++ .../execution/command-migration-worker.ts | 22 +- .../migration/migration-execution.service.ts | 18 +- .../migration/migration-validation.service.ts | 23 +- apps/api/test/migration-topology.e2e-spec.ts | 298 ++++++++++++++++++ docker-compose.migration-e2e.yml | 186 +++++++++++ package.json | 1 + 10 files changed, 652 insertions(+), 47 deletions(-) create mode 100644 apps/api/src/migration/execution/client-factory.ts create mode 100644 apps/api/test/migration-topology.e2e-spec.ts create mode 100644 docker-compose.migration-e2e.yml diff --git a/apps/api/package.json b/apps/api/package.json index 2e77ae07..b6f75f86 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -26,6 +26,7 @@ "test:integration:redis": "TEST_DB_PORT=6382 jest test/database-compatibility.e2e-spec.ts", "test:integration:valkey": "TEST_DB_PORT=6380 jest --testRegex='.e2e-spec.ts$'", "test:cluster": "jest test/api-cluster.e2e-spec.ts", + "test:migration-topology": "RUN_TOPOLOGY_TESTS=true jest test/migration-topology.e2e-spec.ts", "test:cluster:unit": "jest src/cluster/*.spec.ts", "test:integration:cluster": "TEST_DB_HOST=localhost TEST_DB_PORT=7001 jest test/api-cluster.e2e-spec.ts", "test:unit:parsers": "jest src/database/parsers/*.spec.ts", diff --git a/apps/api/src/migration/__tests__/migration-execution.service.spec.ts b/apps/api/src/migration/__tests__/migration-execution.service.spec.ts index baee6170..bafca5e4 100644 --- a/apps/api/src/migration/__tests__/migration-execution.service.spec.ts +++ b/apps/api/src/migration/__tests__/migration-execution.service.spec.ts @@ -31,14 +31,28 @@ jest.mock('../execution/command-migration-worker', () => ({ runCommandMigration: jest.fn().mockResolvedValue(undefined), })); -function createMockRegistry() { - const mockAdapter = { +function createMockRegistry(overrides?: { sourceClusterEnabled?: boolean; targetClusterEnabled?: boolean }) { + const sourceCluster = overrides?.sourceClusterEnabled ?? false; + const targetCluster = overrides?.targetClusterEnabled ?? false; + + const mockSourceAdapter = { + getCapabilities: jest.fn().mockReturnValue({ dbType: 'valkey', version: '8.1.0' }), + getInfo: jest.fn().mockResolvedValue({ cluster: { cluster_enabled: sourceCluster ? '1' : '0' } }), + getClient: jest.fn().mockReturnValue({ quit: jest.fn() }), + }; + const mockTargetAdapter = { getCapabilities: jest.fn().mockReturnValue({ dbType: 'valkey', version: '8.1.0' }), - getInfo: jest.fn().mockResolvedValue({ cluster_enabled: '0' }), + getInfo: jest.fn().mockResolvedValue({ cluster: { cluster_enabled: targetCluster ? '1' : '0' } }), getClient: jest.fn().mockReturnValue({ quit: jest.fn() }), }; + + const adapters: Record = { + 'conn-1': mockSourceAdapter, + 'conn-2': mockTargetAdapter, + }; + return { - get: jest.fn().mockReturnValue(mockAdapter), + get: jest.fn().mockImplementation((id: string) => adapters[id] ?? mockSourceAdapter), getConfig: jest.fn().mockReturnValue({ id: 'conn-1', name: 'Test', @@ -46,6 +60,8 @@ function createMockRegistry() { port: 6379, createdAt: Date.now(), }), + mockSourceAdapter, + mockTargetAdapter, }; } @@ -102,6 +118,43 @@ describe('MigrationExecutionService', () => { }), ).rejects.toThrow(NotFoundException); }); + + it('should pass targetIsCluster: true when target reports cluster_enabled=1', async () => { + const { runCommandMigration } = require('../execution/command-migration-worker'); + + const clusterRegistry = createMockRegistry({ targetClusterEnabled: true }); + const clusterService = new MigrationExecutionService(clusterRegistry as any); + + await clusterService.startExecution({ + sourceConnectionId: 'conn-1', + targetConnectionId: 'conn-2', + mode: 'command', + }); + + // Wait a tick for the async runCommandMode to call runCommandMigration + await new Promise(r => setTimeout(r, 20)); + + expect(runCommandMigration).toHaveBeenCalledWith( + expect.objectContaining({ targetIsCluster: true }), + ); + }); + + it('should pass targetIsCluster: false when target is standalone', async () => { + const { runCommandMigration } = require('../execution/command-migration-worker'); + (runCommandMigration as jest.Mock).mockClear(); + + await service.startExecution({ + sourceConnectionId: 'conn-1', + targetConnectionId: 'conn-2', + mode: 'command', + }); + + await new Promise(r => setTimeout(r, 20)); + + expect(runCommandMigration).toHaveBeenCalledWith( + expect.objectContaining({ targetIsCluster: false }), + ); + }); }); describe('stopExecution', () => { diff --git a/apps/api/src/migration/__tests__/migration-validation.service.spec.ts b/apps/api/src/migration/__tests__/migration-validation.service.spec.ts index d3db6096..09af6881 100644 --- a/apps/api/src/migration/__tests__/migration-validation.service.spec.ts +++ b/apps/api/src/migration/__tests__/migration-validation.service.spec.ts @@ -2,7 +2,7 @@ import { MigrationValidationService } from '../migration-validation.service'; import { BadRequestException, NotFoundException } from '@nestjs/common'; jest.mock('iovalkey', () => { - return jest.fn().mockImplementation(() => ({ + const mockClient = () => ({ connect: jest.fn().mockResolvedValue(undefined), ping: jest.fn().mockResolvedValue('PONG'), quit: jest.fn().mockResolvedValue(undefined), @@ -12,7 +12,10 @@ jest.mock('iovalkey', () => { type: jest.fn().mockReturnThis(), exec: jest.fn().mockResolvedValue([]), }), - })); + }); + const Valkey = jest.fn().mockImplementation(mockClient); + (Valkey as any).Cluster = jest.fn().mockImplementation(mockClient); + return Valkey; }); jest.mock('../validation/key-count-comparator', () => ({ @@ -44,10 +47,11 @@ jest.mock('../validation/baseline-comparator', () => ({ }), })); -function createMockRegistry() { +function createMockRegistry(overrides?: { targetClusterEnabled?: boolean }) { + const targetCluster = overrides?.targetClusterEnabled ?? false; const mockAdapter = { getCapabilities: jest.fn().mockReturnValue({ dbType: 'valkey', version: '8.1.0' }), - getInfo: jest.fn().mockResolvedValue({}), + getInfo: jest.fn().mockResolvedValue({ cluster: { cluster_enabled: targetCluster ? '1' : '0' } }), getClient: jest.fn().mockReturnValue({ quit: jest.fn() }), }; return { @@ -134,6 +138,30 @@ describe('MigrationValidationService', () => { ).rejects.toThrow(NotFoundException); }); + it('should detect cluster target and complete validation', async () => { + const clusterRegistry = createMockRegistry({ targetClusterEnabled: true }); + const clusterStorage = createMockStorage(); + const clusterMigrationService = createMockMigrationService(); + const clusterService = new MigrationValidationService( + clusterRegistry as any, + clusterStorage, + clusterMigrationService, + ); + + const { id } = await clusterService.startValidation({ + sourceConnectionId: 'conn-1', + targetConnectionId: 'conn-2', + }); + + // Wait for async validation to complete + await new Promise(r => setTimeout(r, 100)); + + const validation = clusterService.getValidation(id); + expect(validation).toBeDefined(); + // Should query target adapter for cluster info + expect(clusterRegistry.mockAdapter.getInfo).toHaveBeenCalledWith(['cluster']); + }); + it('should use Phase 1 analysis result when analysisId provided', async () => { migrationService.getJob.mockReturnValue({ status: 'completed', diff --git a/apps/api/src/migration/execution/client-factory.ts b/apps/api/src/migration/execution/client-factory.ts new file mode 100644 index 00000000..9ea4fe2b --- /dev/null +++ b/apps/api/src/migration/execution/client-factory.ts @@ -0,0 +1,53 @@ +import Valkey, { Cluster } from 'iovalkey'; +import type { DatabaseConnectionConfig } from '@betterdb/shared'; + +/** + * Create a standalone Valkey client from a connection config. + */ +export function createClient(config: DatabaseConnectionConfig, name: string): Valkey { + return new Valkey({ + host: config.host, + port: config.port, + username: config.username || undefined, + password: config.password || undefined, + tls: config.tls ? {} : undefined, + lazyConnect: true, + connectTimeout: 10_000, + commandTimeout: 15_000, + connectionName: name, + }); +} + +/** + * Create a target client — Cluster or standalone depending on the topology. + * The Cluster client is cast to Valkey so callers can use the same Commander + * interface without branching. + */ +export function createTargetClient( + config: DatabaseConnectionConfig, + name: string, + isCluster: boolean, +): Valkey { + if (!isCluster) { + return createClient(config, name); + } + + const cluster = new Cluster( + [{ host: config.host, port: config.port }], + { + redisOptions: { + username: config.username || undefined, + password: config.password || undefined, + tls: config.tls ? {} : undefined, + connectTimeout: 10_000, + commandTimeout: 15_000, + connectionName: name, + }, + lazyConnect: true, + enableReadyCheck: true, + ...(config.tls ? { dnsLookup: (address: string, callback: (err: NodeJS.ErrnoException | null, address: string, family: number) => void) => callback(null, address, 4) } : {}), + }, + ); + + return cluster as unknown as Valkey; +} diff --git a/apps/api/src/migration/execution/command-migration-worker.ts b/apps/api/src/migration/execution/command-migration-worker.ts index a0d15c1e..0d6b75db 100644 --- a/apps/api/src/migration/execution/command-migration-worker.ts +++ b/apps/api/src/migration/execution/command-migration-worker.ts @@ -2,6 +2,7 @@ import Valkey from 'iovalkey'; import type { DatabaseConnectionConfig } from '@betterdb/shared'; import type { ExecutionJob } from './execution-job'; import { migrateKey } from './type-handlers'; +import { createClient, createTargetClient } from './client-factory'; const SCAN_COUNT = 500; const TYPE_BATCH = 500; @@ -11,6 +12,7 @@ export interface CommandMigrationOptions { sourceConfig: DatabaseConnectionConfig; targetConfig: DatabaseConnectionConfig; sourceIsCluster: boolean; + targetIsCluster: boolean; job: ExecutionJob; maxLogLines: number; } @@ -20,13 +22,13 @@ export interface CommandMigrationOptions { * Operates entirely in-process using iovalkey. No external binary needed. */ export async function runCommandMigration(opts: CommandMigrationOptions): Promise { - const { sourceConfig, targetConfig, sourceIsCluster, job, maxLogLines } = opts; + const { sourceConfig, targetConfig, sourceIsCluster, targetIsCluster, job, maxLogLines } = opts; const sourceClients: Valkey[] = []; - const targetClient = createClient(targetConfig, 'BetterDB-Migration-Target'); + const targetClient = createTargetClient(targetConfig, 'BetterDB-Migration-Target', targetIsCluster); try { await targetClient.connect(); - log(job, maxLogLines, 'Connected to target'); + log(job, maxLogLines, `Connected to target${targetIsCluster ? ' (cluster mode)' : ''}`); // Build source clients (one per cluster master, or single standalone) if (sourceIsCluster) { @@ -154,20 +156,6 @@ export async function runCommandMigration(opts: CommandMigrationOptions): Promis // ── Helpers ── -function createClient(config: DatabaseConnectionConfig, name: string): Valkey { - return new Valkey({ - host: config.host, - port: config.port, - username: config.username || undefined, - password: config.password || undefined, - tls: config.tls ? {} : undefined, - lazyConnect: true, - connectTimeout: 10_000, - commandTimeout: 15_000, - connectionName: name, - }); -} - function parseClusterMasters(nodesRaw: string): Array<{ host: string; port: number }> { const results: Array<{ host: string; port: number }> = []; for (const line of nodesRaw.split('\n')) { diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index c33bf38f..ec7bba37 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -29,7 +29,7 @@ export class MigrationExecutionService { // 1. Resolve both connections (throws NotFoundException if missing) const sourceAdapter = this.connectionRegistry.get(req.sourceConnectionId); const sourceConfig = this.connectionRegistry.getConfig(req.sourceConnectionId); - this.connectionRegistry.get(req.targetConnectionId); + const targetAdapter = this.connectionRegistry.get(req.targetConnectionId); const targetConfig = this.connectionRegistry.getConfig(req.targetConnectionId); if (!sourceConfig || !targetConfig) { @@ -41,10 +41,14 @@ export class MigrationExecutionService { throw new BadRequestException('Source and target must be different connections'); } - // 3. Detect if source is cluster - const info = await sourceAdapter.getInfo(['cluster']); - const clusterSection = (info as Record>).cluster ?? {}; - const clusterEnabled = String(clusterSection['cluster_enabled'] ?? '0') === '1'; + // 3. Detect if source/target is cluster + const sourceInfo = await sourceAdapter.getInfo(['cluster']); + const sourceClusterSection = (sourceInfo as Record>).cluster ?? {}; + const clusterEnabled = String(sourceClusterSection['cluster_enabled'] ?? '0') === '1'; + + const targetInfo = await targetAdapter.getInfo(['cluster']); + const targetClusterSection = (targetInfo as Record>).cluster ?? {}; + const targetIsCluster = String(targetClusterSection['cluster_enabled'] ?? '0') === '1'; // 4. For redis_shake mode, locate the binary upfront let binaryPath: string | undefined; @@ -90,7 +94,7 @@ export class MigrationExecutionService { this.logger.error(`Execution ${id} failed: ${err.message}`); }); } else { - this.runCommandMode(job, sourceConfig, targetConfig, clusterEnabled).catch(err => { + this.runCommandMode(job, sourceConfig, targetConfig, clusterEnabled, targetIsCluster).catch(err => { this.logger.error(`Execution ${id} failed: ${err.message}`); }); } @@ -180,6 +184,7 @@ export class MigrationExecutionService { sourceConfig: Parameters[0]['sourceConfig'], targetConfig: Parameters[0]['targetConfig'], sourceIsCluster: boolean, + targetIsCluster: boolean, ): Promise { job.status = 'running'; try { @@ -187,6 +192,7 @@ export class MigrationExecutionService { sourceConfig, targetConfig, sourceIsCluster, + targetIsCluster, job, maxLogLines: this.MAX_LOG_LINES, }); diff --git a/apps/api/src/migration/migration-validation.service.ts b/apps/api/src/migration/migration-validation.service.ts index 67d30867..14c9db4b 100644 --- a/apps/api/src/migration/migration-validation.service.ts +++ b/apps/api/src/migration/migration-validation.service.ts @@ -15,6 +15,7 @@ import { compareKeyCounts } from './validation/key-count-comparator'; import { validateSample } from './validation/sample-validator'; import { compareBaseline } from './validation/baseline-comparator'; import { MigrationService } from './migration.service'; +import { createClient, createTargetClient } from './execution/client-factory'; @Injectable() export class MigrationValidationService { @@ -94,9 +95,14 @@ export class MigrationValidationService { try { job.status = 'running'; + // Detect if target is a cluster + const targetInfo = await targetAdapter.getInfo(['cluster']); + const targetClusterSection = (targetInfo as Record>).cluster ?? {}; + const targetIsCluster = String(targetClusterSection['cluster_enabled'] ?? '0') === '1'; + // Create temporary iovalkey clients — same pattern as command-migration-worker.ts sourceClient = createClient(sourceConfig, 'BetterDB-Validation-Source'); - targetClient = createClient(targetConfig, 'BetterDB-Validation-Target'); + targetClient = createTargetClient(targetConfig, 'BetterDB-Validation-Target', targetIsCluster); // Step 1: Connect check (5%) try { @@ -252,18 +258,3 @@ export class MigrationValidationService { } } -// ── Helpers ── - -function createClient(config: DatabaseConnectionConfig, name: string): Valkey { - return new Valkey({ - host: config.host, - port: config.port, - username: config.username || undefined, - password: config.password || undefined, - tls: config.tls ? {} : undefined, - lazyConnect: true, - connectTimeout: 10_000, - commandTimeout: 15_000, - connectionName: name, - }); -} diff --git a/apps/api/test/migration-topology.e2e-spec.ts b/apps/api/test/migration-topology.e2e-spec.ts new file mode 100644 index 00000000..280aa2dd --- /dev/null +++ b/apps/api/test/migration-topology.e2e-spec.ts @@ -0,0 +1,298 @@ +import { NestFastifyApplication } from '@nestjs/platform-fastify'; +import request from 'supertest'; +import Valkey, { Cluster } from 'iovalkey'; +import { execSync } from 'child_process'; +import { join } from 'path'; +import { createTestApp } from './test-utils'; + +/** + * Migration Topology E2E — verifies command-mode migration across all four + * topology combinations: + * + * standalone → standalone + * standalone → cluster + * cluster → standalone + * cluster → cluster + * + * Requires Docker. Skipped unless RUN_TOPOLOGY_TESTS=true is set. + * Run via: pnpm test:migration-topology + */ + +const RUN = process.env.RUN_TOPOLOGY_TESTS === 'true'; + +const PROJECT_ROOT = join(__dirname, '..', '..', '..'); +const COMPOSE_FILE = join(PROJECT_ROOT, 'docker-compose.migration-e2e.yml'); +const COMPOSE_PROJECT = 'migration-e2e'; + +const SRC_STANDALONE_PORT = 6990; +const TGT_STANDALONE_PORT = 6991; +const SRC_CLUSTER_PORT = 7301; // seed node +const TGT_CLUSTER_PORT = 7401; // seed node + +// ── Docker helpers ────────────────────────────────────────────────── + +function compose(cmd: string): string { + return execSync( + `docker compose -p ${COMPOSE_PROJECT} -f "${COMPOSE_FILE}" ${cmd}`, + { encoding: 'utf-8', timeout: 120_000, stdio: ['pipe', 'pipe', 'pipe'] }, + ); +} + +// ── Connection helpers ────────────────────────────────────────────── + +function sleep(ms: number): Promise { + return new Promise(r => setTimeout(r, ms)); +} + +async function waitForStandalone(port: number, timeoutMs = 30_000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const c = new Valkey({ host: '127.0.0.1', port, lazyConnect: true, connectTimeout: 2_000 }); + await c.connect(); + await c.ping(); + await c.quit(); + return; + } catch { /* retry */ } + await sleep(500); + } + throw new Error(`Standalone on port ${port} not ready after ${timeoutMs}ms`); +} + +async function waitForCluster(port: number, timeoutMs = 60_000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const c = new Valkey({ host: '127.0.0.1', port, lazyConnect: true, connectTimeout: 2_000 }); + await c.connect(); + const info = (await c.call('CLUSTER', 'INFO')) as string; + await c.quit(); + if (info.includes('cluster_state:ok')) return; + } catch { /* retry */ } + await sleep(1_000); + } + throw new Error(`Cluster on port ${port} not ready after ${timeoutMs}ms`); +} + +// ── Client factories ──────────────────────────────────────────────── + +async function openClient(port: number, isCluster: boolean): Promise { + if (isCluster) { + const cluster = new Cluster( + [{ host: '127.0.0.1', port }], + { lazyConnect: true }, + ); + await cluster.connect(); + return cluster as unknown as Valkey; + } + const client = new Valkey({ host: '127.0.0.1', port, lazyConnect: true }); + await client.connect(); + return client; +} + +// ── Key seed / verify helpers ─────────────────────────────────────── + +async function seedKeys(client: Valkey, prefix: string): Promise { + await client.set(`${prefix}:str1`, 'value1'); + await client.set(`${prefix}:str2`, 'value2'); + await client.set(`${prefix}:str3`, 'value3'); + await client.hset(`${prefix}:hash1`, 'f1', 'v1', 'f2', 'v2'); + await client.hset(`${prefix}:hash2`, 'field', 'data'); + await client.rpush(`${prefix}:list1`, 'a', 'b', 'c'); + await client.sadd(`${prefix}:set1`, 'm1', 'm2', 'm3'); + await client.zadd(`${prefix}:zset1`, 1, 'z1', 2, 'z2'); + await client.set(`${prefix}:str4`, 'value4'); + await client.set(`${prefix}:str5`, 'value5'); +} + +async function verifyKeys(client: Valkey, prefix: string): Promise { + // 5 strings + expect(await client.get(`${prefix}:str1`)).toBe('value1'); + expect(await client.get(`${prefix}:str2`)).toBe('value2'); + expect(await client.get(`${prefix}:str3`)).toBe('value3'); + expect(await client.get(`${prefix}:str4`)).toBe('value4'); + expect(await client.get(`${prefix}:str5`)).toBe('value5'); + + // 2 hashes + expect(await client.hgetall(`${prefix}:hash1`)).toEqual({ f1: 'v1', f2: 'v2' }); + expect(await client.hgetall(`${prefix}:hash2`)).toEqual({ field: 'data' }); + + // list + expect(await client.lrange(`${prefix}:list1`, 0, -1)).toEqual(['a', 'b', 'c']); + + // set (order is non-deterministic) + const members = await client.smembers(`${prefix}:set1`); + expect(members.sort()).toEqual(['m1', 'm2', 'm3']); + + // sorted set (ordered by score) + const zset = await client.zrange(`${prefix}:zset1`, '0', '-1'); + expect(zset).toEqual(['z1', 'z2']); +} + +// ── Migration runner ──────────────────────────────────────────────── + +async function runMigration( + app: NestFastifyApplication, + sourceId: string, + targetId: string, +): Promise<{ status: string; keysTransferred?: number; error?: string } | 'skipped'> { + const startRes = await request(app.getHttpServer()) + .post('/migration/execution') + .send({ sourceConnectionId: sourceId, targetConnectionId: targetId, mode: 'command' }); + + if (startRes.status === 403) return 'skipped'; + expect([200, 201]).toContain(startRes.status); + + const execId = startRes.body.id; + + let result: any; + for (let i = 0; i < 120; i++) { + const poll = await request(app.getHttpServer()).get(`/migration/execution/${execId}`); + if (poll.status === 403) return 'skipped'; + result = poll.body; + if (result.status === 'completed' || result.status === 'failed') break; + await sleep(500); + } + return result; +} + +// ── Tests ─────────────────────────────────────────────────────────── + +(RUN ? describe : describe.skip)('Migration Topology E2E', () => { + let app: NestFastifyApplication; + const connIds: Record = {}; + let licenseLocked = false; + + beforeAll(async () => { + // 1. Start topology containers (clean slate) + try { compose('down --remove-orphans --volumes'); } catch { /* ok */ } + compose('up -d'); + + // 2. Wait for standalone instances + await Promise.all([ + waitForStandalone(SRC_STANDALONE_PORT), + waitForStandalone(TGT_STANDALONE_PORT), + ]); + + // 3. Wait for both clusters to form + await Promise.all([ + waitForCluster(SRC_CLUSTER_PORT), + waitForCluster(TGT_CLUSTER_PORT), + ]); + + // 4. Flush all instances to ensure clean state (no leftover data from prior runs) + for (const { port, isCluster } of [ + { port: SRC_STANDALONE_PORT, isCluster: false }, + { port: TGT_STANDALONE_PORT, isCluster: false }, + { port: SRC_CLUSTER_PORT, isCluster: true }, + { port: TGT_CLUSTER_PORT, isCluster: true }, + ]) { + const c = await openClient(port, isCluster); + await c.flushall(); + await c.quit(); + } + + // 5. Seed source standalone (10 keys, prefix "mig:sa") + const sa = await openClient(SRC_STANDALONE_PORT, false); + await seedKeys(sa, 'mig:sa'); + await sa.quit(); + + // 6. Seed source cluster (10 keys, prefix "mig:cl") + const cl = await openClient(SRC_CLUSTER_PORT, true); + await seedKeys(cl, 'mig:cl'); + await cl.quit(); + + // 6. Boot NestJS app (default DB comes from global-setup on port 6380) + app = await createTestApp(); + + // 7. Register four connections via the API + const defs = [ + { key: 'srcSA', name: 'Topo Source Standalone', port: SRC_STANDALONE_PORT }, + { key: 'tgtSA', name: 'Topo Target Standalone', port: TGT_STANDALONE_PORT }, + { key: 'srcCL', name: 'Topo Source Cluster', port: SRC_CLUSTER_PORT }, + { key: 'tgtCL', name: 'Topo Target Cluster', port: TGT_CLUSTER_PORT }, + ]; + for (const d of defs) { + const res = await request(app.getHttpServer()) + .post('/connections') + .send({ name: d.name, host: '127.0.0.1', port: d.port }); + if (res.status === 200 || res.status === 201) { + connIds[d.key] = res.body.id; + } + } + }, 120_000); + + afterAll(async () => { + // Clean up connections + for (const id of Object.values(connIds)) { + try { await request(app.getHttpServer()).delete(`/connections/${id}`); } catch { /* ok */ } + } + if (app) await app.close(); + + // Tear down Docker topology + try { compose('down --remove-orphans --volumes'); } catch { /* ok */ } + }, 60_000); + + // ── Shared scenario runner ── + + async function scenario( + sourceKey: string, + targetKey: string, + sourcePrefix: string, + targetPort: number, + targetIsCluster: boolean, + ): Promise { + const srcId = connIds[sourceKey]; + const tgtId = connIds[targetKey]; + if (!srcId || !tgtId) { + throw new Error(`Connection not registered: ${sourceKey} / ${targetKey}`); + } + + // Flush target before migration + const flushClient = await openClient(targetPort, targetIsCluster); + await flushClient.flushall(); + await flushClient.quit(); + + // Run the migration + const result = await runMigration(app, srcId, tgtId); + if (result === 'skipped') { + licenseLocked = true; + return; + } + + expect(result.status).toBe('completed'); + expect(result.keysTransferred).toBeGreaterThanOrEqual(10); + + // Verify all 10 keys arrived on the target + const target = await openClient(targetPort, targetIsCluster); + try { + await verifyKeys(target, sourcePrefix); + } finally { + await target.quit(); + } + } + + // ── 4 topology combinations ── + + it('standalone → standalone', async () => { + await scenario('srcSA', 'tgtSA', 'mig:sa', TGT_STANDALONE_PORT, false); + if (licenseLocked) { + console.warn('Execution requires Pro license — remaining topology tests will be skipped'); + } + }, 60_000); + + it('standalone → cluster', async () => { + if (licenseLocked) return; + await scenario('srcSA', 'tgtCL', 'mig:sa', TGT_CLUSTER_PORT, true); + }, 60_000); + + it('cluster → standalone', async () => { + if (licenseLocked) return; + await scenario('srcCL', 'tgtSA', 'mig:cl', TGT_STANDALONE_PORT, false); + }, 60_000); + + it('cluster → cluster', async () => { + if (licenseLocked) return; + await scenario('srcCL', 'tgtCL', 'mig:cl', TGT_CLUSTER_PORT, true); + }, 60_000); +}); diff --git a/docker-compose.migration-e2e.yml b/docker-compose.migration-e2e.yml new file mode 100644 index 00000000..182ae31a --- /dev/null +++ b/docker-compose.migration-e2e.yml @@ -0,0 +1,186 @@ +# Self-contained topology for migration e2e tests. +# 2 standalone Valkey instances + 2 clusters (3 masters each, no replicas). +# All services use host networking so cluster nodes advertise 127.0.0.1. +# +# Usage: +# docker compose -p migration-e2e -f docker-compose.migration-e2e.yml up -d +# docker compose -p migration-e2e -f docker-compose.migration-e2e.yml down -v + +services: + # ── Standalone instances ────────────────────────────────────────── + + source-standalone: + image: valkey/valkey:8-alpine + container_name: mig-source-standalone + network_mode: host + command: valkey-server --port 6990 --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "6990", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + target-standalone: + image: valkey/valkey:8-alpine + container_name: mig-target-standalone + network_mode: host + command: valkey-server --port 6991 --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "6991", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + # ── Source cluster (3 masters, no replicas) ─────────────────────── + + src-node-1: + image: valkey/valkey:8-alpine + container_name: mig-src-node-1 + network_mode: host + command: > + valkey-server --port 7301 + --cluster-enabled yes + --cluster-config-file /data/nodes.conf + --cluster-node-timeout 5000 + --cluster-announce-ip 127.0.0.1 + --cluster-announce-port 7301 + --cluster-announce-bus-port 17301 + --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "7301", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + src-node-2: + image: valkey/valkey:8-alpine + container_name: mig-src-node-2 + network_mode: host + command: > + valkey-server --port 7302 + --cluster-enabled yes + --cluster-config-file /data/nodes.conf + --cluster-node-timeout 5000 + --cluster-announce-ip 127.0.0.1 + --cluster-announce-port 7302 + --cluster-announce-bus-port 17302 + --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "7302", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + src-node-3: + image: valkey/valkey:8-alpine + container_name: mig-src-node-3 + network_mode: host + command: > + valkey-server --port 7303 + --cluster-enabled yes + --cluster-config-file /data/nodes.conf + --cluster-node-timeout 5000 + --cluster-announce-ip 127.0.0.1 + --cluster-announce-port 7303 + --cluster-announce-bus-port 17303 + --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "7303", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + src-cluster-init: + image: valkey/valkey:8-alpine + container_name: mig-src-cluster-init + network_mode: host + depends_on: + src-node-1: { condition: service_healthy } + src-node-2: { condition: service_healthy } + src-node-3: { condition: service_healthy } + command: > + sh -c "valkey-cli --cluster create + 127.0.0.1:7301 127.0.0.1:7302 127.0.0.1:7303 + --cluster-replicas 0 --cluster-yes" + restart: "no" + + # ── Target cluster (3 masters, no replicas) ─────────────────────── + + tgt-node-1: + image: valkey/valkey:8-alpine + container_name: mig-tgt-node-1 + network_mode: host + command: > + valkey-server --port 7401 + --cluster-enabled yes + --cluster-config-file /data/nodes.conf + --cluster-node-timeout 5000 + --cluster-announce-ip 127.0.0.1 + --cluster-announce-port 7401 + --cluster-announce-bus-port 17401 + --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "7401", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + tgt-node-2: + image: valkey/valkey:8-alpine + container_name: mig-tgt-node-2 + network_mode: host + command: > + valkey-server --port 7402 + --cluster-enabled yes + --cluster-config-file /data/nodes.conf + --cluster-node-timeout 5000 + --cluster-announce-ip 127.0.0.1 + --cluster-announce-port 7402 + --cluster-announce-bus-port 17402 + --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "7402", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + tgt-node-3: + image: valkey/valkey:8-alpine + container_name: mig-tgt-node-3 + network_mode: host + command: > + valkey-server --port 7403 + --cluster-enabled yes + --cluster-config-file /data/nodes.conf + --cluster-node-timeout 5000 + --cluster-announce-ip 127.0.0.1 + --cluster-announce-port 7403 + --cluster-announce-bus-port 17403 + --save "" --appendonly no + healthcheck: + test: ["CMD", "valkey-cli", "-p", "7403", "ping"] + interval: 2s + timeout: 3s + retries: 15 + restart: "no" + + tgt-cluster-init: + image: valkey/valkey:8-alpine + container_name: mig-tgt-cluster-init + network_mode: host + depends_on: + tgt-node-1: { condition: service_healthy } + tgt-node-2: { condition: service_healthy } + tgt-node-3: { condition: service_healthy } + command: > + sh -c "valkey-cli --cluster create + 127.0.0.1:7401 127.0.0.1:7402 127.0.0.1:7403 + --cluster-replicas 0 --cluster-yes" + restart: "no" diff --git a/package.json b/package.json index 657213c8..58141850 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "test:integration": "pnpm --filter api test:integration", "test:integration:redis": "TEST_DB_PORT=6382 pnpm --filter api test:integration:redis", "test:integration:valkey": "TEST_DB_PORT=6380 pnpm --filter api test:integration:valkey", + "test:migration-topology": "pnpm --filter api test:migration-topology", "lint": "turbo lint", "clean": "turbo clean && rm -rf node_modules", "cli:build": "pnpm --filter @betterdb/monitor build", From c5f3b73d823717ff8ef0e9499339ab25a46c5510 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Tue, 31 Mar 2026 20:56:04 +0300 Subject: [PATCH 28/34] cleanup --- apps/api/src/migration/execution/type-handlers.ts | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index e7dec6c8..9204fc27 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -312,19 +312,6 @@ end return 1 `; -/** - * Read PTTL from source, then atomically RENAME tmp→key + PEXPIRE on target. - * Eliminates the window where the key exists on target without its TTL. - */ -async function migrateTtlAtomic(source: Valkey, target: Valkey, key: string): Promise { - const pttl = await source.pttl(key); - if (pttl > 0) { - await target.pexpire(key, pttl); - } else if (pttl === -2) { - // Key expired between copy and TTL check — remove ghost copy from target - await target.del(key); - } -} /** * Atomically RENAME tmp→key and apply PTTL in a single Lua eval. From 31ad55d8d88dd07e4a97fa9b14269c00f7564754 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Wed, 1 Apr 2026 09:03:56 +0300 Subject: [PATCH 29/34] Add analysis compatibility tests to migration topology e2e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify that the compatibility checker correctly flags: - cluster → standalone as a blocking incompatibility - standalone → cluster as a warning (non-blocking) Also fix: handle 402 (license) status in execution runner, and use valid scanSampleSize (min 1000) for analysis requests. Co-Authored-By: Claude --- apps/api/test/migration-topology.e2e-spec.ts | 75 +++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/apps/api/test/migration-topology.e2e-spec.ts b/apps/api/test/migration-topology.e2e-spec.ts index 280aa2dd..95bbf271 100644 --- a/apps/api/test/migration-topology.e2e-spec.ts +++ b/apps/api/test/migration-topology.e2e-spec.ts @@ -129,6 +129,32 @@ async function verifyKeys(client: Valkey, prefix: string): Promise { expect(zset).toEqual(['z1', 'z2']); } +// ── Analysis runner ───────────────────────────────────────────────── + +async function runAnalysis( + app: NestFastifyApplication, + sourceId: string, + targetId: string, +): Promise { + const startRes = await request(app.getHttpServer()) + .post('/migration/analysis') + .send({ sourceConnectionId: sourceId, targetConnectionId: targetId, scanSampleSize: 1000 }); + + expect([200, 201]).toContain(startRes.status); + + const analysisId = startRes.body.id; + + let result: any; + for (let i = 0; i < 60; i++) { + const poll = await request(app.getHttpServer()).get(`/migration/analysis/${analysisId}`); + expect(poll.status).toBe(200); + result = poll.body; + if (result.status === 'completed' || result.status === 'failed') break; + await sleep(500); + } + return result; +} + // ── Migration runner ──────────────────────────────────────────────── async function runMigration( @@ -140,7 +166,7 @@ async function runMigration( .post('/migration/execution') .send({ sourceConnectionId: sourceId, targetConnectionId: targetId, mode: 'command' }); - if (startRes.status === 403) return 'skipped'; + if (startRes.status === 402 || startRes.status === 403) return 'skipped'; expect([200, 201]).toContain(startRes.status); const execId = startRes.body.id; @@ -148,7 +174,7 @@ async function runMigration( let result: any; for (let i = 0; i < 120; i++) { const poll = await request(app.getHttpServer()).get(`/migration/execution/${execId}`); - if (poll.status === 403) return 'skipped'; + if (poll.status === 402 || poll.status === 403) return 'skipped'; result = poll.body; if (result.status === 'completed' || result.status === 'failed') break; await sleep(500); @@ -295,4 +321,49 @@ async function runMigration( if (licenseLocked) return; await scenario('srcCL', 'tgtCL', 'mig:cl', TGT_CLUSTER_PORT, true); }, 60_000); + + // ── Compatibility analysis ── + + it('analysis: cluster → standalone should report a blocking incompatibility', async () => { + const srcId = connIds['srcCL']; + const tgtId = connIds['tgtSA']; + if (!srcId || !tgtId) throw new Error('Connections not registered'); + + const result = await runAnalysis(app, srcId, tgtId); + + expect(result.status).toBe('completed'); + expect(result.incompatibilities).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + severity: 'blocking', + category: 'cluster_topology', + }), + ]), + ); + expect(result.blockingCount).toBeGreaterThanOrEqual(1); + }, 60_000); + + it('analysis: standalone → cluster should report a warning incompatibility', async () => { + const srcId = connIds['srcSA']; + const tgtId = connIds['tgtCL']; + if (!srcId || !tgtId) throw new Error('Connections not registered'); + + const result = await runAnalysis(app, srcId, tgtId); + + expect(result.status).toBe('completed'); + expect(result.incompatibilities).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + severity: 'warning', + category: 'cluster_topology', + }), + ]), + ); + expect(result.warningCount).toBeGreaterThanOrEqual(1); + // Should NOT be blocking — migration is still possible + const clusterBlocking = (result.incompatibilities ?? []).filter( + (i: any) => i.category === 'cluster_topology' && i.severity === 'blocking', + ); + expect(clusterBlocking).toHaveLength(0); + }, 60_000); }); From 22dea6d656ac3e98d0e8d3671e65357c031adf43 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Wed, 1 Apr 2026 09:17:30 +0300 Subject: [PATCH 30/34] Mock Pro license in topology e2e so execution tests actually run The 4 migration execution tests were silently skipping due to 402 (license guard). Mock global.fetch to return pro tier and remove the licenseLocked skip logic so tests fail loudly if the mock breaks. Co-Authored-By: Claude --- apps/api/test/migration-topology.e2e-spec.ts | 27 ++++++++++---------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/apps/api/test/migration-topology.e2e-spec.ts b/apps/api/test/migration-topology.e2e-spec.ts index 95bbf271..46125c1a 100644 --- a/apps/api/test/migration-topology.e2e-spec.ts +++ b/apps/api/test/migration-topology.e2e-spec.ts @@ -187,7 +187,6 @@ async function runMigration( (RUN ? describe : describe.skip)('Migration Topology E2E', () => { let app: NestFastifyApplication; const connIds: Record = {}; - let licenseLocked = false; beforeAll(async () => { // 1. Start topology containers (clean slate) @@ -228,10 +227,16 @@ async function runMigration( await seedKeys(cl, 'mig:cl'); await cl.quit(); - // 6. Boot NestJS app (default DB comes from global-setup on port 6380) + // 7. Boot NestJS app with Pro license so execution endpoints are unlocked + process.env.BETTERDB_LICENSE_KEY = 'test-topology-key'; + jest.spyOn(global, 'fetch').mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ valid: true, tier: 'pro', expiresAt: null }), + } as Response); + app = await createTestApp(); - // 7. Register four connections via the API + // 8. Register four connections via the API const defs = [ { key: 'srcSA', name: 'Topo Source Standalone', port: SRC_STANDALONE_PORT }, { key: 'tgtSA', name: 'Topo Target Standalone', port: TGT_STANDALONE_PORT }, @@ -255,6 +260,10 @@ async function runMigration( } if (app) await app.close(); + // Restore license env / mocks + delete process.env.BETTERDB_LICENSE_KEY; + jest.restoreAllMocks(); + // Tear down Docker topology try { compose('down --remove-orphans --volumes'); } catch { /* ok */ } }, 60_000); @@ -281,10 +290,8 @@ async function runMigration( // Run the migration const result = await runMigration(app, srcId, tgtId); - if (result === 'skipped') { - licenseLocked = true; - return; - } + expect(result).not.toBe('skipped'); + if (result === 'skipped') return; // type guard expect(result.status).toBe('completed'); expect(result.keysTransferred).toBeGreaterThanOrEqual(10); @@ -302,23 +309,17 @@ async function runMigration( it('standalone → standalone', async () => { await scenario('srcSA', 'tgtSA', 'mig:sa', TGT_STANDALONE_PORT, false); - if (licenseLocked) { - console.warn('Execution requires Pro license — remaining topology tests will be skipped'); - } }, 60_000); it('standalone → cluster', async () => { - if (licenseLocked) return; await scenario('srcSA', 'tgtCL', 'mig:sa', TGT_CLUSTER_PORT, true); }, 60_000); it('cluster → standalone', async () => { - if (licenseLocked) return; await scenario('srcCL', 'tgtSA', 'mig:cl', TGT_STANDALONE_PORT, false); }, 60_000); it('cluster → cluster', async () => { - if (licenseLocked) return; await scenario('srcCL', 'tgtCL', 'mig:cl', TGT_CLUSTER_PORT, true); }, 60_000); From a998853d37ec7204d2c8a24f7f5f11e7486e6ae7 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Wed, 1 Apr 2026 09:37:08 +0300 Subject: [PATCH 31/34] Add migration feature documentation Covers the three-phase workflow (analysis, execution, validation), supported topologies, compatibility checks, and known limitations including hash-tag slot routing, binary data, TTL race conditions, HFE, large keys, and multi-database constraints. Co-Authored-By: Claude --- docs/migration.md | 238 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 docs/migration.md diff --git a/docs/migration.md b/docs/migration.md new file mode 100644 index 00000000..6db49458 --- /dev/null +++ b/docs/migration.md @@ -0,0 +1,238 @@ +# Migration + +BetterDB can migrate data between Valkey and Redis instances using a three-phase +workflow: **analysis**, **execution**, and **validation**. Each phase is +independent — you can run analysis without committing to a migration, and +validation is optional after execution completes. + +## Phases + +### 1. Analysis (Community tier) + +Scans the source instance and compares it against the target to produce a +compatibility report. No data is written. + +What it checks: + +- **Key sampling** — SCAN + TYPE on a configurable sample (1,000–50,000 keys). + In cluster mode each master node is sampled independently. +- **Memory estimation** — `MEMORY USAGE` per sampled key, extrapolated to the + full keyspace. +- **TTL distribution** — Groups keys into buckets (no expiry, <1 h, <24 h, + <7 d, >7 d). +- **Hash Field Expiry (HFE)** — Detects per-field TTLs on Valkey 8.1+ via + `HEXPIRETIME`. Skipped on Redis or older Valkey. +- **Compatibility** — Produces a list of incompatibilities with severity levels + (`blocking`, `warning`, `info`). See [Compatibility checks](#compatibility-checks). +- **Command distribution** — Top commands by frequency from `COMMANDLOG` (Valkey + 8+) or `SLOWLOG`. + +### 2. Execution (Pro tier) + +Transfers keys from source to target. Two modes are available: + +| Mode | Mechanism | Best for | +|------|-----------|----------| +| **redis_shake** (default) | External Go binary ([redis-shake](https://github.com/tair-opensource/RedisShake)) | Large datasets, production workloads | +| **command** | In-process Node.js via iovalkey | Simpler deployments, smaller datasets, easier debugging | + +#### Command mode + +Connects directly to the source and target using the iovalkey library. For each +key it reads the value with a type-specific command, writes it to the target, +and preserves the TTL. + +Supported data types: + +| Type | Read | Write | TTL | +|------|------|-------|-----| +| string | `GET` (binary) | `SET PX` | Atomic — single `SET` with `PX` flag | +| hash | `HSCAN` (binary fields) | `HSET` to temp key, then `RENAME` | Lua `RENAME` + `PEXPIRE` | +| list | `LRANGE` in 1,000-element chunks | `RPUSH` to temp key, then `RENAME` | Lua `RENAME` + `PEXPIRE` | +| set | `SMEMBERS` or `SSCAN` (>10 K) | `SADD` to temp key, then `RENAME` | Lua `RENAME` + `PEXPIRE` | +| sorted set | `ZRANGE` or `ZSCAN` (>10 K) | `ZADD` to temp key, then `RENAME` | Lua `RENAME` + `PEXPIRE` | +| stream | `XRANGE` in 1,000-entry chunks | `XADD` to temp key, then `RENAME` | Lua `RENAME` + `PEXPIRE` | + +Compound types (everything except string) are written to a temporary key first, +then atomically renamed to the final key. This avoids partial writes if the +process crashes mid-transfer. If `EVAL` is blocked by ACL on the target, the +rename and TTL are applied as separate commands with a small race window. + +#### RedisShake mode + +Spawns the redis-shake binary as a child process. BetterDB generates the TOML +configuration, manages the process lifecycle, and streams progress from its +stdout. RedisShake auto-discovers cluster topology on both sides, so no special +handling is needed for cluster targets. + +The binary is found in this order: +1. `$REDIS_SHAKE_PATH` environment variable +2. `/usr/local/bin/redis-shake` (Docker image) +3. `~/.betterdb/bin/redis-shake` (npx install) + +### 3. Validation (Pro tier) + +Spot-checks the target after migration to verify data integrity. + +Steps: + +1. **Key count** — `DBSIZE` on both sides. Computes discrepancy percentage. +2. **Sample validation** — SCAN ~500 random keys and compare type + value. + Large keys (>100 elements) are compared by element count only to avoid + timeouts. +3. **Baseline comparison** (optional) — If a migration start time is provided + and BetterDB has >= 5 pre-migration memory snapshots, compares opsPerSec, + usedMemory, fragmentation ratio, and CPU usage against the pre-migration + baseline. + +A validation **passes** when the issue count is 0 and the key count discrepancy +is below 1%. + +## Topology support + +| Source | Target | Status | Notes | +|--------|--------|--------|-------| +| Standalone | Standalone | Supported | Direct key transfer | +| Standalone | Cluster | Supported | Keys are resharded across target slots. Analysis reports a warning. | +| Cluster | Cluster | Supported | Per-master scanning, slot-aware writes | +| Cluster | Standalone | **Blocked** | Analysis reports a blocking incompatibility. The data is spread across slots and cannot be safely collapsed into a single node. | + +## Compatibility checks + +Analysis detects the following incompatibilities: + +| Category | Severity | Condition | +|----------|----------|-----------| +| `cluster_topology` | blocking | Cluster source, standalone target | +| `cluster_topology` | warning | Standalone source, cluster target (keys will be resharded) | +| `type_direction` | blocking | Valkey source, Redis target (Valkey-specific features may be lost) | +| `hfe` | blocking | Hash Field Expiry detected on source, target does not support it | +| `modules` | blocking | Source uses a module not present on target (one entry per module) | +| `multi_db` | blocking | Source uses multiple databases and target is a cluster (clusters only support db0) | +| `multi_db` | warning | Source uses multiple databases, target is standalone but may not be configured for it | +| `maxmemory_policy` | warning | Eviction policy differs between source and target | +| `acl` | warning | Source has custom ACL users that do not exist on target | +| `persistence` | info | Persistence configuration differs | + +Blocking incompatibilities are advisory — the execution endpoint does not +currently enforce them. A future release will reject execution when blocking +incompatibilities exist. + +## Limitations + +### Keys containing `{` (hash tags in cluster mode) + +In cluster mode, Valkey determines which slot a key belongs to by hashing the +substring between the first `{` and the next `}`. This is called a **hash tag**. + +During command-mode migration, compound types are written to a temporary key and +then renamed to the final key. `RENAME` requires both keys to hash to the same +slot. To satisfy this, the temp key reuses the original key's hash tag: + +``` +Original key: user:{12345}:profile +Temp key: __betterdb_mig_a1b2c3d4:{12345} + +Original key: plain-key-no-braces +Temp key: __betterdb_mig_a1b2c3d4:{plain-key-no-braces} +``` + +**Edge case**: If a key contains `{` but no matching `}`, or the content between +the braces is empty (e.g., `foo{}bar`), Valkey hashes the entire key. BetterDB +handles this correctly — the `tempKey()` function only extracts a hash tag when +`{...}` contains at least one character. Otherwise it wraps the full key name as +the tag. + +**Impact on key names with literal braces**: If your keys use `{` as part of +their name rather than as a hash tag (e.g., `json:{data}`), the migration still +works correctly. The content between the first `{…}` pair is reused as the tag, +which guarantees the temp key lands in the same slot. The key's value and name +are preserved exactly. + +### Binary data + +All migrations use `*Buffer` variants of commands (`getBuffer`, `lrangeBuffer`, +`hscanBuffer`, etc.) so binary values are never coerced to UTF-8. Hash field +names are read via `HSCAN` (not `HGETALL`) specifically because `hgetallBuffer` +coerces field names to strings. + +**RedisShake mode**: The TOML configuration builder rejects values (passwords, +connection strings) containing control characters (`\x00–\x08`, `\x0b`, `\x0c`, +`\x0e–\x1f`, `\x7f`) to prevent TOML injection. + +### TTL precision and race conditions + +- **String keys**: TTL is applied atomically via `SET key value PX pttl` — no + window where the key exists without its TTL. +- **Compound types**: A Lua script performs `RENAME` + `PEXPIRE` in a single + `EVAL` call. If the target blocks `EVAL` via ACL, BetterDB falls back to + separate `RENAME` and `PEXPIRE` commands. In this fallback path there is a + brief window where the key exists with no expiry. +- **Expired between read and TTL fetch**: If `PTTL` returns `-2` (key expired), + the target copy is deleted. + +### Hash Field Expiry (HFE) + +Valkey 8.1+ supports per-field TTLs within a hash. Analysis detects HFE usage +via `HEXPIRETIME`, but **command-mode migration does not transfer per-field +expirations**. Only the overall key-level TTL is preserved. If the target does +not support HFE, analysis flags this as a blocking incompatibility. + +### Large keys + +Keys with more than 10,000 elements use cursor-based reads (`HSCAN`, `SSCAN`, +`ZSCAN`) instead of bulk commands to avoid blocking the server. Lists and +streams are always read in 1,000-element chunks regardless of size. + +During validation, keys with more than 100 elements are compared by **element +count only** — full value comparison is skipped to avoid timeouts. + +### Multi-database + +Command-mode migration and cluster mode only operate on database 0. If the +source uses multiple databases (`db0`, `db1`, etc.) and the target is a cluster, +analysis flags this as a blocking incompatibility. For standalone targets, +analysis issues a warning. + +### ACL users and modules + +ACL rules and loaded modules are **not migrated** — they are analyzed and +reported. If the source has custom ACL users missing from the target, analysis +issues a warning. If the source uses modules not loaded on the target, analysis +flags a blocking incompatibility. + +### DBSIZE accuracy in cluster mode + +`DBSIZE` on a cluster client is sent to a single random node, returning a +partial count. This means the key count comparison in validation may be +inaccurate for cluster targets. This is a known limitation. + +### Concurrent writes on the source + +The migration reads a point-in-time snapshot per key but does not freeze the +source. If keys are modified on the source during migration: + +- **Lists** may have different lengths. A post-migration length check warns if + the list grew or shrank. +- **Keys created after SCAN started** are missed entirely. +- **Keys deleted after SCAN** are skipped with no error (the read returns nil). + +For a consistent migration, quiesce writes to the source before starting. + +## Batching and concurrency + +| Parameter | Value | +|-----------|-------| +| SCAN batch size | 500 keys per iteration | +| TYPE lookup batch | 500 keys per pipeline | +| Migration batch | 50 keys in parallel | +| List/stream chunk | 1,000 elements per read | +| Max concurrent analysis jobs | 20 | +| Max concurrent execution jobs | 10 | +| Stuck job timeout | 2 hours (auto-cancelled) | + +## Credential handling + +RedisShake log output is sanitized before being served to the frontend. Patterns +like `password = "secret"` and `redis://user:pass@host` are redacted. Source +passwords are never included in API responses. From c2a53ad2046e5f9724107373e0399bc7ea12ad84 Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Wed, 1 Apr 2026 10:18:51 +0300 Subject: [PATCH 32/34] Fix 5 review findings: pttl===0, catch scope, regex, hash compare, HFE denominator - Treat PTTL 0 (sub-ms remaining) as expired in both migrateString and atomicRenameWithTtl Lua script, preventing ghost permanent keys - Narrow atomicRenameWithTtl catch to NOSCRIPT/ACL errors only so transient failures (OOM, timeout) propagate instead of silently widening the TTL race window - Change sanitizeLogLine unquoted-credential regex from \S.* to \S+ so it stops at the first whitespace instead of eating the whole line - Remove Math.min(10, ...) cap in compareHash so all fields under the large-key threshold are validated, not just the first 10 - Use candidates.length instead of validKeys.length as denominator in HFE extrapolation to account for oversized hashes excluded from sample Co-Authored-By: Claude --- .../api/src/migration/analysis/hfe-detector.ts | 4 ++-- .../src/migration/execution/type-handlers.ts | 18 ++++++++++++------ .../migration/migration-execution.service.ts | 2 +- .../migration/validation/sample-validator.ts | 5 ++--- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/apps/api/src/migration/analysis/hfe-detector.ts b/apps/api/src/migration/analysis/hfe-detector.ts index 8b4bdb5e..5864f643 100644 --- a/apps/api/src/migration/analysis/hfe-detector.ts +++ b/apps/api/src/migration/analysis/hfe-detector.ts @@ -119,8 +119,8 @@ export async function detectHfe( if (hfePositiveKeys > 0) { result.hfeDetected = true; - result.hfeKeyCount = validKeys.length > 0 - ? Math.round((hfePositiveKeys / validKeys.length) * totalEstimatedHashKeys) + result.hfeKeyCount = candidates.length > 0 + ? Math.round((hfePositiveKeys / candidates.length) * totalEstimatedHashKeys) : 0; } } catch { diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 9204fc27..3c328606 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -106,11 +106,12 @@ async function migrateString(source: Valkey, target: Valkey, key: string): Promi if (pttl > 0) { // Atomic SET with PX — no window where key exists without TTL await target.set(key, value, 'PX', pttl); - } else if (pttl === -2) { - // Key expired between GET and PTTL — remove any ghost copy + } else if (pttl === -2 || pttl === 0) { + // pttl -2: expired between GET and PTTL; pttl 0: sub-ms remaining — treat as expired await target.del(key); return false; } else { + // pttl -1: no expiry (persistent key) await target.set(key, value); } return true; @@ -306,7 +307,7 @@ redis.call('RENAME', KEYS[1], KEYS[2]) local pttl = tonumber(ARGV[1]) if pttl > 0 then redis.call('PEXPIRE', KEYS[2], pttl) -elseif pttl == -2 then +elseif pttl == -2 or pttl == 0 then redis.call('DEL', KEYS[2]) end return 1 @@ -325,12 +326,17 @@ async function atomicRenameWithTtl( ): Promise { try { await target.call('EVAL', RENAME_WITH_TTL_LUA, '2', tmp, key, String(pttl)); - } catch { - // Fallback: separate commands (e.g. EVAL blocked by ACL) + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + // Only fall back for NOSCRIPT / unknown-command / ACL-denied errors. + // Transient errors (OOM, timeouts) should propagate. + if (!/NOSCRIPT|unknown command|DENIED|NOPERM/i.test(msg)) { + throw err; + } await target.rename(tmp, key); if (pttl > 0) { await target.pexpire(key, pttl); - } else if (pttl === -2) { + } else if (pttl === -2 || pttl === 0) { await target.del(key); } } diff --git a/apps/api/src/migration/migration-execution.service.ts b/apps/api/src/migration/migration-execution.service.ts index ec7bba37..fec4fef9 100644 --- a/apps/api/src/migration/migration-execution.service.ts +++ b/apps/api/src/migration/migration-execution.service.ts @@ -302,7 +302,7 @@ function sanitizeLogLine(line: string): string { ); // 2. Unquoted sensitive fields (skip already-redacted quoted ones) sanitized = sanitized.replace( - new RegExp(`(${SENSITIVE_KEYS.source})\\s*[=:]\\s*(?!["*])\\S.*`, 'gi'), + new RegExp(`(${SENSITIVE_KEYS.source})\\s*[=:]\\s*(?!["*])\\S+`, 'gi'), (match) => { const eqIdx = match.search(/[=:]/); return match.slice(0, eqIdx + 1) + ' ***'; diff --git a/apps/api/src/migration/validation/sample-validator.ts b/apps/api/src/migration/validation/sample-validator.ts index 7ce86a7d..1feb241e 100644 --- a/apps/api/src/migration/validation/sample-validator.ts +++ b/apps/api/src/migration/validation/sample-validator.ts @@ -243,9 +243,8 @@ async function compareHash(source: Valkey, target: Valkey, key: string): Promise sourceEntries.sort((a, b) => a.field.compare(b.field)); targetEntries.sort((a, b) => a.field.compare(b.field)); - // Compare first 10 sorted fields (fully binary-safe) - const checkCount = Math.min(10, sourceEntries.length); - for (let i = 0; i < checkCount; i++) { + // Compare all sorted fields (fully binary-safe) + for (let i = 0; i < sourceEntries.length; i++) { if (!sourceEntries[i].field.equals(targetEntries[i].field)) { return `field names differ at index ${i}`; } From f9dccdb6cd5556dd656c13b3bd3d7481a8af0d8f Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Wed, 1 Apr 2026 10:51:39 +0300 Subject: [PATCH 33/34] Fix cluster fail-master filtering, CROSSSLOT temp keys, stuck-job handling, atomic TTL, and list drift check - tempKey() returns null for keys with empty/invalid hash tags (e.g. `user:{}:1`) to avoid CROSSSLOT errors in cluster mode; all type handlers fall back to direct writes with applyTtl when no safe temp key is possible - Validation evictOldJobs() now throws ServiceUnavailableException instead of silently logging when all job slots are occupied - Cross-user analysisId access blocked by verifying source/target connection IDs match the analysis job Co-Authored-By: Claude --- .../src/migration/execution/type-handlers.ts | 111 +++++++++++++----- .../migration/migration-validation.service.ts | 13 +- 2 files changed, 91 insertions(+), 33 deletions(-) diff --git a/apps/api/src/migration/execution/type-handlers.ts b/apps/api/src/migration/execution/type-handlers.ts index 3c328606..1e059048 100644 --- a/apps/api/src/migration/execution/type-handlers.ts +++ b/apps/api/src/migration/execution/type-handlers.ts @@ -10,22 +10,28 @@ const STREAM_CHUNK = 1000; /** * Generate a unique temporary key that hashes to the same slot as the original key. * In cluster mode, RENAME requires both keys to be in the same slot. - * We preserve the original key's hash tag if present, or wrap the key itself - * as the hash tag so Redis routes both to the same slot. + * + * Returns null for keys that contain braces but have no valid hash tag (e.g. + * `user:{}:1`). Valkey hashes the full key name for these, and we can't + * construct a temp key in the same slot without embedding `}` which would + * create a different hash tag. Callers must write directly to the final key + * when null is returned. */ -function tempKey(key: string): string { +function tempKey(key: string): string | null { const suffix = randomBytes(8).toString('hex'); - // Extract existing hash tag: first {…} pair where content is non-empty const openBrace = key.indexOf('{'); if (openBrace !== -1) { const closeBrace = key.indexOf('}', openBrace + 1); if (closeBrace > openBrace + 1) { - // Key already has a hash tag — reuse it verbatim + // Key has a valid hash tag — reuse it so temp key lands in the same slot const tag = key.substring(openBrace, closeBrace + 1); return `__betterdb_mig_${suffix}:${tag}`; } + // Braces present but no valid tag (empty `{}` or unclosed `{`). + // Cannot safely construct a same-slot temp key. + return null; } - // No hash tag — wrap the whole key as the tag + // No braces — wrap the whole key as the tag return `__betterdb_mig_${suffix}:{${key}}`; } @@ -123,26 +129,33 @@ async function migrateHash(source: Valkey, target: Valkey, key: string): Promise const len = await source.hlen(key); if (len === 0) return false; - // Write to a temp key then atomically RENAME to avoid data loss on crash const tmp = tempKey(key); + const writeKey = tmp ?? key; + try { + // DEL the target when writing directly (no temp key) + if (!tmp) await target.del(key); + // Use HSCAN for all sizes so binary field names are preserved as Buffers - // (hgetallBuffer returns Record which coerces field names to UTF-8) let cursor = '0'; do { const [next, fields] = await source.hscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); cursor = String(next); if (fields.length === 0) continue; - const args: (string | Buffer | number)[] = [tmp]; + const args: (string | Buffer | number)[] = [writeKey]; for (let i = 0; i < fields.length; i += 2) { args.push(fields[i], fields[i + 1]); } await target.call('HSET', ...args); } while (cursor !== '0'); const pttl = await source.pttl(key); - await atomicRenameWithTtl(target, tmp, key, pttl); + if (tmp) { + await atomicRenameWithTtl(target, tmp, key, pttl); + } else { + await applyTtl(target, key, pttl); + } } catch (err) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } throw err; } return true; @@ -154,19 +167,26 @@ async function migrateList(source: Valkey, target: Valkey, key: string): Promise const len = await source.llen(key); if (len === 0) return false; - // Write to a temp key then atomically RENAME to avoid data loss on crash const tmp = tempKey(key); + const writeKey = tmp ?? key; + try { + if (!tmp) await target.del(key); + for (let start = 0; start < len; start += LIST_CHUNK) { const end = Math.min(start + LIST_CHUNK - 1, len - 1); const items = await source.lrangeBuffer(key, start, end); if (items.length === 0) break; - await target.call('RPUSH', tmp, ...items); + await target.call('RPUSH', writeKey, ...items); } const pttl = await source.pttl(key); - await atomicRenameWithTtl(target, tmp, key, pttl); + if (tmp) { + await atomicRenameWithTtl(target, tmp, key, pttl); + } else { + await applyTtl(target, key, pttl); + } } catch (err) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } throw err; } return true; @@ -178,29 +198,36 @@ async function migrateSet(source: Valkey, target: Valkey, key: string): Promise< const card = await source.scard(key); if (card === 0) return false; - // Write to a temp key then atomically RENAME to avoid data loss on crash const tmp = tempKey(key); + const writeKey = tmp ?? key; + try { + if (!tmp) await target.del(key); + if (card <= LARGE_KEY_THRESHOLD) { const members = await source.smembersBuffer(key); if (members.length === 0) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } return false; // key expired between SCARD and SMEMBERS } - await target.call('SADD', tmp, ...members); + await target.call('SADD', writeKey, ...members); } else { let cursor = '0'; do { const [next, members] = await source.sscanBuffer(key, cursor, 'COUNT', SCAN_BATCH); cursor = String(next); if (members.length === 0) continue; - await target.call('SADD', tmp, ...members); + await target.call('SADD', writeKey, ...members); } while (cursor !== '0'); } const pttl = await source.pttl(key); - await atomicRenameWithTtl(target, tmp, key, pttl); + if (tmp) { + await atomicRenameWithTtl(target, tmp, key, pttl); + } else { + await applyTtl(target, key, pttl); + } } catch (err) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } throw err; } return true; @@ -212,21 +239,24 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise const card = await source.zcard(key); if (card === 0) return false; - // Write to a temp key then atomically RENAME to avoid data loss on crash const tmp = tempKey(key); + const writeKey = tmp ?? key; + try { + if (!tmp) await target.del(key); + if (card <= LARGE_KEY_THRESHOLD) { // Use callBuffer to preserve binary member data (call() decodes as UTF-8) const raw = await source.callBuffer('ZRANGE', key, '0', '-1', 'WITHSCORES') as Buffer[]; if (!raw || raw.length === 0) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } return false; // key expired between ZCARD and ZRANGE } // raw is [member, score, member, score, ...] as Buffers const pipeline = target.pipeline(); for (let i = 0; i < raw.length; i += 2) { // Score is always ASCII-safe, member stays as Buffer - pipeline.zadd(tmp, raw[i + 1].toString(), raw[i]); + pipeline.zadd(writeKey, raw[i + 1].toString(), raw[i]); } await pipeline.exec(); } else { @@ -240,15 +270,19 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise // entries is [member, score, member, score, ...] as Buffers const pipeline = target.pipeline(); for (let i = 0; i < entries.length; i += 2) { - pipeline.zadd(tmp, entries[i + 1].toString(), entries[i]); + pipeline.zadd(writeKey, entries[i + 1].toString(), entries[i]); } await pipeline.exec(); } while (cursor !== '0'); } const pttl = await source.pttl(key); - await atomicRenameWithTtl(target, tmp, key, pttl); + if (tmp) { + await atomicRenameWithTtl(target, tmp, key, pttl); + } else { + await applyTtl(target, key, pttl); + } } catch (err) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } throw err; } return true; @@ -257,11 +291,13 @@ async function migrateZset(source: Valkey, target: Valkey, key: string): Promise // ── Stream ── async function migrateStream(source: Valkey, target: Valkey, key: string): Promise { - // Write to a temp key then atomically RENAME to avoid data loss on crash const tmp = tempKey(key); + const writeKey = tmp ?? key; let wrote = false; try { + if (!tmp) await target.del(key); + let lastId = '-'; let hasMore = true; @@ -279,7 +315,7 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi // entry[0] = stream ID (always ASCII), entry[1] = [field, value, field, value, ...] const id = entry[0].toString(); const fields = entry[1] as unknown as Buffer[]; - await target.callBuffer('XADD', tmp, id, ...fields); + await target.callBuffer('XADD', writeKey, id, ...fields); lastId = id; wrote = true; } @@ -289,10 +325,14 @@ async function migrateStream(source: Valkey, target: Valkey, key: string): Promi } if (wrote) { const pttl = await source.pttl(key); - await atomicRenameWithTtl(target, tmp, key, pttl); + if (tmp) { + await atomicRenameWithTtl(target, tmp, key, pttl); + } else { + await applyTtl(target, key, pttl); + } } } catch (err) { - try { await target.del(tmp); } catch { /* best-effort cleanup */ } + if (tmp) { try { await target.del(tmp); } catch { /* best-effort cleanup */ } } throw err; } return wrote; @@ -314,6 +354,15 @@ return 1 `; +/** Apply TTL directly to a key (used when temp-key RENAME is not possible). */ +async function applyTtl(target: Valkey, key: string, pttl: number): Promise { + if (pttl > 0) { + await target.pexpire(key, pttl); + } else if (pttl === -2 || pttl === 0) { + await target.del(key); + } +} + /** * Atomically RENAME tmp→key and apply PTTL in a single Lua eval. * Falls back to separate RENAME + PEXPIRE if EVAL is blocked (e.g. by ACL). diff --git a/apps/api/src/migration/migration-validation.service.ts b/apps/api/src/migration/migration-validation.service.ts index 14c9db4b..e3bbca60 100644 --- a/apps/api/src/migration/migration-validation.service.ts +++ b/apps/api/src/migration/migration-validation.service.ts @@ -1,4 +1,4 @@ -import { Injectable, Inject, Logger, NotFoundException, BadRequestException } from '@nestjs/common'; +import { Injectable, Inject, Logger, NotFoundException, BadRequestException, ServiceUnavailableException } from '@nestjs/common'; import { randomUUID } from 'crypto'; import Valkey from 'iovalkey'; import type { @@ -50,6 +50,13 @@ export class MigrationValidationService { if (req.analysisId) { const job = this.migrationService.getJob(req.analysisId); if (job && job.status === 'completed') { + // Verify the analysis belongs to the same source/target pair + if ( + (job.sourceConnectionId && job.sourceConnectionId !== req.sourceConnectionId) || + (job.targetConnectionId && job.targetConnectionId !== req.targetConnectionId) + ) { + throw new BadRequestException('Analysis does not match the provided source/target connections'); + } analysisResult = job; } } @@ -253,7 +260,9 @@ export class MigrationValidationService { } if (this.jobs.size >= this.MAX_JOBS) { - this.logger.warn(`Validation job limit reached (${this.MAX_JOBS}). Cannot evict running jobs.`); + throw new ServiceUnavailableException( + `Validation job limit reached (${this.MAX_JOBS}). All slots occupied by running jobs — try again later.`, + ); } } } From f2bcab9de584993ff107c5650ac1876fc8a29e1c Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Wed, 1 Apr 2026 12:34:03 +0300 Subject: [PATCH 34/34] Fix migration e2e tests: use DB_PORT instead of hardcoded 6380 The test docker-compose.test.yml maps Valkey to port 6390, but the migration e2e tests had port 6380 hardcoded, causing "Connection is closed" failures in CI. Co-Authored-By: Claude --- apps/api/test/api-migration.e2e-spec.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/apps/api/test/api-migration.e2e-spec.ts b/apps/api/test/api-migration.e2e-spec.ts index 9092e730..1234474a 100644 --- a/apps/api/test/api-migration.e2e-spec.ts +++ b/apps/api/test/api-migration.e2e-spec.ts @@ -5,7 +5,7 @@ import { createTestApp } from './test-utils'; /** * E2E tests for the /migration API endpoints. - * Requires Valkey on 6380 (Docker) available. + * Requires Valkey available on DB_PORT (default 6390 from docker-compose.test.yml). */ describe('Migration API (e2e)', () => { let app: NestFastifyApplication; @@ -13,12 +13,13 @@ describe('Migration API (e2e)', () => { let targetConnectionId: string; let createdConnectionIds: string[] = []; + const dbPort = Number(process.env.DB_PORT) || 6390; + const dbPassword = process.env.DB_PASSWORD || 'devpassword'; + beforeAll(async () => { app = await createTestApp(); - // Seed a handful of test keys on port 6380 - const dbPassword = process.env.DB_PASSWORD || 'devpassword'; - const seedClient = new Valkey({ host: 'localhost', port: 6380, password: dbPassword, lazyConnect: true }); + const seedClient = new Valkey({ host: 'localhost', port: dbPort, password: dbPassword, lazyConnect: true }); try { await seedClient.connect(); await seedClient.set('migration:test:string', 'hello'); @@ -30,10 +31,10 @@ describe('Migration API (e2e)', () => { await seedClient.quit(); } - // Create two connections both pointing to 6380 + // Create two connections both pointing to the test Valkey instance const res1 = await request(app.getHttpServer()) .post('/connections') - .send({ name: 'Migration Source', host: 'localhost', port: 6380, password: dbPassword }); + .send({ name: 'Migration Source', host: 'localhost', port: dbPort, password: dbPassword }); if (res1.status === 200 || res1.status === 201) { sourceConnectionId = res1.body.id; createdConnectionIds.push(sourceConnectionId); @@ -41,7 +42,7 @@ describe('Migration API (e2e)', () => { const res2 = await request(app.getHttpServer()) .post('/connections') - .send({ name: 'Migration Target', host: 'localhost', port: 6380, password: dbPassword }); + .send({ name: 'Migration Target', host: 'localhost', port: dbPort, password: dbPassword }); if (res2.status === 200 || res2.status === 201) { targetConnectionId = res2.body.id; createdConnectionIds.push(targetConnectionId); @@ -50,7 +51,7 @@ describe('Migration API (e2e)', () => { afterAll(async () => { // Clean up test keys - const cleanupClient = new Valkey({ host: 'localhost', port: 6380, password: process.env.DB_PASSWORD || 'devpassword', lazyConnect: true }); + const cleanupClient = new Valkey({ host: 'localhost', port: dbPort, password: dbPassword, lazyConnect: true }); try { await cleanupClient.connect(); await cleanupClient.del(