From 4a684aa03b3da066f2f6ca0ab34aac5bcf8a85c3 Mon Sep 17 00:00:00 2001 From: Oleh Okilko Date: Sat, 28 Feb 2026 04:51:24 +0000 Subject: [PATCH 1/2] feat(pipeline): add standalone clinical note verification library - Adds standalone verification library in packages/pipeline/verification/ - Heuristic-based clinical note validation - 13 unit and integration tests - No modifications to existing code - No pipeline integration This is a minimal, isolated contribution intended as a foundation for possible future integration. --- PR_DESCRIPTION.md | 32 ++++++++ packages/pipeline/verification/README.md | 36 +++++++++ .../src/__tests__/note-verifier.test.ts | 46 ++++++++++++ .../src/__tests__/verifier.test.ts | 47 ++++++++++++ packages/pipeline/verification/src/index.ts | 3 + .../verification/src/note-verifier.ts | 73 +++++++++++++++++++ packages/pipeline/verification/src/types.ts | 39 ++++++++++ .../pipeline/verification/src/verifier.ts | 65 +++++++++++++++++ 8 files changed, 341 insertions(+) create mode 100644 PR_DESCRIPTION.md create mode 100644 packages/pipeline/verification/README.md create mode 100644 packages/pipeline/verification/src/__tests__/note-verifier.test.ts create mode 100644 packages/pipeline/verification/src/__tests__/verifier.test.ts create mode 100644 packages/pipeline/verification/src/index.ts create mode 100644 packages/pipeline/verification/src/note-verifier.ts create mode 100644 packages/pipeline/verification/src/types.ts create mode 100644 packages/pipeline/verification/src/verifier.ts diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md new file mode 100644 index 0000000..d5af99e --- /dev/null +++ b/PR_DESCRIPTION.md @@ -0,0 +1,32 @@ +# feat(pipeline): add verification module + +Adds a standalone verification library at `packages/pipeline/verification/`. + +Basically it checks clinical notes against the source transcript using token matching - sees if the claims in the note are actually supported by what was said. + +## whats in here + +- `types.ts` - types for claims, verdicts, etc +- `verifier.ts` - core matching logic (tokenize, overlap calc) +- `note-verifier.ts` - main `verifyNote()` function +- tests for both + +## whats NOT touched + +Nothing. This is new code only, no changes to existing files. + +- no tsconfig changes +- no storage type changes +- no pipeline wiring + +## safe to merge + +Its completely isolated. Just a library sitting in its own folder. + +## testing + +```bash +npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts +``` + +13 tests, all pass. diff --git a/packages/pipeline/verification/README.md b/packages/pipeline/verification/README.md new file mode 100644 index 0000000..90a7bfd --- /dev/null +++ b/packages/pipeline/verification/README.md @@ -0,0 +1,36 @@ +# verification + +Validates clinical notes against source transcripts using token matching. + +Not wired into the pipeline yet - just a standalone lib. + +## quick example + +```typescript +import { verifyNote } from './src/note-verifier' + +const result = await verifyNote( + 'Patient has headache for 3 days.', + 'Patient reported headache lasting 3 days.' +) + +console.log(result.status) // 'verified' | 'partial' | 'failed' +``` + +## how it works + +1. Split note into sentences (claims) +2. Classify each (fact, inference, opinion, etc) +3. Match against transcript chunks +4. Score based on token overlap + number coverage + +## exports + +- `verifyNote(note, transcript, opts?)` - main api +- `tokenize`, `extractNumbers`, `calculateOverlap`, `classifyClaim` - utils + +## run tests + +```bash +npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts +``` diff --git a/packages/pipeline/verification/src/__tests__/note-verifier.test.ts b/packages/pipeline/verification/src/__tests__/note-verifier.test.ts new file mode 100644 index 0000000..213fa30 --- /dev/null +++ b/packages/pipeline/verification/src/__tests__/note-verifier.test.ts @@ -0,0 +1,46 @@ +import { describe, it } from 'node:test' +import assert from 'node:assert' +import { verifyNote } from '../note-verifier' + +const sampleTranscript = ` +Doctor: Good morning, what brings you in today? +Patient: I've been having this really bad headache for the past 3 days. +Doctor: Pain severity? +Patient: About 7 or 8 out of 10. +Doctor: Blood pressure is 128/82, temperature 98.4. +` + +const goodNote = `Patient presents with headache for 3 days. Pain severity 7-8/10. Vitals: BP 128/82.` +const badNote = `Patient presents with chest pain for 5 days. BP 180/110.` + +describe('verifyNote', () => { + it('verifies matching note', async () => { + const result = await verifyNote(goodNote, sampleTranscript) + assert.ok(['verified', 'partial'].includes(result.status)) + assert.ok(result.summary.overallConfidence > 0.3) + assert.ok(result.claims.length > 0) + }) + + it('flags mismatch', async () => { + const result = await verifyNote(badNote, sampleTranscript) + assert.ok(result.summary.overallConfidence < 0.3) + }) + + it('handles empty note', async () => { + const result = await verifyNote('', sampleTranscript) + assert.strictEqual(result.claims.length, 0) + assert.strictEqual(result.status, 'verified') + }) + + it('handles empty transcript', async () => { + const result = await verifyNote(goodNote, '') + assert.ok(result.summary.overallConfidence < 0.5) + }) + + it('respects factsOnly', async () => { + const result = await verifyNote(goodNote, sampleTranscript, { factsOnly: true }) + for (const claim of result.claims) { + assert.strictEqual(claim.kind, 'fact') + } + }) +}) diff --git a/packages/pipeline/verification/src/__tests__/verifier.test.ts b/packages/pipeline/verification/src/__tests__/verifier.test.ts new file mode 100644 index 0000000..b16d2e8 --- /dev/null +++ b/packages/pipeline/verification/src/__tests__/verifier.test.ts @@ -0,0 +1,47 @@ +import { describe, it } from 'node:test' +import assert from 'node:assert' +import { tokenize, extractNumbers, calculateOverlap, classifyClaim } from '../verifier' + +describe('tokenize', () => { + it('extracts tokens, filters stopwords', () => { + const tokens = tokenize('Patient reports headache for 3 days') + assert.ok(tokens.includes('headache')) + assert.ok(!tokens.includes('for')) + }) + + it('handles empty', () => { + assert.deepStrictEqual(tokenize(''), []) + }) +}) + +describe('extractNumbers', () => { + it('extracts numbers and decimals', () => { + const numbers = extractNumbers('BP 120/80, temp 98.6') + assert.ok(numbers.includes('120')) + assert.ok(numbers.includes('98.6')) + }) +}) + +describe('calculateOverlap', () => { + it('returns 1.0 for same text', () => { + assert.strictEqual(calculateOverlap('severe headache', 'severe headache'), 1.0) + }) + + it('returns 0 for no match', () => { + assert.strictEqual(calculateOverlap('headache pain', 'cardiac issues'), 0) + }) +}) + +describe('classifyClaim', () => { + it('identifies facts', () => { + assert.strictEqual(classifyClaim('Patient has hypertension.'), 'fact') + }) + + it('identifies questions', () => { + assert.strictEqual(classifyClaim('Does the patient smoke?'), 'question') + }) + + it('identifies inferences', () => { + assert.strictEqual(classifyClaim('I think this might be migraine.'), 'inference') + }) +}) diff --git a/packages/pipeline/verification/src/index.ts b/packages/pipeline/verification/src/index.ts new file mode 100644 index 0000000..e3e878f --- /dev/null +++ b/packages/pipeline/verification/src/index.ts @@ -0,0 +1,3 @@ +export type { Claim, ClaimKind, Evidence, Verdict, VerificationResult, VerificationSummary, VerificationOptions } from './types' +export { verifyNote } from './note-verifier' +export { tokenize, extractNumbers, calculateOverlap, classifyClaim } from './verifier' diff --git a/packages/pipeline/verification/src/note-verifier.ts b/packages/pipeline/verification/src/note-verifier.ts new file mode 100644 index 0000000..bd3ddfd --- /dev/null +++ b/packages/pipeline/verification/src/note-verifier.ts @@ -0,0 +1,73 @@ +import type { Claim, Evidence, VerificationResult, VerificationSummary, VerificationOptions } from './types' +import { looksSupported, classifyClaim, determineVerdict } from './verifier' + +function extractClaims(text: string): string[] { + return text.replace(/\n+/g, ' ').split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 10) +} + +function chunkTranscript(transcript: string): { text: string; ref: string }[] { + return transcript.split('\n').filter(l => l.trim()).map((text, i) => ({ text: text.trim(), ref: `line:${i + 1}` })) +} + +function findEvidence(claim: string, chunks: { text: string; ref: string }[], opts: VerificationOptions): { evidence: Evidence[]; bestScore: number } { + const evidence: Evidence[] = [] + let bestScore = 0 + for (const chunk of chunks) { + const [, score] = looksSupported(claim, chunk.text, opts.minTokenOverlap, opts.minNumberCoverage) + if (score > 0.1) { + evidence.push({ ref: chunk.ref, text: chunk.text, score }) + if (score > bestScore) bestScore = score + } + } + return { evidence: evidence.sort((a, b) => b.score - a.score).slice(0, 3), bestScore } +} + +function calculateSummary(claims: Claim[]): VerificationSummary { + const facts = claims.filter(c => c.kind === 'fact') + const supported = facts.filter(c => c.verdict === 'supported').length + const unsupported = facts.filter(c => c.verdict === 'unsupported').length + const totalConf = facts.reduce((sum, c) => sum + c.confidence, 0) + return { + totalClaims: claims.length, + supportedClaims: supported, + unsupportedClaims: unsupported, + overallConfidence: facts.length > 0 ? Math.round((totalConf / facts.length) * 100) / 100 : 1.0 + } +} + +export async function verifyNote(noteText: string, transcript: string, options: VerificationOptions = {}): Promise { + const startTime = performance.now() + const { minTokenOverlap = 0.25, minNumberCoverage = 1.0, factsOnly = false } = options + + const claimTexts = extractClaims(noteText) + const chunks = chunkTranscript(transcript) + const claims: Claim[] = [] + + for (let i = 0; i < claimTexts.length; i++) { + const text = claimTexts[i] + const kind = classifyClaim(text) + if (factsOnly && kind !== 'fact') continue + + const { evidence, bestScore } = findEvidence(text, chunks, { minTokenOverlap, minNumberCoverage }) + claims.push({ + id: `claim_${i + 1}`, + text, + kind, + verdict: determineVerdict(bestScore, kind), + confidence: Math.round(bestScore * 100) / 100, + evidence + }) + } + + const summary = calculateSummary(claims) + const factTotal = summary.supportedClaims + summary.unsupportedClaims + let status: 'verified' | 'partial' | 'failed' = 'verified' + if (factTotal > 0) { + const supportRate = summary.supportedClaims / factTotal + const unsupportRate = summary.unsupportedClaims / factTotal + if (unsupportRate > 0.3) status = 'failed' + else if (supportRate < 0.8 || summary.unsupportedClaims > 0) status = 'partial' + } + + return { status, summary, claims, processingTimeMs: Math.round(performance.now() - startTime) } +} diff --git a/packages/pipeline/verification/src/types.ts b/packages/pipeline/verification/src/types.ts new file mode 100644 index 0000000..4beddd6 --- /dev/null +++ b/packages/pipeline/verification/src/types.ts @@ -0,0 +1,39 @@ +// types for note verification + +export type ClaimKind = 'fact' | 'inference' | 'opinion' | 'instruction' | 'question' +export type Verdict = 'supported' | 'uncertain' | 'unsupported' + +export interface Claim { + id: string + text: string + kind: ClaimKind + verdict: Verdict + confidence: number + evidence: Evidence[] +} + +export interface Evidence { + ref: string + text: string + score: number +} + +export interface VerificationResult { + status: 'verified' | 'partial' | 'failed' + summary: VerificationSummary + claims: Claim[] + processingTimeMs: number +} + +export interface VerificationSummary { + totalClaims: number + supportedClaims: number + unsupportedClaims: number + overallConfidence: number +} + +export interface VerificationOptions { + minTokenOverlap?: number + minNumberCoverage?: number + factsOnly?: boolean +} diff --git a/packages/pipeline/verification/src/verifier.ts b/packages/pipeline/verification/src/verifier.ts new file mode 100644 index 0000000..5b9c1a4 --- /dev/null +++ b/packages/pipeline/verification/src/verifier.ts @@ -0,0 +1,65 @@ +import type { ClaimKind, Verdict } from './types' + +const STOP_WORDS = new Set([ + 'a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'of', 'to', 'in', 'on', 'for', 'with', 'by', 'as', + 'is', 'are', 'was', 'were', 'be', 'been', 'it', 'this', 'that', 'at', 'from', 'not', 'can', 'do', 'does', + 'we', 'you', 'they', 'i', 'he', 'she', 'has', 'have', 'had', 'will', 'patient', 'reports', 'denies' +]) + +export function tokenize(text: string): string[] { + const normalized = (text || '').toLowerCase().replace(/[^\w\-]+/g, ' ').trim() + if (!normalized) return [] + return normalized.split(/\s+/).filter(t => t.length >= 2 && !STOP_WORDS.has(t)) +} + +export function extractNumbers(text: string): string[] { + return (text || '').match(/(? n.replace(',', '.')) + if (claimNums.length === 0) return 1.0 + + const evidenceNums = new Set(extractNumbers(evidence).map(n => n.replace(',', '.'))) + if (evidenceNums.size === 0) return 0 + + let hits = 0 + for (const n of claimNums) if (evidenceNums.has(n)) hits++ + return hits / claimNums.length +} + +export function looksSupported(claim: string, evidence: string, minOverlap = 0.25, minNumCov = 1.0): [boolean, number] { + const overlap = calculateOverlap(claim, evidence) + const numCov = numberCoverage(claim, evidence) + const score = overlap * 0.7 + numCov * 0.3 + return [overlap >= minOverlap && numCov >= minNumCov, score] +} + +export function classifyClaim(text: string): ClaimKind { + const lower = text.toLowerCase().trim() + if (lower.endsWith('?')) return 'question' + if (['i think', 'i believe', 'probably', 'likely'].some(p => lower.includes(p))) return 'inference' + if (['in my opinion', 'i feel'].some(p => lower.includes(p))) return 'opinion' + if (['do ', 'please ', 'recommend ', 'consider '].some(p => lower.startsWith(p))) return 'instruction' + return 'fact' +} + +export function determineVerdict(score: number, kind: ClaimKind): Verdict { + if (kind !== 'fact') return 'uncertain' + if (score >= 0.5) return 'supported' + if (score >= 0.25) return 'uncertain' + return 'unsupported' +} From 66ee8ae151cf2ae011b9d8978f760a4adbcb41a1 Mon Sep 17 00:00:00 2001 From: Sam Margolis Date: Mon, 30 Mar 2026 09:03:58 -0700 Subject: [PATCH 2/2] refactor(pipeline): relocate verification module into note-core --- PR_DESCRIPTION.md | 32 -------- packages/pipeline/verification/README.md | 36 --------- .../src/__tests__/note-verifier.test.ts | 46 ------------ .../src/__tests__/verifier.test.ts | 47 ------------ packages/pipeline/verification/src/index.ts | 3 - .../verification/src/note-verifier.ts | 73 ------------------- packages/pipeline/verification/src/types.ts | 39 ---------- .../pipeline/verification/src/verifier.ts | 65 ----------------- 8 files changed, 341 deletions(-) delete mode 100644 PR_DESCRIPTION.md delete mode 100644 packages/pipeline/verification/README.md delete mode 100644 packages/pipeline/verification/src/__tests__/note-verifier.test.ts delete mode 100644 packages/pipeline/verification/src/__tests__/verifier.test.ts delete mode 100644 packages/pipeline/verification/src/index.ts delete mode 100644 packages/pipeline/verification/src/note-verifier.ts delete mode 100644 packages/pipeline/verification/src/types.ts delete mode 100644 packages/pipeline/verification/src/verifier.ts diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md deleted file mode 100644 index d5af99e..0000000 --- a/PR_DESCRIPTION.md +++ /dev/null @@ -1,32 +0,0 @@ -# feat(pipeline): add verification module - -Adds a standalone verification library at `packages/pipeline/verification/`. - -Basically it checks clinical notes against the source transcript using token matching - sees if the claims in the note are actually supported by what was said. - -## whats in here - -- `types.ts` - types for claims, verdicts, etc -- `verifier.ts` - core matching logic (tokenize, overlap calc) -- `note-verifier.ts` - main `verifyNote()` function -- tests for both - -## whats NOT touched - -Nothing. This is new code only, no changes to existing files. - -- no tsconfig changes -- no storage type changes -- no pipeline wiring - -## safe to merge - -Its completely isolated. Just a library sitting in its own folder. - -## testing - -```bash -npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts -``` - -13 tests, all pass. diff --git a/packages/pipeline/verification/README.md b/packages/pipeline/verification/README.md deleted file mode 100644 index 90a7bfd..0000000 --- a/packages/pipeline/verification/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# verification - -Validates clinical notes against source transcripts using token matching. - -Not wired into the pipeline yet - just a standalone lib. - -## quick example - -```typescript -import { verifyNote } from './src/note-verifier' - -const result = await verifyNote( - 'Patient has headache for 3 days.', - 'Patient reported headache lasting 3 days.' -) - -console.log(result.status) // 'verified' | 'partial' | 'failed' -``` - -## how it works - -1. Split note into sentences (claims) -2. Classify each (fact, inference, opinion, etc) -3. Match against transcript chunks -4. Score based on token overlap + number coverage - -## exports - -- `verifyNote(note, transcript, opts?)` - main api -- `tokenize`, `extractNumbers`, `calculateOverlap`, `classifyClaim` - utils - -## run tests - -```bash -npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts -``` diff --git a/packages/pipeline/verification/src/__tests__/note-verifier.test.ts b/packages/pipeline/verification/src/__tests__/note-verifier.test.ts deleted file mode 100644 index 213fa30..0000000 --- a/packages/pipeline/verification/src/__tests__/note-verifier.test.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { describe, it } from 'node:test' -import assert from 'node:assert' -import { verifyNote } from '../note-verifier' - -const sampleTranscript = ` -Doctor: Good morning, what brings you in today? -Patient: I've been having this really bad headache for the past 3 days. -Doctor: Pain severity? -Patient: About 7 or 8 out of 10. -Doctor: Blood pressure is 128/82, temperature 98.4. -` - -const goodNote = `Patient presents with headache for 3 days. Pain severity 7-8/10. Vitals: BP 128/82.` -const badNote = `Patient presents with chest pain for 5 days. BP 180/110.` - -describe('verifyNote', () => { - it('verifies matching note', async () => { - const result = await verifyNote(goodNote, sampleTranscript) - assert.ok(['verified', 'partial'].includes(result.status)) - assert.ok(result.summary.overallConfidence > 0.3) - assert.ok(result.claims.length > 0) - }) - - it('flags mismatch', async () => { - const result = await verifyNote(badNote, sampleTranscript) - assert.ok(result.summary.overallConfidence < 0.3) - }) - - it('handles empty note', async () => { - const result = await verifyNote('', sampleTranscript) - assert.strictEqual(result.claims.length, 0) - assert.strictEqual(result.status, 'verified') - }) - - it('handles empty transcript', async () => { - const result = await verifyNote(goodNote, '') - assert.ok(result.summary.overallConfidence < 0.5) - }) - - it('respects factsOnly', async () => { - const result = await verifyNote(goodNote, sampleTranscript, { factsOnly: true }) - for (const claim of result.claims) { - assert.strictEqual(claim.kind, 'fact') - } - }) -}) diff --git a/packages/pipeline/verification/src/__tests__/verifier.test.ts b/packages/pipeline/verification/src/__tests__/verifier.test.ts deleted file mode 100644 index b16d2e8..0000000 --- a/packages/pipeline/verification/src/__tests__/verifier.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { describe, it } from 'node:test' -import assert from 'node:assert' -import { tokenize, extractNumbers, calculateOverlap, classifyClaim } from '../verifier' - -describe('tokenize', () => { - it('extracts tokens, filters stopwords', () => { - const tokens = tokenize('Patient reports headache for 3 days') - assert.ok(tokens.includes('headache')) - assert.ok(!tokens.includes('for')) - }) - - it('handles empty', () => { - assert.deepStrictEqual(tokenize(''), []) - }) -}) - -describe('extractNumbers', () => { - it('extracts numbers and decimals', () => { - const numbers = extractNumbers('BP 120/80, temp 98.6') - assert.ok(numbers.includes('120')) - assert.ok(numbers.includes('98.6')) - }) -}) - -describe('calculateOverlap', () => { - it('returns 1.0 for same text', () => { - assert.strictEqual(calculateOverlap('severe headache', 'severe headache'), 1.0) - }) - - it('returns 0 for no match', () => { - assert.strictEqual(calculateOverlap('headache pain', 'cardiac issues'), 0) - }) -}) - -describe('classifyClaim', () => { - it('identifies facts', () => { - assert.strictEqual(classifyClaim('Patient has hypertension.'), 'fact') - }) - - it('identifies questions', () => { - assert.strictEqual(classifyClaim('Does the patient smoke?'), 'question') - }) - - it('identifies inferences', () => { - assert.strictEqual(classifyClaim('I think this might be migraine.'), 'inference') - }) -}) diff --git a/packages/pipeline/verification/src/index.ts b/packages/pipeline/verification/src/index.ts deleted file mode 100644 index e3e878f..0000000 --- a/packages/pipeline/verification/src/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export type { Claim, ClaimKind, Evidence, Verdict, VerificationResult, VerificationSummary, VerificationOptions } from './types' -export { verifyNote } from './note-verifier' -export { tokenize, extractNumbers, calculateOverlap, classifyClaim } from './verifier' diff --git a/packages/pipeline/verification/src/note-verifier.ts b/packages/pipeline/verification/src/note-verifier.ts deleted file mode 100644 index bd3ddfd..0000000 --- a/packages/pipeline/verification/src/note-verifier.ts +++ /dev/null @@ -1,73 +0,0 @@ -import type { Claim, Evidence, VerificationResult, VerificationSummary, VerificationOptions } from './types' -import { looksSupported, classifyClaim, determineVerdict } from './verifier' - -function extractClaims(text: string): string[] { - return text.replace(/\n+/g, ' ').split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 10) -} - -function chunkTranscript(transcript: string): { text: string; ref: string }[] { - return transcript.split('\n').filter(l => l.trim()).map((text, i) => ({ text: text.trim(), ref: `line:${i + 1}` })) -} - -function findEvidence(claim: string, chunks: { text: string; ref: string }[], opts: VerificationOptions): { evidence: Evidence[]; bestScore: number } { - const evidence: Evidence[] = [] - let bestScore = 0 - for (const chunk of chunks) { - const [, score] = looksSupported(claim, chunk.text, opts.minTokenOverlap, opts.minNumberCoverage) - if (score > 0.1) { - evidence.push({ ref: chunk.ref, text: chunk.text, score }) - if (score > bestScore) bestScore = score - } - } - return { evidence: evidence.sort((a, b) => b.score - a.score).slice(0, 3), bestScore } -} - -function calculateSummary(claims: Claim[]): VerificationSummary { - const facts = claims.filter(c => c.kind === 'fact') - const supported = facts.filter(c => c.verdict === 'supported').length - const unsupported = facts.filter(c => c.verdict === 'unsupported').length - const totalConf = facts.reduce((sum, c) => sum + c.confidence, 0) - return { - totalClaims: claims.length, - supportedClaims: supported, - unsupportedClaims: unsupported, - overallConfidence: facts.length > 0 ? Math.round((totalConf / facts.length) * 100) / 100 : 1.0 - } -} - -export async function verifyNote(noteText: string, transcript: string, options: VerificationOptions = {}): Promise { - const startTime = performance.now() - const { minTokenOverlap = 0.25, minNumberCoverage = 1.0, factsOnly = false } = options - - const claimTexts = extractClaims(noteText) - const chunks = chunkTranscript(transcript) - const claims: Claim[] = [] - - for (let i = 0; i < claimTexts.length; i++) { - const text = claimTexts[i] - const kind = classifyClaim(text) - if (factsOnly && kind !== 'fact') continue - - const { evidence, bestScore } = findEvidence(text, chunks, { minTokenOverlap, minNumberCoverage }) - claims.push({ - id: `claim_${i + 1}`, - text, - kind, - verdict: determineVerdict(bestScore, kind), - confidence: Math.round(bestScore * 100) / 100, - evidence - }) - } - - const summary = calculateSummary(claims) - const factTotal = summary.supportedClaims + summary.unsupportedClaims - let status: 'verified' | 'partial' | 'failed' = 'verified' - if (factTotal > 0) { - const supportRate = summary.supportedClaims / factTotal - const unsupportRate = summary.unsupportedClaims / factTotal - if (unsupportRate > 0.3) status = 'failed' - else if (supportRate < 0.8 || summary.unsupportedClaims > 0) status = 'partial' - } - - return { status, summary, claims, processingTimeMs: Math.round(performance.now() - startTime) } -} diff --git a/packages/pipeline/verification/src/types.ts b/packages/pipeline/verification/src/types.ts deleted file mode 100644 index 4beddd6..0000000 --- a/packages/pipeline/verification/src/types.ts +++ /dev/null @@ -1,39 +0,0 @@ -// types for note verification - -export type ClaimKind = 'fact' | 'inference' | 'opinion' | 'instruction' | 'question' -export type Verdict = 'supported' | 'uncertain' | 'unsupported' - -export interface Claim { - id: string - text: string - kind: ClaimKind - verdict: Verdict - confidence: number - evidence: Evidence[] -} - -export interface Evidence { - ref: string - text: string - score: number -} - -export interface VerificationResult { - status: 'verified' | 'partial' | 'failed' - summary: VerificationSummary - claims: Claim[] - processingTimeMs: number -} - -export interface VerificationSummary { - totalClaims: number - supportedClaims: number - unsupportedClaims: number - overallConfidence: number -} - -export interface VerificationOptions { - minTokenOverlap?: number - minNumberCoverage?: number - factsOnly?: boolean -} diff --git a/packages/pipeline/verification/src/verifier.ts b/packages/pipeline/verification/src/verifier.ts deleted file mode 100644 index 5b9c1a4..0000000 --- a/packages/pipeline/verification/src/verifier.ts +++ /dev/null @@ -1,65 +0,0 @@ -import type { ClaimKind, Verdict } from './types' - -const STOP_WORDS = new Set([ - 'a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'of', 'to', 'in', 'on', 'for', 'with', 'by', 'as', - 'is', 'are', 'was', 'were', 'be', 'been', 'it', 'this', 'that', 'at', 'from', 'not', 'can', 'do', 'does', - 'we', 'you', 'they', 'i', 'he', 'she', 'has', 'have', 'had', 'will', 'patient', 'reports', 'denies' -]) - -export function tokenize(text: string): string[] { - const normalized = (text || '').toLowerCase().replace(/[^\w\-]+/g, ' ').trim() - if (!normalized) return [] - return normalized.split(/\s+/).filter(t => t.length >= 2 && !STOP_WORDS.has(t)) -} - -export function extractNumbers(text: string): string[] { - return (text || '').match(/(? n.replace(',', '.')) - if (claimNums.length === 0) return 1.0 - - const evidenceNums = new Set(extractNumbers(evidence).map(n => n.replace(',', '.'))) - if (evidenceNums.size === 0) return 0 - - let hits = 0 - for (const n of claimNums) if (evidenceNums.has(n)) hits++ - return hits / claimNums.length -} - -export function looksSupported(claim: string, evidence: string, minOverlap = 0.25, minNumCov = 1.0): [boolean, number] { - const overlap = calculateOverlap(claim, evidence) - const numCov = numberCoverage(claim, evidence) - const score = overlap * 0.7 + numCov * 0.3 - return [overlap >= minOverlap && numCov >= minNumCov, score] -} - -export function classifyClaim(text: string): ClaimKind { - const lower = text.toLowerCase().trim() - if (lower.endsWith('?')) return 'question' - if (['i think', 'i believe', 'probably', 'likely'].some(p => lower.includes(p))) return 'inference' - if (['in my opinion', 'i feel'].some(p => lower.includes(p))) return 'opinion' - if (['do ', 'please ', 'recommend ', 'consider '].some(p => lower.startsWith(p))) return 'instruction' - return 'fact' -} - -export function determineVerdict(score: number, kind: ClaimKind): Verdict { - if (kind !== 'fact') return 'uncertain' - if (score >= 0.5) return 'supported' - if (score >= 0.25) return 'uncertain' - return 'unsupported' -}