From fa1881bf6cbc80534c1942050980dad5bf20f69f Mon Sep 17 00:00:00 2001 From: Prateek Jain <49508975+Prateek32177@users.noreply.github.com> Date: Thu, 19 Feb 2026 14:48:35 +0530 Subject: [PATCH] Add AI-assisted semantic normalization with manual fallbacks --- README.md | 48 ++++++++- src/index.ts | 2 +- src/normalization/semantic.ts | 183 ++++++++++++++++++++++++++++++++++ src/normalization/simple.ts | 34 ++++++- src/test.ts | 53 ++++++++++ src/types.ts | 33 ++++++ 6 files changed, 346 insertions(+), 7 deletions(-) create mode 100644 src/normalization/semantic.ts diff --git a/README.md b/README.md index 6cf5a91..9531412 100644 --- a/README.md +++ b/README.md @@ -369,6 +369,53 @@ export default { }; ``` +## AI-assisted semantic normalization + +Tern now supports layered normalization: + +1. Built-in category normalization (payment/auth/infrastructure) +2. Raw payload retention (`_raw`) for zero data loss +3. Semantic extraction with manual fallbacks (`_semantic`) + +```typescript +const result = await WebhookVerificationService.verifyWithPlatformConfig( + request, + 'stripe', + process.env.STRIPE_WEBHOOK_SECRET!, + 300, + { + includeRaw: true, + semantic: { + ai: { + // Provider failover order + providers: ['groq', 'cohere', 'openai', 'anthropic', 'google'], + minimumConfidence: 0.75, + }, + fields: { + customer_email: { + description: 'email of the customer who paid', + // fallback dot-path if AI is unavailable / low confidence + fallback: 'data.object.billing_details.email', + }, + }, + }, + }, +); + +const email = (result.payload as any)._semantic?.fields.customer_email; +const meta = (result.payload as any)._semantic?.meta.customer_email; +// meta.source: 'ai' | 'manual' | 'missing' +``` + +Provider env vars supported for AI extraction: +- `GROQ_API_KEY` +- `COHERE_API_KEY` +- `OPENAI_API_KEY` +- `ANTHROPIC_API_KEY` +- `GOOGLE_GENERATIVE_AI_API_KEY` + +If no provider is configured (or AI confidence is too low), Tern automatically falls back to manual field mapping paths. + ## API Reference ### WebhookVerificationService @@ -545,4 +592,3 @@ export const POST = createWebhookHandler({ handler: async (payload) => ({ received: true, event: payload.event ?? payload.type }), }); ``` - diff --git a/src/index.ts b/src/index.ts index fb2d168..c67224c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -29,7 +29,7 @@ export class WebhookVerificationService { result.platform = config.platform; if (config.normalize) { - result.payload = normalizePayload(config.platform, result.payload, config.normalize); + result.payload = await normalizePayload(config.platform, result.payload, config.normalize); } } diff --git a/src/normalization/semantic.ts b/src/normalization/semantic.ts new file mode 100644 index 0000000..6b1494d --- /dev/null +++ b/src/normalization/semantic.ts @@ -0,0 +1,183 @@ +import { + SemanticAIProvider, + SemanticFieldConfig, + SemanticFieldMeta, + SemanticNormalizationResult, + SemanticNormalizeOptions, +} from '../types'; + +interface ResolvedFieldDefinition { + key: string; + description: string; + fallbackPaths: string[]; +} + +interface AIExtractionCandidate { + key: string; + value: unknown; + sourceField?: string; + confidence?: number; + reasoning?: string; +} + +function readPath(payload: Record, path: string): any { + return path.split('.').reduce((acc, key) => { + if (acc === undefined || acc === null) { + return undefined; + } + return acc[key]; + }, payload as any); +} + +function toFieldDefinition(key: string, config: string | SemanticFieldConfig): ResolvedFieldDefinition { + if (typeof config === 'string') { + return { + key, + description: config, + fallbackPaths: [], + }; + } + + const fallback = config.fallback + ? (Array.isArray(config.fallback) ? config.fallback : [config.fallback]) + : []; + + return { + key, + description: config.description || key, + fallbackPaths: fallback, + }; +} + +function parseJSONResponse(content: string): AIExtractionCandidate[] { + const clean = content.trim().replace(/^```json\s*/i, '').replace(/```$/i, ''); + const parsed = JSON.parse(clean) as { fields?: AIExtractionCandidate[] }; + return parsed.fields || []; +} + +async function extractWithProvider( + provider: SemanticAIProvider, + fields: ResolvedFieldDefinition[], + payload: unknown, +): Promise { + const prompt = `Extract semantic webhook fields from a raw payload.\nReturn strict JSON with shape: {"fields":[{"key":"...","value":...,"sourceField":"dot.path","confidence":0-1,"reasoning":"..."}]}.\nOnly include keys requested below.\nRequested fields: ${JSON.stringify(fields)}\nPayload: ${JSON.stringify(payload)}`; + + try { + const { generateText } = await loadModule('ai'); + + if (provider === 'groq' && process.env.GROQ_API_KEY) { + const { createGroq } = await loadModule('@ai-sdk/groq'); + const groq = createGroq({ apiKey: process.env.GROQ_API_KEY }); + const output = await generateText({ model: groq('llama-3.1-8b-instant'), prompt }); + return parseJSONResponse(output.text); + } + + if (provider === 'cohere' && process.env.COHERE_API_KEY) { + const { createCohere } = await loadModule('@ai-sdk/cohere'); + const cohere = createCohere({ apiKey: process.env.COHERE_API_KEY }); + const output = await generateText({ model: cohere('command-r-plus'), prompt }); + return parseJSONResponse(output.text); + } + + if (provider === 'openai' && process.env.OPENAI_API_KEY) { + const { createOpenAI } = await loadModule('@ai-sdk/openai'); + const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY }); + const output = await generateText({ model: openai('gpt-4o-mini'), prompt }); + return parseJSONResponse(output.text); + } + + if (provider === 'anthropic' && process.env.ANTHROPIC_API_KEY) { + const { createAnthropic } = await loadModule('@ai-sdk/anthropic'); + const anthropic = createAnthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); + const output = await generateText({ model: anthropic('claude-3-5-haiku-latest'), prompt }); + return parseJSONResponse(output.text); + } + + if (provider === 'google' && process.env.GOOGLE_GENERATIVE_AI_API_KEY) { + const { createGoogleGenerativeAI } = await loadModule('@ai-sdk/google'); + const google = createGoogleGenerativeAI({ apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY }); + const output = await generateText({ model: google('gemini-1.5-flash'), prompt }); + return parseJSONResponse(output.text); + } + } catch { + return null; + } + + return null; +} + +async function loadModule(moduleName: string): Promise { + const dynamicImporter = new Function('moduleName', 'return import(moduleName);') as (name: string) => Promise; + return dynamicImporter(moduleName); +} + +export async function runSemanticNormalization( + payload: unknown, + options: SemanticNormalizeOptions, +): Promise { + const definitions = Object.entries(options.fields).map(([key, value]) => toFieldDefinition(key, value)); + const preferredProviders = options.ai?.providers || ['groq', 'cohere', 'openai', 'anthropic', 'google']; + const minimumConfidence = options.ai?.minimumConfidence ?? 0.7; + + let extractedByAI: AIExtractionCandidate[] = []; + + if (options.ai?.enabled !== false) { + for (const provider of preferredProviders) { + const extracted = await extractWithProvider(provider, definitions, payload); + if (extracted && extracted.length > 0) { + extractedByAI = extracted; + break; + } + } + } + + const fields: Record = {}; + const meta: Record = {}; + const payloadObj = (payload && typeof payload === 'object') ? payload as Record : {}; + + for (const definition of definitions) { + const aiMatch = extractedByAI.find((entry) => entry.key === definition.key); + const confidence = aiMatch?.confidence ?? 0; + + if (aiMatch && aiMatch.value !== undefined && confidence >= minimumConfidence) { + fields[definition.key] = aiMatch.value; + meta[definition.key] = { + source: 'ai', + sourceField: aiMatch.sourceField, + confidence, + reasoning: aiMatch.reasoning, + }; + continue; + } + + let resolvedFallback: unknown; + let resolvedPath: string | undefined; + for (const path of definition.fallbackPaths) { + const candidate = readPath(payloadObj, path); + if (candidate !== undefined) { + resolvedFallback = candidate; + resolvedPath = path; + break; + } + } + + if (resolvedFallback !== undefined) { + fields[definition.key] = resolvedFallback; + meta[definition.key] = { + source: 'manual', + sourceField: resolvedPath, + confidence: 1, + }; + continue; + } + + fields[definition.key] = null; + meta[definition.key] = { + source: 'missing', + confidence, + reasoning: aiMatch?.reasoning || 'Field not found in AI extraction or manual fallback paths.', + }; + } + + return { fields, meta }; +} diff --git a/src/normalization/simple.ts b/src/normalization/simple.ts index d13d21b..9258afb 100644 --- a/src/normalization/simple.ts +++ b/src/normalization/simple.ts @@ -7,7 +7,9 @@ import { AuthWebhookNormalized, InfrastructureWebhookNormalized, UnknownNormalizedWebhook, + SemanticNormalizationResult, } from '../types'; +import { runSemanticNormalization } from './semantic'; type PlatformNormalizationFn = (payload: any) => Omit; @@ -149,11 +151,11 @@ function buildUnknownNormalizedPayload( }; } -export function normalizePayload( +export async function normalizePayload( platform: WebhookPlatform, payload: any, normalize?: boolean | NormalizeOptions, -): AnyNormalizedWebhook | unknown { +): Promise { const options = resolveNormalizeOptions(normalize); if (!options.enabled) { return payload; @@ -163,24 +165,46 @@ export function normalizePayload( const inferredCategory = spec?.category; if (!spec) { - return buildUnknownNormalizedPayload(platform, payload, options.category, options.includeRaw); + const unknownPayload = buildUnknownNormalizedPayload(platform, payload, options.category, options.includeRaw); + return attachSemanticIfNeeded(unknownPayload, payload, normalize); } if (options.category && options.category !== inferredCategory) { - return buildUnknownNormalizedPayload( + const unknownPayload = buildUnknownNormalizedPayload( platform, payload, inferredCategory, options.includeRaw, `Requested normalization category '${options.category}' does not match platform category '${inferredCategory}'`, ); + return attachSemanticIfNeeded(unknownPayload, payload, normalize); } const normalized = spec.normalize(payload); - return { + const basePayload = { ...normalized, _platform: platform, _raw: options.includeRaw ? payload : undefined, } as AnyNormalizedWebhook; + + return attachSemanticIfNeeded(basePayload, payload, normalize); +} + +async function attachSemanticIfNeeded( + normalizedPayload: AnyNormalizedWebhook, + rawPayload: unknown, + normalize?: boolean | NormalizeOptions, +): Promise { + const semanticOptions = typeof normalize === 'object' ? normalize.semantic : undefined; + if (!semanticOptions) { + return normalizedPayload; + } + + const semanticResult: SemanticNormalizationResult = await runSemanticNormalization(rawPayload, semanticOptions); + + return { + ...normalizedPayload, + _semantic: semanticResult, + }; } diff --git a/src/test.ts b/src/test.ts index af3d80a..3ab150b 100644 --- a/src/test.ts +++ b/src/test.ts @@ -393,6 +393,59 @@ try { console.log(' āŒ Category registry test failed:', error); } + // Test 13: Semantic normalization with manual fallback + console.log('\n13. Testing semantic normalization fallback...'); + try { + const semanticStripeBody = JSON.stringify({ + type: 'payment_intent.succeeded', + data: { + object: { + id: 'pi_789', + billing_details: { + email: 'buyer@example.com', + }, + }, + }, + }); + + const timestamp = Math.floor(Date.now() / 1000); + const stripeSignature = createStripeSignature(semanticStripeBody, testSecret, timestamp); + + const request = createMockRequest( + { + 'stripe-signature': stripeSignature, + 'content-type': 'application/json', + }, + semanticStripeBody, + ); + + const result = await WebhookVerificationService.verifyWithPlatformConfig( + request, + 'stripe', + testSecret, + 300, + { + semantic: { + fields: { + customer_email: { + description: 'email of the paying customer', + fallback: 'data.object.billing_details.email', + }, + }, + }, + }, + ); + + const payload = result.payload as Record; + const semanticEmail = payload._semantic?.fields?.customer_email; + const semanticSource = payload._semantic?.meta?.customer_email?.source; + const passed = result.isValid && semanticEmail === 'buyer@example.com' && semanticSource === 'manual'; + + console.log(' āœ… Semantic fallback:', passed ? 'PASSED' : 'FAILED'); + } catch (error) { + console.log(' āŒ Semantic fallback test failed:', error); + } + console.log('\nšŸŽ‰ All tests completed!'); } diff --git a/src/types.ts b/src/types.ts index 07da81d..ad5b75c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -63,6 +63,38 @@ export interface BaseNormalizedWebhook { _platform: WebhookPlatform | string; _raw: unknown; occurred_at?: string; + _semantic?: SemanticNormalizationResult; +} + +export type SemanticAIProvider = 'groq' | 'cohere' | 'openai' | 'anthropic' | 'google'; + +export interface SemanticFieldConfig { + description?: string; + fallback?: string | string[]; + required?: boolean; +} + +export interface SemanticFieldMeta { + source: 'ai' | 'manual' | 'missing'; + sourceField?: string; + confidence: number; + reasoning?: string; +} + +export interface SemanticNormalizationResult { + fields: Record; + meta: Record; +} + +export interface SemanticAIOptions { + enabled?: boolean; + minimumConfidence?: number; + providers?: SemanticAIProvider[]; +} + +export interface SemanticNormalizeOptions { + fields: Record; + ai?: SemanticAIOptions; } export type PaymentWebhookEvent = @@ -145,6 +177,7 @@ export interface NormalizeOptions { enabled?: boolean; category?: NormalizationCategory; includeRaw?: boolean; + semantic?: SemanticNormalizeOptions; } export interface WebhookVerificationResult {