Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@
"vitest": "^3.1.4"
},
"dependencies": {
"@supabase/supabase-js": "^2.58.0",
"dotenv": "^16.5.0",
"jsonwebtoken": "^9.0.2",
"openai": "^5.23.1",
"pino": "^9.7.0",
"pino-pretty": "^13.0.0",
"sqlite": "^5.1.1",
Expand Down
34 changes: 28 additions & 6 deletions src/lib/iMessages.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { settings } from '$lib/config'
import type { MessageRow } from '$lib/types'
import type { Message, MessageEmbeddingInput, MessageRow } from '$lib/types'
import os from 'node:os'
import path from 'node:path'
import { open } from 'sqlite'
Expand All @@ -22,7 +22,8 @@ const buildQuery = (startDate: string, endDate: string) => {
datetime(message.date / 1000000000 + strftime('%s', '2001-01-01'), 'unixepoch') AS timestamp,
message.text AS text,
handle.id AS contact_id,
message.is_from_me
message.is_from_me,
message.guid AS guid
FROM message
JOIN handle ON message.handle_id = handle.ROWID
WHERE message.text IS NOT NULL
Expand Down Expand Up @@ -50,10 +51,27 @@ const formatMessages = (rows: MessageRow[]) => {
.reverse() // Reverse to get chronological order
}

export const queryMessagesDb = async (startDate: string, endDate: string) => {
const mapEmbeddableMessages = (rows: MessageRow[]): MessageEmbeddingInput[] => {
return rows.map((row) => ({
message_id: row.guid,
thread_id: row.contact_id ?? settings.CONTACT_PHONE ?? 'unknown',
ts: new Date(row.timestamp.endsWith('Z') ? row.timestamp : `${row.timestamp}Z`).toISOString(),
sender: row.is_from_me
? 'me'
: row.contact_id === settings.CONTACT_PHONE
? 'them'
: row.contact_id || 'unknown',
text: row.text || '',
}))
}

export const queryMessagesDb = async (
startDate: string,
endDate: string
): Promise<{ messages: Message[]; embeddableMessages: MessageEmbeddingInput[] }> => {
if (!settings.CONTACT_PHONE) {
logger.warn('CONTACT_PHONE setting not configured')
return { messages: [] }
return { messages: [], embeddableMessages: [] }
}

const db = await open({ filename: CHAT_DB_PATH, driver: sqlite3.Database })
Expand All @@ -64,12 +82,16 @@ export const queryMessagesDb = async (startDate: string, endDate: string) => {
logger.info({ count: rows.length, handleId: settings.CONTACT_PHONE }, 'Fetched messages')

const formattedMessages = formatMessages(rows)
const embeddableMessages = mapEmbeddableMessages(rows)

// Return empty array if there are no messages from the contact
return { messages: hasContactMessages(formattedMessages) ? formattedMessages : [] }
return {
messages: hasContactMessages(formattedMessages) ? formattedMessages : [],
embeddableMessages,
}
} catch (error) {
logger.error({ error }, 'Error querying messages database')
return { messages: [] }
return { messages: [], embeddableMessages: [] }
} finally {
await db.close()
}
Expand Down
63 changes: 63 additions & 0 deletions src/lib/server/getSimilarMessages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import { logger } from '$lib/logger'
import type { SimilarMessageSnippet } from '$lib/types'
import { getOpenAIClient } from './openai'
import { supabase } from './supabase'
import type { SupabaseClient } from '@supabase/supabase-js'

const EMBEDDING_MODEL = 'text-embedding-3-small'

export const getSimilarMessages = async (
query: string,
threadId: string,
k = 5
): Promise<SimilarMessageSnippet[]> => {
const trimmedQuery = query.trim()
if (!trimmedQuery || !threadId) {
return []
}

if (!supabase) {
logger.debug('Supabase client unavailable; skipping semantic recall lookup')
return []
}

const client = getOpenAIClient()
if (!client) {
logger.debug('OpenAI client unavailable; skipping semantic recall lookup')
return []
}

try {
const embeddingResponse = await client.embeddings.create({
model: EMBEDDING_MODEL,
input: trimmedQuery,
})

const queryEmbedding = embeddingResponse.data[0]?.embedding
if (!queryEmbedding) {
logger.warn('Failed to compute query embedding for semantic recall lookup')
return []
}

const supabaseClient = supabase as SupabaseClient
const { data, error } = await supabaseClient.rpc('match_message_embeddings', {
query_embedding: queryEmbedding,
thread_filter: threadId,
match_count: k,
})

if (error) {
logger.error({ error }, 'Failed to fetch similar messages from Supabase')
return []
}

return (data || []).map((row) => ({
text: row.text as string,
ts: row.ts as string,
sender: row.sender as string,
}))
} catch (error) {
logger.error({ error }, 'Semantic recall lookup failed')
return []
}
}
111 changes: 111 additions & 0 deletions src/lib/server/indexMessages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import { logger } from '$lib/logger'
import type { MessageEmbeddingInput } from '$lib/types'
import { getOpenAIClient } from './openai'
import { supabase } from './supabase'
import type { SupabaseClient } from '@supabase/supabase-js'

const EMBEDDING_MODEL = 'text-embedding-3-small'

export const indexMessages = async (messages: MessageEmbeddingInput[]): Promise<void> => {
if (!messages.length) {
return
}

if (!supabase) {
logger.debug('Supabase client unavailable; skipping message indexing')
return
}

const client = getOpenAIClient()
if (!client) {
logger.debug('OpenAI client unavailable; skipping message indexing')
return
}

const uniqueMessages = dedupeMessages(messages)
if (!uniqueMessages.length) {
return
}

try {
const supabaseClient = supabase as SupabaseClient
const messagesToEmbed = await filterExisting(supabaseClient, uniqueMessages)
if (!messagesToEmbed.length) {
logger.debug('No new messages to embed')
return
}

const inputs = messagesToEmbed.map((msg) => msg.text)
const embeddingResponse = await client.embeddings.create({
model: EMBEDDING_MODEL,
input: inputs,
})

if (embeddingResponse.data.length !== messagesToEmbed.length) {
logger.warn(
{
expected: messagesToEmbed.length,
received: embeddingResponse.data.length,
},
'Mismatch between embeddings and messages'
)
return
}

const rows = messagesToEmbed.map((message, index) => ({
message_id: message.message_id,
thread_id: message.thread_id,
ts: message.ts,
sender: message.sender,
text: message.text,
embedding: embeddingResponse.data[index].embedding,
}))

const { error } = await supabaseClient.from('message_embeddings').upsert(rows, {
onConflict: 'message_id',
})

if (error) {
logger.error({ error }, 'Failed to upsert message embeddings')
}
} catch (error) {
logger.error({ error }, 'Failed to index messages')
}
}

const dedupeMessages = (messages: MessageEmbeddingInput[]) => {
const seen = new Set<string>()

return messages.filter((message) => {
if (!message.message_id || !message.thread_id || !message.text.trim()) {
return false
}

const key = `${message.thread_id}:${message.message_id}`
if (seen.has(key)) {
return false
}

seen.add(key)
return true
})
}

const filterExisting = async (
client: SupabaseClient,
messages: MessageEmbeddingInput[]
): Promise<MessageEmbeddingInput[]> => {
const ids = messages.map((message) => message.message_id)

const { data, error } = await client.from('message_embeddings').select('message_id').in('message_id', ids)

if (error) {
logger.error({ error }, 'Failed to fetch existing message embeddings')
return messages
}

const existingIds = new Set((data || []).map((row) => row.message_id))
return messages.filter((message) => !existingIds.has(message.message_id))
}

// TODO: Consider moving indexing into a background job for batch backfilling older history.
19 changes: 19 additions & 0 deletions src/lib/server/openai.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { settings } from '$lib/config'
import { logger } from '$lib/logger'
import OpenAI from 'openai'

let client: OpenAI | null = null

export const getOpenAIClient = (): OpenAI | null => {
if (client) {
return client
}

if (!settings.OPENAI_API_KEY) {
logger.warn('OpenAI API key missing; semantic recall disabled')
return null
}

client = new OpenAI({ apiKey: settings.OPENAI_API_KEY })
return client
}
21 changes: 21 additions & 0 deletions src/lib/server/supabase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { env } from '$env/dynamic/private'
import { createClient, type SupabaseClient } from '@supabase/supabase-js'
import { logger } from '$lib/logger'

const supabaseUrl = env.SUPABASE_URL
const supabaseServiceRoleKey = env.SUPABASE_SERVICE_ROLE_KEY

let client: SupabaseClient | null = null

if (!supabaseUrl || !supabaseServiceRoleKey) {
logger.warn('Supabase environment variables missing; semantic recall disabled')
} else {
client = createClient(supabaseUrl, supabaseServiceRoleKey, {
auth: {
autoRefreshToken: false,
persistSession: false,
},
})
}

export const supabase = client
15 changes: 15 additions & 0 deletions src/lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export interface MessageRow {
date: string
contact_id?: string
timestamp: string
guid: string
}

export interface PageData {
Expand All @@ -35,3 +36,17 @@ export interface OpenAIConfig {
apiUrl: string
apiKey?: string
}

export interface MessageEmbeddingInput {
message_id: string
thread_id: string
ts: string
sender: string
text: string
}

export interface SimilarMessageSnippet {
text: string
ts: string
sender: string
}
Loading