@@ -102,7 +102,10 @@ interface EmbeddingResponse {
102102 [ key : string ] : any ;
103103}
104104
105- async function embedWithRetry ( texts : string [ ] , retryCount = 0 ) : Promise < EmbeddingResponse > {
105+ async function embedWithRetry (
106+ texts : string [ ] ,
107+ retryCount = 0
108+ ) : Promise < EmbeddingResponse > {
106109 try {
107110 // Set a timeout promise
108111 const timeoutPromise = new Promise < never > ( ( _ , reject ) => {
@@ -116,7 +119,7 @@ async function embedWithRetry(texts: string[], retryCount = 0): Promise<Embeddin
116119 input : texts ,
117120 inputType : 'document' ,
118121 } ) as Promise < EmbeddingResponse > ,
119- timeoutPromise
122+ timeoutPromise ,
120123 ] ) ;
121124 } catch ( error : any ) {
122125 // Check if we still have retries left
@@ -131,26 +134,31 @@ async function embedWithRetry(texts: string[], retryCount = 0): Promise<Embeddin
131134 ) ;
132135 await wait ( delay ) ;
133136 return embedWithRetry ( texts , retryCount + 1 ) ;
134- }
137+ }
135138 // Handle timeouts and other transient errors
136- else if ( error . message === 'Request timed out' ||
137- error . message ?. includes ( 'timeout' ) ||
138- error . message ?. includes ( 'network' ) ||
139- error . code === 'ECONNRESET' ||
140- error . code === 'ETIMEDOUT' ) {
139+ else if (
140+ error . message === 'Request timed out' ||
141+ error . message ?. includes ( 'timeout' ) ||
142+ error . message ?. includes ( 'network' ) ||
143+ error . code === 'ECONNRESET' ||
144+ error . code === 'ETIMEDOUT'
145+ ) {
141146 const delay = INITIAL_RETRY_DELAY * Math . pow ( 2 , retryCount ) ;
142147 console . log (
143- `Request failed with error: ${ error . message } . Waiting ${ delay } ms before retry ${
144- retryCount + 1
145- } /${ MAX_RETRIES } `
148+ `Request failed with error: ${
149+ error . message
150+ } . Waiting ${ delay } ms before retry ${ retryCount + 1 } /${ MAX_RETRIES } `
146151 ) ;
147152 await wait ( delay ) ;
148153 return embedWithRetry ( texts , retryCount + 1 ) ;
149154 }
150155 }
151-
156+
152157 // No more retries or non-retriable error
153- console . error ( `Embedding failed after ${ retryCount } retries:` , error . message ) ;
158+ console . error (
159+ `Embedding failed after ${ retryCount } retries:` ,
160+ error . message
161+ ) ;
154162 throw error ;
155163 }
156164}
@@ -165,49 +173,88 @@ function extractPostDate(filePath: string, frontmatter: any): Date {
165173 // Approach 1: Direct Date constructor (handles ISO formats and many common formats)
166174 const parsedDate = new Date ( frontmatter . date ) ;
167175 if ( ! isNaN ( parsedDate . getTime ( ) ) ) {
168- console . log ( `Date from frontmatter (direct): ${ parsedDate . toISOString ( ) } for ${ filePath } ` ) ;
176+ console . log (
177+ `Date from frontmatter (direct): ${ parsedDate . toISOString ( ) } for ${ filePath } `
178+ ) ;
169179 return parsedDate ;
170180 }
171-
181+
172182 // Approach 2: Handle month name formats like "Jun 1, 2024" or "June 1 2024"
173- const monthNameMatch = String ( frontmatter . date ) . match ( / ( [ A - Z a - z ] + ) \s + ( \d { 1 , 2 } ) (?: , ? \s + ) ? ( \d { 4 } ) / ) ;
183+ const monthNameMatch = String ( frontmatter . date ) . match (
184+ / ( [ A - Z a - z ] + ) \s + ( \d { 1 , 2 } ) (?: , ? \s + ) ? ( \d { 4 } ) /
185+ ) ;
174186 if ( monthNameMatch ) {
175187 const [ _ , month , day , year ] = monthNameMatch ;
176- const monthMap : { [ key : string ] : number } = {
177- jan : 0 , january : 0 , feb : 1 , february : 1 , mar : 2 , march : 2 ,
178- apr : 3 , april : 3 , may : 4 , jun : 5 , june : 5 , jul : 6 , july : 6 ,
179- aug : 7 , august : 7 , sep : 8 , september : 8 , oct : 9 , october : 9 ,
180- nov : 10 , november : 10 , dec : 11 , december : 11
188+ const monthMap : { [ key : string ] : number } = {
189+ jan : 0 ,
190+ january : 0 ,
191+ feb : 1 ,
192+ february : 1 ,
193+ mar : 2 ,
194+ march : 2 ,
195+ apr : 3 ,
196+ april : 3 ,
197+ may : 4 ,
198+ jun : 5 ,
199+ june : 5 ,
200+ jul : 6 ,
201+ july : 6 ,
202+ aug : 7 ,
203+ august : 7 ,
204+ sep : 8 ,
205+ september : 8 ,
206+ oct : 9 ,
207+ october : 9 ,
208+ nov : 10 ,
209+ november : 10 ,
210+ dec : 11 ,
211+ december : 11 ,
181212 } ;
182-
213+
183214 const monthIndex = monthMap [ month . toLowerCase ( ) ] ;
184215 if ( monthIndex !== undefined ) {
185- const formattedDate = new Date ( parseInt ( year ) , monthIndex , parseInt ( day ) ) ;
216+ const formattedDate = new Date (
217+ parseInt ( year ) ,
218+ monthIndex ,
219+ parseInt ( day )
220+ ) ;
186221 if ( ! isNaN ( formattedDate . getTime ( ) ) ) {
187- console . log ( `Date from frontmatter (month name): ${ formattedDate . toISOString ( ) } for ${ filePath } ` ) ;
222+ console . log (
223+ `Date from frontmatter (month name): ${ formattedDate . toISOString ( ) } for ${ filePath } `
224+ ) ;
188225 return formattedDate ;
189226 }
190227 }
191228 }
192-
229+
193230 // Log warning if we have a date field but couldn't parse it
194- console . warn ( `Warning: Could not parse date '${ frontmatter . date } ' from frontmatter in ${ filePath } ` ) ;
231+ console . warn (
232+ `Warning: Could not parse date '${ frontmatter . date } ' from frontmatter in ${ filePath } `
233+ ) ;
195234 }
196235
197236 // Try to parse from filename as fallback (e.g., MMDDYY.md format)
198237 const filenameMatch = filePath . match ( / ( \d { 2 } ) ( \d { 2 } ) ( \d { 2 } ) \. m d $ / ) ;
199238 if ( filenameMatch ) {
200239 const [ _ , month , day , year ] = filenameMatch ;
201240 const fullYear = parseInt ( `20${ year } ` ) ; // Assuming 20xx years
202- const dateFromFilename = new Date ( fullYear , parseInt ( month ) - 1 , parseInt ( day ) ) ;
203- console . log ( `Date from filename: ${ dateFromFilename . toISOString ( ) } for ${ filePath } ` ) ;
241+ const dateFromFilename = new Date (
242+ fullYear ,
243+ parseInt ( month ) - 1 ,
244+ parseInt ( day )
245+ ) ;
246+ console . log (
247+ `Date from filename: ${ dateFromFilename . toISOString ( ) } for ${ filePath } `
248+ ) ;
204249 return dateFromFilename ;
205250 }
206251
207252 // Use a stable default date for posts with no date instead of current date
208253 // Using January 1, 2020 as a reasonable default that will still sort correctly
209254 const defaultDate = new Date ( 2020 , 0 , 1 ) ;
210- console . warn ( `Warning: No date found for ${ filePath } , using default date ${ defaultDate . toISOString ( ) } ` ) ;
255+ console . warn (
256+ `Warning: No date found for ${ filePath } , using default date ${ defaultDate . toISOString ( ) } `
257+ ) ;
211258 return defaultDate ;
212259}
213260
@@ -240,7 +287,7 @@ async function generateEmbeddingsForSingleFile(
240287 const { frontmatter, chunks } = post ;
241288 let successfulChunks = 0 ;
242289 let failedChunks = 0 ;
243-
290+
244291 // Create a chunk-level progress bar
245292 function updateChunkProgress ( ) {
246293 const total = chunks . length ;
@@ -341,7 +388,7 @@ async function generateEmbeddingsForSingleFile(
341388 } catch ( error ) {
342389 console . error ( 'Error inserting whole post chunk:' , error ) ;
343390 failedChunks ++ ;
344-
391+
345392 // Update chunk progress after error
346393 console . log ( updateChunkProgress ( ) ) ;
347394 }
@@ -358,7 +405,11 @@ async function generateEmbeddingsForSingleFile(
358405 const batchChunks = chunks . slice ( i , i + BATCH_SIZE ) ;
359406 const batchEnd = Math . min ( i + BATCH_SIZE , chunks . length ) ;
360407
361- console . log ( `\nProcessing batch ${ i } -${ batchEnd } of ${ chunks . length } (${ Math . ceil ( ( batchEnd - i ) / BATCH_SIZE ) } /${ Math . ceil ( chunks . length / BATCH_SIZE ) } batches)` ) ;
408+ console . log (
409+ `\nProcessing batch ${ i } -${ batchEnd } of ${ chunks . length } (${ Math . ceil (
410+ ( batchEnd - i ) / BATCH_SIZE
411+ ) } /${ Math . ceil ( chunks . length / BATCH_SIZE ) } batches)`
412+ ) ;
362413
363414 try {
364415 // Format chunks with more context
@@ -383,38 +434,43 @@ async function generateEmbeddingsForSingleFile(
383434 message ?: string ;
384435 code ?: string ;
385436 } ;
386-
387- if ( inputTexts . length > 3 && (
388- error . message ?. includes ( 'timeout' ) ||
389- error . message ?. includes ( 'network' ) ||
390- error . code === 'ECONNRESET' ||
391- error . code === 'ETIMEDOUT' ) ) {
392-
437+
438+ if (
439+ inputTexts . length > 3 &&
440+ ( error . message ?. includes ( 'timeout' ) ||
441+ error . message ?. includes ( 'network' ) ||
442+ error . code === 'ECONNRESET' ||
443+ error . code === 'ETIMEDOUT' )
444+ ) {
393445 console . log ( `Error processing full batch: ${ error . message } ` ) ;
394446 console . log ( `Splitting batch into smaller chunks and retrying...` ) ;
395-
447+
396448 // Split the batch in half
397449 const midpoint = Math . floor ( inputTexts . length / 2 ) ;
398450 const firstHalf = inputTexts . slice ( 0 , midpoint ) ;
399451 const secondHalf = inputTexts . slice ( midpoint ) ;
400-
452+
401453 // Process first half
402454 console . log ( `Processing first half (${ firstHalf . length } chunks)...` ) ;
403455 const firstResponse = await embedWithRetry ( firstHalf ) ;
404-
456+
405457 // Add delay between sub-batches
406458 await wait ( DELAY_BETWEEN_BATCHES ) ;
407-
459+
408460 // Process second half
409- console . log ( `Processing second half (${ secondHalf . length } chunks)...` ) ;
461+ console . log (
462+ `Processing second half (${ secondHalf . length } chunks)...`
463+ ) ;
410464 const secondResponse = await embedWithRetry ( secondHalf ) ;
411-
465+
412466 // Merge responses
413467 response = {
414- data : [ ...firstResponse . data , ...secondResponse . data ]
468+ data : [ ...firstResponse . data , ...secondResponse . data ] ,
415469 } ;
416-
417- console . log ( `Successfully processed split batch with ${ response . data . length } embeddings` ) ;
470+
471+ console . log (
472+ `Successfully processed split batch with ${ response . data . length } embeddings`
473+ ) ;
418474 } else {
419475 // If not a timeout or the batch is already small, rethrow
420476 throw embeddingError ;
@@ -552,6 +608,10 @@ async function generateEmbeddingsForSingleFile(
552608 const results = await Promise . all ( insertPromises ) ;
553609 const successCount = results . filter ( Boolean ) . length ;
554610
611+ // Update counters based on results
612+ successfulChunks += successCount ;
613+ failedChunks += batchChunks . length - successCount ;
614+
555615 // Process overlaps after all inserts completed
556616 if ( successCount > 1 ) {
557617 // Only process overlaps if we have at least 2 chunks
@@ -561,7 +621,7 @@ async function generateEmbeddingsForSingleFile(
561621
562622 // Update chunk progress
563623 console . log ( updateChunkProgress ( ) ) ;
564-
624+
565625 console . log (
566626 `✅ Batch complete: ${ successCount } /${ batchChunks . length } chunks successful with sliding window overlaps`
567627 ) ;
@@ -571,22 +631,26 @@ async function generateEmbeddingsForSingleFile(
571631 } catch ( error ) {
572632 console . error ( 'Error processing batch:' , error ) ;
573633 failedChunks += batchChunks . length ;
574-
634+
575635 // Update chunk progress after error
576636 console . log ( updateChunkProgress ( ) ) ;
577637 }
578638 }
579639
580640 return { successfulChunks, failedChunks } ;
581641}
582-
583642/**
584643 * Creates a simple ASCII progress bar
585644 */
586- function createProgressBar ( current : number , total : number , width : number = 30 ) : string {
645+ function createProgressBar (
646+ current : number ,
647+ total : number ,
648+ width : number = 30
649+ ) : string {
587650 const percentage = Math . round ( ( current / total ) * 100 ) ;
588651 const progressChars = Math . round ( ( current / total ) * width ) ;
589- const progressBar = '█' . repeat ( progressChars ) + '░' . repeat ( width - progressChars ) ;
652+ const progressBar =
653+ '█' . repeat ( progressChars ) + '░' . repeat ( width - progressChars ) ;
590654 return `[${ progressBar } ] ${ percentage } % (${ current } /${ total } )` ;
591655}
592656
@@ -605,7 +669,9 @@ async function generateEmbeddingsForAllFiles() {
605669
606670 for ( let i = 0 ; i < nonDraftPosts . length ; i ++ ) {
607671 const post = nonDraftPosts [ i ] ;
608- console . log ( `\n=== Processing file ${ i + 1 } /${ totalFiles } : ${ post . filePath } ===` ) ;
672+ console . log (
673+ `\n=== Processing file ${ i + 1 } /${ totalFiles } : ${ post . filePath } ===`
674+ ) ;
609675
610676 const { successfulChunks, failedChunks } =
611677 await generateEmbeddingsForSingleFile ( post . filePath ) ;
@@ -614,8 +680,8 @@ async function generateEmbeddingsForAllFiles() {
614680 totalFailed += failedChunks ;
615681
616682 // Update progress bar
617- console . log ( createProgressBar ( i + 1 , totalFiles ) ) ;
618-
683+ console . log ( createProgressBar ( i + 1 , totalFiles ) ) ;
684+
619685 // Add delay between files
620686 if ( i < nonDraftPosts . length - 1 ) {
621687 console . log ( `Waiting ${ DELAY_BETWEEN_FILES } ms before next file...` ) ;
0 commit comments