@@ -342,6 +342,7 @@ def process(
342342 json_output = False ,
343343 markdown_output = False
344344 ):
345+ start_time = time .time ()
345346 batch_size_pdf = self .config ["batch_size" ]
346347
347348 # First pass: count all eligible files
@@ -433,11 +434,19 @@ def process(
433434 errors_files_count += batch_errors
434435 skipped_files_count += batch_skipped
435436
437+ runtime = time .time () - start_time
438+ docs_per_second = processed_files_count / runtime if runtime > 0 else 0
439+ seconds_per_doc = runtime / processed_files_count if processed_files_count > 0 else 0
440+
436441 # Log final statistics - always visible
437442 print (f"Processing completed: { processed_files_count } out of { total_files } files processed" )
438443 print (f"Errors: { errors_files_count } out of { total_files } files processed" )
439444 if skipped_files_count > 0 :
440445 print (f"Skipped: { skipped_files_count } out of { total_files } files (already existed, use --force to reprocess)" )
446+
447+ print (f"⏱️ Total runtime: { runtime :.2f} seconds" )
448+ print (f"🚀 Speed: { docs_per_second :.2f} documents/second" )
449+ print (f" Throughput: { seconds_per_doc :.2f} seconds/document" )
441450
442451 def process_batch (
443452 self ,
@@ -459,6 +468,7 @@ def process_batch(
459468 json_output = False ,
460469 markdown_output = False
461470 ):
471+ batch_start_time = time .time ()
462472 if verbose :
463473 self .logger .info (f"{ len (input_files )} files to process in current batch" )
464474
@@ -613,6 +623,16 @@ def process_batch(
613623 except OSError as e :
614624 self .logger .error (f"Failed to write TEI XML file { filename } : { str (e )} " )
615625
626+ # Calculate batch statistics
627+ batch_runtime = time .time () - batch_start_time
628+ batch_docs_per_second = processed_count / batch_runtime if batch_runtime > 0 else 0
629+ batch_seconds_per_docs = batch_runtime / processed_count if processed_count > 0 else 0
630+
631+ if verbose :
632+ self .logger .info (f"⏱️ Runtime: { batch_runtime :.2f} seconds" )
633+ self .logger .info (f"🚀 Speed: { batch_docs_per_second :.2f} documents/second" )
634+ self .logger .info (f" Throughput: { batch_seconds_per_docs :.2f} seconds/document" )
635+
616636 return processed_count , error_count , skipped_count
617637
618638 def process_pdf (
0 commit comments