3737from logging .handlers import RotatingFileHandler
3838import asyncio
3939from importlib .metadata import version , PackageNotFoundError
40- import tempfile
40+ import warnings
41+ from typing import Optional
4142
4243# Third-Party Library Imports
4344import gradio as gr
5051
5152# Local Module Imports
5253from utils .function_call import send_airtime , send_message , search_news , translate_text
53- from typing import Optional
54- from utils .models import ReceiptData , LineItem
5554from utils .constants import VISION_SYSTEM_PROMPT , API_SYSTEM_PROMPT
5655
5756# ------------------------------------------------------------------------------------
6261langtrace .init (api_key = os .getenv ("LANGTRACE_API_KEY" ))
6362groq_client = groq .Client (api_key = os .getenv ("GROQ_API_KEY" ))
6463
64+ # Suppress Pydantic UserWarning from autogen
65+ warnings .filterwarnings (
66+ "ignore" ,
67+ category = UserWarning ,
68+ message = r".*Field.*in.*has conflict with protected namespace.*" ,
69+ )
70+
6571# Set up the logger
6672logger = logging .getLogger (__name__ )
6773logger .setLevel (logging .DEBUG ) # Set the logger to handle all levels DEBUG and above
131137
132138
133139async def text_to_speech (text : str ) -> None :
140+ """
141+ Generate speech from text using edge-tts.
142+
143+ Parameters
144+ ----------
145+ text : str
146+ The text to convert to speech.
147+ """
134148 try :
135149 communicate = edge_tts .Communicate (text , VOICE )
136150 await communicate .save (OUTPUT_FILE )
137- logger .info (f "Generated speech output: { OUTPUT_FILE } " )
151+ logger .info ("Generated speech output: %s" , OUTPUT_FILE )
138152 except Exception as e :
139- logger .error (f "TTS Error: { str (e )} " )
153+ logger .error ("TTS Error: %s" , str (e ))
140154 raise
141155
142156
@@ -248,7 +262,7 @@ async def text_to_speech(text: str) -> None:
248262@with_langtrace_root_span ()
249263async def process_user_message (
250264 message : str ,
251- history : list ,
265+ history : list , # pylint: disable=unused-argument
252266 use_vision : bool = False ,
253267 image_path : Optional [str ] = None ,
254268) -> str :
@@ -286,15 +300,18 @@ async def process_user_message(
286300 messages .append ({"role" : "user" , "content" : message })
287301
288302 try :
289- model_name = "llama3.2-vision" if use_vision else "qwen2.5:0.5b"
303+ # Use 'llava' as it's a common Ollama vision model.
304+ # Ensure you have pulled the model with `ollama pull llava`.
305+ # You can use llama3.2-vision as well
306+ model_name = "llava" if use_vision else "qwen2.5:0.5b"
290307 response = await client .chat (
291308 model = model_name ,
292309 messages = messages ,
293310 tools = None if use_vision else tools ,
294311 format = "json" if use_vision else None ,
295312 options = {"temperature" : 0 },
296313 )
297- except Exception as e :
314+ except Exception :
298315 logger .exception ("Failed to get response from Ollama client." )
299316 return "An unexpected error occurred while communicating with the assistant."
300317
@@ -353,16 +370,11 @@ async def process_user_message(
353370 }
354371 )
355372
356- return f"Function `{ tool_name } ` executed successfully. Response:\n { function_response } "
357- except (
358- send_airtime .ErrorType ,
359- send_message .ErrorType ,
360- search_news .ErrorType ,
361- translate_text .ErrorType ,
362- ) as e :
363- logger .error ("Handled error in tool `%s`: %s" , tool_name , e )
364- return f"Error executing `{ tool_name } `: { str (e )} "
365- except Exception as e : # pylint: disable=broad-exception-caught
373+ return (
374+ f"Function `{ tool_name } ` executed successfully. Response:\n "
375+ f"{ function_response } "
376+ )
377+ except Exception as e :
366378 logger .exception ("Unexpected error in tool `%s`: %s" , tool_name , e )
367379 return f"An unexpected error occurred while executing `{ tool_name } `."
368380 else :
@@ -420,12 +432,12 @@ async def process_audio_and_llm(audio):
420432 response = await process_user_message (transcription , [])
421433 return f"Transcription: { transcription } \n LLM Response: { response } "
422434
423- except Exception as e :
424- logger .exception ("Error during transcription or LLM processing: %s" , e )
425- return f"Error: { str (e )} "
426- except Exception as e :
427- logger .exception ("Error in audio processing: %s" , e )
428- return f"Error: { str (e )} "
435+ except Exception as exc :
436+ logger .exception ("Error during transcription or LLM processing: %s" , exc )
437+ return f"Error: { str (exc )} "
438+ except Exception as exc :
439+ logger .exception ("Error in audio processing: %s" , exc )
440+ return f"Error: { str (exc )} "
429441
430442
431443def gradio_interface (message : str , history : list ) -> str :
@@ -447,8 +459,8 @@ def gradio_interface(message: str, history: list) -> str:
447459 try :
448460 response = asyncio .run (process_user_message (message , history ))
449461 return response
450- except Exception as e : # pylint: disable=broad-exception-caught
451- logger .exception ("Error in gradio_interface: %s" , e )
462+ except Exception as exc :
463+ logger .exception ("Error in gradio_interface: %s" , exc )
452464 return "An unexpected error occurred while processing your message."
453465
454466
@@ -540,9 +552,9 @@ def show_transcription(audio):
540552 )
541553 logger .info ("Audio transcribed successfully: %s" , transcription )
542554 return transcription
543- except Exception as e :
544- logger .exception ("Error during transcription: %s" , e )
545- return f"Error: { str (e )} "
555+ except Exception as exc :
556+ logger .exception ("Error during transcription: %s" , exc )
557+ return f"Error: { str (exc )} "
546558
547559 # Define TTS Function
548560 async def generate_tts (text : str ) -> str :
@@ -552,28 +564,28 @@ async def generate_tts(text: str) -> str:
552564 try :
553565 communicate = edge_tts .Communicate (text , VOICE )
554566 await communicate .save (OUTPUT_FILE )
555- logger .info (f "TTS audio generated successfully: { OUTPUT_FILE } " )
567+ logger .info ("TTS audio generated successfully: %s" , OUTPUT_FILE )
556568 return OUTPUT_FILE
557- except Exception as e :
558- logger .error (f "TTS Generation Error: { str (e ) } " )
569+ except Exception as exc :
570+ logger .error ("TTS Generation Error: %s" , str (exc ) )
559571 return None
560572
561573 # Wire up the components
562- transcribe_button .click (
574+ transcribe_button .click ( # pylint: disable=no-member
563575 fn = show_transcription , inputs = audio_input , outputs = transcription_preview
564576 )
565577
566578 # Process the edited text
567- process_button .click (
579+ process_button .click ( # pylint: disable=no-member
568580 fn = lambda x : asyncio .run (process_user_message (x , [])),
569581 inputs = transcription_preview ,
570582 outputs = audio_output ,
571583 )
572584
573585 # Connect TTS Button to Function
574- tts_button .click (
586+ tts_button .click ( # pylint: disable=no-member
575587 fn = lambda txt : asyncio .run (generate_tts (txt )),
576- inputs = audio_output , # Replace with the component holding the final text
588+ inputs = audio_output ,
577589 outputs = tts_audio ,
578590 )
579591
@@ -596,17 +608,20 @@ async def generate_tts(text: str) -> str:
596608 result_text = gr .Textbox (label = "Analysis Result" )
597609
598610 async def process_with_speech (image ):
611+ """
612+ Process image with vision model and return analysis.
613+ """
599614 try :
600615 # Get text result first
601616 text_result = await process_user_message (
602617 "Analyze this receipt" , [], use_vision = True , image_path = image
603618 )
604619 return text_result
605- except Exception as e :
606- logger .error (f "Processing error: { str (e ) } " )
607- return str (e )
620+ except Exception as exc :
621+ logger .error ("Processing error: %s" , str (exc ) )
622+ return str (exc )
608623
609- scan_button .click (
624+ scan_button .click ( # pylint: disable=no-member
610625 fn = lambda img : asyncio .run (process_with_speech (img )),
611626 inputs = image_input ,
612627 outputs = result_text ,
@@ -621,6 +636,6 @@ async def process_with_speech(image):
621636 logger .info ("Launching Gradio interface..." )
622637 demo .launch (inbrowser = True , server_name = "0.0.0.0" , server_port = 7860 )
623638 logger .info ("Gradio interface launched successfully." )
624- except Exception as e :
625- logger .exception ("Failed to launch Gradio interface: %s" , e )
639+ except Exception as exc :
640+ logger .exception ("Failed to launch Gradio interface: %s" , exc )
626641 logger .info ("Script execution completed" )
0 commit comments