Merge pull request #20 from Shuyib/ag2

Shuyib · web-flow · commit 5f4a240c2483 · 2025-07-19T14:40:59.000+03:00
Ag2 from pyautogen update
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ africastalking==1.2.8
 black==24.8.0
 pylint==3.2.6
 ipython==8.30.0
-ollama==0.4.4
+ollama==0.5.1
 black==24.8.0
 pyment==0.3.3
 codecarbon==2.7.1
@@ -16,11 +16,12 @@ nltk==3.9.1
 soundfile==0.12.1
 groq==0.13.1
 numpy==2.2.1
-pyautogen==0.2.18
+ag2==0.9.6
 flaml[automl]
 edge-tts==7.0.0
 deprecated==1.2.18
 pydantic==2.9.2
 flask==3.0.0
 flask-cors==6.0.0
 requests==2.32.4
+ag2[ollama]
diff --git a/utils/function_call.py b/utils/function_call.py
@@ -27,15 +27,52 @@
 from logging.handlers import RotatingFileHandler
 from importlib.metadata import version
 import asyncio
-import africastalking
+import re
+import warnings
+from typing import Optional, Union
+
 import ollama
 import requests
-from autogen import ConversableAgent
+
+# Suppress Pydantic UserWarning from autogen
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message=r".*Field.*in.*has conflict with protected namespace.*",
+)
+
+
+# Monkey-patch for pydantic issue with autogen
+# See: https://github.com/microsoft/autogen/issues/1996
+try:
+    from pydantic._internal import _typing_extra
+except ImportError:
+    pass  # not a pydantic v2.7.0+ installation, no issue
+else:
+    try:
+        # pydantic v2.7.0+
+        from pydantic._internal._typing_extra import try_eval_type
+    except ImportError:
+        # autogen is not yet compatible with pydantic v2.7.0+
+        # see: https://github.com/microsoft/autogen/issues/1996
+        # monkey-patch pydantic
+        from typing import Any, Dict, Type
+
+        def try_eval_type(t: Type[Any]) -> Type[Any]:
+            try:
+                return _typing_extra._eval_type(
+                    t, globalns=None, localns=None, type_aliases=None
+                )
+            except (NameError, TypeError):
+                return t
+
+        _typing_extra.try_eval_type = try_eval_type
+
+
+from autogen.agentchat.conversable_agent import ConversableAgent
 from pydantic import BaseModel, field_validator, ValidationError
-from typing import Union
-from typing import Optional
-import re
-from .communication_apis import send_mobile_data_wrapper, send_mobile_data_original
+
+from .communication_apis import send_mobile_data_wrapper
 
 # from codecarbon import EmissionsTracker  # Import the EmissionsTracker
 from duckduckgo_search import DDGS
@@ -454,7 +491,7 @@ def send_message(phone_number: str, message: str, username: str, **kwargs) -> st
 
     try:
         # Use absolute import for communication_apis to avoid relative import errors
-        from communication_apis import send_message as comm_send_message
+        from .communication_apis import send_message as comm_send_message
 
         masked_number = mask_phone_number(phone_number)
         logger.info("Delegating message sending to %s", masked_number)
@@ -512,7 +549,10 @@ def send_ussd(phone_number: str, code: str, **kwargs) -> str:
 
 
 def get_wallet_balance(**kwargs) -> str:
-    """Fetch the current wallet balance from Africa's Talking account using the documented API endpoint."""
+    """
+    Fetch the current wallet balance from Africa's Talking account using the
+    documented API endpoint.
+    """
     try:
         username = os.getenv("AT_USERNAME")
         api_key = os.getenv("AT_API_KEY")
@@ -842,7 +882,7 @@ def search_news(query: str, max_results: int = 5, **kwargs) -> str:
 
     Returns
     -------
-    str : The search results.
+    str : The search results, formatted for readability.
 
     Examples
     --------
@@ -860,8 +900,24 @@ def search_news(query: str, max_results: int = 5, **kwargs) -> str:
         max_results=max_results,
         **kwargs,
     )
-    logger.debug("The search results are: %s", results)
-    return json.dumps(results)
+    logger.debug("The raw search results are: %s", results)
+
+    if not results:
+        return "No news found for your query."
+
+    formatted_results = []
+    for article in results:
+        title = article.get("title", "No Title")
+        source = article.get("source", "No Source")
+        body = article.get("body", "No Summary")
+        url = article.get("url", "No URL")
+
+        formatted_article = (
+            f"Title: {title}\n" f"Source: {source}\n" f"Summary: {body}\n" f"URL: {url}"
+        )
+        formatted_results.append(formatted_article)
+
+    return "\n\n---\n\n".join(formatted_results)
 
 
 def translate_text(text: str, target_language: str) -> str:
@@ -938,7 +994,8 @@ def translate_text(text: str, target_language: str) -> str:
 
     message = f"Zoe, translate '{text}' to {normalized_language}"
     result = joe.initiate_chat(zoe, message=message, max_turns=2)
-    return result
+    # Extract the last message from the chat history, which is the translation
+    return result.summary
 
 
 # Asynchronous function to handle the conversation with the model
@@ -1426,7 +1483,7 @@ async def run(model: str, user_input: str):
         if not user_prompt:
             logger.info("No input provided. Exiting...")
             break
-        elif user_prompt.lower() == "exit":
+        if user_prompt.lower() == "exit":
             break
 
         # Run the asynchronous function with tracker
diff --git a/voice_stt_mode.py b/voice_stt_mode.py
@@ -37,7 +37,8 @@
 from logging.handlers import RotatingFileHandler
 import asyncio
 from importlib.metadata import version, PackageNotFoundError
-import tempfile
+import warnings
+from typing import Optional
 
 # Third-Party Library Imports
 import gradio as gr
@@ -50,8 +51,6 @@
 
 # Local Module Imports
 from utils.function_call import send_airtime, send_message, search_news, translate_text
-from typing import Optional
-from utils.models import ReceiptData, LineItem
 from utils.constants import VISION_SYSTEM_PROMPT, API_SYSTEM_PROMPT
 
 # ------------------------------------------------------------------------------------
@@ -62,6 +61,13 @@
 langtrace.init(api_key=os.getenv("LANGTRACE_API_KEY"))
 groq_client = groq.Client(api_key=os.getenv("GROQ_API_KEY"))
 
+# Suppress Pydantic UserWarning from autogen
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message=r".*Field.*in.*has conflict with protected namespace.*",
+)
+
 # Set up the logger
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)  # Set the logger to handle all levels DEBUG and above
@@ -131,12 +137,20 @@
 
 
 async def text_to_speech(text: str) -> None:
+    """
+    Generate speech from text using edge-tts.
+
+    Parameters
+    ----------
+    text : str
+        The text to convert to speech.
+    """
     try:
         communicate = edge_tts.Communicate(text, VOICE)
         await communicate.save(OUTPUT_FILE)
-        logger.info(f"Generated speech output: {OUTPUT_FILE}")
+        logger.info("Generated speech output: %s", OUTPUT_FILE)
     except Exception as e:
-        logger.error(f"TTS Error: {str(e)}")
+        logger.error("TTS Error: %s", str(e))
         raise
 
 
@@ -248,7 +262,7 @@ async def text_to_speech(text: str) -> None:
 @with_langtrace_root_span()
 async def process_user_message(
     message: str,
-    history: list,
+    history: list,  # pylint: disable=unused-argument
     use_vision: bool = False,
     image_path: Optional[str] = None,
 ) -> str:
@@ -286,15 +300,18 @@ async def process_user_message(
         messages.append({"role": "user", "content": message})
 
     try:
-        model_name = "llama3.2-vision" if use_vision else "qwen2.5:0.5b"
+        # Use 'llava' as it's a common Ollama vision model.
+        # Ensure you have pulled the model with `ollama pull llava`.
+        # You can use llama3.2-vision as well
+        model_name = "llava" if use_vision else "qwen2.5:0.5b"
         response = await client.chat(
             model=model_name,
             messages=messages,
             tools=None if use_vision else tools,
             format="json" if use_vision else None,
             options={"temperature": 0},
         )
-    except Exception as e:
+    except Exception:
         logger.exception("Failed to get response from Ollama client.")
         return "An unexpected error occurred while communicating with the assistant."
 
@@ -353,16 +370,11 @@ async def process_user_message(
                     }
                 )
 
-                return f"Function `{tool_name}` executed successfully. Response:\n{function_response}"
-            except (
-                send_airtime.ErrorType,
-                send_message.ErrorType,
-                search_news.ErrorType,
-                translate_text.ErrorType,
-            ) as e:
-                logger.error("Handled error in tool `%s`: %s", tool_name, e)
-                return f"Error executing `{tool_name}`: {str(e)}"
-            except Exception as e:  # pylint: disable=broad-exception-caught
+                return (
+                    f"Function `{tool_name}` executed successfully. Response:\n"
+                    f"{function_response}"
+                )
+            except Exception as e:
                 logger.exception("Unexpected error in tool `%s`: %s", tool_name, e)
                 return f"An unexpected error occurred while executing `{tool_name}`."
     else:
@@ -420,12 +432,12 @@ async def process_audio_and_llm(audio):
             response = await process_user_message(transcription, [])
             return f"Transcription: {transcription}\nLLM Response: {response}"
 
-        except Exception as e:
-            logger.exception("Error during transcription or LLM processing: %s", e)
-            return f"Error: {str(e)}"
-    except Exception as e:
-        logger.exception("Error in audio processing: %s", e)
-        return f"Error: {str(e)}"
+        except Exception as exc:
+            logger.exception("Error during transcription or LLM processing: %s", exc)
+            return f"Error: {str(exc)}"
+    except Exception as exc:
+        logger.exception("Error in audio processing: %s", exc)
+        return f"Error: {str(exc)}"
 
 
 def gradio_interface(message: str, history: list) -> str:
@@ -447,8 +459,8 @@ def gradio_interface(message: str, history: list) -> str:
     try:
         response = asyncio.run(process_user_message(message, history))
         return response
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        logger.exception("Error in gradio_interface: %s", e)
+    except Exception as exc:
+        logger.exception("Error in gradio_interface: %s", exc)
         return "An unexpected error occurred while processing your message."
 
 
@@ -540,9 +552,9 @@ def show_transcription(audio):
                 )
                 logger.info("Audio transcribed successfully: %s", transcription)
                 return transcription
-            except Exception as e:
-                logger.exception("Error during transcription: %s", e)
-                return f"Error: {str(e)}"
+            except Exception as exc:
+                logger.exception("Error during transcription: %s", exc)
+                return f"Error: {str(exc)}"
 
         # Define TTS Function
         async def generate_tts(text: str) -> str:
@@ -552,28 +564,28 @@ async def generate_tts(text: str) -> str:
             try:
                 communicate = edge_tts.Communicate(text, VOICE)
                 await communicate.save(OUTPUT_FILE)
-                logger.info(f"TTS audio generated successfully: {OUTPUT_FILE}")
+                logger.info("TTS audio generated successfully: %s", OUTPUT_FILE)
                 return OUTPUT_FILE
-            except Exception as e:
-                logger.error(f"TTS Generation Error: {str(e)}")
+            except Exception as exc:
+                logger.error("TTS Generation Error: %s", str(exc))
                 return None
 
         # Wire up the components
-        transcribe_button.click(
+        transcribe_button.click(  # pylint: disable=no-member
             fn=show_transcription, inputs=audio_input, outputs=transcription_preview
         )
 
         # Process the edited text
-        process_button.click(
+        process_button.click(  # pylint: disable=no-member
             fn=lambda x: asyncio.run(process_user_message(x, [])),
             inputs=transcription_preview,
             outputs=audio_output,
         )
 
         # Connect TTS Button to Function
-        tts_button.click(
+        tts_button.click(  # pylint: disable=no-member
             fn=lambda txt: asyncio.run(generate_tts(txt)),
-            inputs=audio_output,  # Replace with the component holding the final text
+            inputs=audio_output,
             outputs=tts_audio,
         )
 
@@ -596,17 +608,20 @@ async def generate_tts(text: str) -> str:
         result_text = gr.Textbox(label="Analysis Result")
 
         async def process_with_speech(image):
+            """
+            Process image with vision model and return analysis.
+            """
             try:
                 # Get text result first
                 text_result = await process_user_message(
                     "Analyze this receipt", [], use_vision=True, image_path=image
                 )
                 return text_result
-            except Exception as e:
-                logger.error(f"Processing error: {str(e)}")
-                return str(e)
+            except Exception as exc:
+                logger.error("Processing error: %s", str(exc))
+                return str(exc)
 
-        scan_button.click(
+        scan_button.click(  # pylint: disable=no-member
             fn=lambda img: asyncio.run(process_with_speech(img)),
             inputs=image_input,
             outputs=result_text,
@@ -621,6 +636,6 @@ async def process_with_speech(image):
         logger.info("Launching Gradio interface...")
         demo.launch(inbrowser=True, server_name="0.0.0.0", server_port=7860)
         logger.info("Gradio interface launched successfully.")
-    except Exception as e:
-        logger.exception("Failed to launch Gradio interface: %s", e)
+    except Exception as exc:
+        logger.exception("Failed to launch Gradio interface: %s", exc)
     logger.info("Script execution completed")