Skip to content

Commit 5f4a240

Browse files
authored
Merge pull request #20 from Shuyib/ag2
Ag2 from pyautogen update
2 parents 1bc8e09 + 6a703fc commit 5f4a240

File tree

3 files changed

+130
-57
lines changed

3 files changed

+130
-57
lines changed

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ africastalking==1.2.8
22
black==24.8.0
33
pylint==3.2.6
44
ipython==8.30.0
5-
ollama==0.4.4
5+
ollama==0.5.1
66
black==24.8.0
77
pyment==0.3.3
88
codecarbon==2.7.1
@@ -16,11 +16,12 @@ nltk==3.9.1
1616
soundfile==0.12.1
1717
groq==0.13.1
1818
numpy==2.2.1
19-
pyautogen==0.2.18
19+
ag2==0.9.6
2020
flaml[automl]
2121
edge-tts==7.0.0
2222
deprecated==1.2.18
2323
pydantic==2.9.2
2424
flask==3.0.0
2525
flask-cors==6.0.0
2626
requests==2.32.4
27+
ag2[ollama]

utils/function_call.py

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,52 @@
2727
from logging.handlers import RotatingFileHandler
2828
from importlib.metadata import version
2929
import asyncio
30-
import africastalking
30+
import re
31+
import warnings
32+
from typing import Optional, Union
33+
3134
import ollama
3235
import requests
33-
from autogen import ConversableAgent
36+
37+
# Suppress Pydantic UserWarning from autogen
38+
warnings.filterwarnings(
39+
"ignore",
40+
category=UserWarning,
41+
message=r".*Field.*in.*has conflict with protected namespace.*",
42+
)
43+
44+
45+
# Monkey-patch for pydantic issue with autogen
46+
# See: https://github.com/microsoft/autogen/issues/1996
47+
try:
48+
from pydantic._internal import _typing_extra
49+
except ImportError:
50+
pass # not a pydantic v2.7.0+ installation, no issue
51+
else:
52+
try:
53+
# pydantic v2.7.0+
54+
from pydantic._internal._typing_extra import try_eval_type
55+
except ImportError:
56+
# autogen is not yet compatible with pydantic v2.7.0+
57+
# see: https://github.com/microsoft/autogen/issues/1996
58+
# monkey-patch pydantic
59+
from typing import Any, Dict, Type
60+
61+
def try_eval_type(t: Type[Any]) -> Type[Any]:
62+
try:
63+
return _typing_extra._eval_type(
64+
t, globalns=None, localns=None, type_aliases=None
65+
)
66+
except (NameError, TypeError):
67+
return t
68+
69+
_typing_extra.try_eval_type = try_eval_type
70+
71+
72+
from autogen.agentchat.conversable_agent import ConversableAgent
3473
from pydantic import BaseModel, field_validator, ValidationError
35-
from typing import Union
36-
from typing import Optional
37-
import re
38-
from .communication_apis import send_mobile_data_wrapper, send_mobile_data_original
74+
75+
from .communication_apis import send_mobile_data_wrapper
3976

4077
# from codecarbon import EmissionsTracker # Import the EmissionsTracker
4178
from duckduckgo_search import DDGS
@@ -454,7 +491,7 @@ def send_message(phone_number: str, message: str, username: str, **kwargs) -> st
454491

455492
try:
456493
# Use absolute import for communication_apis to avoid relative import errors
457-
from communication_apis import send_message as comm_send_message
494+
from .communication_apis import send_message as comm_send_message
458495

459496
masked_number = mask_phone_number(phone_number)
460497
logger.info("Delegating message sending to %s", masked_number)
@@ -512,7 +549,10 @@ def send_ussd(phone_number: str, code: str, **kwargs) -> str:
512549

513550

514551
def get_wallet_balance(**kwargs) -> str:
515-
"""Fetch the current wallet balance from Africa's Talking account using the documented API endpoint."""
552+
"""
553+
Fetch the current wallet balance from Africa's Talking account using the
554+
documented API endpoint.
555+
"""
516556
try:
517557
username = os.getenv("AT_USERNAME")
518558
api_key = os.getenv("AT_API_KEY")
@@ -842,7 +882,7 @@ def search_news(query: str, max_results: int = 5, **kwargs) -> str:
842882
843883
Returns
844884
-------
845-
str : The search results.
885+
str : The search results, formatted for readability.
846886
847887
Examples
848888
--------
@@ -860,8 +900,24 @@ def search_news(query: str, max_results: int = 5, **kwargs) -> str:
860900
max_results=max_results,
861901
**kwargs,
862902
)
863-
logger.debug("The search results are: %s", results)
864-
return json.dumps(results)
903+
logger.debug("The raw search results are: %s", results)
904+
905+
if not results:
906+
return "No news found for your query."
907+
908+
formatted_results = []
909+
for article in results:
910+
title = article.get("title", "No Title")
911+
source = article.get("source", "No Source")
912+
body = article.get("body", "No Summary")
913+
url = article.get("url", "No URL")
914+
915+
formatted_article = (
916+
f"Title: {title}\n" f"Source: {source}\n" f"Summary: {body}\n" f"URL: {url}"
917+
)
918+
formatted_results.append(formatted_article)
919+
920+
return "\n\n---\n\n".join(formatted_results)
865921

866922

867923
def translate_text(text: str, target_language: str) -> str:
@@ -938,7 +994,8 @@ def translate_text(text: str, target_language: str) -> str:
938994

939995
message = f"Zoe, translate '{text}' to {normalized_language}"
940996
result = joe.initiate_chat(zoe, message=message, max_turns=2)
941-
return result
997+
# Extract the last message from the chat history, which is the translation
998+
return result.summary
942999

9431000

9441001
# Asynchronous function to handle the conversation with the model
@@ -1426,7 +1483,7 @@ async def run(model: str, user_input: str):
14261483
if not user_prompt:
14271484
logger.info("No input provided. Exiting...")
14281485
break
1429-
elif user_prompt.lower() == "exit":
1486+
if user_prompt.lower() == "exit":
14301487
break
14311488

14321489
# Run the asynchronous function with tracker

voice_stt_mode.py

Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
from logging.handlers import RotatingFileHandler
3838
import asyncio
3939
from importlib.metadata import version, PackageNotFoundError
40-
import tempfile
40+
import warnings
41+
from typing import Optional
4142

4243
# Third-Party Library Imports
4344
import gradio as gr
@@ -50,8 +51,6 @@
5051

5152
# Local Module Imports
5253
from utils.function_call import send_airtime, send_message, search_news, translate_text
53-
from typing import Optional
54-
from utils.models import ReceiptData, LineItem
5554
from utils.constants import VISION_SYSTEM_PROMPT, API_SYSTEM_PROMPT
5655

5756
# ------------------------------------------------------------------------------------
@@ -62,6 +61,13 @@
6261
langtrace.init(api_key=os.getenv("LANGTRACE_API_KEY"))
6362
groq_client = groq.Client(api_key=os.getenv("GROQ_API_KEY"))
6463

64+
# Suppress Pydantic UserWarning from autogen
65+
warnings.filterwarnings(
66+
"ignore",
67+
category=UserWarning,
68+
message=r".*Field.*in.*has conflict with protected namespace.*",
69+
)
70+
6571
# Set up the logger
6672
logger = logging.getLogger(__name__)
6773
logger.setLevel(logging.DEBUG) # Set the logger to handle all levels DEBUG and above
@@ -131,12 +137,20 @@
131137

132138

133139
async def text_to_speech(text: str) -> None:
140+
"""
141+
Generate speech from text using edge-tts.
142+
143+
Parameters
144+
----------
145+
text : str
146+
The text to convert to speech.
147+
"""
134148
try:
135149
communicate = edge_tts.Communicate(text, VOICE)
136150
await communicate.save(OUTPUT_FILE)
137-
logger.info(f"Generated speech output: {OUTPUT_FILE}")
151+
logger.info("Generated speech output: %s", OUTPUT_FILE)
138152
except Exception as e:
139-
logger.error(f"TTS Error: {str(e)}")
153+
logger.error("TTS Error: %s", str(e))
140154
raise
141155

142156

@@ -248,7 +262,7 @@ async def text_to_speech(text: str) -> None:
248262
@with_langtrace_root_span()
249263
async def process_user_message(
250264
message: str,
251-
history: list,
265+
history: list, # pylint: disable=unused-argument
252266
use_vision: bool = False,
253267
image_path: Optional[str] = None,
254268
) -> str:
@@ -286,15 +300,18 @@ async def process_user_message(
286300
messages.append({"role": "user", "content": message})
287301

288302
try:
289-
model_name = "llama3.2-vision" if use_vision else "qwen2.5:0.5b"
303+
# Use 'llava' as it's a common Ollama vision model.
304+
# Ensure you have pulled the model with `ollama pull llava`.
305+
# You can use llama3.2-vision as well
306+
model_name = "llava" if use_vision else "qwen2.5:0.5b"
290307
response = await client.chat(
291308
model=model_name,
292309
messages=messages,
293310
tools=None if use_vision else tools,
294311
format="json" if use_vision else None,
295312
options={"temperature": 0},
296313
)
297-
except Exception as e:
314+
except Exception:
298315
logger.exception("Failed to get response from Ollama client.")
299316
return "An unexpected error occurred while communicating with the assistant."
300317

@@ -353,16 +370,11 @@ async def process_user_message(
353370
}
354371
)
355372

356-
return f"Function `{tool_name}` executed successfully. Response:\n{function_response}"
357-
except (
358-
send_airtime.ErrorType,
359-
send_message.ErrorType,
360-
search_news.ErrorType,
361-
translate_text.ErrorType,
362-
) as e:
363-
logger.error("Handled error in tool `%s`: %s", tool_name, e)
364-
return f"Error executing `{tool_name}`: {str(e)}"
365-
except Exception as e: # pylint: disable=broad-exception-caught
373+
return (
374+
f"Function `{tool_name}` executed successfully. Response:\n"
375+
f"{function_response}"
376+
)
377+
except Exception as e:
366378
logger.exception("Unexpected error in tool `%s`: %s", tool_name, e)
367379
return f"An unexpected error occurred while executing `{tool_name}`."
368380
else:
@@ -420,12 +432,12 @@ async def process_audio_and_llm(audio):
420432
response = await process_user_message(transcription, [])
421433
return f"Transcription: {transcription}\nLLM Response: {response}"
422434

423-
except Exception as e:
424-
logger.exception("Error during transcription or LLM processing: %s", e)
425-
return f"Error: {str(e)}"
426-
except Exception as e:
427-
logger.exception("Error in audio processing: %s", e)
428-
return f"Error: {str(e)}"
435+
except Exception as exc:
436+
logger.exception("Error during transcription or LLM processing: %s", exc)
437+
return f"Error: {str(exc)}"
438+
except Exception as exc:
439+
logger.exception("Error in audio processing: %s", exc)
440+
return f"Error: {str(exc)}"
429441

430442

431443
def gradio_interface(message: str, history: list) -> str:
@@ -447,8 +459,8 @@ def gradio_interface(message: str, history: list) -> str:
447459
try:
448460
response = asyncio.run(process_user_message(message, history))
449461
return response
450-
except Exception as e: # pylint: disable=broad-exception-caught
451-
logger.exception("Error in gradio_interface: %s", e)
462+
except Exception as exc:
463+
logger.exception("Error in gradio_interface: %s", exc)
452464
return "An unexpected error occurred while processing your message."
453465

454466

@@ -540,9 +552,9 @@ def show_transcription(audio):
540552
)
541553
logger.info("Audio transcribed successfully: %s", transcription)
542554
return transcription
543-
except Exception as e:
544-
logger.exception("Error during transcription: %s", e)
545-
return f"Error: {str(e)}"
555+
except Exception as exc:
556+
logger.exception("Error during transcription: %s", exc)
557+
return f"Error: {str(exc)}"
546558

547559
# Define TTS Function
548560
async def generate_tts(text: str) -> str:
@@ -552,28 +564,28 @@ async def generate_tts(text: str) -> str:
552564
try:
553565
communicate = edge_tts.Communicate(text, VOICE)
554566
await communicate.save(OUTPUT_FILE)
555-
logger.info(f"TTS audio generated successfully: {OUTPUT_FILE}")
567+
logger.info("TTS audio generated successfully: %s", OUTPUT_FILE)
556568
return OUTPUT_FILE
557-
except Exception as e:
558-
logger.error(f"TTS Generation Error: {str(e)}")
569+
except Exception as exc:
570+
logger.error("TTS Generation Error: %s", str(exc))
559571
return None
560572

561573
# Wire up the components
562-
transcribe_button.click(
574+
transcribe_button.click( # pylint: disable=no-member
563575
fn=show_transcription, inputs=audio_input, outputs=transcription_preview
564576
)
565577

566578
# Process the edited text
567-
process_button.click(
579+
process_button.click( # pylint: disable=no-member
568580
fn=lambda x: asyncio.run(process_user_message(x, [])),
569581
inputs=transcription_preview,
570582
outputs=audio_output,
571583
)
572584

573585
# Connect TTS Button to Function
574-
tts_button.click(
586+
tts_button.click( # pylint: disable=no-member
575587
fn=lambda txt: asyncio.run(generate_tts(txt)),
576-
inputs=audio_output, # Replace with the component holding the final text
588+
inputs=audio_output,
577589
outputs=tts_audio,
578590
)
579591

@@ -596,17 +608,20 @@ async def generate_tts(text: str) -> str:
596608
result_text = gr.Textbox(label="Analysis Result")
597609

598610
async def process_with_speech(image):
611+
"""
612+
Process image with vision model and return analysis.
613+
"""
599614
try:
600615
# Get text result first
601616
text_result = await process_user_message(
602617
"Analyze this receipt", [], use_vision=True, image_path=image
603618
)
604619
return text_result
605-
except Exception as e:
606-
logger.error(f"Processing error: {str(e)}")
607-
return str(e)
620+
except Exception as exc:
621+
logger.error("Processing error: %s", str(exc))
622+
return str(exc)
608623

609-
scan_button.click(
624+
scan_button.click( # pylint: disable=no-member
610625
fn=lambda img: asyncio.run(process_with_speech(img)),
611626
inputs=image_input,
612627
outputs=result_text,
@@ -621,6 +636,6 @@ async def process_with_speech(image):
621636
logger.info("Launching Gradio interface...")
622637
demo.launch(inbrowser=True, server_name="0.0.0.0", server_port=7860)
623638
logger.info("Gradio interface launched successfully.")
624-
except Exception as e:
625-
logger.exception("Failed to launch Gradio interface: %s", e)
639+
except Exception as exc:
640+
logger.exception("Failed to launch Gradio interface: %s", exc)
626641
logger.info("Script execution completed")

0 commit comments

Comments
 (0)