cw-academy-agent windows 11 fix and improvements #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

gpdl-0 wants to merge 3 commits into cloudwalk:main from gpdl-0:develop

.gitignore

-Original file line number
+Diff line change
@@ Expand Up / @@ -81,4 +81,7 @@ dmypy.json @@
     .cache/
     # Project specific
-    memory.json
+    memory.json
+    # Recorded audio
+    *.wav

agent-memory/main.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -8,60 +8,112 @@
  
    from datetime import datetime

    import json

    print("Iniciando o Agente de Memória...")

    # Carrega as variáveis de ambiente (ex: OPENAI_API_KEY) do arquivo .env

    load_dotenv(find_dotenv())

    # Inicializa o cliente da OpenAI. É aqui que a API Key é usada.

    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    # Loop principal do agente

    while True:

        memory = {

            "events": [],

            "interactions": []

        } if not os.path.exists("memory.json") else json.load(open("memory.json"))

        print("\n----- Novo Ciclo de Interação -----")

        # Tenta carregar a memória existente. Se não existir, cria uma estrutura vazia.

        try:

            memory = {

                "events": [],

                "interactions": []

            } if not os.path.exists("memory.json") else json.load(open("memory.json"))

            # print(f"Memória carregada. {len(memory['events'])} eventos, {len(memory['interactions'])} interações.")

        except Exception as e:

            print(f"Erro ao carregar 'memory.json': {e}. Usando memória vazia.")

            memory = {"events": [], "interactions": []}

        # 1. GRAVAÇÃO DE ÁUDIO

        # Chama a função que aguarda o usuário pressionar Enter para gravar

        filename_audio = record_audio()

        print(f"Áudio gravado e salvo como: {filename_audio}")

        # 2. TRANSCRIÇÃO (Whisper API)

        # Abre o arquivo de áudio gravado em modo binário ("rb")

        audio_file = open(filename_audio, "rb")

        # PONTO CRÍTICO DE ERRO (COMO VIMOS)

        # Esta linha faz a chamada para a API do Whisper da OpenAI para transcrever o áudio.

        # Se você receber um erro 'insufficient_quota' (429), é um problema de faturamento/créditos

        # na sua conta OpenAI, e não um erro de código.

        print("Enviando áudio para transcrição (API Whisper)...")

        transcription = client.audio.transcriptions.create(

            model="whisper-1", 

            file=audio_file,

            language="pt"

        )

        # Fecha o arquivo de áudio após a transcrição

        audio_file.close() 

        # 3. LIMPEZA

        # Remove o arquivo .wav para não ocupar espaço

        print(f"Removendo arquivo de áudio local: {filename_audio}")

        os.remove(filename_audio)

        # Extrai o texto da resposta de transcrição

        text = transcription.text

        print(f"Texto transcrito: '{text}'")

        print(text)

        # 4. PROCESSAMENTO (GPT-4o)

        actual_date = datetime.now().strftime("%d/%m/%Y")

        # Prepara a chamada para o modelo de chat (GPT-4o)

        # Note que estamos enviando a 'memory' atual para dar contexto ao assistente

        print("Enviando texto para processamento (API GPT-4o)...")

        completion = client.chat.completions.create(

        model="gpt-4o",

        messages=[

            {"role": "developer", "content": f"You are a helpful assistant. You are responsible for remembering events of my life. Today is {actual_date} use this as a reference to remember events. If the event occurred in the past, you should use the date to remember the event using today's date as a reference."},

            {"role": "developer", "content": f"You are a helpful assistant. You are responsible for remembering events of my life. Today is {actual_date} use this as a reference to remember events. If the event occurred in a past, you should use the date to remember the event using today's date as a reference."},

            {"role": "assistant", "content": json.dumps(memory)},

            {"role": "user", "content": text}

        ],

        tool_choice="auto",

        tool_choice="auto", # Deixa a IA decidir se usa uma ferramenta (DailyEvents)

        tools=[

            base_model2tool(DailyEvents)

            base_model2tool(DailyEvents) # Converte o Pydantic Model em uma "tool"

            ]

        )

        # 5. ANÁLISE E RESPOSTA

        # Verifica se a IA decidiu usar a ferramenta 'DailyEvents'

        if completion.choices[0].message.tool_calls:

            print("A IA decidiu usar uma ferramenta (DailyEvents).")

            for tool_call in completion.choices[0].message.tool_calls:

                if tool_call.function.name == "DailyEvents":

                    daily_events = DailyEvents(**json.loads(tool_call.function.arguments))

                    # Se a ferramenta for 'DailyEvents', extrai os argumentos

                    tool_args = json.loads(tool_call.function.arguments)

                    daily_events = DailyEvents(**tool_args)

                    # Salva os eventos na memória

                    memory["events"].append(f"Day: {daily_events.date} - {daily_events.events}")

                    # Prepara a resposta de confirmação

                    response_text = f"Evento do dia {daily_events.date} registrado com sucesso, posso te ajudar com mais alguma coisa?"

                    memory['interactions'].append(f"Human: {text}")

                    memory['interactions'].append(f"Assistant: {response_text}")

                    print(f"Resposta da IA: {response_text}")

        # Se a IA não usou uma ferramenta, ela provavelmente deu uma resposta em texto

        elif completion.choices[0].message.content:

            print("A IA respondeu com texto (sem usar ferramenta).")

            response_text = completion.choices[0].message.content

            memory['interactions'].append(f"Human: {text}")

            memory['interactions'].append(f"Assistant: Evento do dia {daily_events.date} registrado com sucesso, posso te ajudar com mais alguma coisa?")

            print(f"Evento do dia {daily_events.date} registrado com sucesso, posso te ajudar com mais alguma coisa?")

        if completion.choices[0].message.content:

            memory['interactions'].append(f"Human: {text}")

            memory['interactions'].append(f"Assistant: {completion.choices[0].message.content}")

            print(completion.choices[0].message.content)

            memory['interactions'].append(f"Assistant: {response_text}")

            print(f"Resposta da IA: {response_text}")

        else:

            print("A IA não retornou nem ferramenta, nem conteúdo.")

        with open("memory.json", "w") as f:

            json.dump(memory, f)

        # 6. SALVAMENTO

        # Salva o estado atualizado da memória de volta no arquivo .json

        print("Salvando memória atualizada em 'memory.json'...")

        with open("memory.json", "w", encoding='utf-8') as f:

            # 'ensure_ascii=False' garante que caracteres PT-BR (como 'ç' ou 'ã')

            # sejam salvos corretamente em vez de códigos unicode.

            json.dump(memory, f, ensure_ascii=False, indent=2)

agent-memory/tools/daily_events.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -5,19 +5,17 @@ class DailyEvents(BaseModel): @@
         """
         Identifica e registra eventos diários para salvar em uma planilha.
-        Args:
-            date (str): Data em que os eventos devem ser identificados, no formato YYYY-MM-DD
-            events (List[Event]): Lista de eventos identificados no dia, cada um contendo título,
-                descrição e horário
-        Returns:
-            str: Confirmação do registro dos eventos
+        O 'docstring' (esta descrição) é MUITO IMPORTANTE.
+        O GPT-4o lê esta descrição para entender O QUE esta ferramenta faz e QUANDO usá-la.
         """
+        # O 'Field' (campo) também usa sua descrição para guiar a IA
         date: str = Field(
             description="Data em que os eventos ocorreram no formato DD/MM/YYYY"
+            # A descrição diz à IA qual formato de data ela deve extrair do texto.
         )
         events: List[str] = Field(
             description="Lista de eventos identificados no dia"
-        )
+            # A IA entenderá que deve agrupar os eventos em uma lista de strings.
+        )

agent-memory/utils/basemodel2tool.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,3 +1,12 @@
  
    """

    Este módulo é um "tradutor" que converte um Pydantic BaseModel (como o DailyEvents)

    para o formato de esquema JSON que a API da OpenAI espera para sua funcionalidade

    de "Tool Calling" (Chamada de Ferramenta).

    A IA não pode ler o código Python do DailyEvents, mas pode ler o JSON que

    este script gera a partir dele.

    """

    from enum import Enum

    from types import UnionType

    from typing import (

    @@ -13,20 +22,14 @@
  
    def get_field_type(field: FieldInfo) -> str:

        """Função auxiliar para mapear tipos Python (ex: str) para tipos JSON (ex: 'string')."""

        if field.annotation is None:

            return "any"

        return get_simple_type_name(field.annotation)

    def get_simple_type_name(type_hint: type) -> str:

        """Get simplified OpenAI-compatible type name from Python type hint.

        Args:

            type_hint (type): Python type hint to convert

        Returns:

            str: OpenAI-compatible type name

        """

        """Traduz tipos Python complexos (List, Union, etc.) para nomes de tipo JSON."""

        # Add support for | type hint E.g. str | None

        if get_origin(type_hint) in (Union, UnionType) or isinstance(type_hint, UnionType):

            types = [t for t in get_args(type_hint) if t is not type(None)]

    @@ -53,19 +56,21 @@ def get_simple_type_name(type_hint: type) -> str:
  
    def base_model2tool(model: type[BaseModel]) -> dict:

        """Convert a Pydantic BaseModel to OpenAI function format.

        """

        Função principal. Converte um Pydantic BaseModel para o formato de ferramenta OpenAI.

        Args:

            model (type[BaseModel]): Pydantic model class to convert

            model (type[BaseModel]): A classe Pydantic (ex: DailyEvents)

        Returns:

            dict: OpenAI function-calling format dictionary

            dict: Um dicionário formatado como a OpenAI espera.

        """

        # Monta a estrutura base que a API da OpenAI requer

        json_output = {

            "type": "function",

            "function": {

                "name": model.__name__,

                "description": model.__doc__ or "",

                "name": model.__name__, # Pega o nome da classe (ex: "DailyEvents")

                "description": model.__doc__ or "", # Pega o docstring (descrição)

                "parameters": {

                    "type": "object",

                    "properties": {}

    @@ -75,27 +80,33 @@ def base_model2tool(model: type[BaseModel]) -> dict:
  
        required_properties = []

        # Itera sobre todos os campos definidos no Pydantic Model (ex: 'date', 'events')

        for name, field in model.model_fields.items():

            # Traduz o tipo do campo (ex: str -> "string")

            prop_dict = {

                "type": get_field_type(field),

                "description": field.description or "",

                "description": field.description or "", # Pega a descrição do campo

            }

            # Trata casos especiais, como Listas (Arrays)

            if get_origin(field.annotation) in (list, List):

                prop_dict["type"] = "array"

                args = get_args(field.annotation)

                if args:

                    prop_dict["items"] = {"type": get_simple_type_name(args[0])}

            # Marca quais campos são obrigatórios

            if field.is_required():

                required_properties.append(name)

            if isinstance(field.annotation, type) and issubclass(field.annotation, Enum):

                prop_dict["enum"] = [str(e.value) for e in field.annotation]

            # Adiciona o campo traduzido ao JSON final

            json_output["function"]["parameters"]["properties"][name] = prop_dict

        if required_properties:

            json_output["function"]["parameters"]["required"] = required_properties

        return json_output

        # Retorna o dicionário/JSON completo

        return json_output

agent-memory/utils/record_audio.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -3,8 +3,9 @@ @@
     import time
     from datetime import datetime
     from typing import Optional
-    import sys
-    import select
+    # Modules 'sys' & 'select' are no more used in this logic
+    # import sys
+    # import select
     def record_audio() -> Optional[str]:
         """Records audio from microphone when user presses Enter and stops when Enter is pressed again.
@@ Expand Down Expand Up / @@ -38,18 +39,24 @@ def callback(in_data, frame_count, time_info, status): @@
             stream.start_stream()
             print("Press Enter to start recording, press Enter again to stop...")
+            # This block is modified because I was having problems using W11 :
+            #   if select.select([sys.stdin], [], [], 0.1)[0]:
+            #   OSError: [WinError 10038] An operation was attempted on something that is not a socket"
+            # My solution using Gemini:
+            # 1 Wait (blocking) for 1st 'enter'
+            input()
+            # 'recording' flag
+            recording = True
+            print("Recording...")
+            # 2 Wait (blocking) for 2nd 'enter'
+            input()
-            while True:
-                # Check if Enter key was pressed
-                if select.select([sys.stdin], [], [], 0.1)[0]:
-                    sys.stdin.readline()
-                    if not recording:
-                        recording = True
-                        print("Recording...")
-                    else:
-                        break
-                time.sleep(0.1)  # Reduce CPU usage
+            # 3 Stp recording
+            # The flag 'recording' changes
+            recording = False # optional but best practice
+            # Stop strem here
             stream.stop_stream()
             stream.close()
@@ Expand All / @@ -67,4 +74,4 @@ def callback(in_data, frame_count, time_info, status): @@
         finally:
             audio.terminate()
-        return None
+        return None

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

cw-academy-agent windows 11 fix and improvements #5

Uh oh!

Diff view

Diff view

There are no files selected for viewing

cw-academy-agent windows 11 fix and improvements #5

Are you sure you want to change the base?

Uh oh!

cw-academy-agent windows 11 fix and improvements #5

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing