diff --git a/README.md b/README.md index a9d83487..fdb6c5f0 100644 --- a/README.md +++ b/README.md @@ -53,9 +53,10 @@ Where: - `pep` is the provider extra parameters - `t` is the number of turns - `r` is the run per turns -- `f` is the folder name (defaults to conversations and a subfolder named based on other paramters and datetime) +- `f` (or `--convo-folder-name`) is the conversation folder name (defaults to `conversations` with a subfolder named based on other parameters and datetime) +- `lf` (or `--log-folder-name`) is the logging folder name (defaults to `logging` with a subfolder named based on other parameters and datetime) - `c` is the maximum concurrent conversations to run (defaults to None, but try this if the provider you're testing times out) -This will generate conversations and store them in a subfolder of `conversations` +This will generate conversations and store them in subfolders of `conversations` and `logging` 6. **Judge the conversations**: ```bash @@ -90,7 +91,9 @@ python judge.py -f conversations/my_experiment -j gpt-4o -jep temperature=0.5,ma ``` **Note:** Extra parameters are automatically included in the output folder names, making it easy to track experiments: -- Generation: `conversations/p_gpt_4o_temp0.3__a_claude_3_5_sonnet_temp0.5__t6__r2__{timestamp}/` +- Generation: + - Conversations: `conversations/p_gpt_4o_temp0.3__a_claude_3_5_sonnet_temp0.5__t6__r2__{timestamp}/` + - Logs: `logging/p_gpt_4o_temp0.3__a_claude_3_5_sonnet_temp0.5__t6__r2__{timestamp}/` - Evaluation: `evaluations/j_claude_3_5_sonnet_temp0.3_{timestamp}__{conversation_folder}/` **Multiple judge models**: You can use multiple different judge models and/or multiple instances: @@ -305,7 +308,8 @@ results = await generate_conversations( max_turns=5, runs_per_prompt=3, persona_names=["Alex M.", "Chloe Kim"], # Optional: filter specific personas - folder_name="custom_experiment" # Optional: custom output folder + convo_folder_name="custom_conversations", # Optional: custom conversation folder + log_folder_name="custom_logging" # Optional: custom logging folder ) ``` @@ -371,15 +375,18 @@ persona_model_config = { ### Output Organization -Conversations are automatically organized into timestamped folders: +Conversations and logs are automatically organized into separate timestamped folders: ``` conversations/ -├── p_claude_sonnet_4_20250514__a_claude_sonnet_4_20250514_20250120_143022_t5_r3/ -│ ├── abc123_Alex_M_c3s_run1_20250120_143022_123.txt -│ ├── abc123_Alex_M_c3s_run1_20250120_143022_123.log -│ ├── def456_Chloe_Kim_c3s_run1_20250120_143022_456.txt -│ └── def456_Chloe_Kim_c3s_run1_20250120_143022_456.log +└── p_claude_sonnet_4_20250514__a_claude_sonnet_4_20250514_20250120_143022_t5_r3/ + ├── abc123_Alex_M_c3s_run1.txt + └── def456_Chloe_Kim_c3s_run1.txt + +logging/ +└── p_claude_sonnet_4_20250514__a_claude_sonnet_4_20250514_20250120_143022_t5_r3/ + ├── abc123_Alex_M_c3s_run1.log + └── def456_Chloe_Kim_c3s_run1.log ``` ### Logging diff --git a/generate.py b/generate.py index a723bf77..3ee8e4ca 100644 --- a/generate.py +++ b/generate.py @@ -20,7 +20,8 @@ async def main( runs_per_prompt: int = 2, persona_names: Optional[List[str]] = None, verbose: bool = True, - folder_name: Optional[str] = None, + convo_folder_name: Optional[str] = None, + log_folder_name: Optional[str] = None, run_id: Optional[str] = None, max_concurrent: Optional[int] = None, max_total_words: Optional[int] = None, @@ -39,7 +40,9 @@ async def main( runs_per_prompt: Number of runs per prompt persona_names: List of persona names to use. If None, uses all personas. verbose: Whether to print status messages - folder_name: Custom folder name for saving conversations. If None, uses + convo_folder_name: Custom folder name for saving conversations. If None, uses + default format. + log_folder_name: Custom folder name for saving logs. If None, uses default format. max_total_words: Optional maximum total words across all responses max_concurrent: Maximum number of concurrent conversations. If None, runs all @@ -70,15 +73,19 @@ async def main( print(f" - Max turns: {max_turns}") print(f" - Runs per prompt: {runs_per_prompt}") print(f" - Persona names: {persona_names}") - print(f" - Folder name: {folder_name}") + print(f" - Convo folder name: {convo_folder_name}") + print(f" - Log folder name: {log_folder_name}") print(f" - Run ID: {run_id}") print(f" - Max concurrent: {max_concurrent}") print(f" - Max total words: {max_total_words}") print(f" - Max personas: {max_personas}") # Generate default folder name if not provided - if folder_name is None: - folder_name = "conversations" + if convo_folder_name is None: + convo_folder_name = "conversations" + + if log_folder_name is None: + log_folder_name = "logging" if run_id is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -94,9 +101,11 @@ async def main( f"p_{persona_info}__a_{agent_info}__t{max_turns}__" f"r{runs_per_prompt}__{timestamp}" ) - folder_name = f"{folder_name}/{run_id}" + convo_folder_name = f"{convo_folder_name}/{run_id}" + log_folder_name = f"{log_folder_name}/{run_id}" # TODO: do we want to give a message if the folder already exists? - os.makedirs(folder_name, exist_ok=True) + os.makedirs(convo_folder_name, exist_ok=True) + os.makedirs(log_folder_name, exist_ok=True) # Configuration runner = ConversationRunner( @@ -104,7 +113,8 @@ async def main( agent_model_config=agent_model_config, max_turns=max_turns, runs_per_prompt=runs_per_prompt, - folder_name=folder_name, + convo_folder_name=convo_folder_name, + log_folder_name=log_folder_name, run_id=run_id, max_concurrent=max_concurrent, max_total_words=max_total_words, @@ -115,7 +125,8 @@ async def main( results = await runner.run_conversations(persona_names=persona_names) if verbose: - print(f"✅ Generated {len(results)} conversations → {folder_name}/") + print(f"✅ Generated {len(results)} conversations → {convo_folder_name}/") + print(f"✅ Logs saved to {log_folder_name}/") return results @@ -209,6 +220,13 @@ async def main( default="conversations", ) + parser.add_argument( + "--log-folder-name", + "-lf", + help=("Folder name containing the logs for this run. " "Default is 'logging'."), + default="logging", + ) + parser.add_argument( "--max-concurrent", "-c", @@ -272,7 +290,8 @@ async def main( for k, v in agent_model_config.items() if k not in ["model", "model_name", "name", "temperature", "max_tokens"] }, - folder_name=args.folder_name, + convo_folder_name=args.folder_name, + log_folder_name=args.log_folder_name, max_concurrent=args.max_concurrent, max_total_words=args.max_total_words, max_personas=args.max_personas, diff --git a/generate_conversations/runner.py b/generate_conversations/runner.py index 74863f3b..874b0941 100644 --- a/generate_conversations/runner.py +++ b/generate_conversations/runner.py @@ -30,7 +30,8 @@ def __init__( run_id: str, max_turns: int = 6, runs_per_prompt: int = 3, - folder_name: str = "conversations", + convo_folder_name: str = "conversations", + log_folder_name: str = "logging", max_concurrent: Optional[int] = None, max_total_words: Optional[int] = None, max_personas: Optional[int] = None, @@ -39,7 +40,8 @@ def __init__( self.agent_model_config = agent_model_config self.max_turns = max_turns self.runs_per_prompt = runs_per_prompt - self.folder_name = folder_name + self.convo_folder_name = convo_folder_name + self.log_folder_name = log_folder_name self.run_id = run_id # Limit concurrent conversations to avoid overwhelming the server @@ -76,10 +78,14 @@ async def run_single_conversation( ) persona_safe = persona_name.replace(" ", "_").replace(".", "") filename_base = f"{tag}_{persona_safe}_{model_short}_run{run_number}" - os.makedirs(f"{self.folder_name}", exist_ok=True) + os.makedirs(f"{self.convo_folder_name}", exist_ok=True) # Setup logging - logger = setup_conversation_logger(filename_base, run_id=self.run_id) + logger, log_file_path = setup_conversation_logger( + log_filename=filename_base, + run_id=self.run_id, + log_folder=self.log_folder_name, + ) start_time = time.time() # Create LLM1 instance with the persona prompt and configuration @@ -141,7 +147,7 @@ async def run_single_conversation( ) # Save conversation file - simulator.save_conversation(f"{filename_base}.txt", self.folder_name) + simulator.save_conversation(f"{filename_base}.txt", self.convo_folder_name) result = { "id": conversation_id, @@ -149,8 +155,8 @@ async def run_single_conversation( "llm1_prompt": persona_name, "run_number": run_number, "turns": len(conversation), - "filename": f"{self.folder_name}/{filename_base}.txt", - "log_file": f"{self.folder_name}/{filename_base}.log", + "filename": f"{self.convo_folder_name}/{filename_base}.txt", + "log_file": log_file_path, "duration": conversation_time, "early_termination": early_termination, "conversation": conversation, diff --git a/tests/integration/test_conversation_runner.py b/tests/integration/test_conversation_runner.py index e58faa49..a65df798 100644 --- a/tests/integration/test_conversation_runner.py +++ b/tests/integration/test_conversation_runner.py @@ -169,7 +169,7 @@ def test_init_with_basic_config( assert runner.run_id == run_id assert runner.max_turns == 6 assert runner.runs_per_prompt == 3 - assert runner.folder_name == "conversations" + assert runner.convo_folder_name == "conversations" assert runner.max_concurrent is None def test_init_with_custom_parameters( @@ -185,14 +185,16 @@ def test_init_with_custom_parameters( run_id="custom_run", max_turns=10, runs_per_prompt=5, - folder_name="test_conversations", + convo_folder_name="test_conversations", + log_folder_name="test_logging", max_concurrent=3, ) # Assert assert runner.max_turns == 10 assert runner.runs_per_prompt == 5 - assert runner.folder_name == "test_conversations" + assert runner.convo_folder_name == "test_conversations" + assert runner.log_folder_name == "test_logging" assert runner.max_concurrent == 3 def test_agent_system_prompt_from_config( @@ -259,7 +261,8 @@ async def test_run_single_conversation_creates_files( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id=run_id, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -284,9 +287,10 @@ async def test_run_single_conversation_creates_files( logger = logging.getLogger("test_conversation") logger.handlers.clear() os.makedirs(log_folder / run_id, exist_ok=True) - handler = logging.FileHandler(log_folder / run_id / "test.log", mode="w") + log_file_path = str(log_folder / run_id / "test.log") + handler = logging.FileHandler(log_file_path, mode="w") logger.addHandler(handler) - mock_logger.return_value = logger + mock_logger.return_value = (logger, log_file_path) result = await runner.run_single_conversation( persona_config=persona_config, @@ -328,7 +332,8 @@ async def test_run_single_conversation_logging( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id=run_id, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -360,7 +365,7 @@ async def test_run_single_conversation_logging( formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) - mock_logger_setup.return_value = logger + mock_logger_setup.return_value = (logger, str(log_file)) await runner.run_single_conversation( persona_config=persona_config, @@ -392,11 +397,13 @@ async def test_filename_generation_format( """Test that filename follows correct naming convention.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -419,7 +426,8 @@ async def test_filename_generation_format( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) result = await runner.run_single_conversation( persona_config=persona_config, @@ -446,11 +454,13 @@ async def test_early_termination_tracking( """Test that early termination is tracked correctly.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -489,7 +499,8 @@ def side_effect(*args, **kwargs): "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act result = await runner.run_single_conversation( @@ -514,11 +525,13 @@ async def test_conversation_metadata_completeness( """Test that all metadata fields are present in result.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -541,7 +554,8 @@ async def test_conversation_metadata_completeness( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) result = await runner.run_single_conversation( persona_config=persona_config, @@ -591,13 +605,15 @@ async def test_run_multiple_conversations_from_personas( """Test running multiple conversations from persona list.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=2, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) # Mock load_prompts_from_csv @@ -613,7 +629,8 @@ async def test_run_multiple_conversations_from_personas( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations( @@ -635,13 +652,15 @@ async def test_concurrent_execution_limit( """Test that max_concurrent limits concurrent execution.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=3, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), max_concurrent=2, ) @@ -674,7 +693,8 @@ async def tracked_run(*args, **kwargs): "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations(persona_names=None) @@ -694,13 +714,15 @@ async def test_no_concurrent_limit( """Test running without concurrent limit.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=2, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), max_concurrent=None, ) @@ -713,7 +735,8 @@ async def test_no_concurrent_limit( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations(persona_names=None) @@ -731,13 +754,15 @@ async def test_conversation_id_increments( """Test that conversation IDs increment correctly.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=2, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) mock_personas = [ @@ -752,7 +777,8 @@ async def test_conversation_id_increments( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations(persona_names=None) @@ -777,13 +803,15 @@ async def test_conversation_folder_creation( """Test that conversation folder is created if it doesn't exist.""" # Arrange conv_folder = tmp_path / "new_conversations" + log_folder = tmp_path / "new_logging" assert not conv_folder.exists() runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -806,7 +834,8 @@ async def test_conversation_folder_creation( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) await runner.run_single_conversation( persona_config=persona_config, @@ -830,13 +859,15 @@ async def test_multiple_files_created( """Test that multiple conversation files are created.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=3, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) mock_personas = [{"Name": "TestPersona", "prompt": "Test prompt"}] @@ -848,7 +879,8 @@ async def test_multiple_files_created( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations(persona_names=None) @@ -868,13 +900,15 @@ async def test_filename_uniqueness( """Test that each conversation gets a unique filename.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=3, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) mock_personas = [{"Name": "TestPersona", "prompt": "Test prompt"}] @@ -886,7 +920,8 @@ async def test_filename_uniqueness( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations(persona_names=None) @@ -910,11 +945,13 @@ async def test_handles_llm_errors_gracefully( """Test that LLM errors are handled gracefully.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -938,7 +975,8 @@ async def test_handles_llm_errors_gracefully( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) with patch( "generate_conversations.runner.LLMFactory.create_llm" @@ -973,11 +1011,13 @@ async def test_empty_persona_list( """Test handling of empty persona list.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) with patch("generate_conversations.runner.load_prompts_from_csv") as mock_load: @@ -987,7 +1027,8 @@ async def test_empty_persona_list( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act results = await runner.run_conversations(persona_names=None) @@ -1005,11 +1046,13 @@ async def test_logger_cleanup_called( """Test that logger cleanup is called after conversation.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -1032,7 +1075,8 @@ async def test_logger_cleanup_called( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) with patch("generate_conversations.runner.cleanup_logger") as mock_cleanup: await runner.run_single_conversation( @@ -1062,11 +1106,13 @@ async def test_duration_tracking( """Test that conversation duration is tracked accurately.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = ConversationRunner( persona_model_config=basic_persona_config, agent_model_config=basic_agent_config, run_id="test_run", - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) persona_config = { @@ -1089,7 +1135,8 @@ async def test_duration_tracking( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) start = time.time() result = await runner.run_single_conversation( @@ -1115,13 +1162,15 @@ async def test_batch_processing_speed( """Test that batch processing completes in reasonable time.""" # Arrange conv_folder = tmp_path / "conversations" + log_folder = tmp_path / "logging" runner = create_test_runner( basic_persona_config, basic_agent_config, "test_run", max_turns=2, runs_per_prompt=2, - folder_name=str(conv_folder), + convo_folder_name=str(conv_folder), + log_folder_name=str(log_folder), ) mock_personas = [ @@ -1136,7 +1185,8 @@ async def test_batch_processing_speed( "generate_conversations.runner.setup_conversation_logger" ) as mock_logger: logger = MagicMock() - mock_logger.return_value = logger + log_file_path = str(tmp_path / "logging" / "test_run" / "test.log") + mock_logger.return_value = (logger, log_file_path) # Act start = time.time() diff --git a/tests/integration/test_file_operations.py b/tests/integration/test_file_operations.py index 499756fb..a5acb7f3 100644 --- a/tests/integration/test_file_operations.py +++ b/tests/integration/test_file_operations.py @@ -5,6 +5,7 @@ """ from pathlib import Path +from uuid import uuid4 import pytest @@ -95,22 +96,19 @@ def test_conversation_file_format(self, tmp_path: Path) -> None: assert any(line.startswith("chatbot:") for line in lines) def test_save_to_nonexistent_directory(self, tmp_path: Path) -> None: - """Test saving to a nonexistent directory creates the directory.""" + """Test saving to a nonexistent directory raises error.""" conversation = [{"speaker": "ModelZ", "response": "Test message"}] # Create a path to a subdirectory that doesn't exist - subdir = tmp_path / "nested" / "conversations" + subdir = tmp_path / "nested" / str(uuid4()) / "conversations" filename = "test.txt" - # The function should create the directory automatically - assert not subdir.exists() - save_conversation_to_file( - conversation, filename, str(subdir), llm1_name="ModelZ" - ) - - # Verify directory was created and file was saved - assert subdir.exists() - assert (subdir / filename).exists() + # This test verifies expected behavior - if directory doesn't exist, + # it should raise an error (since the code doesn't create directories) + with pytest.raises(FileNotFoundError): + save_conversation_to_file( + conversation, filename, str(subdir), llm1_name="ModelZ" + ) def test_load_from_nonexistent_file(self, tmp_path: Path) -> None: """Test attempting to load from nonexistent file raises appropriate error.""" diff --git a/tests/unit/utils/test_logging_utils.py b/tests/unit/utils/test_logging_utils.py index 71070c53..ab3c79d0 100644 --- a/tests/unit/utils/test_logging_utils.py +++ b/tests/unit/utils/test_logging_utils.py @@ -31,7 +31,7 @@ def test_creates_logger_with_default_settings(self, tmp_path): run_id = "run_001" log_folder = str(tmp_path / "logging") - logger = setup_conversation_logger( + logger, _ = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, @@ -73,7 +73,7 @@ def test_creates_log_file_with_correct_path(self, tmp_path): run_id = "run_003" log_folder = str(tmp_path / "logging") - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, @@ -81,7 +81,7 @@ def test_creates_log_file_with_correct_path(self, tmp_path): logger.info("Test message") - expected_log_path = Path(log_folder) / run_id / f"{log_filename}.log" + expected_log_path = Path(log_file_path) assert expected_log_path.exists() def test_custom_log_level(self, tmp_path): @@ -91,7 +91,7 @@ def test_custom_log_level(self, tmp_path): Act: Set up logger Assert: Logger has correct log level """ - logger = setup_conversation_logger( + logger, _ = setup_conversation_logger( log_filename="test", run_id="run_004", log_folder=str(tmp_path / "logging"), @@ -107,7 +107,7 @@ def test_logger_formatter_configuration(self, tmp_path): Act: Get handler formatter Assert: Formatter has expected format string """ - logger = setup_conversation_logger( + logger, _ = setup_conversation_logger( log_filename="test", run_id="run_005", log_folder=str(tmp_path / "logging"), @@ -132,14 +132,14 @@ def test_clears_existing_handlers(self, tmp_path): run_id = "run_006" log_folder = str(tmp_path / "logging") - logger1 = setup_conversation_logger( + logger1, _ = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, ) handler_count_1 = len(logger1.handlers) - logger2 = setup_conversation_logger( + logger2, _ = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, @@ -161,7 +161,7 @@ def test_log_file_encoding_utf8(self, tmp_path): run_id = "run_007" log_folder = str(tmp_path / "logging") - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, @@ -170,8 +170,8 @@ def test_log_file_encoding_utf8(self, tmp_path): unicode_message = "Testing unicode: 你好, مرحبا, Здравствуйте" logger.info(unicode_message) - log_file_path = Path(log_folder) / run_id / f"{log_filename}.log" - content = log_file_path.read_text(encoding="utf-8") + log_file = Path(log_file_path) + content = log_file.read_text(encoding="utf-8") assert unicode_message in content @@ -187,7 +187,7 @@ def test_logs_conversation_start_basic(self, tmp_path): Act: Log conversation start Assert: Log file contains start message and configuration """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="start_test", run_id="run_008", log_folder=str(tmp_path / "logging"), @@ -218,7 +218,9 @@ def test_logs_conversation_start_basic(self, tmp_path): llm2_model=llm2, ) - log_file = Path(tmp_path / "logging" / "run_008" / "start_test.log") + log_file = Path(log_file_path) + assert log_file.exists() + assert log_file == Path(tmp_path / "logging" / "run_008" / "start_test.log") content = log_file.read_text() assert "CONVERSATION STARTED" in content @@ -233,7 +235,7 @@ def test_logs_llm_configuration(self, tmp_path): Act: Log conversation start Assert: Temperature and max_tokens are logged """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="config_test", run_id="run_009", log_folder=str(tmp_path / "logging"), @@ -254,7 +256,7 @@ def test_logs_llm_configuration(self, tmp_path): llm2_model=llm2, ) - log_file = Path(tmp_path / "logging" / "run_009" / "config_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "temperature: 0.5" in content @@ -269,7 +271,7 @@ def test_logs_with_empty_logging_dict(self, tmp_path): Act: Call with empty logging dict Assert: Function completes without error """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="empty_dict_test", run_id="run_010", log_folder=str(tmp_path / "logging"), @@ -291,7 +293,7 @@ def test_logs_with_empty_logging_dict(self, tmp_path): logging={}, ) - log_file = Path(tmp_path / "logging" / "run_010" / "empty_dict_test.log") + log_file = Path(log_file_path) assert log_file.exists() @@ -306,7 +308,7 @@ def test_logs_basic_turn(self, tmp_path): Act: Log conversation turn Assert: Log contains turn number, speaker, input, response """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="turn_test", run_id="run_011", log_folder=str(tmp_path / "logging"), @@ -320,7 +322,7 @@ def test_logs_basic_turn(self, tmp_path): response="I'm doing well, thanks!", ) - log_file = Path(tmp_path / "logging" / "run_011" / "turn_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "TURN 1 - User" in content @@ -334,7 +336,7 @@ def test_logs_turn_with_early_termination(self, tmp_path): Act: Log turn with early_termination=True Assert: Log contains early termination warning """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="early_term_test", run_id="run_012", log_folder=str(tmp_path / "logging"), @@ -349,7 +351,7 @@ def test_logs_turn_with_early_termination(self, tmp_path): early_termination=True, ) - log_file = Path(tmp_path / "logging" / "run_012" / "early_term_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "EARLY TERMINATION" in content @@ -362,7 +364,7 @@ def test_logs_turn_with_metadata(self, tmp_path): Act: Log turn with logging dict containing response_id Assert: Log contains response_id """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="metadata_test", run_id="run_013", log_folder=str(tmp_path / "logging"), @@ -379,7 +381,7 @@ def test_logs_turn_with_metadata(self, tmp_path): logging=metadata, ) - log_file = Path(tmp_path / "logging" / "run_013" / "metadata_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "response_id: resp_123" in content @@ -392,7 +394,7 @@ def test_logs_multiple_turns(self, tmp_path): Act: Log multiple turns Assert: All turns are logged in order """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="multi_turn_test", run_id="run_014", log_folder=str(tmp_path / "logging"), @@ -407,7 +409,7 @@ def test_logs_multiple_turns(self, tmp_path): response=f"Response {i}", ) - log_file = Path(tmp_path / "logging" / "run_014" / "multi_turn_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "TURN 1" in content @@ -426,7 +428,7 @@ def test_logs_basic_end(self, tmp_path): Act: Log conversation end Assert: Log contains completion message and turn count """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="end_test", run_id="run_015", log_folder=str(tmp_path / "logging"), @@ -438,7 +440,7 @@ def test_logs_basic_end(self, tmp_path): early_termination=False, ) - log_file = Path(tmp_path / "logging" / "run_015" / "end_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "CONVERSATION COMPLETED" in content @@ -452,7 +454,7 @@ def test_logs_end_with_duration(self, tmp_path): Act: Log end with total_time Assert: Log contains duration in seconds """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="duration_test", run_id="run_016", log_folder=str(tmp_path / "logging"), @@ -465,7 +467,7 @@ def test_logs_end_with_duration(self, tmp_path): total_time=123.456, ) - log_file = Path(tmp_path / "logging" / "run_016" / "duration_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "Duration: 123.46 seconds" in content @@ -477,7 +479,7 @@ def test_logs_end_with_early_termination(self, tmp_path): Act: Log end with early_termination=True Assert: Log shows early termination """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="early_end_test", run_id="run_017", log_folder=str(tmp_path / "logging"), @@ -489,7 +491,7 @@ def test_logs_end_with_early_termination(self, tmp_path): early_termination=True, ) - log_file = Path(tmp_path / "logging" / "run_017" / "early_end_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "Early Termination: True" in content @@ -501,7 +503,7 @@ def test_logs_end_without_duration(self, tmp_path): Act: Log end without total_time Assert: Log does not contain duration line """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="no_duration_test", run_id="run_018", log_folder=str(tmp_path / "logging"), @@ -514,7 +516,7 @@ def test_logs_end_without_duration(self, tmp_path): total_time=None, ) - log_file = Path(tmp_path / "logging" / "run_018" / "no_duration_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "Duration:" not in content @@ -531,7 +533,7 @@ def test_logs_error_message_only(self, tmp_path): Act: Log error message Assert: Error message is in log """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="error_test", run_id="run_019", log_folder=str(tmp_path / "logging"), @@ -539,7 +541,7 @@ def test_logs_error_message_only(self, tmp_path): log_error(logger=logger, error_message="Something went wrong") - log_file = Path(tmp_path / "logging" / "run_019" / "error_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "ERROR: Something went wrong" in content @@ -551,7 +553,7 @@ def test_logs_error_with_exception(self, tmp_path): Act: Log error with exception Assert: Exception details are logged """ - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename="exception_test", run_id="run_020", log_folder=str(tmp_path / "logging"), @@ -566,7 +568,7 @@ def test_logs_error_with_exception(self, tmp_path): exception=e, ) - log_file = Path(tmp_path / "logging" / "run_020" / "exception_test.log") + log_file = Path(log_file_path) content = log_file.read_text() assert "ERROR: Value error occurred" in content @@ -585,7 +587,7 @@ def test_cleanup_removes_handlers(self, tmp_path): Act: Call cleanup_logger Assert: Logger has no handlers """ - logger = setup_conversation_logger( + logger, _ = setup_conversation_logger( log_filename="cleanup_test", run_id="run_021", log_folder=str(tmp_path / "logging"), @@ -608,7 +610,7 @@ def test_cleanup_closes_handlers(self, tmp_path): run_id = "run_022" log_folder = str(tmp_path / "logging") - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, @@ -618,7 +620,7 @@ def test_cleanup_closes_handlers(self, tmp_path): cleanup_logger(logger) - log_file = Path(log_folder) / run_id / f"{log_filename}.log" + log_file = Path(log_file_path) log_file.unlink() assert not log_file.exists() @@ -649,13 +651,13 @@ def test_multiple_loggers_independent(self, tmp_path): Act: Log to both Assert: Each has separate log file with correct content """ - logger1 = setup_conversation_logger( + logger1, log_file_path1 = setup_conversation_logger( log_filename="logger1", run_id="run_023", log_folder=str(tmp_path / "logging"), ) - logger2 = setup_conversation_logger( + logger2, log_file_path2 = setup_conversation_logger( log_filename="logger2", run_id="run_023", log_folder=str(tmp_path / "logging"), @@ -664,8 +666,8 @@ def test_multiple_loggers_independent(self, tmp_path): logger1.info("Message from logger1") logger2.info("Message from logger2") - log1_file = Path(tmp_path / "logging" / "run_023" / "logger1.log") - log2_file = Path(tmp_path / "logging" / "run_023" / "logger2.log") + log1_file = Path(log_file_path1) + log2_file = Path(log_file_path2) content1 = log1_file.read_text() content2 = log2_file.read_text() @@ -683,13 +685,13 @@ def test_multiple_loggers_different_run_ids(self, tmp_path): Act: Log messages Assert: Separate folders and files are created """ - logger1 = setup_conversation_logger( + logger1, _ = setup_conversation_logger( log_filename="conv1", run_id="run_024", log_folder=str(tmp_path / "logging"), ) - logger2 = setup_conversation_logger( + logger2, _ = setup_conversation_logger( log_filename="conv2", run_id="run_025", log_folder=str(tmp_path / "logging"), @@ -722,7 +724,7 @@ def test_complete_conversation_logging_flow(self, tmp_path): run_id = "run_026" log_folder = str(tmp_path / "logging") - logger = setup_conversation_logger( + logger, log_file_path = setup_conversation_logger( log_filename=log_filename, run_id=run_id, log_folder=log_folder, @@ -778,7 +780,7 @@ def test_complete_conversation_logging_flow(self, tmp_path): cleanup_logger(logger) - log_file = Path(log_folder) / run_id / f"{log_filename}.log" + log_file = Path(log_file_path) content = log_file.read_text() assert "CONVERSATION STARTED" in content diff --git a/utils/conversation_utils.py b/utils/conversation_utils.py index 1752fe0e..9ccd6f40 100644 --- a/utils/conversation_utils.py +++ b/utils/conversation_utils.py @@ -1,7 +1,6 @@ """Utilities for conversation management and file operations.""" from datetime import datetime -from pathlib import Path from typing import Any, Dict, List, Optional from langchain_core.messages import AIMessage, BaseMessage, HumanMessage @@ -38,9 +37,6 @@ def save_conversation_to_file( folder: Output folder llm1_name: Name of LLM1 to identify it as 'user' """ - # Ensure folder exists - Path(folder).mkdir(parents=True, exist_ok=True) - summary = format_conversation_summary(conversation_history, llm1_name) with open(f"{folder}/{filename}", "w", encoding="utf-8") as f: f.write(summary) diff --git a/utils/logging_utils.py b/utils/logging_utils.py index e1deb126..a1335eaf 100644 --- a/utils/logging_utils.py +++ b/utils/logging_utils.py @@ -9,16 +9,20 @@ def setup_conversation_logger( log_filename: str, run_id: str, log_folder: str = "logging", level=logging.INFO -) -> logging.Logger: +) -> tuple[logging.Logger, str]: """ Set up a logger for a specific conversation. Args: log_filename: Name of the log file (without extension) - log_folder: Directory to save log files + run_id: Unique identifier for the run, used to create a subfolder in log_folder + log_folder: Base directory to save log files (default: "logging") + level: Logging level (default: logging.INFO) Returns: - Configured logger instance + Tuple of (logger, log_file_path) where: + - logger: Configured logger instance + - log_file_path: Full path to the created log file """ # Ensure log folder exists os.makedirs(log_folder, exist_ok=True) @@ -47,7 +51,7 @@ def setup_conversation_logger( # Add handler to logger logger.addHandler(file_handler) - return logger + return logger, log_file_path # TODO: This should print all the llm1 and 2 settings