Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ Where:
- `pep` is the provider extra parameters
- `t` is the number of turns
- `r` is the run per turns
- `f` is the folder name (defaults to conversations and a subfolder named based on other paramters and datetime)
- `f` (or `--convo-folder-name`) is the conversation folder name (defaults to `conversations` with a subfolder named based on other parameters and datetime)
- `lf` (or `--log-folder-name`) is the logging folder name (defaults to `logging` with a subfolder named based on other parameters and datetime)
- `c` is the maximum concurrent conversations to run (defaults to None, but try this if the provider you're testing times out)
This will generate conversations and store them in a subfolder of `conversations`
This will generate conversations and store them in subfolders of `conversations` and `logging`

6. **Judge the conversations**:
```bash
Expand Down Expand Up @@ -90,7 +91,9 @@ python judge.py -f conversations/my_experiment -j gpt-4o -jep temperature=0.5,ma
```

**Note:** Extra parameters are automatically included in the output folder names, making it easy to track experiments:
- Generation: `conversations/p_gpt_4o_temp0.3__a_claude_3_5_sonnet_temp0.5__t6__r2__{timestamp}/`
- Generation:
- Conversations: `conversations/p_gpt_4o_temp0.3__a_claude_3_5_sonnet_temp0.5__t6__r2__{timestamp}/`
- Logs: `logging/p_gpt_4o_temp0.3__a_claude_3_5_sonnet_temp0.5__t6__r2__{timestamp}/`
- Evaluation: `evaluations/j_claude_3_5_sonnet_temp0.3_{timestamp}__{conversation_folder}/`

**Multiple judge models**: You can use multiple different judge models and/or multiple instances:
Expand Down Expand Up @@ -305,7 +308,8 @@ results = await generate_conversations(
max_turns=5,
runs_per_prompt=3,
persona_names=["Alex M.", "Chloe Kim"], # Optional: filter specific personas
folder_name="custom_experiment" # Optional: custom output folder
convo_folder_name="custom_conversations", # Optional: custom conversation folder
log_folder_name="custom_logging" # Optional: custom logging folder
)
```

Expand Down Expand Up @@ -371,15 +375,18 @@ persona_model_config = {

### Output Organization

Conversations are automatically organized into timestamped folders:
Conversations and logs are automatically organized into separate timestamped folders:

```
conversations/
├── p_claude_sonnet_4_20250514__a_claude_sonnet_4_20250514_20250120_143022_t5_r3/
│ ├── abc123_Alex_M_c3s_run1_20250120_143022_123.txt
│ ├── abc123_Alex_M_c3s_run1_20250120_143022_123.log
│ ├── def456_Chloe_Kim_c3s_run1_20250120_143022_456.txt
│ └── def456_Chloe_Kim_c3s_run1_20250120_143022_456.log
└── p_claude_sonnet_4_20250514__a_claude_sonnet_4_20250514_20250120_143022_t5_r3/
├── abc123_Alex_M_c3s_run1.txt
└── def456_Chloe_Kim_c3s_run1.txt

logging/
└── p_claude_sonnet_4_20250514__a_claude_sonnet_4_20250514_20250120_143022_t5_r3/
├── abc123_Alex_M_c3s_run1.log
└── def456_Chloe_Kim_c3s_run1.log
```

### Logging
Expand Down
39 changes: 29 additions & 10 deletions generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ async def main(
runs_per_prompt: int = 2,
persona_names: Optional[List[str]] = None,
verbose: bool = True,
folder_name: Optional[str] = None,
convo_folder_name: Optional[str] = None,
log_folder_name: Optional[str] = None,
run_id: Optional[str] = None,
max_concurrent: Optional[int] = None,
max_total_words: Optional[int] = None,
Expand All @@ -39,7 +40,9 @@ async def main(
runs_per_prompt: Number of runs per prompt
persona_names: List of persona names to use. If None, uses all personas.
verbose: Whether to print status messages
folder_name: Custom folder name for saving conversations. If None, uses
convo_folder_name: Custom folder name for saving conversations. If None, uses
default format.
log_folder_name: Custom folder name for saving logs. If None, uses
default format.
max_total_words: Optional maximum total words across all responses
max_concurrent: Maximum number of concurrent conversations. If None, runs all
Expand Down Expand Up @@ -70,15 +73,19 @@ async def main(
print(f" - Max turns: {max_turns}")
print(f" - Runs per prompt: {runs_per_prompt}")
print(f" - Persona names: {persona_names}")
print(f" - Folder name: {folder_name}")
print(f" - Convo folder name: {convo_folder_name}")
print(f" - Log folder name: {log_folder_name}")
print(f" - Run ID: {run_id}")
print(f" - Max concurrent: {max_concurrent}")
print(f" - Max total words: {max_total_words}")
print(f" - Max personas: {max_personas}")

# Generate default folder name if not provided
if folder_name is None:
folder_name = "conversations"
if convo_folder_name is None:
convo_folder_name = "conversations"

if log_folder_name is None:
log_folder_name = "logging"

if run_id is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
Expand All @@ -94,17 +101,20 @@ async def main(
f"p_{persona_info}__a_{agent_info}__t{max_turns}__"
f"r{runs_per_prompt}__{timestamp}"
)
folder_name = f"{folder_name}/{run_id}"
convo_folder_name = f"{convo_folder_name}/{run_id}"
log_folder_name = f"{log_folder_name}/{run_id}"
# TODO: do we want to give a message if the folder already exists?
os.makedirs(folder_name, exist_ok=True)
os.makedirs(convo_folder_name, exist_ok=True)
os.makedirs(log_folder_name, exist_ok=True)

# Configuration
runner = ConversationRunner(
persona_model_config=persona_model_config,
agent_model_config=agent_model_config,
max_turns=max_turns,
runs_per_prompt=runs_per_prompt,
folder_name=folder_name,
convo_folder_name=convo_folder_name,
log_folder_name=log_folder_name,
run_id=run_id,
max_concurrent=max_concurrent,
max_total_words=max_total_words,
Expand All @@ -115,7 +125,8 @@ async def main(
results = await runner.run_conversations(persona_names=persona_names)

if verbose:
print(f"✅ Generated {len(results)} conversations → {folder_name}/")
print(f"✅ Generated {len(results)} conversations → {convo_folder_name}/")
print(f"✅ Logs saved to {log_folder_name}/")

return results

Expand Down Expand Up @@ -209,6 +220,13 @@ async def main(
default="conversations",
)

parser.add_argument(
"--log-folder-name",
"-lf",
help=("Folder name containing the logs for this run. " "Default is 'logging'."),
default="logging",
)

parser.add_argument(
"--max-concurrent",
"-c",
Expand Down Expand Up @@ -272,7 +290,8 @@ async def main(
for k, v in agent_model_config.items()
if k not in ["model", "model_name", "name", "temperature", "max_tokens"]
},
folder_name=args.folder_name,
convo_folder_name=args.folder_name,
log_folder_name=args.log_folder_name,
max_concurrent=args.max_concurrent,
max_total_words=args.max_total_words,
max_personas=args.max_personas,
Expand Down
20 changes: 13 additions & 7 deletions generate_conversations/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def __init__(
run_id: str,
max_turns: int = 6,
runs_per_prompt: int = 3,
folder_name: str = "conversations",
convo_folder_name: str = "conversations",
log_folder_name: str = "logging",
max_concurrent: Optional[int] = None,
max_total_words: Optional[int] = None,
max_personas: Optional[int] = None,
Expand All @@ -39,7 +40,8 @@ def __init__(
self.agent_model_config = agent_model_config
self.max_turns = max_turns
self.runs_per_prompt = runs_per_prompt
self.folder_name = folder_name
self.convo_folder_name = convo_folder_name
self.log_folder_name = log_folder_name
self.run_id = run_id

# Limit concurrent conversations to avoid overwhelming the server
Expand Down Expand Up @@ -76,10 +78,14 @@ async def run_single_conversation(
)
persona_safe = persona_name.replace(" ", "_").replace(".", "")
filename_base = f"{tag}_{persona_safe}_{model_short}_run{run_number}"
os.makedirs(f"{self.folder_name}", exist_ok=True)
os.makedirs(f"{self.convo_folder_name}", exist_ok=True)

# Setup logging
logger = setup_conversation_logger(filename_base, run_id=self.run_id)
logger, log_file_path = setup_conversation_logger(
log_filename=filename_base,
run_id=self.run_id,
log_folder=self.log_folder_name,
)
start_time = time.time()

# Create LLM1 instance with the persona prompt and configuration
Expand Down Expand Up @@ -141,16 +147,16 @@ async def run_single_conversation(
)

# Save conversation file
simulator.save_conversation(f"{filename_base}.txt", self.folder_name)
simulator.save_conversation(f"{filename_base}.txt", self.convo_folder_name)

result = {
"id": conversation_id,
"llm1_model": model_name,
"llm1_prompt": persona_name,
"run_number": run_number,
"turns": len(conversation),
"filename": f"{self.folder_name}/{filename_base}.txt",
"log_file": f"{self.folder_name}/{filename_base}.log",
"filename": f"{self.convo_folder_name}/{filename_base}.txt",
"log_file": log_file_path,
"duration": conversation_time,
"early_termination": early_termination,
"conversation": conversation,
Expand Down
Loading