Skip to content

Commit d9c1feb

Browse files
author
EgonBot
committed
fix: per-archetype PremortemTask decomposition for small-model compatibility
PremortemAnalysis required the LLM to emit a deeply nested schema in one call: 3 AssumptionItem + 3 FailureModeItem, 11+ required fields each, with linked cross-reference IDs. Local small models (Qwen 3.5-35B, GLM 4.7 Flash) echoed the schema structure back instead of producing values, exhausting all retries. Fix: decompose into one independent LLM call per archetype using ArchetypeNarrative (6 plain text fields, no IDs). Code assembles AssumptionItem + FailureModeItem from the narrative and assigns all IDs and cross-references. Changes (3 hunks): 1. Add ArchetypeNarrative schema (after PremortemAnalysis). Includes an 'archetype' field so the LLM can adapt the category name to the specific project rather than being locked to hardcoded labels. 2. Rewrite execute() to run num_rounds × 3 archetypes (3×3=9 calls in ALL_DETAILS, 1×3=3 in FAST mode), restoring the original 9+9 assumption/failure-mode volume. Archetype suggestions guide the LLM; the returned narrative.archetype is used in the output (LLM may rename/adapt it per project). Failed archetype calls are skipped gracefully; first call failure raises. 3. Fix _calculate_risk_level_verbose: return 'Not Scored' when likelihood or impact is None (was rendering 'Likelihood None/5, Impact None/5'). Validated: PremortemTask PASSED on GLM 4.7 Flash (HVT_minimal run).
1 parent 70918f9 commit d9c1feb

1 file changed

Lines changed: 66 additions & 65 deletions

File tree

worker_plan/worker_plan_internal/diagnostics/premortem.py

Lines changed: 66 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,15 @@ class PremortemAnalysis(BaseModel):
7171
assumptions_to_kill: List[AssumptionItem] = Field(description="A list of 3 new, critical, underlying assumptions to test immediately.")
7272
failure_modes: List[FailureModeItem] = Field(description="A list containing exactly 3 distinct failure failure_modes, one for each archetype.")
7373

74+
class ArchetypeNarrative(BaseModel):
75+
"""Minimal per-archetype schema. IDs and cross-references are assigned by the program, not the LLM."""
76+
archetype: str = Field(description="The failure archetype category most relevant to this project and scenario (e.g. 'Process/Financial', 'Technical/Logistical', 'Market/Human', or a more specific variant).")
77+
assumption: str = Field(description="One critical assumption the project is making that, if false, would cause this failure.")
78+
test_now: str = Field(description="One concrete action to immediately test if this assumption holds.")
79+
failure_title: str = Field(description="A short, compelling title for this failure scenario (e.g. 'The Gridlock Gamble').")
80+
failure_story: str = Field(description="A detailed narrative of how this failure unfolds. Explain causes, chain of events, and impact.")
81+
warning_signs: List[str] = Field(description="2-4 observable signals that this failure is beginning to occur.")
82+
7483
PREMORTEM_SYSTEM_PROMPT = """
7584
Persona: You are a senior project analyst. Your primary goal is to write compelling, detailed, and distinct failure stories that are also operationally actionable.
7685
@@ -119,92 +128,84 @@ def execute(cls, llm_executor: LLMExecutor, speed_vs_detail: SpeedVsDetailEnum,
119128
logger.debug(f"User Prompt:\n{user_prompt}")
120129
system_prompt = PREMORTEM_SYSTEM_PROMPT.strip()
121130

122-
accumulated_chat_message_list = [
123-
ChatMessage(
124-
role=MessageRole.SYSTEM,
125-
content=system_prompt,
126-
)
127-
]
128-
129-
user_prompt_list = [
130-
user_prompt,
131-
"Generate 3 new assumptions that are thematically different from the previous ones. Start assumption_id at A4.",
132-
"Generate 3 new assumptions that are thematically different from the previous ones and covers different archetypes. Start assumption_id at A7.",
133-
]
131+
# Archetype suggestions guide the LLM; the LLM writes the actual archetype name
132+
# in ArchetypeNarrative.archetype, adapting it to the specific project if needed.
133+
archetype_suggestions = ["Process/Financial", "Technical/Logistical", "Market/Human"]
134+
num_rounds = 1 if speed_vs_detail == SpeedVsDetailEnum.FAST_BUT_SKIP_DETAILS else 3
134135
if speed_vs_detail == SpeedVsDetailEnum.FAST_BUT_SKIP_DETAILS:
135-
user_prompt_list = user_prompt_list[:1]
136-
logger.info("Running in FAST_BUT_SKIP_DETAILS mode. Omitting some assumptions.")
136+
logger.info("Running in FAST_BUT_SKIP_DETAILS mode. 1 round × 3 archetypes = 3 calls.")
137137
else:
138-
logger.info("Running in ALL_DETAILS_BUT_SLOW mode. Processing all assumptions.")
138+
logger.info("Running in ALL_DETAILS_BUT_SLOW mode. 3 rounds × 3 archetypes = 9 calls.")
139139

140-
responses: list[PremortemAnalysis] = []
140+
assumptions_to_kill: list[AssumptionItem] = []
141+
failure_modes: list[FailureModeItem] = []
141142
metadata_list: list[dict] = []
142-
for user_prompt_index, user_prompt_item in enumerate(user_prompt_list):
143-
logger.info(f"Processing user_prompt_index: {user_prompt_index+1} of {len(user_prompt_list)}")
144-
chat_message_list = accumulated_chat_message_list.copy()
145-
chat_message_list.append(
146-
ChatMessage(
147-
role=MessageRole.USER,
148-
content=user_prompt_item,
143+
144+
call_index = 0
145+
first_call = True
146+
for round_index in range(num_rounds):
147+
for archetype_suggestion in archetype_suggestions:
148+
call_index += 1
149+
assumption_id = f"A{len(assumptions_to_kill) + 1}"
150+
failure_mode_index = len(failure_modes) + 1
151+
logger.info(f"Call {call_index}: round={round_index+1}, suggestion={archetype_suggestion!r}")
152+
153+
archetype_user_prompt = (
154+
f"{user_prompt}\n\n"
155+
f"Suggested archetype: {archetype_suggestion}\n"
156+
f"Write one assumption and one failure scenario. "
157+
f"Adapt the archetype name if a more specific label fits the project."
149158
)
150-
)
159+
chat_message_list = [
160+
ChatMessage(role=MessageRole.SYSTEM, content=system_prompt),
161+
ChatMessage(role=MessageRole.USER, content=archetype_user_prompt),
162+
]
151163

152164
def execute_function(llm: LLM) -> dict:
153-
sllm = llm.as_structured_llm(PremortemAnalysis)
165+
sllm = llm.as_structured_llm(ArchetypeNarrative)
154166
start_time = time.perf_counter()
155-
156167
chat_response = sllm.chat(chat_message_list)
157-
pydantic_response = chat_response.raw
158-
168+
narrative = require_raw(chat_response, ArchetypeNarrative)
159169
end_time = time.perf_counter()
160170
duration = int(ceil(end_time - start_time))
161-
162171
metadata = dict(llm.metadata)
163172
metadata["llm_classname"] = llm.class_name()
164173
metadata["duration"] = duration
165-
166-
return {
167-
"pydantic_response": pydantic_response,
168-
"metadata": metadata,
169-
"duration": duration
170-
}
174+
return {"narrative": narrative, "metadata": metadata}
171175

172176
try:
173177
result = llm_executor.run(execute_function)
174178
except PipelineStopRequested:
175-
# Re-raise PipelineStopRequested without wrapping it
176179
raise
177180
except Exception as e:
178-
logger.debug(f"LLM chat interaction failed: {e}")
179-
logger.error("LLM chat interaction failed.", exc_info=True)
180-
if user_prompt_index == 0:
181-
logger.error("The first user prompt failed. This is a critical error. Please check the system prompt and user prompt.")
182-
raise ValueError("LLM chat interaction failed.") from e
183-
else:
184-
logger.error(f"User prompt {user_prompt_index+1} failed. Continuing with next user prompt.")
185-
continue
186-
187-
assistant_content_raw: dict = result["pydantic_response"].model_dump()
188-
# Compact JSON without newlines and spaces, since it's going to be parsed by the LLM. Pretty printing wastes input tokens for the LLM.
189-
assistant_content: str = json.dumps(assistant_content_raw, separators=(',', ':'))
190-
191-
chat_message_list.append(
192-
ChatMessage(
193-
role=MessageRole.ASSISTANT,
194-
content=assistant_content,
195-
)
196-
)
197-
198-
responses.append(result["pydantic_response"])
181+
logger.error(f"Call {call_index} failed: {e}", exc_info=True)
182+
if first_call:
183+
raise ValueError(f"First archetype call failed: {e}") from e
184+
logger.warning(f"Skipping suggestion {archetype_suggestion!r} due to failure.")
185+
continue
186+
187+
first_call = False
188+
narrative: ArchetypeNarrative = result["narrative"]
199189
metadata_list.append(result["metadata"])
200-
accumulated_chat_message_list = chat_message_list.copy()
201190

202-
# Use the last response as the primary result
203-
assumptions_to_kill: list[AssumptionItem] = []
204-
failure_modes: list[FailureModeItem] = []
205-
for response in responses:
206-
assumptions_to_kill.extend(response.assumptions_to_kill)
207-
failure_modes.extend(response.failure_modes)
191+
# Code assigns IDs and cross-references — the LLM only provides narrative text.
192+
# The actual archetype name comes from narrative.archetype (LLM adapts to the project).
193+
assumption = AssumptionItem(
194+
assumption_id=assumption_id,
195+
statement=narrative.assumption,
196+
test_now=narrative.test_now,
197+
falsifier=f"Result of: {narrative.test_now} — reveals the assumption does not hold.",
198+
)
199+
failure_mode = FailureModeItem(
200+
failure_mode_index=failure_mode_index,
201+
root_cause_assumption_id=assumption_id,
202+
failure_mode_archetype=narrative.archetype,
203+
failure_mode_title=narrative.failure_title,
204+
risk_analysis=narrative.failure_story,
205+
early_warning_signs=narrative.warning_signs,
206+
)
207+
assumptions_to_kill.append(assumption)
208+
failure_modes.append(failure_mode)
208209

209210
final_response = PremortemAnalysis(
210211
assumptions_to_kill=assumptions_to_kill,
@@ -286,7 +287,7 @@ def _calculate_risk_level_brief(likelihood: Optional[int], impact: Optional[int]
286287
def _calculate_risk_level_verbose(likelihood: Optional[int], impact: Optional[int]) -> str:
287288
"""Calculates a qualitative risk level from likelihood and impact scores."""
288289
if likelihood is None or impact is None:
289-
return f"Likelihood {likelihood}/5, Impact {impact}/5"
290+
return "Not Scored"
290291

291292
score = likelihood * impact
292293
if score >= 15:

0 commit comments

Comments
 (0)