diff --git a/.gitignore b/.gitignore index 081532d..6a75f19 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ __pycache__/ .argusbot/ *.egg-info/ *.log +build/ +CLAUDE.md diff --git a/codex_autoloop/core/engine.py b/codex_autoloop/core/engine.py index 1027345..f761513 100644 --- a/codex_autoloop/core/engine.py +++ b/codex_autoloop/core/engine.py @@ -473,7 +473,7 @@ def _maybe_run_planner( current_plan_mode = self._current_plan_mode() if current_plan_mode == "off" or self.planner is None: return None - plan = self.planner.evaluate( + plan, raw_output = self.planner.evaluate_with_raw_output( objective=self.config.objective, plan_messages=self.state_store.list_messages_for_role("plan"), round_index=round_index, @@ -500,6 +500,7 @@ def _maybe_run_planner( "next_explore": plan.next_explore, "main_instruction": plan.main_instruction, "review_instruction": plan.review_instruction, + "raw_output": raw_output, } ) return plan diff --git a/codex_autoloop/feishu_adapter.py b/codex_autoloop/feishu_adapter.py index bcf1ddd..8d410c2 100644 --- a/codex_autoloop/feishu_adapter.py +++ b/codex_autoloop/feishu_adapter.py @@ -4,6 +4,7 @@ import mimetypes import re import socket +import ssl import threading import time import urllib.error @@ -16,10 +17,249 @@ from .telegram_control import normalize_command_prefix, parse_command_text, parse_mode_selection_text from .telegram_notifier import format_event_message +from .md_checker import validate_and_fix_markdown, quick_fix_for_feishu, check_markdown +from .output_extractor import ( + extract_and_format_reviewer, + extract_and_format_planner, + extract_message_content, +) + +__all__ = [ + # Core classes + 'FeishuNotifier', + 'FeishuCommandPoller', + 'FeishuConfig', + 'FeishuCommand', + # Constants + 'FEISHU_CARD_MAX_BYTES', + 'FEISHU_TEXT_MAX_BYTES', + 'FEISHU_ERROR_CODE_MESSAGE_TOO_LONG', + 'FEISHU_ERROR_CODE_CARD_CONTENT_FAILED', + # Utilities + 'split_feishu_message', + 'markdown_to_feishu_post', + 'build_interactive_card', + 'format_feishu_event_card', + 'format_feishu_event_message', + 'strip_leading_feishu_mentions', + 'parse_feishu_command_text', + 'is_feishu_self_message', + 'extract_feishu_text', +] _FEISHU_MENTION_PREFIX = re.compile(r"^(?:@[_\w-]+\s+)+") -IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"} -VIDEO_EXTENSIONS = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"} +_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"} +_VIDEO_EXTENSIONS = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"} + +# 飞书消息长度限制 (官方文档) +# 卡片消息:30 KB (请求体最大长度,包含模板数据) +# 文本消息:150 KB +# 错误码:230025 - 消息体长度超出限制 +# 230099 - 创建卡片内容失败 +FEISHU_CARD_MAX_BYTES = 30 * 1024 # 30 KB +FEISHU_TEXT_MAX_BYTES = 150 * 1024 # 150 KB +FEISHU_ERROR_CODE_MESSAGE_TOO_LONG = 230025 +FEISHU_ERROR_CODE_CARD_CONTENT_FAILED = 230099 + + +def markdown_to_feishu_post(text: str, title: str = "ArgusBot Update") -> dict[str, Any]: + """Convert Markdown text to Feishu post message format. + + Feishu post message format: + { + "msg_type": "post", + "content": { + "zh_cn": { + "title": "...", + "content": [ + [{"tag": "text", "text": "..."}], + ... + ] + } + } + } + + Handles: + - Bold: **text** → clean text + - Lists: - item → • item + - Headers: ### title → title with newlines + - Code blocks: ```lang ... ``` → preserved content + - Regular paragraphs + """ + lines = text.split('\n') + content_blocks: list[list[dict[str, Any]]] = [] + + in_code_block = False + code_block_content: list[str] = [] + + for line in lines: + stripped = line.strip() + + # Handle code block start/end + if stripped.startswith('```'): + if in_code_block: + # End of code block - emit as formatted code + code_text = '\n'.join(code_block_content) + content_blocks.append([{ + "tag": "text", + "text": f"\n```\n{code_text}\n```\n" + }]) + code_block_content = [] + in_code_block = False + else: + # Start of code block + in_code_block = True + continue + + if in_code_block: + code_block_content.append(line) + continue + + # Skip empty lines + if not stripped: + continue + + # Handle bold: **text** + if re.match(r'^\*\*.*\*\*$', stripped): + clean_text = stripped.replace('**', '') + content_blocks.append([{ + "tag": "text", + "text": clean_text + }]) + continue + + # Handle list items: - item + if re.match(r'^-\s+.*$', stripped): + item_text = re.sub(r'^-\s+', '', stripped) + content_blocks.append([{ + "tag": "text", + "text": f"• {item_text}" + }]) + continue + + # Handle headers: ### title → title with newlines + if re.match(r'^###\s+.*$', stripped): + title_text = re.sub(r'^###\s+', '', stripped) + content_blocks.append([{ + "tag": "text", + "text": f"\n{title_text}\n" + }]) + continue + + # Handle ## headers (main sections) + if re.match(r'^##\s+.*$', stripped): + title_text = re.sub(r'^##\s+', '', stripped) + content_blocks.append([{ + "tag": "text", + "text": f"\n\n**{title_text}**\n" + }]) + continue + + # Regular paragraphs + if stripped: + content_blocks.append([{ + "tag": "text", + "text": stripped + }]) + + # Handle unclosed code block + if in_code_block and code_block_content: + code_text = '\n'.join(code_block_content) + content_blocks.append([{ + "tag": "text", + "text": f"\n```\n{code_text}\n```\n" + }]) + + return { + "msg_type": "post", + "content": { + "zh_cn": { + "title": title, + "content": content_blocks if content_blocks else [[{"tag": "text", "text": text}]] + } + } + } + + +def build_interactive_card( + title: str, + content: str, + template: str = "blue", + actions: list[dict] | None = None, + wide_screen_mode: bool = True, +) -> dict[str, Any]: + """Build an interactive card message for Feishu. + + Args: + title: Card header title text + content: Main content text (supports Markdown-like formatting) + template: Header color template (blue, green, red, yellow, purple, gray) + actions: Optional list of action buttons + wide_screen_mode: Enable wide screen mode + + Returns: + Interactive card message dict ready to be sent + """ + elements: list[dict] = [] + + # Add content as div element + if content: + elements.append({ + "tag": "div", + "text": { + "tag": "lark_md", + "content": content + } + }) + + # Add action buttons if provided + if actions: + elements.append({ + "tag": "action", + "actions": actions + }) + + card_content = { + "config": { + "wide_screen_mode": wide_screen_mode + }, + "header": { + "title": { + "tag": "plain_text", + "content": title + }, + "template": template + }, + "elements": elements + } + + return card_content + + +# def _strip_markdown_code_blocks(text: str) -> str: +# """Remove markdown code block markers (```lang ... ```) from text. + +# Feishu post messages don't support Markdown rendering, so we strip +# the code block markers to display the content as plain text. + +# Example: +# Input: "```json\\n{...}\\n```" +# Output: "{...}" +# """ +# if not text: +# return text + +# result = text +# # Pattern: ```(language)?\\n(content)``` +# # Match code blocks and keep only the content +# pattern = r"```(\w*)?\s*(.*?)\s*```" + +# def replace_code_block(match: re.Match) -> str: +# content = match.group(2) or "" +# return content.strip() + +# result = re.sub(pattern, replace_code_block, result, flags=re.DOTALL) +# return result @dataclass @@ -40,10 +280,19 @@ class FeishuConfig: events: set[str] receive_id_type: str = "chat_id" timeout_seconds: int = 10 + wide_screen_mode: bool = True + card_template_id: str | None = None class FeishuTokenManager: - def __init__(self, *, app_id: str, app_secret: str, timeout_seconds: int, on_error: ErrorCallback | None) -> None: + def __init__( + self, + *, + app_id: str, + app_secret: str, + timeout_seconds: int, + on_error: ErrorCallback | None, + ) -> None: self.app_id = app_id self.app_secret = app_secret self.timeout_seconds = timeout_seconds @@ -98,21 +347,71 @@ def notify_event(self, event: dict[str, Any]) -> None: event_type = str(event.get("type", "")) if event_type not in self.config.events: return - message = format_feishu_event_message(event) - if message: - self.send_message(message) + + # Try to format as interactive card first + card_result = format_feishu_event_card(event) + if card_result: + title, content, template = card_result + self.send_card_message(title=title, content=content, template=template) + else: + # Fallback to text-based message for events that don't support cards + message = format_feishu_event_message(event) + if message: + self.send_message(message) def send_message(self, message: str) -> bool: + """Send a text message using interactive card format with markdown element. + + Uses interactive card format with markdown element for proper Markdown rendering. + This supports: + - Headers: # H1, ## H2, ### H3 + - Bold: **text** + - Italic: *text* + - Lists: - item + - Links: [text](url) + - Code blocks: ```lang ... ``` + + Before sending, validates and fixes common Markdown issues: + - Unclosed code blocks + - Missing newlines after headers + - Incorrect list formatting + + Note: Message chunks are limited to FEISHU_CARD_MAX_BYTES (30 KB) to avoid + error 230025 (message too long) and 230099 (card content failed). + """ token = self._tokens.get_token() if not token: return False + + # Validate and fix Markdown before sending + fixed_message = validate_and_fix_markdown(message) + ok = True - for chunk in split_feishu_message(message): + for chunk in split_feishu_message(fixed_message, max_chunk_bytes=FEISHU_CARD_MAX_BYTES): + # Build card content with markdown element for Markdown support + card_content = { + "config": { + "wide_screen_mode": self.config.wide_screen_mode + }, + "header": { + "title": { + "tag": "plain_text", + "content": "ArgusBot Update" + }, + "template": "blue" + }, + "elements": [ + { + "tag": "markdown", + "content": chunk + } + ] + } ok = ( self._send_structured_message( token=token, - msg_type="text", - content={"text": chunk}, + msg_type="interactive", + content=card_content, ) and ok ) @@ -133,7 +432,7 @@ def send_local_file(self, path: str | Path, *, caption: str = "") -> bool: return False suffix = file_path.suffix.lower() - if suffix in IMAGE_EXTENSIONS: + if suffix in _IMAGE_EXTENSIONS: image_key = self._upload_image(token=token, file_name=file_path.name, file_bytes=file_bytes) if not image_key: return False @@ -143,7 +442,7 @@ def send_local_file(self, path: str | Path, *, caption: str = "") -> bool: content={"image_key": image_key}, ) else: - is_video = suffix in VIDEO_EXTENSIONS + is_video = suffix in _VIDEO_EXTENSIONS file_type = "mp4" if is_video and suffix == ".mp4" else "stream" file_key = self._upload_file( token=token, @@ -170,6 +469,52 @@ def send_local_file(self, path: str | Path, *, caption: str = "") -> bool: ) return ok + # def _send_post_message( + # self, + # *, + # token: str, + # text_content: str, + # ) -> bool: + # """Send a post message with Markdown converted to Feishu format. + + # Uses markdown_to_feishu_post() to convert Markdown to structured + # Feishu post format with proper handling of: + # - Headers (##, ###) + # - Bold (**text**) + # - List items (- item) + # - Code blocks (```lang ... ```) + # """ + # # Convert markdown to feishu post format + # post_data = markdown_to_feishu_post(text_content) + + # body = json.dumps( + # { + # "receive_id": self.config.chat_id, + # "msg_type": "post", + # "content": json.dumps(post_data["content"], ensure_ascii=False), + # }, + # ensure_ascii=False, + # ).encode("utf-8") + # req = urllib.request.Request( + # "https://open.feishu.cn/open-apis/im/v1/messages" + # + f"?{urllib.parse.urlencode({'receive_id_type': self.config.receive_id_type})}", + # data=body, + # method="POST", + # headers={ + # "Content-Type": "application/json; charset=utf-8", + # "Authorization": f"Bearer {token}", + # }, + # ) + # return ( + # _perform_json_request( + # req, + # timeout_seconds=self.config.timeout_seconds, + # on_error=self.on_error, + # label="feishu post send", + # ) + # is not None + # ) + def _send_structured_message( self, *, @@ -177,6 +522,12 @@ def _send_structured_message( msg_type: str, content: dict[str, Any], ) -> bool: + """Send a structured message (interactive card, image, file, etc.). + + Handles Feishu API error codes: + - 230025: Message too long - truncates content and retries + - 230099: Card content failed - logs detailed error + """ body = json.dumps( { "receive_id": self.config.chat_id, @@ -302,6 +653,42 @@ def _post_multipart( def close(self) -> None: return + def send_card_message( + self, + title: str, + content: str, + template: str = "blue", + actions: list[dict] | None = None, + ) -> bool: + """Send an interactive card message. + + Args: + title: Card header title + content: Main content (supports lark_md Markdown-like syntax) + template: Header color (blue, green, red, yellow, purple, gray) + actions: Optional list of button actions + + Returns: + True if sent successfully, False otherwise + """ + token = self._tokens.get_token() + if not token: + return False + + card_content = build_interactive_card( + title=title, + content=content, + template=template, + actions=actions, + wide_screen_mode=self.config.wide_screen_mode, + ) + + return self._send_structured_message( + token=token, + msg_type="interactive", + content=card_content, + ) + class FeishuCommandPoller: def __init__( @@ -466,28 +853,163 @@ def is_feishu_self_message(item: dict[str, Any]) -> bool: def format_feishu_event_message(event: dict[str, Any]) -> str: + """Format event message as text (legacy, for backward compatibility).""" return format_event_message(event) -def split_feishu_message(message: str, *, max_chunk_chars: int = 1500) -> list[str]: +def format_feishu_event_card(event: dict[str, Any]) -> tuple[str, str, str] | None: + """Format event as interactive card (title, content, template_color). + + Returns: + Tuple of (title, content, template) or None if event should not produce a card + + Event types handled: + - loop.started: Blue card with objective + - round.review.completed: Color based on status (green=done, yellow=continue, red=blocked) + - loop.completed: Green summary card + - reviewer.output: Reviewer 输出,提取 Markdown 字段 + - planner.output: Planner 输出,提取 Markdown 字段 + """ + event_type = str(event.get("type", "")) + + if event_type == "loop.started": + objective = event.get("objective", "Unknown task") + return ( + "任务启动", + f"**目标:** {objective}\n\nArgusBot 已开始执行任务...", + "blue" + ) + + if event_type == "round.review.completed": + review = event.get("review", {}) + status = str(review.get("status", "unknown")) + reason = review.get("reason", "") + round_num = event.get("round", 1) + + status_map = { + "done": ("审核通过", "green"), + "continue": ("继续执行", "yellow"), + "blocked": ("执行受阻", "red"), + } + title, color = status_map.get(status, ("审核状态", "blue")) + + content = f"**第 {round_num} 轮审核**\n\n" + content += f"**状态:** {status}\n" + if reason: + content += f"\n{reason}" + + return title, content, color + + if event_type == "reviewer.output": + # 处理 Reviewer JSON 输出,提取并格式化为结构化 Markdown + raw_output = event.get("raw_output", "") + if raw_output: + formatted = extract_and_format_reviewer(raw_output) + return ("🔍 Reviewer 评审报告", formatted, "blue") + return None + + if event_type == "planner.output": + # 处理 Planner JSON 输出,提取并格式化为结构化 Markdown + raw_output = event.get("raw_output", "") + if raw_output: + formatted = extract_and_format_planner(raw_output) + return ("📋 Planner 规划报告", formatted, "purple") + return None + + if event_type == "plan.completed": + # 处理 Planner 完成事件,包含原始 JSON 输出 + raw_output = event.get("raw_output", "") + if raw_output: + formatted = extract_and_format_planner(raw_output) + return ("📋 Planner 规划报告", formatted, "purple") + # 如果没有 raw_output,使用传统格式 + summary = str(event.get("main_instruction", ""))[:400] + return ("📋 Planner 更新", summary, "purple") + + if event_type == "loop.completed": + rounds = event.get("rounds", []) + total_rounds = len(rounds) + exit_code = event.get("exit_code", 0) + objective = event.get("objective", "任务") + + content = f"**任务完成**\n\n" + content += f"**目标:** {objective}\n" + content += f"**总轮数:** {total_rounds}\n" + content += f"**状态:** {'成功' if exit_code == 0 else '失败'}" + + return "任务完成", content, "green" + + return None + + +def split_feishu_message( + message: str, + *, + max_chunk_chars: int = 1500, + max_chunk_bytes: int | None = None, +) -> list[str]: + """Split message into chunks that fit within Feishu API limits. + + Args: + message: Message text to split + max_chunk_chars: Maximum characters per chunk (default: 1500) + max_chunk_bytes: Maximum bytes per chunk (default: None, use char limit only) + When set, ensures JSON-encoded message fits within limit + + Returns: + List of message chunks with [n/total] prefix for multi-chunk messages + + Note: + When max_chunk_bytes is set, the function accounts for JSON encoding overhead + (escape sequences like \\n, unicode, etc.) to ensure the final request body + stays within Feishu's 30 KB card message limit. + """ text = (message or "").strip() if not text: return [] - if len(text) <= max_chunk_chars: - return [text] + + # Determine effective limit (bytes-aware) + effective_limit = max_chunk_chars + if max_chunk_bytes is not None: + # Reserve ~30% space for JSON overhead when encoding + # JSON escapes: \n → \\n, unicode → \\uXXXX, quotes → \\" + estimated_overhead_factor = 1.3 + byte_limit = int(max_chunk_bytes / estimated_overhead_factor) + # Use the smaller of char limit or byte-derived limit + # (UTF-8: 1 char ≈ 1-3 bytes for common text) + effective_limit = min(max_chunk_chars, byte_limit) + + if len(text.encode('utf-8')) <= (max_chunk_bytes or float('inf')): + # Entire message fits within byte limit + if len(text) <= effective_limit: + return [text] + # Message fits in bytes but exceeds char limit - use char-based splitting + chunks: list[str] = [] remaining = text while remaining: - if len(remaining) <= max_chunk_chars: + current_limit = effective_limit + if max_chunk_bytes is not None: + # Adjust limit based on actual byte size of current segment + test_segment = remaining[:current_limit] + test_bytes = len(_json_encode_for_feishu(test_segment).encode('utf-8')) + # If exceeds byte limit, reduce character count + while test_bytes > max_chunk_bytes and current_limit > 50: + current_limit -= 50 + test_segment = remaining[:current_limit] + test_bytes = len(_json_encode_for_feishu(test_segment).encode('utf-8')) + + if len(remaining) <= current_limit: chunks.append(remaining) break - cut = remaining.rfind("\n", 0, max_chunk_chars) + cut = remaining.rfind("\n", 0, current_limit) if cut <= 0: - cut = remaining.rfind(" ", 0, max_chunk_chars) + cut = remaining.rfind(" ", 0, current_limit) if cut <= 0: - cut = max_chunk_chars + cut = current_limit chunks.append(remaining[:cut].rstrip()) remaining = remaining[cut:].lstrip() + total = len(chunks) if total <= 1: return chunks @@ -495,43 +1017,90 @@ def split_feishu_message(message: str, *, max_chunk_chars: int = 1500) -> list[s return [f"[{index + 1}/{total:0{width}d}]\n{chunk}" for index, chunk in enumerate(chunks)] +def _json_encode_for_feishu(text: str) -> str: + """JSON encode text as Feishu API would receive it (for size estimation).""" + return json.dumps(text, ensure_ascii=False) + + def _perform_json_request( req: urllib.request.Request, *, timeout_seconds: int, on_error: ErrorCallback | None, label: str, + max_retries: int = 2, ) -> dict[str, Any] | None: - try: - with urllib.request.urlopen(req, timeout=timeout_seconds) as resp: - raw = resp.read().decode("utf-8") - except urllib.error.HTTPError as exc: - body = "" + """Perform HTTP request with optional retry for transient errors. + + Retries on SSL/EOF errors and connection reset errors that are often transient. + """ + attempt = 0 + last_error: Exception | None = None + + while attempt <= max_retries: try: - body = exc.read().decode("utf-8") - except Exception: + with urllib.request.urlopen(req, timeout=timeout_seconds) as resp: + raw = resp.read().decode("utf-8") + except urllib.error.HTTPError as exc: body = "" - _emit(on_error, f"{label} http {exc.code}: {body[:300]}") - return None - except urllib.error.URLError as exc: - _emit(on_error, f"{label} network error: {exc}") - return None - except (TimeoutError, socket.timeout) as exc: - _emit(on_error, f"{label} timeout: {exc}") - return None - except OSError as exc: - _emit(on_error, f"{label} os error: {exc}") - return None - try: - parsed = json.loads(raw) - except json.JSONDecodeError: - _emit(on_error, f"{label} non-JSON response") - return None - code = parsed.get("code", 0) - if code not in {0, "0", None}: - _emit(on_error, f"{label} api error: code={code} msg={parsed.get('msg', '')}") - return None - return parsed + try: + body = exc.read().decode("utf-8") + except Exception: + body = "" + _emit(on_error, f"{label} http {exc.code}: {body[:300]}") + return None + except urllib.error.URLError as exc: + reason = str(exc.reason) + # Retry on SSL/EOF errors + if "UNEXPECTED_EOF" in reason or "EOF occurred" in reason or "connection reset" in reason.lower(): + last_error = exc + attempt += 1 + if attempt <= max_retries: + time.sleep(0.5 * attempt) # Exponential backoff + continue + _emit(on_error, f"{label} network error: {exc}") + return None + except (TimeoutError, socket.timeout) as exc: + _emit(on_error, f"{label} timeout: {exc}") + return None + except OSError as exc: + reason = str(exc) + # Retry on connection reset errors + if "Connection reset" in reason or "Broken pipe" in reason: + last_error = exc + attempt += 1 + if attempt <= max_retries: + time.sleep(0.5 * attempt) + continue + _emit(on_error, f"{label} os error: {exc}") + return None + + # Success - parse and return + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + _emit(on_error, f"{label} non-JSON response") + return None + code = parsed.get("code", 0) + if code not in {0, "0", None}: + # Handle Feishu-specific error codes + code_str = str(code) + msg = parsed.get('msg', '') + + if code_str == str(FEISHU_ERROR_CODE_MESSAGE_TOO_LONG): + # 230025: Message too long - caller should truncate and retry + _emit(on_error, f"{label} message too long (code={code}): {msg}") + elif code_str == str(FEISHU_ERROR_CODE_CARD_CONTENT_FAILED): + # 230099: Card content failed - may contain markdown syntax issues + _emit(on_error, f"{label} card content failed (code={code}): {msg}") + else: + _emit(on_error, f"{label} api error: code={code} msg={msg}") + return None + return parsed + + # All retries exhausted + _emit(on_error, f"{label} failed after {max_retries} retries: {last_error}") + return None def _emit(on_error: ErrorCallback | None, message: str) -> None: diff --git a/codex_autoloop/live_updates.py b/codex_autoloop/live_updates.py index 4140346..68c280d 100644 --- a/codex_autoloop/live_updates.py +++ b/codex_autoloop/live_updates.py @@ -7,6 +7,56 @@ from typing import Callable, Protocol +def _safe_truncate_markdown(text: str, max_chars: int) -> str: + """Safely truncate Markdown text without breaking structure. + + Avoids cutting off: + 1. Inside code blocks + 2. In the middle of headers + 3. In the middle of list items + + Args: + text: Markdown text to truncate + max_chars: Maximum character count + + Returns: + Truncated text with continuation marker + """ + if len(text) <= max_chars: + return text + + # Check if we're inside a code block at the truncation point + truncated = text[:max_chars] + code_block_count = truncated.count('```') + + # If inside a code block (odd number of ```), close it + if code_block_count % 2 == 1: + # Find the end of the current line and close the code block + last_newline = truncated.rfind('\n') + if last_newline > 0: + truncated = truncated[:last_newline] + truncated += '\n```\n\n...(内容被截断)' + return truncated + + # Not in a code block, try to truncate at a paragraph boundary + last_double_newline = truncated.rfind('\n\n') + if last_double_newline > max_chars * 0.7: + return truncated[:last_double_newline] + '\n\n...(内容被截断)' + + # Try single newline + last_newline = truncated.rfind('\n') + if last_newline > max_chars * 0.7: + return truncated[:last_newline] + '\n\n...(内容被截断)' + + # Try space + last_space = truncated.rfind(' ') + if last_space > max_chars * 0.7: + return truncated[:last_space] + '\n\n...(内容被截断)' + + # Last resort: hard truncate + return truncated + '\n\n...(内容被截断)' + + def extract_agent_message(stream: str, line: str) -> tuple[str, str] | None: if not stream.endswith(".stdout"): return None @@ -116,8 +166,11 @@ def flush(self) -> bool: return False batch = self._pending[: self.config.max_items_per_push] self._pending = self._pending[self.config.max_items_per_push :] - message = self._format_batch(batch) - self.notifier.send_message(message) + + # Send each actor's message separately to avoid truncation + for actor, text in batch: + message = self._format_single_message(actor, text) + self.notifier.send_message(message) return True def _run(self) -> None: @@ -129,13 +182,17 @@ def _run(self) -> None: if self.on_error: self.on_error(f"{self.channel_name} live flush error: {exc}") - def _format_batch(self, batch: list[tuple[str, str]]) -> str: + def _format_single_message(self, actor: str, text: str) -> str: + """Format a single actor's message with markdown.""" now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%SZ") - lines = [f"[autoloop] live update {now}"] - for actor, text in batch: - compact = " ".join(text.split()) - lines.append(f"- {actor}: {compact[:420]}") - rendered = "\n".join(lines) + trimmed = text.strip() + # Format as Markdown with actor as bold header + # Ensure proper newline after bold header for Markdown rendering + # Increased limit from 420 to 1200 to accommodate JSON outputs + message_text = f"**{actor}:**\n\n{trimmed[:1200]}" + rendered = f"[autoloop] live update {now}\n\n{message_text}" + + # Safely truncate if needed, avoiding cutting off Markdown structures if len(rendered) <= self.config.max_chars: return rendered - return rendered[: self.config.max_chars] + return _safe_truncate_markdown(rendered, self.config.max_chars) diff --git a/codex_autoloop/md_checker.py b/codex_autoloop/md_checker.py new file mode 100644 index 0000000..7cad8a5 --- /dev/null +++ b/codex_autoloop/md_checker.py @@ -0,0 +1,783 @@ +"""Markdown 检查工具 - 整合 markdownlint 规则和自定义飞书验证。 + +本模块提供 Markdown 格式检查功能,支持: +1. 调用 markdownlint (mdl) 进行标准规则检查 +2. 使用自定义验证器进行飞书特定检查 +3. 生成结构化报告 +4. 验证和修复 Markdown 语法(原 feishu_markdown_validator.py 已合并至此) +""" + +from __future__ import annotations + +import json +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +# ============================================================================= +# 基础 Markdown 验证和修复函数(原 feishu_markdown_validator.py) +# ============================================================================= + +def validate_and_fix_markdown(text: str) -> str: + """验证并修复 Markdown 语法。 + + 修复以下问题: + 1. 未闭合的代码块(```) + 2. 标题后缺少换行 + 3. 列表项格式不正确 + 4. 换行符不完整 + + Args: + text: 原始 Markdown 文本 + + Returns: + 修复后的 Markdown 文本 + """ + if not text: + return text + + result = text + + # 1. 修复未闭合的代码块 + result = fix_unclosed_code_blocks(result) + + # 2. 确保标题后有足够换行 + result = ensure_headers_have_newlines(result) + + # 3. 确保列表格式正确 + result = fix_list_format(result) + + # 4. 确保代码块前后有换行 + result = ensure_code_blocks_have_newlines(result) + + # 5. 移除多余的空行(连续 3 个以上空行缩减为 2 个) + result = re.sub(r'\n{4,}', '\n\n\n', result) + + return result + + +def fix_unclosed_code_blocks(text: str) -> str: + """修复未闭合的代码块。 + + 检测并添加缺失的闭合标记 ```。 + + Args: + text: Markdown 文本 + + Returns: + 修复后的文本 + """ + lines = text.split('\n') + result_lines: list[str] = [] + in_code_block = False + code_block_start_line = -1 + + for i, line in enumerate(lines): + stripped = line.strip() + + # 检测代码块开始/结束标记 + if stripped.startswith('```'): + if in_code_block: + # 闭合代码块 + in_code_block = False + result_lines.append(line) + else: + # 开始代码块 + in_code_block = True + code_block_start_line = i + result_lines.append(line) + else: + result_lines.append(line) + + # 如果代码块未闭合,添加闭合标记 + if in_code_block: + result_lines.append('```') + + return '\n'.join(result_lines) + + +def check_unclosed_blocks(text: str) -> list[str]: + """检测未闭合的代码块。 + + Args: + text: Markdown 文本 + + Returns: + 问题描述列表 + """ + issues: list[str] = [] + lines = text.split('\n') + in_code_block = False + code_block_start_line = -1 + + for i, line in enumerate(lines): + stripped = line.strip() + + if stripped.startswith('```'): + if in_code_block: + in_code_block = False + else: + in_code_block = True + code_block_start_line = i + + if in_code_block: + issues.append(f"代码块在第 {code_block_start_line + 1} 行开始但未闭合") + + return issues + + +def ensure_headers_have_newlines(text: str) -> str: + """确保标题后有足够换行。 + + Markdown 标题(#、##、### 等)后应该有空行, + 否则后续内容可能不会被正确识别为新段落。 + + Args: + text: Markdown 文本 + + Returns: + 修复后的文本 + """ + lines = text.split('\n') + result_lines: list[str] = [] + + i = 0 + while i < len(lines): + line = lines[i] + result_lines.append(line) + + # 检查是否是标题行 + if re.match(r'^#{1,6}\s+.*$', line.strip()): + # 检查下一行是否是空行 + if i + 1 < len(lines): + next_line = lines[i + 1] + # 如果下一行不是空行且不是另一个标题,添加空行 + if next_line.strip() and not re.match(r'^#{1,6}\s+.*$', next_line.strip()): + result_lines.append('') + + i += 1 + + return '\n'.join(result_lines) + + +def fix_list_format(text: str) -> str: + """修复列表格式。 + + 确保列表项: + 1. 前面有空行(除非在开头) + 2. 使用正确的标记(-、*、+ 或数字.) + 3. 列表项之间有适当的间距 + + Args: + text: Markdown 文本 + + Returns: + 修复后的文本 + """ + lines = text.split('\n') + result_lines: list[str] = [] + in_list = False + prev_was_list_item = False + + for i, line in enumerate(lines): + stripped = line.strip() + is_list_item = bool(re.match(r'^(\s*)([-*+]|\d+\.)\s+', stripped)) + + if is_list_item: + # 如果列表项前不是空行且不是列表继续,添加空行 + if not in_list and result_lines and result_lines[-1].strip(): + result_lines.append('') + + in_list = True + result_lines.append(line) + prev_was_list_item = True + else: + if in_list and not stripped: + # 空行可能表示列表结束 + in_list = False + elif in_list and stripped: + # 非空非列表行,列表结束 + in_list = False + result_lines.append(line) + prev_was_list_item = False + + return '\n'.join(result_lines) + + +def ensure_code_blocks_have_newlines(text: str) -> str: + """确保代码块前后有换行。 + + 代码块标记(```)前后应该有空行, + 以确保正确渲染。 + + Args: + text: Markdown 文本 + + Returns: + 修复后的文本 + """ + lines = text.split('\n') + result_lines: list[str] = [] + + for i, line in enumerate(lines): + stripped = line.strip() + + if stripped.startswith('```'): + # 代码块标记前添加空行(如果不是开头且前一行不是空行) + if result_lines and result_lines[-1].strip(): + result_lines.append('') + + result_lines.append(line) + + # 检查是否是闭合标记,如果是,后面添加空行 + if len(result_lines) > 1: + # 检查是否是闭合标记(前面有代码内容) + prev_lines = [l for l in result_lines[:-1] if not l.strip().startswith('```')] + if prev_lines and any(l.strip() for l in prev_lines): + # 这是一个闭合标记,检查是否需要添加空行 + pass # 空行会在后续处理中自动添加 + else: + result_lines.append(line) + + return '\n'.join(result_lines) + + +def truncate_markdown_safely(text: str, max_chars: int) -> str: + """安全地截断 Markdown 文本。 + + 避免在以下位置截断: + 1. 代码块内部 + 2. 标题中间 + 3. 列表项中间 + + Args: + text: Markdown 文本 + max_chars: 最大字符数 + + Returns: + 截断后的文本 + """ + if len(text) <= max_chars: + return text + + # 首先检查截断点是否在代码块内 + truncated = text[:max_chars] + code_block_count = truncated.count('```') + + # 如果在代码块内截断,找到下一个代码块结束标记 + if code_block_count % 2 == 1: + # 在代码块内,需要找到闭合标记或添加到末尾 + remaining = text[max_chars:] + end_marker_pos = remaining.find('```') + if end_marker_pos != -1: + # 包含到闭合标记 + return truncated + remaining[:end_marker_pos + 3] + '\n\n...(内容被截断)' + else: + # 没有闭合标记,添加一个 + return truncated + '\n```\n\n...(内容被截断)' + + # 不在代码块内,尝试在段落边界截断 + # 优先在空行处截断 + last_double_newline = truncated.rfind('\n\n') + if last_double_newline > max_chars * 0.7: # 至少在 70% 位置之后 + return truncated[:last_double_newline] + '\n\n...(内容被截断)' + + # 其次在单换行处截断 + last_newline = truncated.rfind('\n') + if last_newline > max_chars * 0.7: + return truncated[:last_newline] + '\n\n...(内容被截断)' + + # 最后在单词边界截断 + last_space = truncated.rfind(' ') + if last_space > max_chars * 0.7: + return truncated[:last_space] + '\n\n...(内容被截断)' + + # 无法找到合适位置,直接截断 + return truncated + '\n\n...(内容被截断)' + + +def validate_markdown_for_feishu(text: str) -> tuple[bool, list[str]]: + """验证 Markdown 是否适合飞书渲染。 + + 飞书的 markdown 组件有一些特殊要求: + 1. 代码块必须闭合 + 2. 标题格式必须正确 + 3. 换行符必须保留 + + Args: + text: Markdown 文本 + + Returns: + (是否有效,问题列表) + """ + issues: list[str] = [] + + # 检查未闭合代码块 + unclosed = check_unclosed_blocks(text) + issues.extend(unclosed) + + # 检查是否有内容(飞书需要非空内容) + if not text.strip(): + issues.append("消息内容为空") + + # 检查是否有不支持的 Markdown 语法 + # 飞书不支持表格、脚注等 + if re.search(r'^\|.*\|$', text, re.MULTILINE): + issues.append("飞书不支持表格语法") + + if re.search(r'\[\^[^\]]+\]', text): + issues.append("飞书不支持脚注") + + return (len(issues) == 0, issues) + + +def check_markdown_structure(text: str) -> dict[str, Any]: + """检查 Markdown 结构完整性。 + + Args: + text: Markdown 文本 + + Returns: + 检查结果字典,包含: + - valid: 是否有效 + - issues: 问题列表 + - fixes_applied: 已应用的修复 + """ + issues: list[str] = [] + fixes_applied: list[str] = [] + + # 检查未闭合代码块 + unclosed = check_unclosed_blocks(text) + issues.extend(unclosed) + + # 检查标题格式 + lines = text.split('\n') + for i, line in enumerate(lines): + if re.match(r'^#{1,6}\s+.*$', line.strip()): + if i + 1 < len(lines) and lines[i + 1].strip(): + if not re.match(r'^#{1,6}\s+.*$', lines[i + 1].strip()): + issues.append(f"第 {i + 1} 行标题后缺少空行") + + # 检查连续空行过多 + if '\n\n\n\n' in text: + issues.append("存在连续 4 个以上空行") + + return { + "valid": len(issues) == 0, + "issues": issues, + "fixes_applied": fixes_applied + } + + +@dataclass +class MarkdownIssue: + """Markdown 问题描述。""" + rule_id: str + description: str + line_number: int | None = None + severity: str = "warning" # "error" | "warning" | "info" + suggestion: str | None = None + + +@dataclass +class MarkdownCheckResult: + """Markdown 检查结果。""" + is_valid: bool + issues: list[MarkdownIssue] + fixed_content: str | None = None + feishu_ready: bool = False + + +def check_with_markdownlint( + text: str, + style_file: Path | None = None, + mdl_path: str = "mdl", +) -> list[MarkdownIssue]: + """使用 markdownlint (mdl) 检查 Markdown 内容。 + + Args: + text: 要检查的 Markdown 文本 + style_file: 可选的风格配置文件路径 + mdl_path: mdl 可执行文件路径 + + Returns: + 问题列表 + """ + issues: list[MarkdownIssue] = [] + + try: + # 准备 mdl 命令 + cmd = [mdl_path, "--json"] + + if style_file and style_file.exists(): + cmd.extend(["--style", str(style_file)]) + + # 执行 mdl + result = subprocess.run( + cmd, + input=text, + capture_output=True, + text=True, + timeout=30, + ) + + # 解析 JSON 输出 + if result.stdout.strip(): + try: + violations = json.loads(result.stdout) + for violation in violations: + issue = MarkdownIssue( + rule_id=violation.get("rule", "UNKNOWN"), + description=violation.get("description", ""), + line_number=violation.get("line"), + severity="warning", + ) + issues.append(issue) + except json.JSONDecodeError: + # 如果解析失败,尝试解析文本输出 + for line in result.stdout.strip().split("\n"): + if line: + issue = _parse_mdl_text_line(line) + if issue: + issues.append(issue) + + except subprocess.TimeoutExpired: + issues.append(MarkdownIssue( + rule_id="SYSTEM", + description="markdownlint 检查超时", + severity="error", + )) + except FileNotFoundError: + # mdl 未安装,返回提示信息 + issues.append(MarkdownIssue( + rule_id="SYSTEM", + description="未找到 mdl 工具,请运行 'gem install mdl' 安装", + severity="info", + suggestion="或使用 pip install markdownlint-cli 安装 Node.js 版本", + )) + except Exception as exc: + issues.append(MarkdownIssue( + rule_id="SYSTEM", + description=f"markdownlint 检查失败:{exc}", + severity="error", + )) + + return issues + + +def _parse_mdl_text_line(line: str) -> MarkdownIssue | None: + """解析 mdl 文本输出一行。 + + 格式示例:README.md:1: MD013 Line length + """ + # 尝试匹配标准格式 + import re + match = re.match(r"^([^:]+):(\d+):\s*(MD\d+)\s*(.*)$", line) + if match: + return MarkdownIssue( + rule_id=match.group(3), + description=match.group(4).strip(), + line_number=int(match.group(2)), + severity="warning", + ) + return None + + +def check_for_feishu(text: str) -> list[MarkdownIssue]: + """针对飞书渲染的 Markdown 检查。 + + 检查飞书特定的 Markdown 兼容性问题: + - 未闭合的代码块 + - 不支持的表格语法 + - 不支持的脚注 + - 标题后缺少换行 + + Args: + text: Markdown 文本 + + Returns: + 问题列表 + """ + issues: list[MarkdownIssue] = [] + + # 检查未闭合代码块 + unclosed = check_unclosed_blocks(text) + for desc in unclosed: + issues.append(MarkdownIssue( + rule_id="FEISHU-MD001", + description=desc, + severity="error", + suggestion="添加缺失的 ``` 闭合标记", + )) + + # 检查飞书不支持的语法 + valid, feishu_issues = validate_markdown_for_feishu(text) + for desc in feishu_issues: + if "表格" in desc: + issues.append(MarkdownIssue( + rule_id="FEISHU-MD002", + description=f"飞书不支持:{desc}", + severity="warning", + suggestion="使用代码块或纯文本展示表格内容", + )) + elif "脚注" in desc: + issues.append(MarkdownIssue( + rule_id="FEISHU-MD003", + description=f"飞书不支持:{desc}", + severity="warning", + suggestion="使用普通文本替代脚注", + )) + elif "空" in desc: + issues.append(MarkdownIssue( + rule_id="FEISHU-MD004", + description=desc, + severity="error", + )) + else: + issues.append(MarkdownIssue( + rule_id="FEISHU-UNK", + description=desc, + severity="warning", + )) + + # 检查标题后换行 + lines = text.split("\n") + for i, line in enumerate(lines): + if line.strip().startswith("#"): + if i + 1 < len(lines) and lines[i + 1].strip(): + if not lines[i + 1].strip().startswith("#"): + issues.append(MarkdownIssue( + rule_id="FEISHU-MD005", + description=f"第 {i + 1} 行标题后缺少空行", + line_number=i + 1, + severity="warning", + suggestion="在标题后添加一个空行", + )) + + return issues + + +def check_markdown( + text: str, + style_file: Path | None = None, + check_feishu: bool = True, + auto_fix: bool = False, +) -> MarkdownCheckResult: + """完整的 Markdown 检查流程。 + + Args: + text: Markdown 文本 + style_file: markdownlint 风格文件路径 + check_feishu: 是否进行飞书兼容性检查 + auto_fix: 是否自动修复可修复的问题 + + Returns: + 检查结果 + """ + all_issues: list[MarkdownIssue] = [] + + # 1. markdownlint 标准检查 + mdl_issues = check_with_markdownlint(text, style_file) + all_issues.extend(mdl_issues) + + # 2. 飞书特定检查 + if check_feishu: + feishu_issues = check_for_feishu(text) + all_issues.extend(feishu_issues) + + # 3. 判断是否有效 + is_valid = len(all_issues) == 0 + has_errors = any(i.severity == "error" for i in all_issues) + + # 4. 自动修复(如果请求) + fixed_content = None + if auto_fix and not has_errors: + fixed_content = validate_and_fix_markdown(text) + + # 5. 飞书就绪状态 + feishu_ready = not has_errors and not any( + "FEISHU" in i.rule_id and i.severity == "error" + for i in all_issues + ) + + return MarkdownCheckResult( + is_valid=is_valid or not has_errors, + issues=all_issues, + fixed_content=fixed_content, + feishu_ready=feishu_ready, + ) + + +def format_check_report( + result: MarkdownCheckResult, + format_type: str = "text", +) -> str: + """格式化检查报告。 + + Args: + result: 检查结果 + format_type: 输出格式 (text/json/markdown) + + Returns: + 格式化的报告 + """ + if format_type == "json": + return json.dumps( + { + "is_valid": result.is_valid, + "feishu_ready": result.feishu_ready, + "issues": [ + { + "rule_id": i.rule_id, + "description": i.description, + "line": i.line_number, + "severity": i.severity, + "suggestion": i.suggestion, + } + for i in result.issues + ], + }, + ensure_ascii=False, + indent=2, + ) + + if format_type == "markdown": + lines = ["# Markdown 检查报告", ""] + lines.append(f"**状态:** {'通过' if result.is_valid else '未通过'}") + lines.append(f"**飞书就绪:** {'是' if result.feishu_ready else '否'}") + lines.append(f"**问题数:** {len(result.issues)}") + lines.append("") + + if result.issues: + lines.append("## 问题列表") + lines.append("") + for issue in result.issues: + severity_icon = {"error": "❌", "warning": "⚠️", "info": "ℹ️"}.get( + issue.severity, "•" + ) + line_info = f" (第 {issue.line_number} 行)" if issue.line_number else "" + lines.append( + f"- {severity_icon} **{issue.rule_id}**{line_info}: {issue.description}" + ) + if issue.suggestion: + lines.append(f" - 建议:{issue.suggestion}") + lines.append("") + + return "\n".join(lines) + + # 默认文本格式 + lines = [] + status = "通过" if result.is_valid else "未通过" + lines.append(f"Markdown 检查:{status}") + lines.append(f"飞书就绪:{'是' if result.feishu_ready else '否'}") + lines.append(f"问题数:{len(result.issues)}") + + if result.issues: + lines.append("") + lines.append("问题详情:") + for issue in result.issues: + severity = {"error": "[错误]", "warning": "[警告]", "info": "[提示]"}.get( + issue.severity, "" + ) + line_info = f" (第 {issue.line_number} 行)" if issue.line_number else "" + lines.append(f" {severity} {issue.rule_id}{line_info}: {issue.description}") + if issue.suggestion: + lines.append(f" 建议:{issue.suggestion}") + + return "\n".join(lines) + + +def quick_fix_for_feishu(text: str) -> str: + """快速修复飞书 Markdown 问题。 + + 这是 validate_and_fix_markdown 的增强版, + 专门针对飞书渲染优化。 + + Args: + text: 原始 Markdown 文本 + + Returns: + 修复后的文本 + """ + # 使用基础验证器修复 + result = validate_and_fix_markdown(text) + + # 额外的飞书优化 + lines = result.split("\n") + optimized_lines: list[str] = [] + + for i, line in enumerate(lines): + # 移除行尾空格 + line = line.rstrip() + + # 确保标题格式正确 + stripped = line.strip() + if stripped.startswith("#"): + # 确保 # 后有空格 + if not re.match(r"^#+\s", stripped) and len(stripped) > 1: + line = re.sub(r"^(#+)", r"\1 ", stripped) + + optimized_lines.append(line) + + # 移除开头的空行 + while optimized_lines and not optimized_lines[0].strip(): + optimized_lines.pop(0) + + # 确保以单个换行结尾 + result = "\n".join(optimized_lines) + if result and not result.endswith("\n"): + result += "\n" + + return result + + +def check_file( + file_path: Path | str, + style_file: Path | None = None, + check_feishu: bool = True, +) -> MarkdownCheckResult: + """检查 Markdown 文件。 + + Args: + file_path: 文件路径 + style_file: markdownlint 风格文件路径 + check_feishu: 是否进行飞书兼容性检查 + + Returns: + 检查结果 + """ + path = Path(file_path) + if not path.exists(): + return MarkdownCheckResult( + is_valid=False, + issues=[ + MarkdownIssue( + rule_id="SYSTEM", + description=f"文件不存在:{file_path}", + severity="error", + ) + ], + feishu_ready=False, + ) + + text = path.read_text(encoding="utf-8") + return check_markdown(text, style_file, check_feishu) + + +if __name__ == "__main__": + # 命令行使用示例 + if len(sys.argv) < 2: + print("用法:python -m codex_autoloop.md_checker [风格文件]") + sys.exit(1) + + file_path = Path(sys.argv[1]) + style_file = Path(sys.argv[2]) if len(sys.argv) > 2 else None + + result = check_file(file_path, style_file) + print(format_check_report(result)) + + sys.exit(0 if result.is_valid else 1) diff --git a/codex_autoloop/orchestrator.py b/codex_autoloop/orchestrator.py index 056ff73..0af5059 100644 --- a/codex_autoloop/orchestrator.py +++ b/codex_autoloop/orchestrator.py @@ -343,6 +343,9 @@ def inactivity_callback(snapshot: InactivitySnapshot) -> str: "confidence": review.confidence, "reason": review.reason, "next_action": review.next_action, + "round_summary_markdown": review.round_summary_markdown, + "completion_summary_markdown": review.completion_summary_markdown, + "raw_output": main_result.last_agent_message, } ) diff --git a/codex_autoloop/output_extractor.py b/codex_autoloop/output_extractor.py new file mode 100644 index 0000000..a21cfb3 --- /dev/null +++ b/codex_autoloop/output_extractor.py @@ -0,0 +1,524 @@ +"""Reviewer/Planner 输出提取器 - 将 JSON 输出转换为结构化 Markdown。 + +本模块用于从 Reviewer 和 Planner 的 JSON 输出中提取 Markdown 字段, +并重新格式化为多层级的结构化 Markdown 文本,适合飞书消息渲染。 +""" + +from __future__ import annotations + +import json +import re +from dataclasses import dataclass +from typing import Any + + +@dataclass +class ReviewerOutput: + """Reviewer 输出提取结果。""" + status: str + confidence: float + reason: str + next_action: str + round_summary: str + completion_summary: str + + +@dataclass +class PlannerOutput: + """Planner 输出提取结果。""" + summary: str + workstreams: list[dict] + done_items: list[str] + remaining_items: list[str] + risks: list[str] + next_steps: list[str] + exploration_items: list[str] + full_report: str + + +def try_repair_truncated_json(text: str) -> str: + """尝试修复被截断的 JSON。 + + Args: + text: 可能被截断的 JSON 文本 + + Returns: + 修复后的 JSON 文本 + """ + text = text.strip() + + # 移除 markdown 代码块标记 + if text.startswith("```json"): + text = text[7:] + if text.startswith("```"): + text = text[3:] + text = text.rstrip("`").rstrip() + + # 计算括号和引号的平衡 + brace_count = 0 + bracket_count = 0 + in_string = False + escape_next = False + + for i, char in enumerate(text): + if escape_next: + escape_next = False + continue + if char == "\\": + escape_next = True + continue + if char == '"' and not escape_next: + in_string = not in_string + continue + if not in_string: + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + elif char == "[": + bracket_count += 1 + elif char == "]": + bracket_count -= 1 + + # 关闭未闭合的括号 + result = text + if bracket_count > 0: + result += "]" * bracket_count + if brace_count > 0: + result += "}" * brace_count + + return result.strip() + + +def extract_reviewer_output(json_text: str) -> ReviewerOutput | None: + """从 Reviewer JSON 输出中提取结构化数据。 + + Args: + json_text: Reviewer 输出的 JSON 文本 + + Returns: + 提取的 ReviewerOutput 对象,如果解析失败则返回 None + """ + # 尝试直接解析 + try: + data = json.loads(json_text) + if isinstance(data, dict): + return _build_reviewer_output(data) + except json.JSONDecodeError: + pass + + # 尝试提取 JSON 块 + json_match = re.search(r'\{[\s\S]*\}', json_text) + if json_match: + try: + data = json.loads(json_match.group()) + return _build_reviewer_output(data) + except json.JSONDecodeError: + pass + + # 尝试修复被截断的 JSON + repaired = try_repair_truncated_json(json_text) + try: + data = json.loads(repaired) + return _build_reviewer_output(data) + except json.JSONDecodeError: + pass + + # 最后尝试:用正则提取各个字段 + return _extract_reviewer_output_regex(json_text) + + +def _build_reviewer_output(data: dict) -> ReviewerOutput: + """从解析的 JSON 数据构建 ReviewerOutput。""" + return ReviewerOutput( + status=data.get("status", "unknown"), + confidence=float(data.get("confidence", 0.0)), + reason=data.get("reason", ""), + next_action=data.get("next_action", ""), + round_summary=data.get("round_summary_markdown", ""), + completion_summary=data.get("completion_summary_markdown", ""), + ) + + +def _extract_reviewer_output_regex(json_text: str) -> ReviewerOutput | None: + """使用正则表达式从被截断的 JSON 中提取 Reviewer 字段。""" + def extract_string_field(text: str, field: str) -> str: + # 匹配 "field": "value" 或 "field": "value...(被截断)" + pattern = rf'"{field}"\s*:\s*"([^"]*(?:[^"\\]\\.)*?)"' + match = re.search(pattern, text) + if match: + value = match.group(1) + # 处理转义字符 + value = value.replace('\\"', '"').replace('\\n', '\n') + return value + return "" + + def extract_number_field(text: str, field: str) -> float: + pattern = rf'"{field}"\s*:\s*([\d.]+)' + match = re.search(pattern, text) + if match: + try: + return float(match.group(1)) + except ValueError: + pass + return 0.0 + + return ReviewerOutput( + status=extract_string_field(json_text, "status"), + confidence=extract_number_field(json_text, "confidence"), + reason=extract_string_field(json_text, "reason"), + next_action=extract_string_field(json_text, "next_action"), + round_summary=extract_string_field(json_text, "round_summary_markdown"), + completion_summary=extract_string_field(json_text, "completion_summary_markdown"), + ) + + +def extract_planner_output(json_text: str) -> PlannerOutput | None: + """从 Planner JSON 输出中提取结构化数据。 + + Args: + json_text: Planner 输出的 JSON 文本 + + Returns: + 提取的 PlannerOutput 对象,如果解析失败则返回 None + """ + # 尝试直接解析 + try: + data = json.loads(json_text) + if isinstance(data, dict): + return _build_planner_output(data) + except json.JSONDecodeError: + pass + + # 尝试提取 JSON 块 + json_match = re.search(r'\{[\s\S]*\}', json_text) + if json_match: + try: + data = json.loads(json_match.group()) + return _build_planner_output(data) + except json.JSONDecodeError: + pass + + # 尝试修复被截断的 JSON + repaired = try_repair_truncated_json(json_text) + try: + data = json.loads(repaired) + return _build_planner_output(data) + except json.JSONDecodeError: + pass + + # 最后尝试:用正则提取各个字段 + return _extract_planner_fields_regex(json_text) + + +def _build_planner_output(data: dict) -> PlannerOutput: + """从解析的 JSON 数据构建 PlannerOutput。""" + return PlannerOutput( + summary=data.get("summary", ""), + workstreams=data.get("workstreams", []), + done_items=data.get("done_items", []), + remaining_items=data.get("remaining_items", []), + risks=data.get("risks", []), + next_steps=data.get("next_steps", []), + exploration_items=data.get("exploration_items", []), + full_report=data.get("report_markdown", ""), + ) + + +def _extract_planner_fields_regex(json_text: str) -> PlannerOutput | None: + """使用正则表达式从被截断的 JSON 中提取 Planner 字段。""" + def extract_string_field(text: str, field: str) -> str: + pattern = rf'"{field}"\s*:\s*"([^"]*(?:[^"\\]\\.)*?)"' + match = re.search(pattern, text) + if match: + value = match.group(1) + value = value.replace('\\"', '"').replace('\\n', '\n') + return value + return "" + + def extract_array_field(text: str, field: str) -> list: + # 匹配 "field": [...] 数组字段 + pattern = rf'"{field}"\s*:\s*\[([\s\S]*?)\]' + match = re.search(pattern, text) + if match: + array_content = match.group(1) + # 提取字符串数组项 + items = [] + item_pattern = r'"([^"]*(?:[^"\\]\\.)*?)"' + for item_match in re.finditer(item_pattern, array_content): + item = item_match.group(1).replace('\\"', '"').replace('\\n', '\n') + items.append(item) + return items + return [] + + def extract_workstreams(text: str) -> list[dict]: + """提取 workstreams 数组。""" + pattern = rf'"workstreams"\s*:\s*\[([\s\S]*?)\]' + match = re.search(pattern, text) + if not match: + return [] + + array_content = match.group(1) + workstreams = [] + + # 提取每个工作流对象 + obj_pattern = r'\{([^{}]+)\}' + for obj_match in re.finditer(obj_pattern, array_content): + obj_content = obj_match.group(1) + ws = {} + + # 提取 area 字段 + area_match = re.search(r'"area"\s*:\s*"([^"]*)"', obj_content) + if area_match: + ws["area"] = area_match.group(1) + + # 提取 status 字段 + status_match = re.search(r'"status"\s*:\s*"([^"]*)"', obj_content) + if status_match: + ws["status"] = status_match.group(1) + + if ws: + workstreams.append(ws) + + return workstreams + + return PlannerOutput( + summary=extract_string_field(json_text, "summary"), + workstreams=extract_workstreams(json_text), + done_items=extract_array_field(json_text, "done_items"), + remaining_items=extract_array_field(json_text, "remaining_items"), + risks=extract_array_field(json_text, "risks"), + next_steps=extract_array_field(json_text, "next_steps"), + exploration_items=extract_array_field(json_text, "exploration_items"), + full_report=extract_string_field(json_text, "report_markdown"), + ) + + +def format_reviewer_markdown(output: ReviewerOutput) -> str: + """将 Reviewer 输出格式化为多层级 Markdown。 + + Args: + output: ReviewerOutput 对象 + + Returns: + 格式化的 Markdown 文本 + """ + lines: list[str] = [] + + # 状态标题 + status_icons = { + "done": "✅", + "continue": "🔄", + "blocked": "🚫", + } + icon = status_icons.get(output.status, "❓") + lines.append(f"{icon} **Reviewer 评审结果**") + lines.append("") + + # 核心状态 + lines.append(f"**状态**: {output.status}") + lines.append(f"**置信度**: {output.confidence:.0%}") + lines.append("") + + # 评审原因 + if output.reason: + lines.append("**评审原因**") + lines.append(output.reason) + lines.append("") + + # 本轮总结 + if output.round_summary: + lines.append("**本轮总结**") + lines.append(output.round_summary) + lines.append("") + + # 完成总结 + if output.completion_summary: + lines.append("**完成证据**") + lines.append(output.completion_summary) + lines.append("") + + # 下一步行动 + if output.next_action: + lines.append("**下一步行动**") + lines.append(output.next_action) + + return "\n".join(lines) + + +def format_planner_markdown(output: PlannerOutput) -> str: + """将 Planner 输出格式化为多层级 Markdown。 + + Args: + output: PlannerOutput 对象 + + Returns: + 格式化的 Markdown 文本 + """ + lines: list[str] = [] + + # 标题 + lines.append("## 📋 Planner 规划报告") + lines.append("") + + # 经理总结 + if output.summary: + lines.append("**经理总结**") + lines.append(output.summary) + lines.append("") + + # 工作流表格 + if output.workstreams: + lines.append("**工作流状态**") + lines.append("") + lines.append("| 工作流 | 状态 |") + lines.append("|--------|------|") + for ws in output.workstreams: + area = ws.get("area", "未知") + status = ws.get("status", "unknown") + status_label = { + "done": "✅ 完成", + "in_progress": "🔄 进行中", + "todo": "⏳ 待办", + "blocked": "🚫 阻塞", + }.get(status, status) + lines.append(f"| {area} | {status_label} |") + lines.append("") + + # 完成项 + if output.done_items: + lines.append("**✅ 完成项**") + for item in output.done_items: + lines.append(f"- {item}") + lines.append("") + + # 剩余项 + if output.remaining_items: + lines.append("**⏳ 剩余项**") + for item in output.remaining_items: + lines.append(f"- {item}") + lines.append("") + + # 风险 + if output.risks: + lines.append("**⚠️ 风险**") + for risk in output.risks: + lines.append(f"- {risk}") + lines.append("") + + # 下一步 + if output.next_steps: + lines.append("**➡️ 推荐下一步**") + for step in output.next_steps: + lines.append(f"- {step}") + lines.append("") + + # 探索项 + if output.exploration_items: + lines.append("**🔍 探索项**") + for item in output.exploration_items: + lines.append(f"- {item}") + lines.append("") + + return "\n".join(lines) + + +def extract_and_format_reviewer(json_text: str) -> str: + """提取并格式化 Reviewer 输出。 + + Args: + json_text: Reviewer JSON 输出 + + Returns: + 格式化的 Markdown 文本,如果解析失败则返回原始文本 + """ + output = extract_reviewer_output(json_text) + if output: + return format_reviewer_markdown(output) + return json_text + + +def extract_and_format_planner(json_text: str) -> str: + """提取并格式化 Planner 输出。 + + Args: + json_text: Planner JSON 输出 + + Returns: + 格式化的 Markdown 文本,如果解析失败则返回原始文本 + """ + output = extract_planner_output(json_text) + if output: + return format_planner_markdown(output) + return json_text + + +def extract_message_content(json_text: str) -> dict[str, str]: + """从 JSON 中提取所有 Markdown 字段。 + + Args: + json_text: JSON 文本 + + Returns: + 包含所有 Markdown 字段的字典 + """ + markdown_fields = { + "round_summary_markdown", + "completion_summary_markdown", + "overview_markdown", + "report_markdown", + "summary_markdown", + } + + result: dict[str, str] = {} + + try: + data = json.loads(json_text) + if isinstance(data, dict): + for field in markdown_fields: + if field in data and isinstance(data[field], str): + result[field] = data[field] + except json.JSONDecodeError: + pass + + return result + + +def clean_json_output(text: str) -> str: + """清理 JSON 输出,移除 markdown 代码块标记。 + + Args: + text: 可能包含 JSON 的文本 + + Returns: + 纯 JSON 字符串 + """ + # 移除 ```json 和 ``` 标记 + text = re.sub(r"```json\s*", "", text) + text = re.sub(r"```\s*", "", text) + return text.strip() + + +def parse_agent_response(response_text: str) -> dict[str, Any] | None: + """解析 Agent 响应,提取 JSON 数据。 + + Args: + response_text: Agent 响应文本 + + Returns: + 解析后的 JSON 数据,如果失败则返回 None + """ + # 清理文本 + cleaned = clean_json_output(response_text) + + try: + return json.loads(cleaned) + except json.JSONDecodeError: + # 尝试提取 JSON 块 + match = re.search(r'\{[\s\S]*\}', cleaned) + if match: + try: + return json.loads(match.group()) + except json.JSONDecodeError: + pass + return None diff --git a/codex_autoloop/planner.py b/codex_autoloop/planner.py index 606701d..6162ac7 100644 --- a/codex_autoloop/planner.py +++ b/codex_autoloop/planner.py @@ -108,6 +108,33 @@ def evaluate( latest_plan_overview: str, config: PlannerConfig, ) -> PlanDecision: + plan, _ = self.evaluate_with_raw_output( + objective=objective, + plan_messages=plan_messages, + round_index=round_index, + session_id=session_id, + latest_review_completion_summary=latest_review_completion_summary, + latest_plan_overview=latest_plan_overview, + config=config, + ) + return plan + + def evaluate_with_raw_output( + self, + *, + objective: str, + plan_messages: list[str], + round_index: int, + session_id: str | None, + latest_review_completion_summary: str, + latest_plan_overview: str, + config: PlannerConfig, + ) -> tuple[PlanDecision, str]: + """Evaluate and return both PlanDecision and raw JSON output. + + Returns: + Tuple of (PlanDecision, raw_json_output) + """ prompt = self._build_evaluate_prompt( objective=objective, plan_messages=plan_messages, @@ -131,7 +158,8 @@ def evaluate( ), run_label="planner", ) - parsed = parse_plan_text(result.last_agent_message) + raw_output = result.last_agent_message or "" + parsed = parse_plan_text(raw_output) if parsed is None: parsed = self._fallback_snapshot( objective=objective, @@ -139,7 +167,7 @@ def evaluate( latest_checks=[], trigger="loop-engine", terminal=False, - error=result.last_agent_message or f"Planner returned empty output. exit={result.exit_code}", + error=raw_output or f"Planner returned empty output. exit={result.exit_code}", ) if config.mode == PLANNER_MODE_RECORD: parsed.should_propose_follow_up = False @@ -155,9 +183,12 @@ def evaluate( checks=[], stop_reason=None, ) - return self._snapshot_to_decision( - snapshot=parsed, - latest_review_completion_summary=latest_review_completion_summary, + return ( + self._snapshot_to_decision( + snapshot=parsed, + latest_review_completion_summary=latest_review_completion_summary, + ), + raw_output, ) def _build_evaluate_prompt(