TashanGKD · FZR95 · Mar 8, 2026 · Mar 8, 2026
diff --git a/app/core/config.py b/app/core/config.py
@@ -150,6 +150,11 @@ def get_profile_helper_root() -> Path:
     return _libs_root() / "profile_helper"
 
 
+def get_profile_helper_profiles_dir() -> Path:
+    """Return workspace-backed profile helper profiles directory."""
+    return get_workspace_base() / "profile_helper" / "profiles"
+
+
 def get_prompts_dir() -> Path:
     """Return prompts directory: builtin libs/prompts/ > primary (mount) > app/prompts/. Mount only supplement."""
     builtin = get_libs_builtin_root()

diff --git a/app/services/profile_helper/agent.py b/app/services/profile_helper/agent.py
@@ -1,15 +1,19 @@
 """LLM tool calling + agentic loop for profile helper."""
+
 import json
+from datetime import date
 
 from app.services.profile_helper.llm_client import create_client, get_default_model
 from app.services.profile_helper.prompts import META_SYSTEM_PROMPT
+from app.services.profile_helper.sessions import save_forum_profile, save_profile
 from app.services.profile_helper.tools import (
     list_doc_names,
     list_skill_names,
     read_doc,
     read_skill,
 )
 
+
 def _build_tools() -> list[dict]:
     return [
         {
@@ -52,21 +56,21 @@ def _build_tools() -> list[dict]:
             "type": "function",
             "function": {
                 "name": "read_profile",
-                "description": "获取当前会话中的画像内容。每次开始任务前先调用，了解当前填写进度和采集阶段。",
+                "description": "获取当前会话中的科研数字分身内容。每次开始任务前先调用，了解当前填写进度和采集阶段。",
                 "parameters": {"type": "object", "properties": {}},
             },
         },
         {
             "type": "function",
             "function": {
                 "name": "write_profile",
-                "description": "将发展画像内容写入会话。采集到数据后必须调用此工具保存，不要只在对话中展示而不保存。",
+                "description": "将科研数字分身内容写入会话并同步保存到 profiles 目录。创建和更新过程中每获得一轮可保存信息后都应立即调用此工具。",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "content": {
                             "type": "string",
-                            "description": "完整的发展画像 Markdown 内容",
+                            "description": "完整的科研数字分身 Markdown 内容",
                         }
                     },
                     "required": ["content"],
@@ -77,13 +81,13 @@ def _build_tools() -> list[dict]:
             "type": "function",
             "function": {
                 "name": "write_forum_profile",
-                "description": "将论坛画像（数字分身）写入会话。当用户确认「生成论坛画像」并完成隐私设置后，用此工具保存论坛画像内容。",
+                "description": "将他山论坛分身写入会话并同步保存到 profiles 目录。当用户确认生成后，用此工具保存内容。",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "content": {
                             "type": "string",
-                            "description": "完整的论坛画像 Markdown（Identity/Expertise/Thinking Style/Discussion Style 四节格式）",
+                            "description": "完整的他山论坛分身 Markdown（Identity/Expertise/Thinking Style/Discussion Style 四节格式）",
                         }
                     },
                     "required": ["content"],
@@ -94,7 +98,7 @@ def _build_tools() -> list[dict]:
 
 
 def _execute_tool(name: str, args: dict, session: dict) -> str:
-    """Execute a single tool, return result string."""
+    """Execute a single tool and return its result."""
     if name == "read_skill":
         return read_skill(args.get("skill_name", ""))
     if name == "read_doc":
@@ -103,12 +107,12 @@ def _execute_tool(name: str, args: dict, session: dict) -> str:
         return session["profile"]
     if name == "write_profile":
         content = args.get("content", "")
-        session["profile"] = content
-        return f"已写入发展画像，共 {len(content)} 字符。"
+        path = save_profile(session, content)
+        return f"已写入科研数字分身并保存到 {path.name}，共 {len(content)} 字符。"
     if name == "write_forum_profile":
         content = args.get("content", "")
-        session["forum_profile"] = content
-        return f"已写入论坛画像，共 {len(content)} 字符。"
+        path = save_forum_profile(session, content)
+        return f"已写入他山论坛分身并保存到 {path.name}，共 {len(content)} 字符。"
     return f"未知工具: {name}"
 
 
@@ -130,14 +134,19 @@ def run_agent(
         return
 
     model = model or get_default_model()
+    today_str = date.today().strftime("%Y-%m-%d")
+    system_content = (
+        META_SYSTEM_PROMPT
+        + f"\n\n**当前日期**：{today_str}（写入画像时，创建时间、最后更新、unnamed 文件名等请使用此日期）"
+    )
     messages = session["messages"].copy()
     messages.append({"role": "user", "content": user_message})
 
     max_iterations = 20
     for _ in range(max_iterations):
         response = client.chat.completions.create(
             model=model,
-            messages=[{"role": "system", "content": META_SYSTEM_PROMPT}] + messages,
+            messages=[{"role": "system", "content": system_content}] + messages,
             tools=_build_tools(),
             tool_choice="auto",
         )

diff --git a/app/services/profile_helper/prompts.py b/app/services/profile_helper/prompts.py
@@ -1,19 +1,21 @@
 """Meta system prompt for profile helper agent."""
-META_SYSTEM_PROMPT = """# 科研数字分身助手
 
-你是「他山画像系统」的科研数字分身采集助手。你的核心任务是通过结构化对话，帮助科研人员建立、完善并维护他们的多维度发展画像。
+META_SYSTEM_PROMPT = """# 科研数字分身采集助手
+
+你是「他山数字分身系统」的科研数字分身采集助手。你的核心任务是通过结构化对话，帮助科研人员建立、完善并维护他们的多维度科研数字分身。
 
 ## 隐私与安全声明（必读）
 
 在对话开始或用户首次询问时，主动告知以下安全细则：
-- **用户画像仅在本次对话中临时生成**，系统不会在任何位置保存该画像或您的任何隐私信息
-- 您可以自行下载并本地保存
+- 您在本系统中提供的所有信息仅用于构建和更新您的数字分身。平台不会向任何第三方泄露您的数据，也不会将您的数据用于模型训练或其他用途。
+- 您的数字分身仅在平台内部运行，用于与系统中的其他智能体进行信息交流与协作，不会在平台之外使用。
+- 您可以自行决定该数字分身是否公开。当选择公开时，其他用户在发起讨论或协作任务时可以选择您的数字分身参与；当选择不公开时，该数字分身仅对您本人可见和使用。
 
 ## 角色定位
 
 - 语言：全程使用**中文**，语气专业、温暖、不评判
-- 身份：既是采访者（问问题），也是分析师（解读数据），也是记录者（写入画像）
-- 画像数据存储在会话中，用户可随时下载为 .md 文件
+- 身份：既是采访者（问问题），也是分析师（解读数据），也是记录者（写入数字分身）
+- 数字分身数据存储在会话中，并会同步保存到 `profiles/` 目录，用户也可随时下载为 .md 文件
 
 ## 画像维度说明
 
@@ -32,34 +34,36 @@
 
 | 用户说的话 | 应调用的 read_skill |
 |:---|:---|
-| 「帮我建立画像」「新建档案」「开始收集信息」 | collect-basic-info |
+| 「帮我建立分身」「新建档案」「开始收集信息」 | collect-basic-info |
 | 「我想填量表」「用标准量表测量」「施测」 | 根据画像中哪个量表未完成，依次 administer-ams、administer-rcss、administer-mini-ipip |
 | 「帮我推断」「不想填量表」「快速估算」 | infer-profile-dimensions |
 | 「查看画像」「审核」「给我看结果」 | review-profile |
 | 「修改」「更新」「补充」「我想改一下」 | update-profile |
 | 「新增维度」「删除维度」「修改维度」「调整画像结构」 | modify-profile-schema |
 | 「从 AI 记忆导入」「根据 ChatGPT 记忆」「我有 AI 记忆」「生成提示词」 | generate-ai-memory-prompt |
 | 「整合 AI 回复」「导入 AI 的回答」「把这段内容写进画像」（用户粘贴了 AI 的回复内容） | import-ai-memory |
-| 「生成论坛画像」「数字分身」「导出论坛档案」 | generate-forum-profile |
+| 「生成论坛画像」「生成他山论坛分身」「他山论坛分身」「数字分身」「导出论坛档案」 | generate-forum-profile |
 
 ## 工具使用说明
 
 - **read_skill(skill_name)**：获取具体任务的操作指南。执行任务前必须先调用此工具。
 - **read_doc(doc_name)**：获取量表原题等参考文档。施测时用 read_doc 读取题目，如 academic-motivation-scale、mini-ipip-scale、researcher-cognitive-style。
 - **read_profile()**：获取当前会话中的画像内容。每次开始任务前先调用，了解当前填写进度。
-- **write_profile(content)**：将发展画像内容写入会话。采集到数据后必须调用此工具保存。
-- **write_forum_profile(content)**：将论坛画像（数字分身）写入会话。当用户确认「生成论坛画像」并完成隐私设置后，用此工具保存论坛画像（Identity/Expertise/Thinking Style/Discussion Style 四节格式）。
+- **write_profile(content)**：将科研数字分身内容写入会话，并同步保存到 `profiles/` 目录。创建和更新过程中，每获得一轮可保存信息后都应立即调用。
+- **write_forum_profile(content)**：将他山论坛分身写入会话，并同步保存到 `profiles/` 目录。当用户确认「生成他山论坛分身」并完成隐私设置后，用此工具保存（Identity/Expertise/Thinking Style/Discussion Style 四节格式）。
 
 ## 通用操作规则
 
-1. **每次开始任务前**，先调用 read_profile 了解当前画像状态和采集阶段。
-2. **写入数据时**：发展画像用 write_profile，论坛画像（数字分身）用 write_forum_profile，不要只在对话中展示而不保存。
+1. **每次开始任务前**，先调用 read_profile 了解当前数字分身状态和采集阶段。
+2. **写入数据时**：科研数字分身用 write_profile，他山论坛分身用 write_forum_profile，不要只在对话中展示而不保存；在创建和更新过程中要边采集边保存。
 3. **推断数据** 须标注 `（AI推断，置信度：高/中/低）`，与用户实测数据区分。
-4. 若用户没有提供姓名，画像标题使用 `unnamed-[日期]`。
+4. **不要在对话开始时询问姓名或标识**。AI 记忆导入等流程直接从任务内容开始；仅在需要保存画像（write_profile）时，若尚未确定姓名，再单独询问用户。若用户未提供，数字分身标题使用 `unnamed-[日期]`。
 5. **当前需求**是用户自述字段，不使用 AI 推断标注，但综合解读中应结合此信息给出贴近现实的近期建议。
-6. **AI 记忆导入安全原则**：
-   - 生成提示词时，只询问与画像维度直接相关的信息，**严禁**涉及财务、健康、家庭关系、政治观点等隐私内容
-   - 来自 AI 记忆的所有信息，**必须经用户逐条确认**后才能写入画像，不得自动批量写入
+6. **问答话术**：提问时使用「依次回答」，不要使用「请随意回答」。
+7. **AI 记忆导入安全原则**：
+   - 生成提示词时，只询问与画像维度直接相关的信息，**严禁**涉及财务、健康、家庭关系、政治观点等隐私内容；**不询问用户使用哪个 AI 工具**，提示词对所有 AI 平台通用
+   - 有据可查且无冲突的条目：内部整合后直接写入，**不向用户展示**；只有模糊、冲突或缺关键字段时才以**选择题**形式向用户提问，必要时再用填空或开放问答，并给出回答样例
+   - 完整画像仅在用户「查看画像」「审核」时**一次性展示**供确认
    - AI 记忆来源的数据须标注 `（来源：AI记忆，已用户确认）`
    - 当 AI 记忆信息与用户已填写数据存在冲突时，**以用户自述为准**
 """
diff --git a/app/services/profile_helper/sessions.py b/app/services/profile_helper/sessions.py
@@ -1,25 +1,112 @@
-"""In-memory session management with lightweight cleanup."""
+"""In-memory session management with cleanup and profile auto-save."""
+
+from __future__ import annotations
+
 import os
+import re
 import time
 import uuid
+from datetime import date
+from pathlib import Path
 
+from app.core.config import get_profile_helper_profiles_dir
 from app.services.profile_helper.tools import load_template
 
 _sessions: dict[str, dict] = {}
 SESSION_TTL_SECONDS = max(60, int(os.getenv("PROFILE_HELPER_SESSION_TTL_SECONDS", "3600")))
 SESSION_MAX_COUNT = max(10, int(os.getenv("PROFILE_HELPER_SESSION_MAX_COUNT", "1000")))
+PLACEHOLDER_IDENTIFIERS = {"[姓名/标识]", "姓名/标识"}
+PROFILE_TITLE_PREFIXES = (
+    "# 科研人员画像 — ",
+    "# 科研数字分身 — ",
+)
 
 
 def _now() -> float:
     return time.time()
 
 
-def _new_session() -> dict:
+def _load_template_with_date() -> str:
+    today_str = date.today().strftime("%Y-%m-%d")
+    return load_template().replace("YYYY-MM-DD", today_str)
+
+
+def _today_unnamed() -> str:
+    return f"unnamed-{date.today().strftime('%Y-%m-%d')}"
+
+
+def _sanitize_identifier(identifier: str) -> str:
+    cleaned = identifier.strip()
+    if cleaned in PLACEHOLDER_IDENTIFIERS or not cleaned:
+        return _today_unnamed()
+    cleaned = re.sub(r'[\\/:*?"<>|]+', "-", cleaned)
+    cleaned = re.sub(r"\s+", " ", cleaned).strip(" .")
+    return cleaned or _today_unnamed()
+
+
+def _extract_profile_identifier(content: str) -> str:
+    for line in content.splitlines():
+        stripped = line.strip()
+        if not stripped:
+            continue
+        for prefix in PROFILE_TITLE_PREFIXES:
+            if stripped.startswith(prefix):
+                return _sanitize_identifier(stripped[len(prefix) :])
+        if stripped.startswith("# "):
+            return _sanitize_identifier(stripped[2:])
+        break
+    return _today_unnamed()
+
+
+def _normalize_existing_path(path_value: str | None) -> Path | None:
+    if not path_value:
+        return None
+    return Path(path_value)
+
+
+def _profiles_dir() -> Path:
+    return get_profile_helper_profiles_dir()
+
+
+def _session_suffix(session: dict) -> str:
+    sid = session.get("session_id") or ""
+    if sid:
+        return sid.replace("-", "")[:8]
+    return uuid.uuid4().hex[:8]
+
+
+def _target_profile_path(content: str, session: dict) -> Path:
+    identifier = _extract_profile_identifier(content)
+    suffix = _session_suffix(session)
+    return _profiles_dir() / f"{identifier}-{suffix}.md"
+
+
+def _target_forum_profile_path(session: dict) -> Path:
+    profile_path = _normalize_existing_path(session.get("profile_path"))
+    if not profile_path:
+        profile_path = _target_profile_path(session.get("profile", ""), session)
+    return profile_path.with_name(f"{profile_path.stem}-论坛画像.md")
+
+
+def _relocate_file_if_needed(current_path: Path | None, target_path: Path) -> None:
+    if not current_path or current_path == target_path or not current_path.exists():
+        return
+    if target_path.exists():
+        current_path.unlink()
+        return
+    target_path.parent.mkdir(parents=True, exist_ok=True)
+    current_path.rename(target_path)
+
+
+def _new_session(session_id: str) -> dict:
     now = _now()
     return {
+        "session_id": session_id,
         "messages": [],
-        "profile": load_template(),
+        "profile": _load_template_with_date(),
         "forum_profile": "",
+        "profile_path": None,
+        "forum_profile_path": None,
         "created_at": now,
         "updated_at": now,
     }
@@ -51,17 +138,67 @@ def _cleanup() -> None:
             _sessions.pop(sid, None)
 
 
+def save_profile(session: dict, content: str) -> Path:
+    """Persist the development profile to disk and session memory."""
+    profiles_dir = _profiles_dir()
+    profiles_dir.mkdir(parents=True, exist_ok=True)
+
+    target_path = _target_profile_path(content, session)
+    current_path = _normalize_existing_path(session.get("profile_path"))
+    _relocate_file_if_needed(current_path, target_path)
+    target_path.write_text(content, encoding="utf-8")
+
+    session["profile"] = content
+    session["profile_path"] = str(target_path)
+
+    forum_content = session.get("forum_profile", "")
+    if forum_content:
+        forum_target_path = _target_forum_profile_path(session)
+        forum_current_path = _normalize_existing_path(session.get("forum_profile_path"))
+        _relocate_file_if_needed(forum_current_path, forum_target_path)
+        forum_target_path.write_text(forum_content, encoding="utf-8")
+        session["forum_profile_path"] = str(forum_target_path)
+
+    _touch(session)
+    return target_path
+
+
+def save_forum_profile(session: dict, content: str) -> Path:
+    """Persist the forum profile to disk and session memory."""
+    profiles_dir = _profiles_dir()
+    profiles_dir.mkdir(parents=True, exist_ok=True)
+
+    profile_content = session.get("profile", "")
+    if profile_content:
+        save_profile(session, profile_content)
+
+    target_path = _target_forum_profile_path(session)
+    current_path = _normalize_existing_path(session.get("forum_profile_path"))
+    _relocate_file_if_needed(current_path, target_path)
+    target_path.write_text(content, encoding="utf-8")
+
+    session["forum_profile"] = content
+    session["forum_profile_path"] = str(target_path)
+    _touch(session)
+    return target_path
+
+
 def get_or_create(session_id: str | None = None) -> tuple[str, dict]:
     """Get or create session. Returns (session_id, session_data)."""
     _cleanup()
     if session_id and session_id in _sessions:
         s = _sessions[session_id]
+        s["session_id"] = session_id
         if "forum_profile" not in s:
             s["forum_profile"] = ""
+        if "profile_path" not in s:
+            s["profile_path"] = None
+        if "forum_profile_path" not in s:
+            s["forum_profile_path"] = None
         _touch(s)
         return session_id, s
     sid = session_id or str(uuid.uuid4())
-    _sessions[sid] = _new_session()
+    _sessions[sid] = _new_session(sid)
     _cleanup()
     return sid, _sessions[sid]
 
@@ -80,5 +217,5 @@ def get(session_id: str) -> dict | None:
 
 def reset(session_id: str) -> dict:
     """Reset session: clear messages and restore template profile."""
-    _sessions[session_id] = _new_session()
+    _sessions[session_id] = _new_session(session_id)
     return _sessions[session_id]