From 6297f180a40f276f303f93cba28b9b692c04a805 Mon Sep 17 00:00:00 2001 From: ComBba Date: Tue, 10 Feb 2026 00:23:21 +0900 Subject: [PATCH 1/3] feat(sse): add progress events for enrichment nodes (RAG, Web Search, Code Analysis) Users can now see real-time progress during the enrichment phase: - RAG context enrichment - Web search grounding - Code analysis Added ENRICHMENT_START/COMPLETE/ERROR event types to EventType enum and emit events at start, completion, and error states for all three enrichment nodes. --- .../app/graph/nodes/code_analysis_enrich.py | 61 +++++++++++++++++++ backend/app/graph/nodes/rag_enrich.py | 60 ++++++++++++++++++ backend/app/graph/nodes/web_search_enrich.py | 60 ++++++++++++++++++ backend/app/services/event_channel.py | 4 ++ 4 files changed, 185 insertions(+) diff --git a/backend/app/graph/nodes/code_analysis_enrich.py b/backend/app/graph/nodes/code_analysis_enrich.py index a2937fa..7f76881 100644 --- a/backend/app/graph/nodes/code_analysis_enrich.py +++ b/backend/app/graph/nodes/code_analysis_enrich.py @@ -5,6 +5,7 @@ from langchain_core.runnables import RunnableConfig from app.graph.state import EvaluationState +from app.services.event_channel import create_sommelier_event, get_event_channel from app.services.repo_clone_service import clone_and_analyze logger = logging.getLogger(__name__) @@ -14,8 +15,33 @@ async def code_analysis_enrich( state: EvaluationState, config: Optional[RunnableConfig] = None ) -> Dict[str, Any]: started_at = datetime.now(timezone.utc).isoformat() + evaluation_id = state.get("evaluation_id") + event_channel = get_event_channel() + + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="code_analysis", + event_type="enrichment_start", + progress_percent=0, + message="Code analysis starting...", + ), + ) if existing := state.get("code_analysis"): + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="code_analysis", + event_type="enrichment_complete", + progress_percent=100, + message="Code analysis complete (cached)", + ), + ) return {"code_analysis": existing} repo_url = state.get("repo_url", "") @@ -25,6 +51,17 @@ async def code_analysis_enrich( github_token = state.get("github_token") if not repo_url: + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="code_analysis", + event_type="enrichment_complete", + progress_percent=100, + message="Code analysis skipped (no repo URL)", + ), + ) return { "code_analysis": { "status": "skipped", @@ -61,6 +98,19 @@ async def code_analysis_enrich( "summary": clone_result.summary, } + if evaluation_id: + files_count = len(clone_result.main_files) + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="code_analysis", + event_type="enrichment_complete", + progress_percent=100, + message=f"Code analysis complete ({files_count} files)", + ), + ) + result: Dict[str, Any] = { "code_analysis": code_analysis, "trace_metadata": { @@ -80,6 +130,17 @@ async def code_analysis_enrich( except Exception as e: logger.exception("code_analysis_enrich failed") + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="code_analysis", + event_type="enrichment_error", + progress_percent=100, + message=f"Code analysis failed: {e}", + ), + ) return { "code_analysis": { "status": "error", diff --git a/backend/app/graph/nodes/rag_enrich.py b/backend/app/graph/nodes/rag_enrich.py index 84fa5d0..c3d9182 100644 --- a/backend/app/graph/nodes/rag_enrich.py +++ b/backend/app/graph/nodes/rag_enrich.py @@ -7,6 +7,7 @@ from app.core.config import settings from app.graph.state import EvaluationState +from app.services.event_channel import create_sommelier_event, get_event_channel logger = logging.getLogger(__name__) @@ -101,14 +102,50 @@ async def rag_enrich( state: EvaluationState, config: Optional[RunnableConfig] = None ) -> Dict[str, Any]: started_at = datetime.now(timezone.utc).isoformat() + evaluation_id = state.get("evaluation_id") + event_channel = get_event_channel() + + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="rag", + event_type="enrichment_start", + progress_percent=0, + message="RAG context enrichment starting...", + ), + ) if existing := state.get("rag_context"): + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="rag", + event_type="enrichment_complete", + progress_percent=100, + message="RAG context enrichment complete (cached)", + ), + ) return {"rag_context": existing} repo_context = state.get("repo_context", {}) query = _create_query(state) if not settings.VERTEX_API_KEY: + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="rag", + event_type="enrichment_complete", + progress_percent=100, + message="RAG enrichment skipped (no API key)", + ), + ) return { "rag_context": { "query": query, @@ -145,6 +182,18 @@ async def rag_enrich( min(settings.RAG_TOP_K, len(docs)), ) + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="rag", + event_type="enrichment_complete", + progress_percent=100, + message=f"RAG enrichment complete ({len(chunks)} chunks)", + ), + ) + return { "rag_context": {"query": query, "chunks": chunks, "error": None}, "trace_metadata": { @@ -157,6 +206,17 @@ async def rag_enrich( except Exception as e: logger.warning(f"RAG embedding failed: {e}") + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="rag", + event_type="enrichment_error", + progress_percent=100, + message=f"RAG enrichment failed: {e}", + ), + ) return { "rag_context": {"query": query, "chunks": [], "error": str(e)}, "errors": [f"rag_enrich failed: {e!s}"], diff --git a/backend/app/graph/nodes/web_search_enrich.py b/backend/app/graph/nodes/web_search_enrich.py index 616fbd1..ef31a15 100644 --- a/backend/app/graph/nodes/web_search_enrich.py +++ b/backend/app/graph/nodes/web_search_enrich.py @@ -6,6 +6,7 @@ from app.core.config import settings from app.graph.state import EvaluationState +from app.services.event_channel import create_sommelier_event, get_event_channel logger = logging.getLogger(__name__) @@ -23,11 +24,47 @@ async def web_search_enrich( state: EvaluationState, config: Optional[RunnableConfig] = None ) -> Dict[str, Any]: started_at = datetime.now(timezone.utc).isoformat() + evaluation_id = state.get("evaluation_id") + event_channel = get_event_channel() + + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="web_search", + event_type="enrichment_start", + progress_percent=0, + message="Web search enrichment starting...", + ), + ) if existing := state.get("web_search_context"): + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="web_search", + event_type="enrichment_complete", + progress_percent=100, + message="Web search enrichment complete (cached)", + ), + ) return {"web_search_context": existing} if not settings.VERTEX_API_KEY: + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="web_search", + event_type="enrichment_complete", + progress_percent=100, + message="Web search skipped (no API key)", + ), + ) return { "web_search_context": { "query": "", @@ -85,6 +122,18 @@ async def web_search_enrich( } ) + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="web_search", + event_type="enrichment_complete", + progress_percent=100, + message=f"Web search complete ({len(sources)} sources)", + ), + ) + return { "web_search_context": { "query": query, @@ -103,6 +152,17 @@ async def web_search_enrich( except Exception as e: logger.warning(f"Web search grounding failed: {e}") + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="web_search", + event_type="enrichment_error", + progress_percent=100, + message=f"Web search failed: {e}", + ), + ) return { "web_search_context": { "query": query, diff --git a/backend/app/services/event_channel.py b/backend/app/services/event_channel.py index 7380eb8..1bc20f0 100644 --- a/backend/app/services/event_channel.py +++ b/backend/app/services/event_channel.py @@ -66,6 +66,10 @@ class EventType(str, Enum): DEEP_SYNTHESIS_COMPLETE = "deep_synthesis_complete" QUALITY_GATE_COMPLETE = "quality_gate_complete" METRICS_UPDATE = "metrics_update" + # Enrichment phase events (RAG, Web Search, Code Analysis) + ENRICHMENT_START = "enrichment_start" + ENRICHMENT_COMPLETE = "enrichment_complete" + ENRICHMENT_ERROR = "enrichment_error" @dataclass From 86d39efd468f41352b27c9e59c2011ce25801d3c Mon Sep 17 00:00:00 2001 From: ComBba Date: Tue, 10 Feb 2026 00:35:47 +0900 Subject: [PATCH 2/3] fix(sse): address review feedback - security and missing events Fixes based on Gemini and CodeRabbit reviews: 1. [CRITICAL] Add 'enrichment_error' to CRITICAL_EVENT_TYPES - Prevents error events from being silently dropped when buffer full 2. [SECURITY] Remove raw exception from SSE error messages (3 files) - Prevents leaking sensitive info (e.g., GitHub tokens in git clone errors) - Generic 'internal error' message sent to client, full error logged server-side 3. [BUG] Add missing complete event for empty docs case in rag_enrich.py - Previously SSE subscribers would remain stuck in 'in progress' state 4. [IMPROVE] code_analysis_enrich.py: reflect actual status in message - Message now shows 'complete', 'partial', or 'skipped' accurately --- backend/app/graph/nodes/code_analysis_enrich.py | 4 ++-- backend/app/graph/nodes/rag_enrich.py | 13 ++++++++++++- backend/app/graph/nodes/web_search_enrich.py | 2 +- backend/app/services/event_channel.py | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/backend/app/graph/nodes/code_analysis_enrich.py b/backend/app/graph/nodes/code_analysis_enrich.py index 7f76881..607c772 100644 --- a/backend/app/graph/nodes/code_analysis_enrich.py +++ b/backend/app/graph/nodes/code_analysis_enrich.py @@ -107,7 +107,7 @@ async def code_analysis_enrich( sommelier="code_analysis", event_type="enrichment_complete", progress_percent=100, - message=f"Code analysis complete ({files_count} files)", + message=f"Code analysis {status} ({files_count} files analyzed)", ), ) @@ -138,7 +138,7 @@ async def code_analysis_enrich( sommelier="code_analysis", event_type="enrichment_error", progress_percent=100, - message=f"Code analysis failed: {e}", + message="Code analysis failed due to an internal error.", ), ) return { diff --git a/backend/app/graph/nodes/rag_enrich.py b/backend/app/graph/nodes/rag_enrich.py index c3d9182..bffd77b 100644 --- a/backend/app/graph/nodes/rag_enrich.py +++ b/backend/app/graph/nodes/rag_enrich.py @@ -164,6 +164,17 @@ async def rag_enrich( try: docs = _build_documents_from_context(repo_context) if not docs: + if evaluation_id: + event_channel.emit_sync( + evaluation_id, + create_sommelier_event( + evaluation_id=evaluation_id, + sommelier="rag", + event_type="enrichment_complete", + progress_percent=100, + message="RAG enrichment complete (no documents)", + ), + ) return { "rag_context": {"query": query, "chunks": [], "error": None}, } @@ -214,7 +225,7 @@ async def rag_enrich( sommelier="rag", event_type="enrichment_error", progress_percent=100, - message=f"RAG enrichment failed: {e}", + message="RAG enrichment failed due to an internal error.", ), ) return { diff --git a/backend/app/graph/nodes/web_search_enrich.py b/backend/app/graph/nodes/web_search_enrich.py index ef31a15..1580168 100644 --- a/backend/app/graph/nodes/web_search_enrich.py +++ b/backend/app/graph/nodes/web_search_enrich.py @@ -160,7 +160,7 @@ async def web_search_enrich( sommelier="web_search", event_type="enrichment_error", progress_percent=100, - message=f"Web search failed: {e}", + message="Web search failed due to an internal error.", ), ) return { diff --git a/backend/app/services/event_channel.py b/backend/app/services/event_channel.py index 1bc20f0..6b55a01 100644 --- a/backend/app/services/event_channel.py +++ b/backend/app/services/event_channel.py @@ -40,6 +40,7 @@ "evaluation_complete", "evaluation_error", "technique_error", + "enrichment_error", } TRANSFER_LOOP_INTERVAL_SECONDS = 0.01 TRANSFER_BATCH_SIZE = 10 From 32c9860c940d5a58d71bdc41bf98669c9a0b9af4 Mon Sep 17 00:00:00 2001 From: ComBba Date: Tue, 10 Feb 2026 00:42:04 +0900 Subject: [PATCH 3/3] fix(config): allow extra env vars and make OAuth fields optional Pre-existing issues fixed: - Add extra='ignore' to Settings model_config for backward compatibility with legacy env vars (GEMINI_API_KEY, OPENAI_API_KEY) - Add default empty values to GITHUB_CLIENT_ID and GITHUB_CLIENT_SECRET for testing/development environments These changes allow tests to run without requiring full production config. --- backend/app/core/config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 57eec5e..fd4bb52 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -20,6 +20,7 @@ class Settings(BaseSettings): env_file=".env", env_file_encoding="utf-8", case_sensitive=True, + extra="ignore", ) APP_NAME: str = "Somm.dev API" @@ -31,9 +32,9 @@ class Settings(BaseSettings): JWT_ALGORITHM: str = "HS256" JWT_EXPIRATION_DAYS: int = 7 - # GitHub OAuth - GITHUB_CLIENT_ID: str - GITHUB_CLIENT_SECRET: str + # GitHub OAuth (required for production, optional for testing) + GITHUB_CLIENT_ID: str = "" + GITHUB_CLIENT_SECRET: str = "" # URLs FRONTEND_URL: str = "https://www.somm.dev"