Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Settings(BaseSettings):
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=True,
extra="ignore",
)

APP_NAME: str = "Somm.dev API"
Expand All @@ -31,9 +32,9 @@ class Settings(BaseSettings):
JWT_ALGORITHM: str = "HS256"
JWT_EXPIRATION_DAYS: int = 7

# GitHub OAuth
GITHUB_CLIENT_ID: str
GITHUB_CLIENT_SECRET: str
# GitHub OAuth (required for production, optional for testing)
GITHUB_CLIENT_ID: str = ""
GITHUB_CLIENT_SECRET: str = ""

# URLs
FRONTEND_URL: str = "https://www.somm.dev"
Expand Down
61 changes: 61 additions & 0 deletions backend/app/graph/nodes/code_analysis_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from langchain_core.runnables import RunnableConfig

from app.graph.state import EvaluationState
from app.services.event_channel import create_sommelier_event, get_event_channel
from app.services.repo_clone_service import clone_and_analyze

logger = logging.getLogger(__name__)
Expand All @@ -14,8 +15,33 @@ async def code_analysis_enrich(
state: EvaluationState, config: Optional[RunnableConfig] = None
) -> Dict[str, Any]:
started_at = datetime.now(timezone.utc).isoformat()
evaluation_id = state.get("evaluation_id")
event_channel = get_event_channel()

if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="code_analysis",
event_type="enrichment_start",
progress_percent=0,
message="Code analysis starting...",
),
)
Comment on lines +21 to +31

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There is significant code duplication in how SSE events are emitted. This block, and four other similar blocks in this file, repeat the if evaluation_id: check and the call to event_channel.emit_sync. This pattern is also present in rag_enrich.py and web_search_enrich.py.

To improve maintainability and reduce redundancy, consider refactoring this logic into a local helper function within code_analysis_enrich. For example:

def _emit_event(event_type: str, progress: int, message: str):
    if evaluation_id:
        event = create_sommelier_event(
            evaluation_id=evaluation_id,
            sommelier="code_analysis",
            event_type=event_type,
            progress_percent=progress,
            message=message,
        )
        event_channel.emit_sync(evaluation_id, event)

# Then you can call it like this:
_emit_event("enrichment_start", 0, "Code analysis starting...")


if existing := state.get("code_analysis"):
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="code_analysis",
event_type="enrichment_complete",
progress_percent=100,
message="Code analysis complete (cached)",
),
)
return {"code_analysis": existing}

repo_url = state.get("repo_url", "")
Expand All @@ -25,6 +51,17 @@ async def code_analysis_enrich(
github_token = state.get("github_token")

if not repo_url:
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="code_analysis",
event_type="enrichment_complete",
progress_percent=100,
message="Code analysis skipped (no repo URL)",
),
)
return {
"code_analysis": {
"status": "skipped",
Expand Down Expand Up @@ -61,6 +98,19 @@ async def code_analysis_enrich(
"summary": clone_result.summary,
}

if evaluation_id:
files_count = len(clone_result.main_files)
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="code_analysis",
event_type="enrichment_complete",
progress_percent=100,
message=f"Code analysis {status} ({files_count} files analyzed)",
),
)

result: Dict[str, Any] = {
"code_analysis": code_analysis,
"trace_metadata": {
Expand All @@ -80,6 +130,17 @@ async def code_analysis_enrich(

except Exception as e:
logger.exception("code_analysis_enrich failed")
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="code_analysis",
event_type="enrichment_error",
progress_percent=100,
message="Code analysis failed due to an internal error.",
),
)
return {
"code_analysis": {
"status": "error",
Expand Down
71 changes: 71 additions & 0 deletions backend/app/graph/nodes/rag_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from app.core.config import settings
from app.graph.state import EvaluationState
from app.services.event_channel import create_sommelier_event, get_event_channel

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -101,14 +102,50 @@ async def rag_enrich(
state: EvaluationState, config: Optional[RunnableConfig] = None
) -> Dict[str, Any]:
started_at = datetime.now(timezone.utc).isoformat()
evaluation_id = state.get("evaluation_id")
event_channel = get_event_channel()

if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="rag",
event_type="enrichment_start",
progress_percent=0,
message="RAG context enrichment starting...",
),
)
Comment on lines +108 to +118

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to other enrichment nodes in this PR, there's significant code duplication in the event-emitting logic. The if evaluation_id: ... pattern is repeated multiple times.

To improve maintainability, this could be refactored into a helper function that encapsulates creating and emitting the event. This would make the main function body cleaner and easier to read.

Example helper:

def _emit_event(event_type: str, progress: int, message: str):
    if evaluation_id:
        event = create_sommelier_event(
            evaluation_id=evaluation_id,
            sommelier="rag",
            event_type=event_type,
            progress_percent=progress,
            message=message,
        )
        event_channel.emit_sync(evaluation_id, event)


if existing := state.get("rag_context"):
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="rag",
event_type="enrichment_complete",
progress_percent=100,
message="RAG context enrichment complete (cached)",
),
)
return {"rag_context": existing}

repo_context = state.get("repo_context", {})
query = _create_query(state)

if not settings.VERTEX_API_KEY:
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="rag",
event_type="enrichment_complete",
progress_percent=100,
message="RAG enrichment skipped (no API key)",
),
)
return {
"rag_context": {
"query": query,
Expand All @@ -127,6 +164,17 @@ async def rag_enrich(
try:
docs = _build_documents_from_context(repo_context)
if not docs:
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="rag",
event_type="enrichment_complete",
progress_percent=100,
message="RAG enrichment complete (no documents)",
),
)
return {
"rag_context": {"query": query, "chunks": [], "error": None},
}
Expand All @@ -145,6 +193,18 @@ async def rag_enrich(
min(settings.RAG_TOP_K, len(docs)),
)

if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="rag",
event_type="enrichment_complete",
progress_percent=100,
message=f"RAG enrichment complete ({len(chunks)} chunks)",
),
)

return {
"rag_context": {"query": query, "chunks": chunks, "error": None},
"trace_metadata": {
Expand All @@ -157,6 +217,17 @@ async def rag_enrich(

except Exception as e:
logger.warning(f"RAG embedding failed: {e}")
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="rag",
event_type="enrichment_error",
progress_percent=100,
message="RAG enrichment failed due to an internal error.",
),
)
return {
"rag_context": {"query": query, "chunks": [], "error": str(e)},
"errors": [f"rag_enrich failed: {e!s}"],
Expand Down
60 changes: 60 additions & 0 deletions backend/app/graph/nodes/web_search_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from app.core.config import settings
from app.graph.state import EvaluationState
from app.services.event_channel import create_sommelier_event, get_event_channel

logger = logging.getLogger(__name__)

Expand All @@ -23,11 +24,47 @@ async def web_search_enrich(
state: EvaluationState, config: Optional[RunnableConfig] = None
) -> Dict[str, Any]:
started_at = datetime.now(timezone.utc).isoformat()
evaluation_id = state.get("evaluation_id")
event_channel = get_event_channel()

if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="web_search",
event_type="enrichment_start",
progress_percent=0,
message="Web search enrichment starting...",
),
)
Comment on lines +30 to +40

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block for emitting an SSE event is repeated multiple times throughout the function, with slight variations. This pattern of if evaluation_id: event_channel.emit_sync(...) is also duplicated in the other enrichment nodes (rag_enrich.py, code_analysis_enrich.py).

Consider creating a small helper function to handle event creation and emission. This will reduce code duplication and make the logic more maintainable.

Example helper:

def _emit_event(event_type: str, progress: int, message: str):
    if evaluation_id:
        event = create_sommelier_event(
            evaluation_id=evaluation_id,
            sommelier="web_search",
            event_type=event_type,
            progress_percent=progress,
            message=message,
        )
        event_channel.emit_sync(evaluation_id, event)


if existing := state.get("web_search_context"):
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="web_search",
event_type="enrichment_complete",
progress_percent=100,
message="Web search enrichment complete (cached)",
),
)
return {"web_search_context": existing}

if not settings.VERTEX_API_KEY:
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="web_search",
event_type="enrichment_complete",
progress_percent=100,
message="Web search skipped (no API key)",
),
)
return {
"web_search_context": {
"query": "",
Expand Down Expand Up @@ -85,6 +122,18 @@ async def web_search_enrich(
}
)

if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="web_search",
event_type="enrichment_complete",
progress_percent=100,
message=f"Web search complete ({len(sources)} sources)",
),
)

return {
"web_search_context": {
"query": query,
Expand All @@ -103,6 +152,17 @@ async def web_search_enrich(

except Exception as e:
logger.warning(f"Web search grounding failed: {e}")
if evaluation_id:
event_channel.emit_sync(
evaluation_id,
create_sommelier_event(
evaluation_id=evaluation_id,
sommelier="web_search",
event_type="enrichment_error",
progress_percent=100,
message="Web search failed due to an internal error.",
),
)
Comment on lines +155 to +165
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

에러 메시지에 민감한 정보가 포함될 수 있습니다.

f"Web search failed: {e}" (Line 163)에서 예외 메시지를 그대로 SSE 이벤트에 포함시키고 있습니다. API 호출 실패 시 예외 메시지에 API 키, 내부 URL, 인증 토큰 등이 포함될 수 있으며, 이는 SSE를 통해 클라이언트에게 직접 전달됩니다. 이 패턴은 rag_enrich.py (Line 217)와 code_analysis_enrich.py (Line 141)에도 동일하게 적용됩니다.

에러 이벤트의 메시지에는 일반화된 문자열을 사용하고, 상세 예외 정보는 서버 측 로그에만 남기는 것이 안전합니다.

🤖 Prompt for AI Agents
In `@backend/app/graph/nodes/web_search_enrich.py` around lines 155 - 165, Replace
the sensitive inline exception message sent to clients with a generic error
string and log the full exception server-side: in web_search_enrich.py where
event_channel.emit_sync(...) creates the sommelier "enrichment_error" event
(using create_sommelier_event and evaluation_id), change the message payload to
something non-sensitive like "Web search failed" and ensure you call the server
logger (e.g., processLogger or the module logger) to record the full exception
details and stacktrace; apply the same fix pattern to the analogous occurrences
in rag_enrich.py (around the create_sommelier_event call) and
code_analysis_enrich.py so client SSE events never contain raw exception text
while full errors remain in server logs.

return {
"web_search_context": {
"query": query,
Expand Down
5 changes: 5 additions & 0 deletions backend/app/services/event_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"evaluation_complete",
"evaluation_error",
"technique_error",
"enrichment_error",
}
TRANSFER_LOOP_INTERVAL_SECONDS = 0.01
TRANSFER_BATCH_SIZE = 10
Expand All @@ -66,6 +67,10 @@ class EventType(str, Enum):
DEEP_SYNTHESIS_COMPLETE = "deep_synthesis_complete"
QUALITY_GATE_COMPLETE = "quality_gate_complete"
METRICS_UPDATE = "metrics_update"
# Enrichment phase events (RAG, Web Search, Code Analysis)
ENRICHMENT_START = "enrichment_start"
ENRICHMENT_COMPLETE = "enrichment_complete"
ENRICHMENT_ERROR = "enrichment_error"


@dataclass
Expand Down
Loading