From d248199001c9c5a8e191864509234c3970a73f28 Mon Sep 17 00:00:00 2001 From: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com> Date: Wed, 18 Feb 2026 13:10:48 +0530 Subject: [PATCH 1/4] Tool classifier planning (#301) * prompt coniguration backend to be testing * custom prompt configuration update and fixed Pyright issues * fixed copilot reviews * pre validation step added when user query is inserted * added more validation cases * fixed review comments * added spec document to newly updated tool classification --- docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md | 1940 ++++++++++++++++++++++++ 1 file changed, 1940 insertions(+) create mode 100644 docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md diff --git a/docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md b/docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md new file mode 100644 index 0000000..469f809 --- /dev/null +++ b/docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md @@ -0,0 +1,1940 @@ +# Tool Classifier Extension - System Specification + +**Version**: 1.0 +**Date**: February 13, 2026 +**Status**: Design Specification + +--- + +## 1. Overview + +This document specifies the extension of the existing RAG Module with a **Tool Classifier** that implements layer-wise workflow routing. The classifier determines whether a user query should be handled by: + +1. **Service Workflow** - External service/API calls +2. **Context Workflow** - Conversation history-based responses +3. **RAG Workflow** - Knowledge base retrieval (existing) +4. **OOD Response** - Out of domain fallback + +### 1.1 Current State + +**Existing Flow:** +``` +User Query → Input Guardrails → Prompt Refiner → Contextual Retrieval → Response Generator → Output Guardrails +``` + +**Entry Points:** +- `POST /orchestrate` - Non-streaming orchestration +- `POST /orchestrate/test` - Testing environment with simplified input +- `POST /orchestrate/stream` - Server-sent events streaming + +### 1.2 Proposed Extension + +**New Flow:** +``` +User Query → Input Guardrails → Tool Classifier → [Service | Context | RAG | OOD] + ↓ + Layer 1: Service Check + ↓ (no match) + Layer 2: Context Check + ↓ (no match) + Layer 3: RAG Retrieval + ↓ (no chunks) + Layer 4: OOD Response +``` + +--- + +## 2. Architecture Changes + +### 2.1 Component Integration + +The Tool Classifier will be integrated into the existing `LLMOrchestrationService` with minimal disruption: + +```python +# Location: src/llm_orchestration_service.py + +def process_orchestration_request(self, request: OrchestrationRequest): + """ + Modified orchestration pipeline with tool classifier. + + Pipeline: + 1. Language Detection (existing) + 2. Query Validation (existing) + 3. Input Guardrails (existing, relocated) + 4. Tool Classifier (NEW) + 5. Workflow Routing (NEW) + """ + + # Existing: Step 0, 0.5 + detected_language = detect_language(request.message) + validation_result = validate_query_basic(request.message) + + # Existing: Component initialization + components = self._initialize_service_components(request) + + # Existing: Step 1 - Input Guardrails (RELOCATED before classifier) + if components["guardrails_adapter"]: + input_blocked = self.handle_input_guardrails(...) + if input_blocked: + return input_blocked + + # NEW: Step 2 - Tool Classifier + classifier_result = self.tool_classifier.classify( + query=request.message, + conversation_history=request.conversationHistory, + language=detected_language + ) + + # NEW: Step 3 - Workflow Routing + if classifier_result.workflow == WorkflowType.SERVICE: + return self._execute_service_workflow(request, classifier_result) + elif classifier_result.workflow == WorkflowType.CONTEXT: + return self._execute_context_workflow(request, classifier_result) + elif classifier_result.workflow == WorkflowType.RAG: + return self._execute_rag_workflow(request, classifier_result) + else: + return self._create_out_of_scope_response(request, detected_language) +``` + +### 2.2 New Components + +| Component | Location | Purpose | +|-----------|----------|---------| +| `ToolClassifier` | `src/tool_classifier/classifier.py` | Main classifier logic | +| `ServiceWorkflowExecutor` | `src/tool_classifier/service_workflow.py` | Service discovery and triggering | +| `ContextWorkflowExecutor` | `src/tool_classifier/context_workflow.py` | LLM-based conversation history analysis | +| `IntentEntityExtractor` | `src/tool_classifier/intent_extractor.py` | LLM-based intent/entity detection | +| `ServiceDiscoveryManager` | `src/tool_classifier/service_discovery.py` | Qdrant semantic search for services | +| `IntentCollectionSync` | `src/tool_classifier/intent_sync_service.py` | Database → Qdrant synchronization | +| `ContextAnalyzer` | `src/tool_classifier/context_analyzer.py` | LLM-based context availability checker | + +### 2.3 LLM Config Module Integration + +The existing LLM Config Module (`src/llm_config_module/`) is reused by the tool classifier for all LLM-based operations. No modifications to the core module are required. + +**Current LLM Config Module Capabilities:** +- **Multi-Provider Support**: Azure OpenAI, AWS Bedrock, OpenAI, Anthropic +- **Vault Integration**: Secure credential management via HashiCorp Vault +- **Connection Management**: Dynamic LLM connection selection based on `connection_id` from requests +- **Usage Tracking**: Token counting and cost calculation across providers + +**Tool Classifier LLM Usage:** + +| Workflow | LLM Operation | Config Usage | Temperature | +|----------|---------------|--------------|-------------| +| **Service (Layer 1)** | Intent & entity extraction | `llm_manager.call_llm_async()` | 0.0 (deterministic) | +| **Context (Layer 2)** | Context availability check | `llm_manager.call_llm_async()` | 0.0 (deterministic) | +| **RAG (Layer 3)** | Response generation | Existing integration | 0.7 (default) | +| **OOD (Layer 4)** | No LLM call | N/A | N/A | + +**Integration Pattern:** + +```python +# Tool classifier workflows use the same LLMManager instance +class ToolClassifier: + def __init__(self, llm_manager: LLMManager, ...): + self.llm_manager = llm_manager # Reuse existing instance + + async def detect_intent(self, query: str, services: List[Service]): + """Use LLM Config Module for intent detection.""" + response = await self.llm_manager.call_llm_async( + prompt=INTENT_DETECTION_PROMPT.format(...), + temperature=0.0, # Deterministic for classification + max_tokens=200 + ) + return parse_intent(response) +``` + +**Configuration Reuse:** +- Same connection selection logic (`connection_id` from `OrchestrationRequest`) +- Same Vault credential retrieval +- Same cost tracking pattern (`get_lm_usage_since()`) +- Same error handling and retry logic +- Same provider-specific implementations + +**No Changes Required**: The LLM Config Module is provider-agnostic and supports all tool classifier LLM calls out of the box. + +--- + +## 3. Layer 1: Service Workflow + +### 3.1 Workflow Logic + +When a user query is received, the system determines if it's a service-related request through the following steps: + +``` +1. Service Count Check → 2. Service Discovery → 3. Intent Detection → 4. Service Validation → 5. Entity Transformation → 6. Service Triggering +``` + +### 3.2 Step-by-Step Implementation + +#### Step 1: Service Count Check + +**Purpose**: Optimize performance based on service catalog size + +```python +# Query: SELECT COUNT(*) FROM services WHERE current_state = 'active' AND deleted = FALSE + +if service_count <= 50: + # Use all services for LLM context + services = get_all_active_services() +else: + # Use semantic search for top 20 most relevant + services = semantic_search_services(user_query, top_k=20) +``` + +**Database Query:** +```sql +SELECT COUNT(*) FROM public.services +WHERE current_state = 'active' AND deleted = FALSE; +``` + +#### Step 2: Semantic Search (When Service Count > 50) + +**Tool**: Qdrant vector database +**Collection**: `intent_collection` +**Vector Dimension**: 3072 (text-embedding-3-large) + +**Search Configuration:** +```python +search_params = { + "collection_name": "intent_collection", + "query_vector": embed_query(user_query), + "limit": 20, + "score_threshold": 0.5, # Higher threshold for service matching +} +``` + +**Output Format:** +```json +[ + { + "service_id": "exchange-rate-001", + "service_name": "ExchangeRateService", + "description": "Provides currency exchange rates", + "entities": ["fromCurrency", "toCurrency"], + "score": 0.87 + }, + ... +] +``` + +#### Step 3: LLM Intent Detection + +**Action**: Call LLM with user query and service context to extract: +- `intent`: Service name to trigger +- `entities`: Key-value pairs of extracted parameters + +**Prompt Template:** +```python +INTENT_DETECTION_PROMPT = """ +You are an intent classifier for government services. Analyze the user query and determine which service should handle the request. + +Available Services: +{service_list} + +User Query: "{user_query}" + +Task: +1. If the query matches a service, extract: + - intent: The exact service name to trigger + - entities: Key-value pairs of required parameters + +2. If NO service matches, respond with: {{"intent": null, "entities": null}} + +Response Format (JSON only, no explanation): +{{"intent": "ServiceName", "entities": {{"param1": "value1", "param2": "value2"}}}} +""" +``` + +**Expected LLM Response:** +```json +{ + "choices": [ + { + "message": { + "content": "{\"intent\": \"ExchangeRateService\", \"entities\": {\"fromCurrency\": \"EUR\", \"toCurrency\": \"USD\"}}" + } + } + ] +} +``` + +**Parsing Logic:** +```python +# Parse LLM response +content = response["choices"][0]["message"]["content"] +parsed = json.loads(content) + +if parsed["intent"] is None: + # No service match - move to Layer 2 (Context Workflow) + return WorkflowType.CONTEXT +``` + +#### Step 4: Service Validation + +**Action**: Validate the detected service against the database + +**Validation Query:** +```sql +SELECT service_id, name, ruuter_type, endpoints, structure, entities +FROM public.services +WHERE service_id = %(detected_service_id)s + AND current_state = 'active' + AND deleted = FALSE; +``` + +**Validation Checks:** +- Service exists in database +- `current_state = 'active'` +- `deleted = FALSE` + +**Failure Handling:** +```python +if not service_exists or not service_active: + logger.warning(f"Service validation failed: {detected_service_id}") + # Fallback to Layer 2 (Context Workflow) + return WorkflowType.CONTEXT +``` + +#### Step 5: Entity Transformation + +**Purpose**: Convert LLM entity object to array format for service payload + +**Input (from LLM):** +```json +{ + "fromCurrency": "EUR", + "toCurrency": "USD" +} +``` + +**Output (for service call):** +```json +["EUR", "USD"] +``` + +**Transformation Logic:** +```python +def transform_entities(entities: Optional[Dict[str, str]], + entity_order: List[str]) -> List[str]: + """ + Transform entity dictionary to ordered array. + + Args: + entities: LLM-extracted entity key-value pairs + entity_order: Expected entity order from service schema + + Returns: + Ordered list of entity values + """ + if not entities or entities is None: + return [] + + # Maintain order defined in service schema + return [entities.get(key, "") for key in entity_order] +``` + +**Example:** +```python +# Service schema defines: entities = ["fromCurrency", "toCurrency"] +transform_entities( + {"fromCurrency": "EUR", "toCurrency": "USD"}, + ["fromCurrency", "toCurrency"] +) +# Output: ["EUR", "USD"] +``` + +#### Step 6: Service Triggering + +**Purpose**: Call the external service endpoint with formatted payload + +**URL Construction:** +```python +# From database field 'endpoints' +base_url = "http://ruuter:8086" # From environment or service config +service_endpoint = f"{base_url}/services/active{service_name}" + +# Example: http://ruuter:8086/services/activeExchangeRateService +``` + +**HTTP Method:** +```python +# Retrieved from database field 'ruuter_type' +method = service.ruuter_type # 'GET' or 'POST' (ENUM) +``` + +**Payload Format:** +```json +{ + "input": ["EUR", "USD"], + "authorId": "user-67890", + "chatId": "chat-12345" +} +``` + +**Implementation:** +```python +async def trigger_service( + service: ServiceRecord, + entities: List[str], + request: OrchestrationRequest +) -> Dict[str, Any]: + """ + Trigger external service via Ruuter. + + Args: + service: Validated service record from database + entities: Transformed entity array + request: Original orchestration request + + Returns: + Service response or error + """ + url = f"{RUUTER_BASE_URL}/services/active{service.name}" + payload = { + "input": entities, + "authorId": request.authorId, + "chatId": request.chatId + } + + try: + if service.ruuter_type == "GET": + response = await http_client.get(url, params=payload, timeout=10) + else: # POST + response = await http_client.post(url, json=payload, timeout=10) + + response.raise_for_status() + return response.json() + + except httpx.TimeoutException: + logger.error(f"Service timeout: {service.service_id}") + raise ServiceTimeoutError() + except httpx.HTTPStatusError as e: + logger.error(f"Service error: {e.response.status_code}") + raise ServiceExecutionError() +``` + +**Response Handling:** + +**Non-Streaming:** +```python +service_response = await trigger_service(service, entities, request) +formatted_content = format_service_response(service_response) + +# Apply output guardrails +if guardrails_adapter: + output_check = await guardrails_adapter.check_output_async(formatted_content) + costs_dict["output_guardrails"] = output_check.usage + + if not output_check.allowed: + logger.warning(f"Service response blocked by guardrails: {output_check.reason}") + return create_guardrail_violation_response(request) + +# Return validated service response +return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=False, + inputGuardFailed=False, + content=formatted_content +) +``` + +**Streaming:** +```python +service_response = await trigger_service(service, entities, request) +formatted_content = format_service_response(service_response) + +# Apply output guardrails validation +if guardrails_adapter: + output_check = await guardrails_adapter.check_output_async(formatted_content) + costs_dict["output_guardrails"] = output_check.usage + + if not output_check.allowed: + logger.warning(f"Service response blocked by guardrails") + yield format_sse(request.chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE) + yield format_sse(request.chatId, "END") + return + +# Stream validated response token-by-token +for token in split_into_tokens(formatted_content, chunk_size=5): + yield format_sse(request.chatId, token) + await asyncio.sleep(0.01) # Maintain streaming UX + +yield format_sse(request.chatId, "END") +``` + +### 3.3 Failure Scenarios + +| Scenario | Action | +|----------|--------| +| No intent detected | Move to Layer 2 (Context Workflow) | +| Service validation failed | Move to Layer 2 (Context Workflow) | +| Service call timeout | Return `SERVICE_TIMEOUT_ERROR` message | +| Service returns error | Return `SERVICE_EXECUTION_ERROR` message | +| Entity extraction incomplete | Attempt service call with partial entities, or fallback to Layer 2 | +| Output guardrails blocked | Return `OUTPUT_GUARDRAIL_VIOLATION_MESSAGE` or fallback to Layer 2 | + +### 3.4 Output Guardrails for Service Responses + +**Why Service Responses Need Guardrails:** +- External services may return PII (personal identifiable information) +- Service errors could expose sensitive system details +- Third-party API responses are untrusted content +- Ensures consistent safety across all workflows + +**Integration Pattern:** + +Both non-streaming and streaming modes validate service responses before sending to users: + +```python +# Get service response +service_response = await trigger_service(...) + +# Apply output guardrails (validation-first) +if guardrails_adapter: + output_check = await guardrails_adapter.check_output_async(service_response) + if not output_check.allowed: + # Blocked - return error or fallback + return create_guardrail_violation_response(request) + +# Validated - return/stream to user +return/stream service_response +``` + +--- + +## 4. Layer 2: Context Workflow + +### 4.1 Workflow Logic + +If Layer 1 fails (no service match), use LLM to determine if the query is a greeting or can be answered from conversation history. + +**Trigger Conditions:** +- No service intent detected in Layer 1 +- Query is a greeting (hello, hi, good morning, etc.) **OR** +- Conversation history exists (at least 1 previous turn) and query references it + +### 4.2 Greeting Detection + +Greetings and conversational pleasantries are handled by the Context Workflow to provide natural, friendly responses without triggering service discovery or RAG retrieval. + +**Greeting Patterns (Multilingual):** + +```python +# Estonian greetings +ESTONIAN_GREETINGS = [ + "tere", "tervist", "tere hommikust", "tere päevast", "tere õhtust", + "hei", "hommikust", "õhtust", "päevast", "nägemist", + "tsau", "moi", "moikka" +] + +# English greetings +ENGLISH_GREETINGS = [ + "hello", "hi", "hey", "good morning", "good afternoon", "good evening", + "greetings", "howdy", "morning", "afternoon", "evening" +] + +# Farewell patterns +FAREWELL_PATTERNS = [ + "goodbye", "bye", "see you", "talk to you later", "ttyl", + "nägemist", "head aega", "kuni", "tsau" +] +``` + +**LLM-Based Greeting Detection:** + +Instead of rigid pattern matching, the LLM analyzes whether the query is a greeting or conversational message: + +```python +async def detect_greeting( + query: str, + llm_manager: LLMManager, + language: str +) -> GreetingResult: + """ + Use LLM to detect if query is a greeting/conversational message. + + Args: + query: User's message + llm_manager: LLM manager instance + language: Detected language (et/en) + + Returns: + GreetingResult with is_greeting flag and optional response + """ + prompt = GREETING_DETECTION_PROMPT.format( + user_query=query, + language=language + ) + + response = await llm_manager.call_llm_async( + prompt=prompt, + temperature=0.0, + max_tokens=150 + ) + + content = response["choices"][0]["message"]["content"] + result = json.loads(content) + + return GreetingResult( + is_greeting=result["is_greeting"], + greeting_type=result.get("greeting_type"), # 'hello', 'goodbye', 'thanks', etc. + suggested_response=result.get("suggested_response") + ) +``` + +**Greeting Detection Prompt:** + +```python +GREETING_DETECTION_PROMPT = """ +You are a greeting classifier. Determine if the user's message is a greeting, farewell, or conversational pleasantry. + +User Message: "{user_query}" +Language: {language} + +Task: +1. Identify if this is a greeting/conversational message (hello, hi, goodbye, thanks, etc.) +2. If YES: Classify the type and suggest an appropriate response +3. If NO: Indicate it's not a greeting + +Response Format (JSON only): +{{ + "is_greeting": true/false, + "greeting_type": "hello" | "goodbye" | "thanks" | "casual" | null, + "suggested_response": "friendly response in same language" | null +}} + +Examples of greetings: +- "Tere!" → {"is_greeting": true, "greeting_type": "hello"} +- "Good morning" → {"is_greeting": true, "greeting_type": "hello"} +- "Thanks for your help" → {"is_greeting": true, "greeting_type": "thanks"} +- "What are digital signatures?" → {"is_greeting": false} +""" +``` + +**Response Generation:** + +```python +if greeting_result.is_greeting: + # Use LLM-suggested response or fallback to predefined messages + response = greeting_result.suggested_response or get_default_greeting_response( + greeting_type=greeting_result.greeting_type, + language=language + ) + + return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=False, + inputGuardFailed=False, + content=response + ) +``` + +### 4.3 LLM-Based Context Analysis + +Instead of using regex patterns, we use the LLM to intelligently determine if the query references conversation history and can be answered from it. + +**Conversation Window:** +```python +# Consider last 10 conversation turns (5 user + 5 bot pairs) +CONTEXT_WINDOW_SIZE = 10 + +def get_recent_history(history: List[ConversationItem]) -> List[ConversationItem]: + """Get recent conversation history for context analysis.""" + return history[-CONTEXT_WINDOW_SIZE:] if history else [] +``` + +**LLM Context Check Prompt:** +```python +CONTEXT_CHECK_PROMPT = """ +You are a conversation context analyzer. Analyze if the user's current query can be answered using ONLY the conversation history provided. + +Conversation History: +{conversation_history} + +Current User Query: "{user_query}" + +Task: +1. First check if this is a greeting/conversational message (hi, hello, thanks, goodbye, etc.) +2. If it's a greeting: Provide an appropriate friendly response +3. If NOT a greeting: Determine if the query references or can be answered from the conversation history above +4. If YES: Extract and provide the answer from the conversation history +5. If NO: Indicate that it cannot be answered from conversation history + +Response Format (JSON only, no explanation): +{{ + "is_greeting": true/false, + "can_answer_from_context": true/false, + "answer": "extracted answer from history OR greeting response" OR null, + "reasoning": "brief explanation of why it can/cannot be answered" +}} + +Examples of GREETINGS (handle with friendly response): +- "Tere!" → {"is_greeting": true, "answer": "Tere! Kuidas saan teid aidata?"} +- "Hello" → {"is_greeting": true, "answer": "Hello! How can I help you?"} +- "Thanks!" → {"is_greeting": true, "answer": "You're welcome!"} +- "Good morning" → {"is_greeting": true, "answer": "Good morning! What can I do for you?"} + +Examples of queries that CAN be answered from context: +- "What did you say earlier about that?" +- "Can you repeat that?" +- "What was the rate you mentioned?" +- "Tell me more about what you just said" + +Examples of queries that CANNOT be answered from context: +- Completely new topics +- Requests for real-time data +- Questions requiring external knowledge +""" +``` + +**Implementation:** +```python +async def check_context_availability( + query: str, + conversation_history: List[ConversationItem], + llm_manager: LLMManager +) -> ContextCheckResult: + """ + Use LLM to check if query can be answered from conversation history. + + Args: + query: Current user query + conversation_history: Recent conversation turns + llm_manager: LLM manager for making calls + + Returns: + ContextCheckResult with can_answer flag and optional answer + """ + # Get recent history + recent_history = get_recent_history(conversation_history) + + if not recent_history: + # No conversation history available + return ContextCheckResult( + can_answer_from_context=False, + answer=None, + reasoning="No conversation history available" + ) + + # Format conversation history for prompt + history_text = format_conversation_history(recent_history) + + # Call LLM with structured output request + prompt = CONTEXT_CHECK_PROMPT.format( + conversation_history=history_text, + user_query=query + ) + + try: + response = await llm_manager.call_llm_async( + prompt=prompt, + temperature=0.0, # Deterministic for classification + max_tokens=300 + ) + + # Parse structured JSON response + content = response["choices"][0]["message"]["content"] + result = json.loads(content) + + return ContextCheckResult( + is_greeting=result.get("is_greeting", False), + can_answer_from_context=result["can_answer_from_context"], + answer=result.get("answer"), + reasoning=result.get("reasoning", "") + ) + + except (json.JSONDecodeError, KeyError) as e: + logger.error(f"Failed to parse LLM context check response: {e}") + # Fallback: assume cannot answer from context + return ContextCheckResult( + can_answer_from_context=False, + answer=None, + reasoning="Failed to parse LLM response" + ) + +def format_conversation_history(history: List[ConversationItem]) -> str: + """Format conversation history for LLM prompt.""" + formatted = [] + for i, item in enumerate(history, 1): + role = "User" if item.authorRole == "user" else "Assistant" + formatted.append(f"{i}. {role}: {item.message}") + return "\n".join(formatted) +``` + +**Response Models:** +```python +from pydantic import BaseModel + +class ContextCheckResult(BaseModel): + """Result from LLM context availability check.""" + is_greeting: bool = False + can_answer_from_context: bool + answer: Optional[str] = None + reasoning: str = "" + +class GreetingResult(BaseModel): + """Result from greeting detection.""" + is_greeting: bool + greeting_type: Optional[str] = None # 'hello', 'goodbye', 'thanks', 'casual' + suggested_response: Optional[str] = None +``` + +### 4.3 Workflow Execution + +**Non-Streaming Response:** +```python +async def execute_context_workflow( + request: OrchestrationRequest, + llm_manager: LLMManager, + guardrails_adapter: Optional[NeMoRailsAdapter], + costs_dict: Dict +) -> Optional[OrchestrationResponse]: + """ + Execute context-based response workflow with output guardrails. + + Returns: + OrchestrationResponse with context-based answer or None to fallback to next layer + """ + # Check if query can be answered from conversation history + context_result = await check_context_availability( + query=request.message, + conversation_history=request.conversationHistory, + llm_manager=llm_manager + ) + + # Track costs + costs_dict["context_check"] = get_lm_usage_since(history_before) + + if (context_result.is_greeting or context_result.can_answer_from_context) and context_result.answer: + logger.info( + f"[{request.chatId}] Query answered from context " + f"(greeting: {context_result.is_greeting})" + ) + + # Apply output guardrails validation + if guardrails_adapter: + output_check = await guardrails_adapter.check_output_async( + context_result.answer + ) + costs_dict["output_guardrails"] = output_check.usage + + if not output_check.allowed: + logger.warning( + f"[{request.chatId}] Context response blocked by guardrails: " + f"{output_check.reason}" + ) + return create_guardrail_violation_response(request) + + # Return validated context-based response + return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=False, + inputGuardFailed=False, + content=context_result.answer + ) + + else: + logger.info( + f"[{request.chatId}] Cannot answer from context: {context_result.reasoning}" + ) + # Fallback to Layer 3 (RAG Workflow) + return None # Signal to move to next layer +``` + +**Streaming Response:** +```python +async def execute_context_workflow_streaming( + request: OrchestrationRequest, + llm_manager: LLMManager, + guardrails_adapter: Optional[NeMoRailsAdapter], + costs_dict: Dict +) -> Optional[AsyncIterator[str]]: + """ + Execute context workflow with streaming support and output guardrails. + + Yields: + SSE-formatted strings with validated context-based response + + Returns: + None if cannot answer from context (signals fallback to next layer) + """ + # Check context availability (non-streaming, fast) + context_result = await check_context_availability( + query=request.message, + conversation_history=request.conversationHistory, + llm_manager=llm_manager + ) + + # Track costs + costs_dict["context_check"] = get_lm_usage_since(history_before) + + if (context_result.is_greeting or context_result.can_answer_from_context) and context_result.answer: + logger.info( + f"[{request.chatId}] Validating and streaming context-based response " + f"(greeting: {context_result.is_greeting})" + ) + + # Apply output guardrails validation BEFORE streaming + if guardrails_adapter: + output_check = await guardrails_adapter.check_output_async( + context_result.answer + ) + costs_dict["output_guardrails"] = output_check.usage + + if not output_check.allowed: + logger.warning( + f"[{request.chatId}] Context response blocked by guardrails (streaming)" + ) + yield format_sse(request.chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE) + yield format_sse(request.chatId, "END") + return + + # Response validated - stream token by token for consistent UX + for token in split_into_tokens(context_result.answer, chunk_size=5): + yield format_sse(request.chatId, token) + await asyncio.sleep(0.01) # Maintain streaming pace + + # Signal completion + yield format_sse(request.chatId, "END") + + else: + logger.info(f"[{request.chatId}] No context match, falling back to RAG") + # Return None to signal fallback to next layer + # Caller will handle RAG workflow + return None + +def split_into_tokens(text: str, chunk_size: int = 5) -> List[str]: + """Split text into token-like chunks for streaming simulation.""" + words = text.split() + tokens = [] + for i in range(0, len(words), chunk_size): + chunk = " ".join(words[i:i + chunk_size]) + tokens.append(chunk + " " if i + chunk_size < len(words) else chunk) + return tokens +``` + +### 4.4 Advantages of LLM-Based Approach + + **No Regex Pattern Maintenance**: LLM understands semantic context references naturally + **Handles Edge Cases**: Can detect implicit references that regex would miss + **Multilingual Support**: Works across Estonian, English, and other languages + **Structured Output**: Consistent JSON format for easy parsing + **Reasoning Transparency**: Includes explanation of decision + **Streaming Compatible**: Fast context check + token-by-token answer delivery + **Greeting Detection**: Automatically handles greetings, farewells, and conversational pleasantries + **Natural Responses**: LLM generates contextually appropriate greeting responses + +### 4.7 Fallback Strategy + +**Fallback to Layer 3 (RAG):** +- If `is_greeting = false` AND `can_answer_from_context = false` +- If LLM response parsing fails +- If conversation history is empty (and not a greeting) +- If output guardrails block the response (fallback to RAG for alternative answer) + +**Error Handling:** +```python +try: + result = await execute_context_workflow( + request, llm_manager, guardrails_adapter, costs_dict + ) + if result: + return result # Context-based answer (validated) + else: + # Move to Layer 3 (RAG) + return await execute_rag_workflow(request, components, costs_dict) +except Exception as e: + logger.error(f"Context workflow failed: {e}") + # Fallback to RAG workflow + return await execute_rag_workflow(request, components, costs_dict) +``` + +**Guardrail Violation Fallback:** +```python +# Option 1: Return error message (current approach) +if not output_check.allowed: + return create_guardrail_violation_response(request) + +# Option 2: Fallback to RAG (alternative approach) +if not output_check.allowed: + logger.warning("Context response blocked, trying RAG workflow") + return await execute_rag_workflow(request, components, costs_dict) +``` + +--- + +## 5. Layer 3: RAG Workflow + +### 5.1 Integration with Existing System + +**Trigger**: When both Layer 1 (Service) and Layer 2 (Context) fail to match + +**Implementation:** +```python +# Reuse existing RAG pipeline +return self._execute_orchestration_pipeline( + request, components, costs_dict, timing_dict +) +``` + +**Existing Flow (No Changes Required):** +1. Prompt Refinement +2. Contextual Retrieval (Qdrant + BM25) +3. Rank Fusion (RRF) +4. Response Generation +5. Output Guardrails (validation-first streaming already implemented) + +**Streaming with Output Guardrails (Current Implementation):** +```python +# RAG workflow uses validation-first approach +async for validated_chunk in guardrails_adapter.stream_with_guardrails( + user_message=refined_query, + bot_message_generator=llm_streaming_generator +): + # NeMo buffers tokens (chunk_size=200) + # Validates each buffer before yielding + yield format_sse(chatId, validated_chunk) + +yield format_sse(chatId, "END") +``` + +**Fallback:** +- If no chunks found (`len(relevant_chunks) == 0`) → Layer 4 (OOD) +- If response confidence low → Layer 4 (OOD) + +--- + +## 5.2 Streaming + Output Guardrails Comparison + +### Summary: How Each Workflow Handles Streaming + Validation + +| Workflow | Response Source | Validation Approach | Streaming Method | +|----------|----------------|---------------------|------------------| +| **RAG** | LLM streaming generation | NeMo buffers + validates chunks (chunk_size=200) | `stream_with_guardrails()` wraps bot generator | +| **Service** | External service (complete) | Validate complete response | Stream validated response token-by-token | +| **Context** | LLM structured output (complete) | Validate complete response | Stream validated response token-by-token | +| **OOD** | Fixed message | No validation needed | Stream fixed message token-by-token | + +### Technical Flow for Each Workflow + +#### RAG Workflow (Existing - Validation-First) + +**Non-Streaming:** +```python +response = await response_generator.generate(...) +output_check = await guardrails_adapter.check_output_async(response) +if output_check.allowed: + return OrchestrationResponse(content=response) +``` + +**Streaming:** +```python +# LLM generates via streaming +async def bot_generator(): + async for token in llm.stream(): + yield token + +# NeMo validates in real-time (buffers chunks) +async for validated_chunk in guardrails_adapter.stream_with_guardrails( + user_message=query, + bot_message_generator=bot_generator +): + yield format_sse(chatId, validated_chunk) # Already validated +``` + +#### Service Workflow (New - Validate Then Stream) + +**Non-Streaming:** +```python +service_response = await call_external_service(...) # Complete response +output_check = await guardrails_adapter.check_output_async(service_response) +if output_check.allowed: + return OrchestrationResponse(content=service_response) +else: + return GuardrailViolationResponse() +``` + +**Streaming:** +```python +service_response = await call_external_service(...) # Complete response + +# Validate complete response FIRST +output_check = await guardrails_adapter.check_output_async(service_response) +if not output_check.allowed: + yield format_sse(chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE) + yield format_sse(chatId, "END") + return + +# Validated - now stream to client token-by-token +for token in split_into_tokens(service_response, chunk_size=5): + yield format_sse(chatId, token) + await asyncio.sleep(0.01) +yield format_sse(chatId, "END") +``` + +#### Context Workflow (New - Validate Then Stream) + +**Non-Streaming:** +```python +context_result = await llm.check_context(query, history) # Complete answer +if context_result.can_answer_from_context: + output_check = await guardrails_adapter.check_output_async(context_result.answer) + if output_check.allowed: + return OrchestrationResponse(content=context_result.answer) + else: + return GuardrailViolationResponse() +``` + +**Streaming:** +```python +context_result = await llm.check_context(query, history) # Complete answer + +if context_result.can_answer_from_context: + # Validate complete answer FIRST + output_check = await guardrails_adapter.check_output_async(context_result.answer) + if not output_check.allowed: + yield format_sse(chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE) + yield format_sse(chatId, "END") + return + + # Validated - stream to client token-by-token + for token in split_into_tokens(context_result.answer, chunk_size=5): + yield format_sse(chatId, token) + await asyncio.sleep(0.01) + yield format_sse(chatId, "END") +``` + +### Key Differences + +**RAG Workflow:** +- **Real-time validation**: LLM generates → NeMo validates chunks → Stream to client +- **Buffered approach**: Tokens buffered in chunks of 200 characters +- **Bi-directional**: Generator feeding into NeMo, NeMo yielding validated chunks +- **Cost**: Inline (no separate validation call) + +**Service/Context Workflows:** +- **Pre-validation**: Get complete response → Validate → Stream to client +- **Complete response**: Already have full text before streaming starts +- **Uni-directional**: Simply chunk and send validated response +- **Cost**: Separate validation call tracked in `costs_dict["output_guardrails"]` +- **UX Consistency**: Simulates streaming to match RAG workflow behavior + +### Why Different Approaches? + +1. **RAG**: LLM streaming is inherently token-by-token, so NeMo can validate in real-time +2. **Service**: External API returns complete response, no streaming generation occurs +3. **Context**: LLM returns structured JSON with complete answer, not streaming + +### Common Pattern: Validation-First + +All three workflows share the **validation-first principle**: +- Content is validated BEFORE reaching the user +- Blocked content never sent to client +- Consistent safety guarantees across all workflows +- Streaming provides smooth UX even with complete responses (Service/Context) + +--- + +## 6. Layer 4: OOD (Out of Domain) Response + +### 6.1 Trigger Conditions + +- No service detected (Layer 1 failed) +- No context match (Layer 2 failed) +- No relevant knowledge chunks (Layer 3 failed) + +### 6.2 Response Generation + +**Return localized OOD message:** +```python +return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=True, # Flag as out of scope + inputGuardFailed=False, + content=get_localized_message(OUT_OF_SCOPE_MESSAGES, detected_language) +) +``` + +**Existing Constants (Reuse):** +```python +# From: src/llm_orchestrator_config/llm_ochestrator_constants.py +OUT_OF_SCOPE_MESSAGES = { + "et": "Vabandust, ma ei suuda sellele küsimusele vastata...", + "en": "I apologize, but I cannot answer this question..." +} +``` + +--- + +## 7. Data Schemas + +### 7.1 Database Schema + +**Table: `services`** + +```sql +-- Location: DSL/Liquibase/changelog/rag-search-script-v6-services.sql + +-- Custom ENUM types +CREATE TYPE ruuter_request_type AS ENUM ('GET', 'POST'); +CREATE TYPE service_state AS ENUM ('active', 'inactive', 'draft'); + +CREATE TABLE public.services ( + -- Primary key + id BIGINT PRIMARY KEY, + + -- Basic service information + name TEXT NOT NULL, -- Service name (e.g., "ExchangeRateService") + description TEXT NOT NULL, -- Human-readable description + service_id TEXT NOT NULL UNIQUE, -- Unique identifier (e.g., "exchange-rate-001") + + -- Service classification + ruuter_type ruuter_request_type DEFAULT 'GET', -- HTTP method: 'GET' or 'POST' + current_state service_state DEFAULT 'draft', -- State: 'active', 'inactive', 'draft' + is_common BOOLEAN NOT NULL DEFAULT FALSE, -- Is this a common/shared service? + deleted BOOLEAN NOT NULL DEFAULT FALSE, -- Soft delete flag + + -- Intent classification data (for LLM) + slot TEXT NOT NULL DEFAULT '', -- Reserved for future use + entities text[] NOT NULL DEFAULT '{}', -- Expected entity names ["entity1", "entity2"] + examples text[] NOT NULL DEFAULT '{}', -- Example queries + + -- Service configuration + structure JSON NOT NULL DEFAULT '{}', -- Service schema/structure + endpoints JSON NOT NULL DEFAULT '[]', -- Endpoint configurations + + -- Timestamps + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Indexes for performance +CREATE UNIQUE INDEX idx_services_service_id ON public.services(service_id); +CREATE INDEX idx_services_active ON public.services(current_state, deleted) + WHERE deleted = FALSE; +CREATE INDEX idx_services_name ON public.services(name); +``` + +**Update Master Changelog:** +```yaml +# Location: DSL/Liquibase/master.yml + +databaseChangeLog: + - include: + file: changelog/rag-search-script-v1-llm-connections.sql + - include: + file: changelog/rag-search-script-v2-user-management.sql + - include: + file: changelog/rag-search-script-v3-configuration.sql + - include: + file: changelog/rag-search-script-v4-authority-data.xml + - include: + file: changelog/rag-search-script-v5-prompt-config.sql + - include: + file: changelog/rag-search-script-v6-services.sql # NEW +``` + +### 7.2 Qdrant Collection Schema + +**Collection Name:** `intent_collection` + +**Configuration:** +```python +{ + "collection_name": "intent_collection", + "vectors_config": { + "size": 3072, # text-embedding-3-large + "distance": "Cosine" + } +} +``` + +**Document Schema:** +```json +{ + "id": "common_service_companies_workforce_taxes", + "name": "Ettevõtte tööjõumaksud", + "description": "Kasutaja soovib infot ettevõtte poolt tasutud tööjõumaksude kohta, näiteks palgamaksud ja sotsiaalmaks.", + "examples": [ + "ettevõtte tasutud tööjõumaksud", + "kui palju maksis ettevõte tööjõumakse", + "firma poolt tasutud tööjõumaksud" + ], + "entities": ["company_name"], + "text_for_embedding": "Kasutaja soovib infot ettevõtte poolt tasutud tööjõumaksude kohta, näiteks palgamaksud ja sotsiaalmaks.\nettevõtte tasutud tööjõumaksud\nkui palju maksis ettevõte tööjõumakse\nfirma poolt tasutud tööjõumaksud", + + "service_id": "common_service_companies_workforce_taxes", + "ruuter_type": "POST", + "current_state": "active" +} +``` + +**Field Mapping:** +| Qdrant Field | Source | Purpose | +|--------------|--------|---------| +| `id` | `services.service_id` | Unique identifier | +| `name` | `services.name` | Service display name | +| `description` | `services.description` | Service description | +| `examples` | `services.examples` | Example queries | +| `entities` | `services.entities` | Expected parameters | +| `text_for_embedding` | Computed | Concatenated text for vector embedding | +| `service_id` | `services.service_id` | Link to database record | +| `ruuter_type` | `services.ruuter_type` | HTTP method | +| `current_state` | `services.current_state` | Service status | + +**Embedding Text Construction:** +```python +def construct_embedding_text(service: ServiceRecord) -> str: + """ + Construct text for embedding from service data. + Format: description + examples (newline-separated) + """ + parts = [service.description] + parts.extend(service.examples) + return "\n".join(parts) +``` + +### 7.3 Database → Qdrant Synchronization + +**Trigger Mechanism:** +```sql +-- PostgreSQL NOTIFY/LISTEN pattern or polling +CREATE OR REPLACE FUNCTION notify_service_change() +RETURNS TRIGGER AS $$ +BEGIN + IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN + PERFORM pg_notify( + 'service_sync', + json_build_object( + 'action', TG_OP, + 'service_id', NEW.service_id, + 'current_state', NEW.current_state + )::text + ); + ELSIF TG_OP = 'DELETE' THEN + PERFORM pg_notify( + 'service_sync', + json_build_object( + 'action', 'DELETE', + 'service_id', OLD.service_id + )::text + ); + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER service_sync_trigger +AFTER INSERT OR UPDATE OR DELETE ON services +FOR EACH ROW EXECUTE FUNCTION notify_service_change(); +``` + +**Sync Service:** +```python +# Location: src/tool_classifier/intent_sync_service.py + +class IntentCollectionSyncService: + """Synchronizes services table with Qdrant intent_collection.""" + + async def handle_service_change(self, event: Dict): + action = event['action'] + service_id = event['service_id'] + + if action in ['INSERT', 'UPDATE']: + # Fetch service from database + service = await self.db.fetch_service(service_id) + + # Generate embedding + embedding_text = self.construct_embedding_text(service) + embedding_vector = await self.embed(embedding_text) + + # Upsert to Qdrant + await self.qdrant_client.upsert( + collection_name="intent_collection", + points=[{ + "id": service.service_id, + "vector": embedding_vector, + "payload": { + "name": service.name, + "description": service.description, + "examples": service.examples, + "entities": service.entities, + "text_for_embedding": embedding_text, + "service_id": service.service_id, + "ruuter_type": service.ruuter_type, + "current_state": service.current_state + } + }] + ) + + elif action == 'DELETE': + await self.qdrant_client.delete( + collection_name="intent_collection", + points_selector={"points": [service_id]} + ) +``` + +--- + +## 8. Error Messages & Constants + +### 8.1 New Error Messages + +**Location:** `src/llm_orchestrator_config/llm_ochestrator_constants.py` + +```python +# Service Workflow Errors +SERVICE_NOT_FOUND_MESSAGES = { + "et": "Vabandust, ma ei leidnud sobivat teenust teie päringu jaoks.", + "en": "Sorry, I couldn't find a matching service for your request.", +} + +SERVICE_VALIDATION_FAILED_MESSAGES = { + "et": "Teenus ei ole hetkel saadaval.", + "en": "The requested service is currently unavailable.", +} + +SERVICE_TIMEOUT_ERROR_MESSAGES = { + "et": "Teenuse vastus võttis liiga kaua aega. Palun proovige hiljem uuesti.", + "en": "The service took too long to respond. Please try again later.", +} + +SERVICE_EXECUTION_ERROR_MESSAGES = { + "et": "Teenuse kutsumine ebaõnnestus. Palun proovige hiljem uuesti.", + "en": "Service execution failed. Please try again later.", +} + +ENTITY_EXTRACTION_FAILED_MESSAGES = { + "et": "Ma ei suutnud teie päringust vajalikku infot tuvastada.", + "en": "I couldn't extract the required information from your query.", +} + +# Context Workflow Errors +INSUFFICIENT_CONTEXT_MESSAGES = { + "et": "Ma ei leia vastust meie eelmisest vestlusest. Kas saate täpsustada?", + "en": "I can't find the answer in our previous conversation. Can you clarify?", +} + +NO_CONTEXT_AVAILABLE_MESSAGES = { + "et": "Mul pole piisavalt konteksti teie küsimusele vastamiseks.", + "en": "I don't have enough context to answer your question.", +} + +# Greeting Responses +GREETING_HELLO_MESSAGES = { + "et": "Tere! Kuidas saan teid aidata?", + "en": "Hello! How can I help you?", +} + +GREETING_GOODBYE_MESSAGES = { + "et": "Head aega! Kui vajate abi, olen siin.", + "en": "Goodbye! If you need help, I'm here.", +} + +GREETING_THANKS_MESSAGES = { + "et": "Pole tänu väärt! Kas saan veel kuidagi aidata?", + "en": "You're welcome! Can I help you with anything else?", +} + +GREETING_CASUAL_MESSAGES = { + "et": "Tere! Mida te soovite teada?", + "en": "Hi there! What would you like to know?", +} +``` + +**Helper Function for Default Greeting Responses:** + +```python +def get_default_greeting_response(greeting_type: str, language: str) -> str: + """ + Get default greeting response based on type and language. + + Args: + greeting_type: Type of greeting ('hello', 'goodbye', 'thanks', 'casual') + language: Language code ('et', 'en') + + Returns: + Localized greeting response + """ + greeting_map = { + "hello": GREETING_HELLO_MESSAGES, + "goodbye": GREETING_GOODBYE_MESSAGES, + "thanks": GREETING_THANKS_MESSAGES, + "casual": GREETING_CASUAL_MESSAGES + } + + messages = greeting_map.get(greeting_type, GREETING_HELLO_MESSAGES) + return messages.get(language, messages["en"]) +``` + +### 8.2 Reused Constants + +```python +# Already defined - reuse for consistency +OUT_OF_SCOPE_MESSAGE +TECHNICAL_ISSUE_MESSAGE +INPUT_GUARDRAIL_VIOLATION_MESSAGE +OUTPUT_GUARDRAIL_VIOLATION_MESSAGE +``` + +--- + +## 9. API Integration + +### 9.1 Entry Points (No Changes) + +The tool classifier is transparent to API consumers. All existing endpoints continue to work: + +**Non-Streaming:** +```http +POST /orchestrate +Content-Type: application/json + +{ + "chatId": "session-123", + "message": "What is the EUR to USD exchange rate?", + "authorId": "user-456", + "conversationHistory": [], + "url": "https://example.com", + "environment": "production", + "connection_id": "conn-789" +} +``` + +**Streaming:** +```http +POST /orchestrate/stream +Content-Type: application/json + +(Same request body as /orchestrate) +``` + +**Testing:** +```http +POST /orchestrate/test +Content-Type: application/json + +{ + "message": "Convert 100 EUR to USD", + "environment": "testing", + "connectionId": 1 +} +``` + +### 9.2 Response Format (No Changes) + +**Success Response:** +```json +{ + "chatId": "session-123", + "llmServiceActive": true, + "questionOutOfLLMScope": false, + "inputGuardFailed": false, + "content": "The current EUR to USD exchange rate is 1.08." +} +``` + +**Service Workflow Response:** +```json +{ + "chatId": "session-123", + "llmServiceActive": true, + "questionOutOfLLMScope": false, + "inputGuardFailed": false, + "content": "Based on the ExchangeRateService: EUR/USD = 1.0850" +} +``` + +The response format remains unchanged. The workflow selection is internal and transparent to the API consumer. + +--- + +## 10. Implementation Considerations + +### 10.1 Performance Optimization + +**Service Discovery Caching:** +```python +# Cache active service count for 5 minutes +@cached(ttl=300) +async def get_active_service_count() -> int: + return await db.count_active_services() +``` + +**Intent Collection Warm-up:** +```python +# Pre-load intent collection on startup +async def warmup_intent_collection(): + """Ensure intent_collection is ready before processing requests.""" + collection_info = await qdrant_client.get_collection("intent_collection") + logger.info(f"Intent collection ready: {collection_info.points_count} services") +``` + +### 10.2 Monitoring & Analytics + +**Tool Classifier Decisions Table:** +```sql +-- Track classifier decisions for analytics +CREATE TABLE tool_classifier_decisions ( + id SERIAL PRIMARY KEY, + chat_id TEXT NOT NULL, + author_id TEXT, + user_query TEXT NOT NULL, + detected_workflow VARCHAR(20) NOT NULL, -- 'service', 'context', 'rag', 'ood' + classifier_confidence NUMERIC(5,4), + service_id VARCHAR(100), -- If service workflow + execution_time_ms INTEGER, + created_at TIMESTAMP DEFAULT NOW() +); + +CREATE INDEX idx_classifier_decisions_workflow + ON tool_classifier_decisions(detected_workflow); +``` + +### 10.3 Cost Tracking + +**Add tracking for new LLM calls:** +# Service workflow - intent detection +costs_dict["intent_detection"] = { + "total_prompt_tokens": usage.prompt_tokens, + "total_completion_tokens": usage.completion_tokens, + "total_cost": calculate_cost(usage) +} + +# Context workflow - context availability check +costs_dict["context_check +costs_dict["intent_detection"] = { + "total_prompt_tokens": usage.prompt_tokens, + "total_completion_tokens": usage.completion_tokens, + "total_cost": calculate_cost(usage) +} +``` + +### 10.4 Guardrails Strategy + +**Output Guardrails Application:** +```python +# Apply output guardrails to ALL workflows for consistency +WORKFLOWS_WITH_OUTPUT_GUARDRAILS = [ + WorkflowType.SERVICE, # Check service responses (may contain PII/sensitive data) + WorkflowType.CONTEXT, # Check context-based responses (conversation history may have PII) + WorkflowType.RAG # Existing behavior (knowledge base responses) +] + +# OOD responses skip guardrails (fixed message) +WORKFLOWS_WITHOUT_OUTPUT_GUARDRAILS = [ + WorkflowType.OOD +] +``` + +**Validation-First Approach:** + +All workflows use the **validation-first** approach where content is validated BEFORE streaming to the client: + +1. **RAG Workflow** (existing): + - LLM generates response via streaming + - NeMo buffers tokens (chunk_size=200) + - Each buffer validated before yielding + - Uses `stream_with_guardrails()` method + +2. **Service Workflow** (new): + - External service returns complete response + - Apply output guardrails validation + - Stream validated response token-by-token to client + - Consistent UX with RAG workflow + +3. **Context Workflow** (new): + - LLM returns complete answer from history + - Apply output guardrails validation + - Stream validated response token-by-token to client + - Consistent UX with RAG workflow + +**Streaming + Output Guardrails Integration:** + +```python +# For Service and Context workflows +async def stream_validated_response( + response_text: str, + guardrails_adapter: NeMoRailsAdapter, + request: OrchestrationRequest, + costs_dict: Dict +) -> AsyncIterator[str]: + """ + Apply output guardrails and stream validated response. + + Flow: + 1. Validate complete response with guardrails + 2. If allowed: Stream token-by-token to client + 3. If blocked: Send guardrail violation message + """ + # Check output guardrails (non-streaming validation) + output_check = await guardrails_adapter.check_output_async(response_text) + + # Track costs + costs_dict["output_guardrails"] = output_check.usage + + if not output_check.allowed: + logger.warning(f"[{request.chatId}] Output blocked by guardrails") + # Send violation message + yield format_sse(request.chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE) + yield format_sse(request.chatId, "END") + return + + # Response validated - stream to client + logger.info(f"[{request.chatId}] Streaming validated response") + for token in split_into_tokens(response_text): + yield format_sse(request.chatId, token) + await asyncio.sleep(0.01) # Maintain streaming pace + + yield format_sse(request.chatId, "END") +``` + +**Utility Function for Token Streaming:** +```python +def split_into_tokens(text: str, chunk_size: int = 5) -> List[str]: + """ + Split text into token-like chunks for streaming simulation. + + Used by Service and Context workflows to provide streaming UX + even though the complete response is already available. + + Args: + text: Complete response text + chunk_size: Number of words per chunk + + Returns: + List of text chunks + """ + words = text.split() + tokens = [] + for i in range(0, len(words), chunk_size): + chunk = " ".join(words[i:i + chunk_size]) + tokens.append(chunk + " " if i + chunk_size < len(words) else chunk) + return tokens +``` + +### 10.5 Streaming Implementation Summary + +| Aspect | RAG Workflow | Service Workflow | Context Workflow | +|--------|--------------|------------------|------------------| +| **Response Type** | Streaming (token-by-token) | Complete (all at once) | Complete (all at once) | +| **Validation Timing** | Real-time (buffered chunks) | Pre-validation | Pre-validation | +| **Guardrail Method** | `stream_with_guardrails()` | `check_output_async()` | `check_output_async()` | +| **Streaming Reason** | Natural (LLM streams) | UX consistency | UX consistency | +| **Token Buffering** | NeMo 200-char chunks | Manual 5-word chunks | Manual 5-word chunks | +| **Cost Tracking** | Inline (timing = 0.0) | Separate call | Separate call | +| **Blocked Handling** | Stop mid-stream | Pre-check, don't stream | Pre-check, don't stream | +| **Client Experience** | Progressive reveal | Progressive reveal | Progressive reveal | + +**Implementation Status:** +- RAG streaming + guardrails: **Already implemented** (production-ready) +- Service streaming + guardrails: **To be implemented** (spec complete) +- Context streaming + guardrails: **To be implemented** (spec complete) + +--- + +## 11. Testing Strategy + +### 11.1 Unit Tests +async def test_context_detection_with_llm(): + query = "What did you say earlier?" + history = [ + ConversationItem(authorRole="bot", message="The EUR to USD rate is 1.08"), + ConversationItem(authorRole="user", message="Thanks") + ] + result = await context_analyzer.check_context_availability(query, history) + assert result.can_answer_from_context == True + assert "1.08" in result.answer + +async def test_context_detection_no_reference(): + query = "What are digital signatures?" + history = [ConversationItem(message="The rate is 1.08", ...)] + result = await context_analyzer.check_context_availability(query, history) + assert result.can_answer_from_context == False + +def test_rag_fallback(): + query = "What are digital signatures?" + result = classifier.classify(query, []) + assert result.workflow == WorkflowType.RAG + +async def test_context_streaming(): + """Test that context workflow supports streaming.""" + query = "What was the rate?" + history = [ConversationItem(message="The rate is 1.08", ...)] + + tokens = [] + async for token in context_workflow.execute_streaming(query, history): + tokens.append(token) + + assert len(tokens) > 0 + assert tokens[-1] == "END" + query = "What did you say earlier?" + history = [ConversationItem(message="The rate is 1.08", ...)] + result = classifier.classify(query, history) + assert result.workflow == WorkflowType.CONTEXT + +def test_rag_fallback(): + query = "What are digital signatures?" + result = classifier.classify(query, []) + assert result.workflow == WorkflowType.RAG +``` + +### 11.2 Integration Tests + +```python +# tests/integration_tests/test_service_workflow.py +async def test_full_service_workflow(): + request = OrchestrationRequest( + message="Convert 100 EUR to USD", + chatId="test-123", + ... + ) + response = await orchestration_service.process_orchestration_request(request) + assert response.llmServiceActive == True + assert "exchange rate" in response.content.lower() +``` + +### 11.3 Load `ContextAnalyzer` with LLM-based context checking +- Create context check prompt template with structured output +- Implement `ContextWorkflowExecutor` with streaming support +- Add conversation history formatting utilities +- Integration tests for context workflow (streaming + non-streaming) +- Cost tracking for context check LLM calls>50 services +locust -f tests/load/test_classifier_load.py --users 100 --spawn-rate 10 +``` + +--- + +## 12. Migration Path + +### 12.1 Phase 1: +- Create database migration for `services` table +- Create Qdrant `intent_collection` +- Relocate input guardrails before tool classifier +- Define error message constants + +### 12.2 Phase 2: +- Implement `ToolClassifier` with rule-based logic +- Implement workflow routing in `LLMOrchestrationService` +- Add classifier decision logging +- Unit tests for classifier + +### 12.3 Phase 3: Service Workflow +- Implement `ServiceDiscoveryManager` (Qdrant semantic search) +- Implement `IntentEntityExtractor` (LLM-based) +- Implement `ServiceWorkflowExecutor` (validation & triggering) +- Implement `IntentCollectionSyncService` (DB → Qdrant) +- Integration tests for service workflow + +### 12.4 Phase 4: Context Workflow +- ✅ ImpleHECK_TEMPERATURE=0.0 # Deterministic for classification +CONTEXT_CHECK_MAX_TOKENS=300tection +- Implement conversation history semantic search +- Implement `ContextWorkflowExecutor` +- Integration tests for context workflow + +### 12.5 Phase 5: Finalization +- Extend output guardrails to service & context workflows +- Implement fallback chain (service → context → rag → ood) +- Add comprehensive error handling +- Performance optimization (caching, async) +- End-to-end testing +- Production deployment + +--- + +## 13. Configuration + +### 13.1 Environment Variables + +```bash +# Service Workflow Configuration +RUUTER_BASE_URL=http://ruuter:8086 +SERVICE_DISCOVERY_TIMEOUT=2 # seconds +SERVICE_CALL_TIMEOUT=10 # seconds +MAX_SERVICES_FOR_LLM_CONTEXT=50 + +# Qdrant Configuration +QDRANT_INTENT_COLLECTION=intent_collection +INTENT_SEARCH_TOP_K=20 +INTENT_SEARCH_THRESHOLD=0.5 + +# Context Workflow Configuration +CONTEXT_WINDOW_SIZE=10 +CONTEXT_CONFIDENCE_THRESHOLD=0.7 +``` + +### 13.2 Feature Flags + +```python +# src/llm_orchestrator_config/feature_flags.py + +class FeatureFlags: + # Enable/disable tool classifier (rollback switch) + TOOL_CLASSIFIER_ENABLED = os.getenv("TOOL_CLASSIFIER_ENABLED", "true").lower() == "true" + + # Enable/disable specific workflows + SERVICE_WORKFLOW_ENABLED = os.getenv("SERVICE_WORKFLOW_ENABLED", "true").lower() == "true" + CONTEXT_WORKFLOW_ENABLED = os.getenv("CONTEXT_WORKFLOW_ENABLED", "true").lower() == "true" + + # Fallback to RAG if tool classifier fails + FALLBACK_TO_RAG_ON_ERROR = True +``` + +--- + +## 14. Rollback Strategy + +### 14.1 Graceful Degradation + +```python +def process_orchestration_request(self, request: OrchestrationRequest): + """Process with tool classifier or fallback to RAG.""" + + if not FeatureFlags.TOOL_CLASSIFIER_ENABLED: + # Fallback: Use existing RAG-only pipeline + logger.info("Tool classifier disabled - using RAG pipeline") + return self._execute_rag_workflow(request, None) + + try: + # New: Tool classifier routing + classifier_result = self.tool_classifier.classify(...) + return self._route_to_workflow(request, classifier_result) + + except Exception as e: + logger.error(f"Tool classifier failed: {e}") + if FeatureFlags.FALLBACK_TO_RAG_ON_ERROR: + logger.info("Falling back to RAG workflow") + return self._execute_rag_workflow(request, None) + raise +``` + +## 15. Success Metrics + +### 15.1 Performance Metrics + +| Metric | Target | Measurement | +|--------|--------|-------------| +| Tool Classifier Latency | < 200ms | p95 response time | +| Service Discovery (>50 services) | < 500ms | Qdrant search + LLM intent | +| Service Call Success Rate | > 95% | Successful service executions | +| Context Match Accuracy | > 80% | Correct context-based responses | +| End-to-End Latency | < 3s | Request to response | + +### 15.2 Quality Metrics + +| Metric | Target | Measurement | +|--------|--------|-------------| +| Workflow Classification Accuracy | > 90% | Manual evaluation sample | +| Service Intent Accuracy | > 85% | Correct service selection | +| Entity Extraction Accuracy | > 90% | Correct entity values | +| False Positive Rate (Service) | < 5% | Incorrect service routing | +| User Satisfaction | > 4.0/5.0 | User feedback surveys | + +--- From c1df52df3dc9472c556864c6bad040c71118a4e6 Mon Sep 17 00:00:00 2001 From: erangi-ar <111747955+erangi-ar@users.noreply.github.com> Date: Thu, 19 Feb 2026 18:09:56 +0530 Subject: [PATCH 2/4] Show the response in markdown in test LLM page (#317) * prompt coniguration backend to be testing * custom prompt configuration update and fixed Pyright issues * fixed copilot reviews * pre validation step added when user query is inserted * added more validation cases * fixed review comments * feat: add react-markdown and remark-gfm for rendering markdown content in TestModel page --------- Co-authored-by: nuwangeek Co-authored-by: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com> Co-authored-by: erangi-ar --- GUI/package-lock.json | 1564 +++++++++++++++++++++++++++-- GUI/package.json | 2 + GUI/src/pages/TestModel/index.tsx | 10 +- 3 files changed, 1514 insertions(+), 62 deletions(-) diff --git a/GUI/package-lock.json b/GUI/package-lock.json index 436ec9c..c0f45b1 100644 --- a/GUI/package-lock.json +++ b/GUI/package-lock.json @@ -50,6 +50,7 @@ "react-i18next": "^12.1.1", "react-icons": "^4.10.1", "react-idle-timer": "^5.5.2", + "react-markdown": "^10.1.0", "react-modal": "^3.16.1", "react-redux": "^8.1.1", "react-router-dom": "^6.5.0", @@ -58,6 +59,7 @@ "react-textarea-autosize": "^8.4.0", "reactflow": "^11.4.0", "regexify-string": "^1.0.19", + "remark-gfm": "^4.0.1", "rxjs": "^7.8.1", "timeago.js": "^4.0.2", "usehooks-ts": "^2.9.1", @@ -6847,7 +6849,6 @@ "version": "4.1.12", "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==", - "dev": true, "dependencies": { "@types/ms": "*" } @@ -6855,14 +6856,31 @@ "node_modules/@types/estree": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", - "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", - "dev": true + "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==" + }, + "node_modules/@types/estree-jsx": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz", + "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==", + "license": "MIT", + "dependencies": { + "@types/estree": "*" + } }, "node_modules/@types/geojson": { "version": "7946.0.14", "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.14.tgz", "integrity": "sha512-WCfD5Ht3ZesJUsONdhvm84dmzWOiOzOAqOncN0++w0lBw1o8OuDNJF2McvvCef/yBqb/HYRahp1BYtODFQ8bRg==" }, + "node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/@types/hoist-non-react-statics": { "version": "3.3.5", "resolved": "https://registry.npmjs.org/@types/hoist-non-react-statics/-/hoist-non-react-statics-3.3.5.tgz", @@ -6911,11 +6929,19 @@ "@types/lodash": "*" } }, + "node_modules/@types/mdast": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", + "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/@types/ms": { "version": "0.7.34", "resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz", - "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==", - "dev": true + "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==" }, "node_modules/@types/node": { "version": "18.19.34", @@ -7008,6 +7034,12 @@ "@types/node": "*" } }, + "node_modules/@types/unist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", + "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", + "license": "MIT" + }, "node_modules/@types/use-sync-external-store": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/@types/use-sync-external-store/-/use-sync-external-store-0.0.3.tgz", @@ -7711,8 +7743,7 @@ "node_modules/@ungap/structured-clone": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", - "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==", - "dev": true + "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==" }, "node_modules/@vitejs/plugin-react": { "version": "3.1.0", @@ -8187,6 +8218,16 @@ "babel-plugin-transform-react-remove-prop-types": "^0.4.24" } }, + "node_modules/bail": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", + "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", @@ -8414,6 +8455,16 @@ } ] }, + "node_modules/ccount": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", + "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/chalk": { "version": "2.4.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", @@ -8427,6 +8478,46 @@ "node": ">=4" } }, + "node_modules/character-entities": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz", + "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-html4": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", + "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-legacy": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", + "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-reference-invalid": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", + "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/chardet": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/chardet/-/chardet-0.7.0.tgz", @@ -8624,6 +8715,16 @@ "node": ">= 0.8" } }, + "node_modules/comma-separated-tokens": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", + "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/commander": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz", @@ -8954,6 +9055,19 @@ } } }, + "node_modules/decode-named-character-reference": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.3.0.tgz", + "integrity": "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q==", + "license": "MIT", + "dependencies": { + "character-entities": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -9031,6 +9145,15 @@ "node": ">=0.4.0" } }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/detect-libc": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-1.0.3.tgz", @@ -9047,6 +9170,19 @@ "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==" }, + "node_modules/devlop": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", + "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", + "license": "MIT", + "dependencies": { + "dequal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -10120,6 +10256,16 @@ "node": ">=4.0" } }, + "node_modules/estree-util-is-identifier-name": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz", + "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/estree-walker": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", @@ -10149,6 +10295,12 @@ "resolved": "https://registry.npmjs.org/exenv/-/exenv-1.2.2.tgz", "integrity": "sha512-Z+ktTxTwv9ILfgKCk32OX3n/doe+OcLTRtqK9pcL+JsP3J1/VW8Uvl4ZjLlKqeW4rzK4oesDOGMEMRIZqtP4Iw==" }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, "node_modules/external-editor": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/external-editor/-/external-editor-3.1.0.tgz", @@ -10738,6 +10890,46 @@ "node": ">= 0.4" } }, + "node_modules/hast-util-to-jsx-runtime": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz", + "integrity": "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "devlop": "^1.0.0", + "estree-util-is-identifier-name": "^3.0.0", + "hast-util-whitespace": "^3.0.0", + "mdast-util-mdx-expression": "^2.0.0", + "mdast-util-mdx-jsx": "^3.0.0", + "mdast-util-mdxjs-esm": "^2.0.0", + "property-information": "^7.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-js": "^1.0.0", + "unist-util-position": "^5.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-whitespace": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", + "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/headers-polyfill": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/headers-polyfill/-/headers-polyfill-3.3.0.tgz", @@ -10775,6 +10967,16 @@ "void-elements": "3.1.0" } }, + "node_modules/html-url-attributes": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz", + "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/htmlnano": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/htmlnano/-/htmlnano-2.1.1.tgz", @@ -10982,6 +11184,12 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "dev": true }, + "node_modules/inline-style-parser": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz", + "integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==", + "license": "MIT" + }, "node_modules/inquirer": { "version": "8.2.6", "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-8.2.6.tgz", @@ -11101,6 +11309,30 @@ "loose-envify": "^1.0.0" } }, + "node_modules/is-alphabetical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", + "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-alphanumerical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz", + "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==", + "license": "MIT", + "dependencies": { + "is-alphabetical": "^2.0.0", + "is-decimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-arguments": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.1.1.tgz", @@ -11267,6 +11499,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-decimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", + "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -11326,6 +11568,16 @@ "node": ">=0.10.0" } }, + "node_modules/is-hexadecimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", + "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-interactive": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/is-interactive/-/is-interactive-1.0.0.tgz", @@ -11393,6 +11645,18 @@ "node": ">=8" } }, + "node_modules/is-plain-obj": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", + "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/is-regex": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.2.1.tgz", @@ -12073,6 +12337,16 @@ "node": ">=8" } }, + "node_modules/longest-streak": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", + "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -12105,6 +12379,16 @@ "node": ">=12" } }, + "node_modules/markdown-table": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz", + "integrity": "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/material-colors": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/material-colors/-/material-colors-1.2.6.tgz", @@ -12120,80 +12404,925 @@ "node": ">= 0.4" } }, - "node_modules/mdn-data": { - "version": "2.0.14", - "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.14.tgz", - "integrity": "sha512-dn6wd0uw5GsdswPFfsgMp5NSB0/aDe6fK94YJV/AJDYXL6HVLWBsxeq7js7Ad+mU2K9LAlwpk6kN2D5mwCPVow==" - }, - "node_modules/memoize-one": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/memoize-one/-/memoize-one-6.0.0.tgz", - "integrity": "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==" + "node_modules/mdast-util-find-and-replace": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.2.tgz", + "integrity": "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "escape-string-regexp": "^5.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, + "node_modules/mdast-util-find-and-replace/node_modules/escape-string-regexp": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", + "integrity": "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==", + "license": "MIT", "engines": { - "node": ">= 8" + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/micromatch": { - "version": "4.0.7", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.7.tgz", - "integrity": "sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==", + "node_modules/mdast-util-from-markdown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.2.tgz", + "integrity": "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==", + "license": "MIT", "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark": "^4.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unist-util-stringify-position": "^4.0.0" }, - "engines": { - "node": ">=8.6" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "engines": { - "node": ">= 0.6" + "node_modules/mdast-util-gfm": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", + "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", + "license": "MIT", + "dependencies": { + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-gfm-autolink-literal": "^2.0.0", + "mdast-util-gfm-footnote": "^2.0.0", + "mdast-util-gfm-strikethrough": "^2.0.0", + "mdast-util-gfm-table": "^2.0.0", + "mdast-util-gfm-task-list-item": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "node_modules/mdast-util-gfm-autolink-literal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.1.tgz", + "integrity": "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==", + "license": "MIT", "dependencies": { - "mime-db": "1.52.0" + "@types/mdast": "^4.0.0", + "ccount": "^2.0.0", + "devlop": "^1.0.0", + "mdast-util-find-and-replace": "^3.0.0", + "micromark-util-character": "^2.0.0" }, - "engines": { - "node": ">= 0.6" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/mimic-fn": { + "node_modules/mdast-util-gfm-footnote": { "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", - "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", - "dev": true, - "engines": { - "node": ">=6" + "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, + "node_modules/mdast-util-gfm-strikethrough": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", + "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", + "license": "MIT", "dependencies": { - "brace-expansion": "^1.1.7" + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" }, - "engines": { - "node": "*" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "node_modules/mdast-util-gfm-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", + "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "markdown-table": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-task-list-item": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", + "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-expression": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz", + "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-jsx": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", + "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "parse-entities": "^4.0.0", + "stringify-entities": "^4.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdxjs-esm": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz", + "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-phrasing": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", + "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-hast": { + "version": "13.2.1", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.1.tgz", + "integrity": "sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@ungap/structured-clone": "^1.0.0", + "devlop": "^1.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "trim-lines": "^3.0.0", + "unist-util-position": "^5.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-markdown": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz", + "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "longest-streak": "^3.0.0", + "mdast-util-phrasing": "^4.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "unist-util-visit": "^5.0.0", + "zwitch": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz", + "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdn-data": { + "version": "2.0.14", + "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.14.tgz", + "integrity": "sha512-dn6wd0uw5GsdswPFfsgMp5NSB0/aDe6fK94YJV/AJDYXL6HVLWBsxeq7js7Ad+mU2K9LAlwpk6kN2D5mwCPVow==" + }, + "node_modules/memoize-one": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/memoize-one/-/memoize-one-6.0.0.tgz", + "integrity": "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==" + }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromark": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz", + "integrity": "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "@types/debug": "^4.0.0", + "debug": "^4.0.0", + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-core-commonmark": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.3.tgz", + "integrity": "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "micromark-factory-destination": "^2.0.0", + "micromark-factory-label": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-factory-title": "^2.0.0", + "micromark-factory-whitespace": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-html-tag-name": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-extension-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", + "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", + "license": "MIT", + "dependencies": { + "micromark-extension-gfm-autolink-literal": "^2.0.0", + "micromark-extension-gfm-footnote": "^2.0.0", + "micromark-extension-gfm-strikethrough": "^2.0.0", + "micromark-extension-gfm-table": "^2.0.0", + "micromark-extension-gfm-tagfilter": "^2.0.0", + "micromark-extension-gfm-task-list-item": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-autolink-literal": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.1.0.tgz", + "integrity": "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==", + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-footnote": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-strikethrough": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.1.0.tgz", + "integrity": "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-table": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.1.1.tgz", + "integrity": "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-tagfilter": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", + "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-task-list-item": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.1.0.tgz", + "integrity": "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-factory-destination": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", + "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-label": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz", + "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-space": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz", + "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-title": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz", + "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-whitespace": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz", + "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-character": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz", + "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-chunked": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz", + "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-classify-character": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz", + "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-combine-extensions": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz", + "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-chunked": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-decode-numeric-character-reference": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz", + "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-decode-string": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz", + "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-encode": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz", + "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-html-tag-name": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz", + "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-normalize-identifier": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz", + "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-resolve-all": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz", + "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-sanitize-uri": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz", + "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-subtokenize": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.1.0.tgz", + "integrity": "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-symbol": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz", + "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-types": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.2.tgz", + "integrity": "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromatch": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.7.tgz", + "integrity": "sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", "dev": true, "funding": { @@ -12929,6 +14058,31 @@ "node": ">=6" } }, + "node_modules/parse-entities": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz", + "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "character-entities-legacy": "^3.0.0", + "character-reference-invalid": "^2.0.0", + "decode-named-character-reference": "^1.0.0", + "is-alphanumerical": "^2.0.0", + "is-decimal": "^2.0.0", + "is-hexadecimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/parse-entities/node_modules/@types/unist": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", + "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", + "license": "MIT" + }, "node_modules/parse-json": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", @@ -13139,6 +14293,16 @@ "resolved": "https://registry.npmjs.org/property-expr/-/property-expr-2.0.6.tgz", "integrity": "sha512-SVtmxhRE/CGkn3eZY1T6pC8Nln6Fr/lu1mKSgRud0eC73whjGfoAogbn78LkD8aFL0zz3bAFerKSnOl7NlErBA==" }, + "node_modules/property-information": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz", + "integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -13353,6 +14517,33 @@ "resolved": "https://registry.npmjs.org/react-lifecycles-compat/-/react-lifecycles-compat-3.0.4.tgz", "integrity": "sha512-fBASbA6LnOU9dOU2eW7aQ8xmYBSXUIWr+UmF9b1efZBazGNO+rcXT/icdKnYm2pTwcRylVUYwW7H1PHfLekVzA==" }, + "node_modules/react-markdown": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-10.1.0.tgz", + "integrity": "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "hast-util-to-jsx-runtime": "^2.0.0", + "html-url-attributes": "^3.0.0", + "mdast-util-to-hast": "^13.0.0", + "remark-parse": "^11.0.0", + "remark-rehype": "^11.0.0", + "unified": "^11.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "peerDependencies": { + "@types/react": ">=18", + "react": ">=18" + } + }, "node_modules/react-modal": { "version": "3.16.1", "resolved": "https://registry.npmjs.org/react-modal/-/react-modal-3.16.1.tgz", @@ -13788,6 +14979,72 @@ "jsesc": "bin/jsesc" } }, + "node_modules/remark-gfm": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.1.tgz", + "integrity": "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-gfm": "^3.0.0", + "micromark-extension-gfm": "^3.0.0", + "remark-parse": "^11.0.0", + "remark-stringify": "^11.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-parse": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", + "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-rehype": { + "version": "11.1.2", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.2.tgz", + "integrity": "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "mdast-util-to-hast": "^13.0.0", + "unified": "^11.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-stringify": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-stringify/-/remark-stringify-11.0.0.tgz", + "integrity": "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-to-markdown": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remove-accents": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/remove-accents/-/remove-accents-0.5.0.tgz", @@ -14226,6 +15483,16 @@ "node": ">=0.10.0" } }, + "node_modules/space-separated-tokens": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", + "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/srcset": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/srcset/-/srcset-4.0.0.tgz", @@ -14396,6 +15663,20 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/stringify-entities": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", + "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", + "license": "MIT", + "dependencies": { + "character-entities-html4": "^2.0.0", + "character-entities-legacy": "^3.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/strip-ansi": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", @@ -14429,6 +15710,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/style-to-js": { + "version": "1.1.21", + "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz", + "integrity": "sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ==", + "license": "MIT", + "dependencies": { + "style-to-object": "1.0.14" + } + }, + "node_modules/style-to-object": { + "version": "1.0.14", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.14.tgz", + "integrity": "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==", + "license": "MIT", + "dependencies": { + "inline-style-parser": "0.2.7" + } + }, "node_modules/stylis": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/stylis/-/stylis-4.2.0.tgz", @@ -14572,6 +15871,26 @@ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", "dev": true }, + "node_modules/trim-lines": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", + "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/trough": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", + "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/ts-api-utils": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", @@ -14834,6 +16153,93 @@ "node": ">=4" } }, + "node_modules/unified": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", + "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "bail": "^2.0.0", + "devlop": "^1.0.0", + "extend": "^3.0.0", + "is-plain-obj": "^4.0.0", + "trough": "^2.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-is": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz", + "integrity": "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-position": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", + "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-stringify-position": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", + "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.1.0.tgz", + "integrity": "sha512-m+vIdyeCOpdr/QeQCu2EzxX/ohgS8KbnPDgFni4dQsfSCtpz8UqDyY5GjRru8PDKuYn7Fq19j1CQ+nJSsGKOzg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.2.tgz", + "integrity": "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/universal-cookie": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/universal-cookie/-/universal-cookie-4.0.4.tgz", @@ -15036,6 +16442,34 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/vfile": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", + "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz", + "integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/vite": { "version": "4.5.3", "resolved": "https://registry.npmjs.org/vite/-/vite-4.5.3.tgz", @@ -15855,6 +17289,16 @@ "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0" } + }, + "node_modules/zwitch": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", + "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } } } } diff --git a/GUI/package.json b/GUI/package.json index 09ab4a8..ec9c4e7 100644 --- a/GUI/package.json +++ b/GUI/package.json @@ -53,6 +53,7 @@ "react-i18next": "^12.1.1", "react-icons": "^4.10.1", "react-idle-timer": "^5.5.2", + "react-markdown": "^10.1.0", "react-modal": "^3.16.1", "react-redux": "^8.1.1", "react-router-dom": "^6.5.0", @@ -61,6 +62,7 @@ "react-textarea-autosize": "^8.4.0", "reactflow": "^11.4.0", "regexify-string": "^1.0.19", + "remark-gfm": "^4.0.1", "rxjs": "^7.8.1", "timeago.js": "^4.0.2", "usehooks-ts": "^2.9.1", diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx index c00c3ae..4829d12 100644 --- a/GUI/src/pages/TestModel/index.tsx +++ b/GUI/src/pages/TestModel/index.tsx @@ -3,6 +3,8 @@ import { Button, FormSelect, FormTextarea, Collapsible } from 'components'; import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner'; import { FC, useState } from 'react'; import { useTranslation } from 'react-i18next'; +import ReactMarkdown from 'react-markdown'; +import remarkGfm from 'remark-gfm'; import './TestLLM.scss'; import { useDialog } from 'hooks/useDialog'; import { fetchLLMConnectionsPaginated, LegacyLLMConnectionFilters } from 'services/llmConnections'; @@ -139,7 +141,9 @@ const TestLLM: FC = () => {
Response:
- {inferenceResult.content} + + {inferenceResult.content} +
@@ -155,7 +159,9 @@ const TestLLM: FC = () => { Rank {contextItem.rank}
- {contextItem.chunkRetrieved} + + {contextItem.chunkRetrieved} +
))} From 078122cc1bd185e93627fd7aacb52571e75dce78 Mon Sep 17 00:00:00 2001 From: erangi-ar <111747955+erangi-ar@users.noreply.github.com> Date: Fri, 20 Feb 2026 10:29:00 +0530 Subject: [PATCH 3/4] Streaming response formatting (#280) * refactor: update SSE connection URL to use environment variable * foramt markdown of the llm response * feat: add markdown support to MessageContent component * title fix * prompt coniguration backend to be testing * custom prompt configuration update and fixed Pyright issues * fixed copilot reviews * pre validation step added when user query is inserted * added more validation cases * fixed review comments * resolved pr comments --------- Co-authored-by: erangi-ar Co-authored-by: nuwangeek Co-authored-by: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com> Co-authored-by: Thiru Dinesh Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Thiru Dinesh <56014038+Thirunayan22@users.noreply.github.com> --- GUI/.env.development | 4 +- .../MessageContent/MessageContent.scss | 127 ++++++--- GUI/src/components/MessageContent/index.tsx | 108 ++------ GUI/src/hooks/useStreamingResponse.tsx | 17 +- .../TestProductionLLM/TestProductionLLM.scss | 28 ++ GUI/src/pages/TestProductionLLM/index.tsx | 260 ++++++++++-------- GUI/translations/en/common.json | 15 + GUI/translations/et/common.json | 15 + 8 files changed, 337 insertions(+), 237 deletions(-) diff --git a/GUI/.env.development b/GUI/.env.development index 39f5e47..ae5b135 100644 --- a/GUI/.env.development +++ b/GUI/.env.development @@ -2,6 +2,6 @@ REACT_APP_RUUTER_API_URL=http://localhost:8086 REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost:8088 REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost:3004/et/dev-auth REACT_APP_SERVICE_ID=conversations,settings,monitoring -REACT_APP_NOTIFICATION_NODE_URL=http://localhost:3005 -REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 ws://localhost; +REACT_APP_NOTIFICATION_NODE_URL=http://localhost:4040 +REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:4040 ws://localhost; REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE \ No newline at end of file diff --git a/GUI/src/components/MessageContent/MessageContent.scss b/GUI/src/components/MessageContent/MessageContent.scss index 7b4eea5..aec5640 100644 --- a/GUI/src/components/MessageContent/MessageContent.scss +++ b/GUI/src/components/MessageContent/MessageContent.scss @@ -1,61 +1,112 @@ .message-content-wrapper { width: 100%; + line-height: 1.6; - .message-text { - margin-bottom: 12px; - line-height: 1.6; + // Markdown text styling + p { + margin: 0 0 12px 0; white-space: pre-wrap; word-wrap: break-word; + + &:last-child { + margin-bottom: 0; + } + } + + // Bold text + .markdown-bold, + strong { + font-weight: 600; + } + + // Ordered lists (for references) + .markdown-list, + ol { + margin: 16px 0 0 0; + padding-left: 20px; + list-style-type: decimal; } - .message-references { - margin-top: 16px; - padding-top: 12px; - border-top: 1px solid rgba(0, 0, 0, 0.1); + // List items + .markdown-list-item, + li { + margin-bottom: 6px; + line-height: 1.5; - .references-title { - display: block; - font-weight: 600; - margin-bottom: 8px; - font-size: 14px; + &:last-child { + margin-bottom: 0; } + } - .references-list { - margin: 0; - padding-left: 20px; - list-style-type: decimal; + // Links + a { + color: #0066cc; + text-decoration: none; + word-break: break-all; + transition: color 0.2s ease; - li { - margin-bottom: 6px; - line-height: 1.5; + &:hover { + color: #0052a3; + text-decoration: underline; + } - &:last-child { - margin-bottom: 0; - } - } + &:visited { + color: #551a8b; + } + } - .reference-link { - color: #0066cc; - text-decoration: none; - word-break: break-all; - transition: color 0.2s ease; + // Inline code + code { + background-color: rgba(0, 0, 0, 0.05); + padding: 2px 6px; + border-radius: 3px; + font-family: monospace; + font-size: 0.9em; + } - &:hover { - color: #0052a3; - text-decoration: underline; - } + // Code blocks + pre { + background-color: rgba(0, 0, 0, 0.05); + padding: 12px; + border-radius: 6px; + overflow-x: auto; + margin: 12px 0; - &:visited { - color: #551a8b; - } - } + code { + background-color: transparent; + padding: 0; } } + + // Headings + h1, h2, h3, h4, h5, h6 { + margin: 16px 0 8px 0; + font-weight: 600; + } + + // Blockquotes + blockquote { + border-left: 4px solid rgba(0, 0, 0, 0.2); + padding-left: 12px; + margin: 12px 0; + color: rgba(0, 0, 0, 0.7); + } } // Dark mode support .test-production-llm__message--bot { - .message-references { - border-top-color: rgba(255, 255, 255, 0.1); + .message-content-wrapper { + code { + background-color: rgba(255, 255, 255, 0.1); + } + + pre { + background-color: rgba(255, 255, 255, 0.1); + } + + blockquote { + border-left-color: rgba(255, 255, 255, 0.2); + color: rgba(255, 255, 255, 0.7); + } } } diff --git a/GUI/src/components/MessageContent/index.tsx b/GUI/src/components/MessageContent/index.tsx index 63ff7f2..69b7ffe 100644 --- a/GUI/src/components/MessageContent/index.tsx +++ b/GUI/src/components/MessageContent/index.tsx @@ -1,4 +1,6 @@ import { FC } from 'react'; +import ReactMarkdown from 'react-markdown'; +import remarkGfm from 'remark-gfm'; import './MessageContent.scss'; interface MessageContentProps { @@ -6,85 +8,33 @@ interface MessageContentProps { } const MessageContent: FC = ({ content }) => { - // Function to parse and render message content with proper formatting - const renderContent = () => { - // Split by **References:** pattern - const referencesMatch = content.match(/\*\*References:\*\*([\s\S]*)/); - - if (!referencesMatch) { - // No references, return plain content with line breaks - return ( -
- {content.split('\n').map((line, index) => ( - - {line} - {index < content.split('\n').length - 1 &&
} -
- ))} -
- ); - } - - // Split content into main text and references - const mainText = content.substring(0, referencesMatch.index); - const referencesText = referencesMatch[1].trim(); - - // Parse numbered references with URLs - const referenceLines = referencesText - .split('\n') - .filter(line => line.trim()) - .map(line => { - // Match pattern: "1. https://url" or "1. url" - const match = line.match(/^(\d+)\.\s+(https?:\/\/[^\s]+)/); - if (match) { - return { - number: match[1], - url: match[2], - }; - } - return null; - }) - .filter(Boolean); - - return ( -
- {/* Main text */} - {mainText && ( -
- {mainText.split('\n').map((line, index) => ( - - {line} - {index < mainText.split('\n').length - 1 &&
} -
- ))} -
- )} - - {/* References section */} - {referenceLines.length > 0 && ( -
- References: -
    - {referenceLines.map((ref, index) => ( -
  1. - - {ref!.url} - -
  2. - ))} -
-
- )} -
- ); - }; - - return <>{renderContent()}; + return ( + + ); }; export default MessageContent; diff --git a/GUI/src/hooks/useStreamingResponse.tsx b/GUI/src/hooks/useStreamingResponse.tsx index 211d44f..8a9d779 100644 --- a/GUI/src/hooks/useStreamingResponse.tsx +++ b/GUI/src/hooks/useStreamingResponse.tsx @@ -1,6 +1,19 @@ import { useState, useRef, useCallback, useEffect } from 'react'; import axios from 'axios'; +const getNotificationNodeUrl = (): string => { + const value = import.meta.env.REACT_APP_NOTIFICATION_NODE_URL; + if (!value) { + throw new Error( + 'Environment variable REACT_APP_NOTIFICATION_NODE_URL is not defined. ' + + 'Please set it to the base URL of the notification service to enable streaming responses.' + ); + } + return value; +}; +const notificationNodeUrl = getNotificationNodeUrl(); +console.log(notificationNodeUrl); + interface StreamingOptions { authorId: string; conversationHistory: Array<{ authorRole: string; message: string; timestamp: string }>; @@ -50,7 +63,7 @@ export const useStreamingResponse = (channelId: string): UseStreamingResponseRet try { // Step 1: Open SSE connection FIRST - const sseUrl = `https://est-rag-rtc.rootcode.software/notifications-server/sse/stream/${channelId}`; + const sseUrl = `${notificationNodeUrl}/sse/stream/${channelId}`; console.log('[SSE] Connecting to:', sseUrl); const eventSource = new EventSource(sseUrl); @@ -102,7 +115,7 @@ export const useStreamingResponse = (channelId: string): UseStreamingResponseRet await new Promise(resolve => setTimeout(resolve, 500)); // Step 3: POST to trigger streaming - const postUrl = `https://est-rag-rtc.rootcode.software/notifications-server/channels/${channelId}/orchestrate/stream`; + const postUrl = `${notificationNodeUrl}/channels/${channelId}/orchestrate/stream`; console.log('[API] Triggering stream:', postUrl); await axios.post(postUrl, { diff --git a/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss b/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss index 1bd8e0f..df51e32 100644 --- a/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss +++ b/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss @@ -77,6 +77,34 @@ border-radius: 18px 18px 18px 4px; } } + + &--error { + .test-production-llm__message-content { + border-color: #f44336; + background-color: #ffebee; + } + } + } + + &__message-error { + display: flex; + align-items: flex-start; + gap: 0.5rem; + margin-top: 0.5rem; + padding-top: 0.5rem; + border-top: 1px solid #ffcdd2; + font-size: 0.85rem; + color: #c62828; + } + + &__message-error-icon { + flex-shrink: 0; + font-size: 1rem; + } + + &__message-error-text { + flex: 1; + line-height: 1.3; } &__message-content { diff --git a/GUI/src/pages/TestProductionLLM/index.tsx b/GUI/src/pages/TestProductionLLM/index.tsx index a9c1493..d978ba1 100644 --- a/GUI/src/pages/TestProductionLLM/index.tsx +++ b/GUI/src/pages/TestProductionLLM/index.tsx @@ -1,153 +1,169 @@ -import { FC, useState, useRef, useEffect } from 'react'; +import { FC, useState, useRef, useEffect, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; -import { Button, FormTextarea, Section } from 'components'; -import { productionInference, ProductionInferenceRequest } from 'services/inference'; +import { Button, FormTextarea } from 'components'; import { useToast } from 'hooks/useToast'; +import { useStreamingResponse } from 'hooks/useStreamingResponse'; import './TestProductionLLM.scss'; - +import MessageContent from 'components/MessageContent'; interface Message { id: string; content: string; isUser: boolean; timestamp: string; + hasError?: boolean; + errorMessage?: string; } const TestProductionLLM: FC = () => { const { t } = useTranslation(); const toast = useToast(); - const [message, setMessage] = useState(''); + const [inputMessage, setInputMessage] = useState(''); const [messages, setMessages] = useState([]); const [isLoading, setIsLoading] = useState(false); const messagesEndRef = useRef(null); - const scrollToBottom = () => { - messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); - }; + // Generate a unique channel ID for this session + const channelId = useMemo(() => `channel-${Math.random().toString(36).substring(2, 15)}`, []); + const { startStreaming, stopStreaming, isStreaming } = useStreamingResponse(channelId); + // Auto-scroll to bottom useEffect(() => { - scrollToBottom(); + messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); }, [messages]); + // Cleanup incomplete messages on unmount if streaming is active + useEffect(() => { + return () => { + if (isStreaming) { + stopStreaming(); + // Remove incomplete bot messages on unmount + setMessages(prev => prev.filter(msg => msg.isUser || !msg.content.trim() === false)); + } + }; + }, [isStreaming, stopStreaming]); + const handleSendMessage = async () => { - if (!message.trim()) { + if (!inputMessage.trim()) { toast.open({ type: 'warning', - title: t('warningTitle'), - message: t('emptyMessageWarning'), + title: t('testProductionLLM.warningTitle'), + message: t('testProductionLLM.emptyMessageWarning'), }); return; } + const userMessageText = inputMessage.trim(); + + // Add user message const userMessage: Message = { id: `user-${Date.now()}`, - content: message.trim(), + content: userMessageText, isUser: true, timestamp: new Date().toISOString(), }; - // Add user message to chat setMessages(prev => [...prev, userMessage]); - setMessage(''); + setInputMessage(''); setIsLoading(true); - try { - // Hardcoded values as requested - const request: ProductionInferenceRequest = { - chatId: 'test-chat-001', - message: userMessage.content, - authorId: 'test-author-001', - conversationHistory: messages.map(msg => ({ - authorRole: msg.isUser ? 'user' : 'bot', - message: msg.content, - timestamp: msg.timestamp, - })), - url: 'https://test-url.example.com', - }; - - let response; - let attemptCount = 0; - const maxAttempts = 2; - - // Retry logic - while (attemptCount < maxAttempts) { - try { - attemptCount++; - console.log(`Production Inference Attempt ${attemptCount}/${maxAttempts}`); - response = await productionInference(request); - - // If we get a successful response, break out of retry loop - if (!response.status || response.status < 400) { - break; - } - - // If first attempt failed with error status, retry once more - if (attemptCount < maxAttempts && response.status >= 400) { - console.log('Retrying due to error status...'); - continue; - } - } catch (err) { - // If first attempt threw an error, retry once more - if (attemptCount < maxAttempts) { - console.log('Retrying due to exception...'); - continue; - } - throw err; // Re-throw on final attempt - } - } + // Create bot message ID + const botMessageId = `bot-${Date.now()}`; - console.log('Production Inference Response:', response); + // Prepare conversation history (exclude the current user message) + const conversationHistory = messages.map(msg => ({ + authorRole: msg.isUser ? 'user' : 'bot', + message: msg.content, + timestamp: msg.timestamp, + })); - // Create bot response message - let botContent = ''; - let botMessageType: 'success' | 'error' = 'success'; + const streamingOptions = { + authorId: 'test-user-456', + conversationHistory, + url: 'opensearch-dashboard-test', + }; - if (response.status && response.status >= 400) { - // Error response - botContent = response.content || 'An error occurred while processing your request.'; - botMessageType = 'error'; - } else { - // Success response - botContent = response?.response?.content || 'Response received successfully.'; + // Callbacks for streaming + const onToken = (token: string) => { + console.log('[Component] Received token:', token); + + setMessages(prev => { + // Find the bot message + const botMsgIndex = prev.findIndex(msg => msg.id === botMessageId); - if (response.questionOutOfLlmScope) { - botContent += ' (Note: This question appears to be outside the LLM scope)'; + if (botMsgIndex === -1) { + // First token - add the bot message + console.log('[Component] Adding bot message with first token'); + return [ + ...prev, + { + id: botMessageId, + content: token, + isUser: false, + timestamp: new Date().toISOString(), + } + ]; + } else { + // Append token to existing message + console.log('[Component] Appending token to existing message'); + const updated = [...prev]; + updated[botMsgIndex] = { + ...updated[botMsgIndex], + content: updated[botMsgIndex].content + token, + }; + return updated; } - } - - const botMessage: Message = { - id: `bot-${Date.now()}`, - content: botContent, - isUser: false, - timestamp: new Date().toISOString(), - }; - - setMessages(prev => [...prev, botMessage]); + }); + }; - // Show toast notification - // toast.open({ - // type: botMessageType, - // title: t('errorOccurred'), - // message: t('errorMessage'), - // }); + const onComplete = () => { + console.log('[Component] Stream completed'); + // Always reset loading state on completion + setIsLoading(false); + }; - } catch (error) { - console.error('Error sending message:', error); + const onError = (error: string) => { + console.error('[Component] Stream error:', error); + // Always reset loading state on error + setIsLoading(false); + + // Handle incomplete bot message + setMessages(prev => { + const botMsgIndex = prev.findIndex(msg => msg.id === botMessageId); + + if (botMsgIndex !== -1) { + const botMessage = prev[botMsgIndex]; + + // If the bot message has content, mark it as errored + if (botMessage.content.trim()) { + const updated = [...prev]; + updated[botMsgIndex] = { + ...botMessage, + hasError: true, + errorMessage: error, + }; + return updated; + } else { + // If no content, remove the empty bot message + return prev.filter(msg => msg.id !== botMessageId); + } + } + + return prev; + }); - const errorMessage: Message = { - id: `error-${Date.now()}`, - content: 'Failed to send message. Please check your connection and try again.', - isUser: false, - timestamp: new Date().toISOString(), - }; - - setMessages(prev => [...prev, errorMessage]); - toast.open({ type: 'error', - title: 'Connection Error', - message: 'Unable to connect to the production LLM service.', + title: t('testProductionLLM.streamingErrorTitle'), + message: error, }); - } finally { + }; + + // Start streaming + try { + await startStreaming(userMessageText, streamingOptions, onToken, onComplete, onError); + } catch (error) { + console.error('[Component] Failed to start streaming:', error); + // Reset loading state if streaming fails to start setIsLoading(false); } }; @@ -161,10 +177,11 @@ const TestProductionLLM: FC = () => { const clearChat = () => { setMessages([]); + stopStreaming(); toast.open({ type: 'info', - title: 'Chat Cleared', - message: 'All messages have been cleared.', + title: t('testProductionLLM.chatClearedTitle'), + message: t('testProductionLLM.chatClearedMessage'), }); }; @@ -172,9 +189,9 @@ const TestProductionLLM: FC = () => {
-

{t('Test Production LLM')}

+

{t('testProductionLLM.title')}

@@ -182,8 +199,8 @@ const TestProductionLLM: FC = () => {
{messages.length === 0 && (
-

Welcome to Production LLM Testing

-

Start a conversation by typing a message below.

+

{t('testProductionLLM.welcomeTitle')}

+

{t('testProductionLLM.welcomeSubtitle')}

)} @@ -192,10 +209,21 @@ const TestProductionLLM: FC = () => { key={msg.id} className={`test-production-llm__message ${ msg.isUser ? 'test-production-llm__message--user' : 'test-production-llm__message--bot' + } ${ + msg.hasError ? 'test-production-llm__message--error' : '' }`} >
- {msg.content} + + {msg.hasError && ( +
+ ⚠️ + + {t('testProductionLLM.incompleteMessageError', { defaultValue: 'This message is incomplete due to an error' })} + {msg.errorMessage && `: ${msg.errorMessage}`} + +
+ )}
{new Date(msg.timestamp).toLocaleTimeString()} @@ -220,22 +248,22 @@ const TestProductionLLM: FC = () => {
setMessage(e.target.value)} + value={inputMessage} + onChange={(e) => setInputMessage(e.target.value)} onKeyDown={handleKeyPress} - placeholder="Type your message here... (Press Enter to send, Shift+Enter for new line)" + placeholder={t('testProductionLLM.messagePlaceholder')??""} hideLabel maxRows={4} - disabled={isLoading} + disabled={isLoading || isStreaming} />
diff --git a/GUI/translations/en/common.json b/GUI/translations/en/common.json index 8c2cac8..a71a2f3 100644 --- a/GUI/translations/en/common.json +++ b/GUI/translations/en/common.json @@ -414,6 +414,21 @@ "azure": "Azure OpenAI" } }, + "testProductionLLM": { + "title": "Test Production LLM", + "clearChat": "Clear Chat", + "welcomeTitle": "Welcome to Production LLM Testing", + "welcomeSubtitle": "Start a conversation by typing a message below.", + "messageLabel": "Message", + "messagePlaceholder": "Type your message here... (Press Enter to send, Shift+Enter for new line)", + "sendButton": "Send", + "sendingButton": "Sending...", + "warningTitle": "Warning", + "emptyMessageWarning": "Please enter a message", + "streamingErrorTitle": "Streaming Error", + "chatClearedTitle": "Chat Cleared", + "chatClearedMessage": "All messages have been cleared." + }, "promptConfigurations": { "title": "Prompt Configurations", "subtitle": "Configure and manage your prompt templates", diff --git a/GUI/translations/et/common.json b/GUI/translations/et/common.json index 1c093b6..b1030db 100644 --- a/GUI/translations/et/common.json +++ b/GUI/translations/et/common.json @@ -415,6 +415,21 @@ "azure": "Azure OpenAI" } }, + "testProductionLLM": { + "title": "Testi Tootmise LLM", + "clearChat": "Tühjenda Vestlus", + "welcomeTitle": "Tere tulemast Tootmise LLM Testimisse", + "welcomeSubtitle": "Alusta vestlust, kirjutades allpool sõnumi.", + "messageLabel": "Sõnum", + "messagePlaceholder": "Kirjuta oma sõnum siia... (Vajuta Enter saatmiseks, Shift+Enter uue rea jaoks)", + "sendButton": "Saada", + "sendingButton": "Saatmine...", + "warningTitle": "Hoiatus", + "emptyMessageWarning": "Palun sisesta sõnum", + "streamingErrorTitle": "Voogedastuse Viga", + "chatClearedTitle": "Vestlus Tühjendatud", + "chatClearedMessage": "Kõik sõnumid on tühjendatud." + }, "promptConfigurations": { "title": "Viiba Seaded", "subtitle": "Seadista ja halda oma viiba malle", From 05f0f94ff6de5ce06d0abf3ffc3594e487bcbe83 Mon Sep 17 00:00:00 2001 From: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:29:32 +0530 Subject: [PATCH 4/4] Implement multi-layer Tool classification agent workflow routing skeleton with BaseWorkflow abstract class (#318) * prompt coniguration backend to be testing * custom prompt configuration update and fixed Pyright issues * fixed copilot reviews * pre validation step added when user query is inserted * added more validation cases * fixed review comments * implement tool classification orchestration agent skeleton * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fixed copilot suggested changes * fixed issue * added skills * fixed issue --------- Co-authored-by: Thiru Dinesh <56014038+Thirunayan22@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Thiru Dinesh --- .github/copilot-instructions.md | 304 ++++++ .github/skills/code-review/SKILL.md | 4 + .gitignore | 1 + docs/TOOL_CLASSIFIER_SKELETON_USAGE.md | 542 ++++++++++ pyproject.toml | 2 +- src/llm_orchestration_service.py | 982 ++++++++++-------- src/llm_orchestration_service_api.py | 10 +- src/llm_orchestrator_config/feature_flags.py | 82 ++ src/tool_classifier/__init__.py | 20 + src/tool_classifier/base_workflow.py | 118 +++ src/tool_classifier/classifier.py | 338 ++++++ src/tool_classifier/enums.py | 39 + src/tool_classifier/models.py | 81 ++ src/tool_classifier/workflows/__init__.py | 13 + .../workflows/context_workflow.py | 86 ++ src/tool_classifier/workflows/ood_workflow.py | 131 +++ src/tool_classifier/workflows/rag_workflow.py | 172 +++ .../workflows/service_workflow.py | 137 +++ 18 files changed, 2625 insertions(+), 437 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 .github/skills/code-review/SKILL.md create mode 100644 docs/TOOL_CLASSIFIER_SKELETON_USAGE.md create mode 100644 src/llm_orchestrator_config/feature_flags.py create mode 100644 src/tool_classifier/__init__.py create mode 100644 src/tool_classifier/base_workflow.py create mode 100644 src/tool_classifier/classifier.py create mode 100644 src/tool_classifier/enums.py create mode 100644 src/tool_classifier/models.py create mode 100644 src/tool_classifier/workflows/__init__.py create mode 100644 src/tool_classifier/workflows/context_workflow.py create mode 100644 src/tool_classifier/workflows/ood_workflow.py create mode 100644 src/tool_classifier/workflows/rag_workflow.py create mode 100644 src/tool_classifier/workflows/service_workflow.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..f71218b --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,304 @@ +# BYK-RAG Module - Copilot Instructions + +## Project Overview + +BYK-RAG is a Retrieval-Augmented Generation module for Estonian government digital services (Bürokratt ecosystem). It provides secure, multilingual AI-powered responses by integrating multiple LLM providers, contextual retrieval, and guardrails. + +## Build, Test, and Lint Commands + +### Environment Setup +```bash +# Install Python 3.12.10 and create virtual environment +uv python install 3.12.10 +uv sync --frozen + +# Install pre-commit hooks +uv run pre-commit install +``` + +### Running Services +```bash +# Always use uv run for Python scripts (whether venv is activated or not) +uv run python + +# Start all services with Docker Compose +docker compose up + +# Run FastAPI orchestration service locally +uv run uvicorn src.llm_orchestration_service_api:app --reload +``` + +### Testing +```bash +# Run all tests +uv run pytest + +# Run specific test file +uv run pytest tests/test_query_validator.py -v + +# Run integration tests (requires Docker and secrets) +uv run pytest tests/integration_tests/ -v --tb=short --log-cli-level=INFO + +# Run deepeval tests +uv run pytest tests/deepeval_tests/standard_tests.py -v --tb=short +``` + +### Linting and Formatting +```bash +# Check code formatting (does NOT modify files) +uv run ruff format --check + +# Apply code formatting (SAFE - layout only, no logic changes) +uv run ruff format + +# Check linting issues (manual fixes required) +uv run ruff check . + +# Get explanation for specific lint rule +uv run ruff rule # e.g., ANN204 + +# NEVER use ruff check --fix (can alter logic/control flow) +``` + +### Type Checking +```bash +# Run Pyright type checker (runs on src/ only, not tests/) +uv run pyright +``` + +### Pre-commit Hooks +```bash +# Run all pre-commit hooks manually +uv run pre-commit run --all-files +``` + +## Architecture + +### Core Components + +1. **LLM Orchestration Service** (`src/llm_orchestration_service.py`) + - Central business logic for RAG orchestration + - Coordinates prompt refinement, retrieval, generation, and guardrails + - Integrates with Langfuse for observability + +2. **FastAPI Application** (`src/llm_orchestration_service_api.py`) + - HTTP API layer exposing `/orchestrate` endpoint + - Handles streaming responses and rate limiting + - Request/response validation via Pydantic models + +3. **Contextual Retrieval** (`src/contextual_retrieval/`) + - Implements Anthropic's Contextual Retrieval methodology + - Hybrid search: Vector (semantic) + BM25 (lexical) with RRF fusion + - Multi-query expansion (6 refined queries per user query) + - Qdrant vector database integration + +4. **Prompt Refinement** (`src/prompt_refine_manager/`) + - DSPy-based query expansion + - Generates 5 refined variations + original query + +5. **Response Generation** (`src/response_generator/`) + - DSPy-based response synthesis + - Supports streaming via SSE (Server-Sent Events) + - Uses top-K retrieved chunks (default: 10) + +6. **Guardrails** (`src/guardrails/`) + - NeMo Guardrails integration with DSPy + - Input guardrails (pre-refinement) and output guardrails (post-generation) + - Blocks out-of-scope queries and harmful content + +7. **LLM Manager** (`src/llm_orchestrator_config/llm_manager.py`) + - Multi-provider support: AWS Bedrock, Azure OpenAI, Google Cloud, OpenAI, Anthropic + - HashiCorp Vault integration for secret management + - RSA-2048 encrypted credentials storage + +8. **Vector Indexer** (`src/vector_indexer/`) + - Qdrant collection management + - Embedding generation and indexing + - BM25 index creation + +### Supporting Services (Docker Compose) + +- **Ruuter** (Public/Private): API gateway and routing +- **DataMapper**: Data transformation layer +- **Resql**: PostgreSQL query builder +- **CronManager**: Scheduled jobs (knowledge base sync) +- **Qdrant**: Vector database +- **MinIO**: S3-compatible object storage +- **HashiCorp Vault**: Secret management +- **Grafana Loki**: Log aggregation +- **Langfuse**: LLM observability dashboard + +### Key Data Flow + +``` +User Query + ↓ +Input Guardrails (NeMo Rails) + ↓ +Prompt Refinement (DSPy) → 6 queries + ↓ +Parallel Hybrid Search (each query) + ├─→ Semantic Search (Qdrant, top-40 per query, threshold ≥0.4) + └─→ BM25 Search (top-40 per query) + ↓ +RRF Fusion → Top-K chunks (10 default) + ↓ +Response Generation (DSPy) + ↓ +Output Guardrails (NeMo Rails) + ↓ +Response to User (JSON or SSE stream) +``` + +## Key Conventions + +### Dependency Management + +- **ALWAYS use `uv add `** to add dependencies (never `pip install`) +- **ALWAYS commit both `pyproject.toml` AND `uv.lock`** together +- Use bounded version ranges: `uv add "package>=x.y,` for explanations +- Autofixes can alter control flow/logic unintentionally + +### Formatting (Ruff Formatter) + +- Double quotes for strings +- Spaces for indentation (4 spaces) +- Respects magic trailing commas +- Auto-detects line endings (LF/CRLF) +- Does NOT reformat docstring code blocks +- `uv run ruff format` is SAFE (layout only, no logic changes) + +### DSPy Usage + +- Used for prompt refinement (multi-query expansion) and response generation +- Custom LLM adapters integrate DSPy with NeMo Guardrails +- Optimization modules under `src/optimization/` for tuning prompts/metrics +- Models loaded via `optimized_module_loader.py` for compiled DSPy modules + +### HashiCorp Vault Integration + +- Secrets stored at `secret/users///` +- Each connection has `provider`, `environment`, and provider-specific keys +- RSA-2048 encryption layer BEFORE Vault storage +- GUI encrypts with public key; CronManager decrypts with private key +- Vault unavailable = graceful degradation (fail securely) + +### Logging + +- **loguru** for application logging +- Grafana Loki integration for centralized logs +- Use `logger.info()`, `logger.warning()`, `logger.error()` (NOT `print()`) +- Loki logger available at `grafana-configs/loki_logger.py` + +### Streaming Responses + +- Implemented via Server-Sent Events (SSE) in FastAPI +- `StreamConfig` and `stream_manager` coordinate streaming state +- `stream_response_native()` in response_generator yields tokens +- Timeout handling via `stream_timeout` utility +- Environment-gated: check `STREAMING_ALLOWED_ENVS` + +### Configuration Loading + +- `PromptConfigurationLoader` fetches prompt configs from Ruuter endpoint +- Cache TTL: `PROMPT_CONFIG_CACHE_TTL` +- Custom prompts per user/organization (stored in Vault/database) +- Fallback to defaults if Ruuter unavailable + +### Error Handling + +- `generate_error_id()` creates unique error IDs for tracking +- `log_error_with_context()` for structured error logging +- Localized error messages via `get_localized_message()` (multilingual support) +- Predefined message constants in `llm_orchestrator_constants.py` + +### Testing Conventions + +- Test files under `tests/` (unit, integration, deepeval) +- Integration tests use `testcontainers` for Docker orchestration +- Secrets required for integration tests (Azure OpenAI keys, etc.) +- Mock data in `tests/mocks/` and `tests/data/` + +### CI/CD Checks + +1. **uv-env-check**: Lockfile vs. pyproject.toml consistency +2. **pyright-type-check**: Type checking on src/ (strict mode) +3. **ruff-format-check**: Code formatting compliance +4. **ruff-lint-check**: Linting standards +5. **pytest-integration-check**: Full integration tests (requires secrets) +6. **deepeval-tests**: LLM evaluation metrics +7. **gitleaks-check**: Secret detection (pre-commit + CI) + +### Pre-commit Hooks + +Configured in `.pre-commit-config.yaml`: +- **gitleaks**: Secret scanning +- **uv-lock**: Ensures lockfile consistency + +### Constants and Thresholds + +Key retrieval constants (`src/vector_indexer/constants.py` and contextual retrieval): +- **Semantic search top-K**: 40 per query +- **Semantic threshold**: 0.4 (cosine similarity ≥0.4 = 50-60% alignment) +- **BM25 top-K**: 40 per query +- **Response generation top-K**: 10 chunks (after RRF fusion) +- **Query refinement count**: 5 variations + original = 6 total +- **Search timeout**: 2 seconds per query + +### Docker and Services + +- Use `docker compose` (not `docker-compose`) +- Services communicate via `bykstack` network +- Shared volumes: `shared-volume`, `cron_data` +- Vault agent containers per service (llm, gui, cron) +- Resource limits: CPU and memory constraints defined in docker-compose.yml + +## Important Notes + +- **Python version pinned to 3.12.10** (see `pyproject.toml` and `.python-version`) +- **Line length: 88** (Black-compatible, enforced by Ruff) +- **No print() statements** in production code (use loguru logger) +- **Pydantic for runtime validation** at API boundaries (FastAPI endpoints) +- **Langfuse tracing** for observability (public/secret keys from Vault) +- **Rate limiting** via `RateLimiter` utility (token and request budgets) +- **Cost tracking** via `calculate_total_costs()` and budget tracker +- **Language detection** for multilingual support (Estonian primary) diff --git a/.github/skills/code-review/SKILL.md b/.github/skills/code-review/SKILL.md new file mode 100644 index 0000000..b4e5479 --- /dev/null +++ b/.github/skills/code-review/SKILL.md @@ -0,0 +1,4 @@ +--- +name: code-review +description: Make sure all Python coding standards in the pyproject.toml file are followed, and that the code is clean, well-structured, maintainable, and efficient. Provide constructive feedback and suggestions for improvement. +--- diff --git a/.gitignore b/.gitignore index d0dc8cb..77ec786 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ datasets logs/ data_sets vault/agent-out +.vscode/ # RSA Private Keys - DO NOT COMMIT vault/keys/rsa_private_key.pem diff --git a/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md b/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md new file mode 100644 index 0000000..9dc87c8 --- /dev/null +++ b/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md @@ -0,0 +1,542 @@ +# Tool Classifier Skeleton - Usage Guide + +**Version**: 1.0 +**Date**: February 17, 2026 +**Status**: Skeleton Implementation + +--- + +## Overview + +This skeleton implements the **framework** for a multi-workflow routing system based on the [TOOL_CLASSIFIER_EXTENSION_SPEC.md](./TOOL_CLASSIFIER_EXTENSION_SPEC.md) specification. + +### Current Status + + **Implemented (Skeleton)**: +- Abstract base classes and interfaces +- Workflow executor skeletons (Service, Context, RAG, OOD) +- Tool classifier with classification and routing logic +- Feature flags for safe deployment +- Integration into LLMOrchestrationService + + **Not Implemented (Separate Tasks)**: +- Service discovery logic (Layer 1) +- Context analysis logic (Layer 2) +- Actual LLM calls in workflows +- Output guardrails integration for new workflows +- Database schema changes + +### Current Behavior + +When `TOOL_CLASSIFIER_ENABLED=false` (default): +- System works exactly as before (RAG-only pipeline) +- No changes to existing functionality + +When `TOOL_CLASSIFIER_ENABLED=true`: +- Classifier routes queries (currently always to RAG) +- Service and Context workflows return `None` (fallback to RAG) +- RAG workflow wraps existing pipeline +- All queries ultimately handled by RAG + +--- + +## Architecture + +### Layer-Wise Workflow Routing + +``` +User Query + ↓ +Input Guardrails + ↓ +Tool Classifier + ↓ +┌────────────────┐ +│ Classification │ +└────────┬───────┘ + ↓ + ┌─────┴──────┐ + │ Routing │ + └─────┬──────┘ + ↓ + ╔═══════════════════════════════════╗ + ║ Layer 1: Service Workflow ║ → (returns None - not implemented) + ╚═══════════════════════════════════╝ + ↓ (fallback) + ╔═══════════════════════════════════╗ + ║ Layer 2: Context Workflow ║ → (returns None - not implemented) + ╚═══════════════════════════════════╝ + ↓ (fallback) + ╔═══════════════════════════════════╗ + ║ Layer 3: RAG Workflow ║ → Handles query (existing pipeline) + ╚═══════════════════════════════════╝ + ↓ + Response to User +``` + +### Component Structure + +``` +src/tool_classifier/ +├── __init__.py # Module exports +├── enums.py # WorkflowType enum +├── models.py # ClassificationResult models +├── base_workflow.py # Abstract BaseWorkflow class +├── classifier.py # Main ToolClassifier +└── workflows/ + ├── __init__.py + ├── service_workflow.py # Layer 1 (skeleton) + ├── context_workflow.py # Layer 2 (skeleton) + ├── rag_workflow.py # Layer 3 (complete) + └── ood_workflow.py # Layer 4 (skeleton) +``` + +### Abstract Base Class Pattern + +The system uses **BaseWorkflow** as an abstract base class to ensure all workflows follow the same contract. + +#### How It Works + +1. **BaseWorkflow defines the contract**: + - Every workflow MUST implement two methods: `execute_async()` and `execute_streaming()` + - Both methods return `Optional[...]` to support the fallback pattern (return `None` → next layer) + - Python's `@abstractmethod` decorator enforces this at instantiation time + +2. **All workflows inherit from BaseWorkflow**: + - ServiceWorkflowExecutor extends BaseWorkflow → implements both methods + - ContextWorkflowExecutor extends BaseWorkflow → implements both methods + - RAGWorkflowExecutor extends BaseWorkflow → implements both methods + - OODWorkflowExecutor extends BaseWorkflow → implements both methods + +3. **Classifier treats all workflows uniformly**: + - The `ToolClassifier.route_to_workflow()` method doesn't need to know which specific workflow it's calling + - It just calls `workflow.execute_async()` or `workflow.execute_streaming()` + - This is **polymorphism** - same interface, different behavior + +4. **Benefits**: + - **Consistency**: All workflows have the same interface + - **Enforcement**: Can't create a workflow without implementing required methods + - **Flexibility**: Easy to add new workflows - just extend BaseWorkflow + - **Testability**: Each workflow can be tested independently + - **Fallback Pattern**: `Optional` return type enables layer chaining + +#### Example Flow + +``` +ToolClassifier needs to execute a workflow + ↓ +Gets workflow object (could be Service, Context, RAG, or OOD) + ↓ +Calls workflow.execute_async(request, context) + ↓ +BaseWorkflow contract guarantees this method exists + ↓ +Each workflow implements its own logic + ↓ +Returns OrchestrationResponse or None (fallback to next layer) +``` + +The abstract class is like a **blueprint** that says: "Any workflow in this system MUST be able to do these two things: execute normally and execute with streaming. I don't care *how* you do it, but you must provide these capabilities." + +--- + +## Feature Flags + +### Environment Variables + +```bash +# Master switch (default: false for safe deployment) +TOOL_CLASSIFIER_ENABLED=false + +# Individual workflow toggles (only apply when classifier enabled) +SERVICE_WORKFLOW_ENABLED=true +CONTEXT_WORKFLOW_ENABLED=true +``` + +### Configuration Class + +```python +from src.llm_orchestrator_config.feature_flags import FeatureFlags + +# Check if classifier is enabled +if FeatureFlags.TOOL_CLASSIFIER_ENABLED: + # Use tool classifier + pass + +# Check specific workflow +if FeatureFlags.is_workflow_enabled("service"): + # Service workflow logic + pass + +# Log current configuration +FeatureFlags.log_configuration() +``` + +--- + +## How It Works + +### 1. Non-Streaming Endpoint (`/orchestrate`) + +#### Current Flow (TOOL_CLASSIFIER_ENABLED=false) + +```python +POST /orchestrate + ↓ +LLMOrchestrationService.process_orchestration_request() + ↓ +Initialize components (LLM, guardrails, retriever, generator) + ↓ +Execute RAG pipeline + ↓ +Return OrchestrationResponse +``` + +#### With Classifier (TOOL_CLASSIFIER_ENABLED=true) + +```python +POST /orchestrate + ↓ +LLMOrchestrationService.process_orchestration_request() + ↓ +Initialize components + ↓ +Tool Classifier Integration: + 1. Initialize ToolClassifier (if first time) + 2. Classify query → ClassificationResult + - Currently always returns: WorkflowType.RAG + 3. Route to workflow: + - ServiceWorkflow.execute_async() → returns None + - ContextWorkflow.execute_async() → returns None + - RAGWorkflow.execute_async() → returns response + ↓ +Return OrchestrationResponse +``` + +### 2. Streaming Endpoint (`/orchestrate/stream`) + +#### Current Flow (TOOL_CLASSIFIER_ENABLED=false) + +```python +POST /orchestrate/stream + ↓ +LLMOrchestrationService.stream_orchestration_response() + ↓ +Initialize components + ↓ +Check input guardrails + ↓ +Refine prompt → Retrieve chunks → Stream through NeMo + ↓ +Yield SSE strings +``` + +#### With Classifier (TOOL_CLASSIFIER_ENABLED=true) + +```python +POST /orchestrate/stream + ↓ +LLMOrchestrationService.stream_orchestration_response() + ↓ +Initialize components + ↓ +Check input guardrails + ↓ +Tool Classifier Integration: + 1. Initialize ToolClassifier (if first time) + 2. Classify query → ClassificationResult + 3. Route to streaming workflow: + - ServiceWorkflow.execute_streaming() → returns None + - ContextWorkflow.execute_streaming() → returns None + - RAGWorkflow.execute_streaming() → yields SSE + ↓ +Yield SSE strings +``` + +### 3. Test Endpoint (`/orchestrate/test`) + +Works identically to `/orchestrate`: +- Converts `TestOrchestrationRequest` → `OrchestrationRequest` +- Routes through classifier (if enabled) +- Converts response back to `TestOrchestrationResponse` + +--- + +## Code Examples + +### Using the Classification System + +```python +from src.tool_classifier import ToolClassifier, WorkflowType, ClassificationResult + +# Initialize classifier +classifier = ToolClassifier( + llm_manager=llm_manager, + orchestration_service=service, +) + +# Classify a query +classification = await classifier.classify( + query="Hello, how are you?", + conversation_history=[], + language="en", +) + +# Check result +print(classification.workflow) # WorkflowType.RAG (in skeleton) +print(classification.confidence) # 1.0 +print(classification.reasoning) # "Default to RAG workflow..." + +# Route to workflow +response = await classifier.route_to_workflow( + classification=classification, + request=request, + is_streaming=False, +) +``` + +### Implementing a Workflow (Example) + +```python +from src.tool_classifier.base_workflow import BaseWorkflow +from models.request_models import OrchestrationRequest, OrchestrationResponse + +class MyCustomWorkflow(BaseWorkflow): + """Custom workflow implementation.""" + + async def execute_async( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[OrchestrationResponse]: + """Handle query in non-streaming mode.""" + + # Check if this workflow can handle the query + can_handle = await self._check_if_applicable(request.message) + + if not can_handle: + # Return None to trigger fallback to next layer + return None + + # Execute workflow logic + result = await self._process_query(request.message) + + # Validate with output guardrails (TODO) + # is_safe = await guardrails.check_output_async(result) + # if not is_safe: + # return None or violation_response + + # Return response + return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=False, + inputGuardFailed=False, + content=result, + ) + + async def execute_streaming( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[AsyncIterator[str]]: + """Handle query in streaming mode.""" + + # Check if applicable + can_handle = await self._check_if_applicable(request.message) + + if not can_handle: + return None # Fallback + + # Get complete result + result = await self._process_query(request.message) + + # Validate with guardrails (TODO) + # is_safe = await guardrails.check_output_async(result) + # if not is_safe: + # yield format_sse(chatId, VIOLATION_MESSAGE) + # yield format_sse(chatId, "END") + # return + + # Stream result token-by-token + async def stream_result(): + for chunk in self._split_into_tokens(result): + yield self._format_sse(request.chatId, chunk) + await asyncio.sleep(0.01) + yield self._format_sse(request.chatId, "END") + + return stream_result() +``` + +--- + +## Deployment Strategy + +### Phase 1: Testing (Current State) + +```bash +# Keep classifier disabled +TOOL_CLASSIFIER_ENABLED=false +``` + +**Result**: System works exactly as before (RAG-only) + +### Phase 2: Enable Classifier (No Impact) + +```bash +# Enable classifier (but workflows not implemented) +TOOL_CLASSIFIER_ENABLED=true +SERVICE_WORKFLOW_ENABLED=true +CONTEXT_WORKFLOW_ENABLED=true +``` + +**Result**: +- Classifier runs but always routes to RAG +- Service/Context return `None` → fallback to RAG +- Functionally identical to Phase 1 +- Validates integration works + +### Phase 3: Implement Service Workflow + +1. Implement service discovery logic (separate task) +2. Deploy with `SERVICE_WORKFLOW_ENABLED=true` +3. Monitor service routing behavior +4. Rollback flag if issues occur + +### Phase 4: Implement Context Workflow + +1. Implement context analysis logic (separate task) +2. Deploy with `CONTEXT_WORKFLOW_ENABLED=true` +3. Monitor greeting/context detection +4. Rollback flag if issues occur + +### Phase 5: Production + +All workflows operational, full layer-wise routing active. + +--- + +## Extending the System + +### Adding a New Workflow + +1. **Create Workflow Executor**: + +```python +# src/tool_classifier/workflows/custom_workflow.py + +from src.tool_classifier.base_workflow import BaseWorkflow + +class CustomWorkflowExecutor(BaseWorkflow): + """Your custom workflow.""" + + async def execute_async(self, request, context): + # Implement logic + pass + + async def execute_streaming(self, request, context): + # Implement streaming logic + pass +``` + +2. **Register in Classifier**: + +```python +# src/tool_classifier/enums.py + +class WorkflowType(Enum): + SERVICE = "service" + CONTEXT = "context" + RAG = "rag" + CUSTOM = "custom" # Add new type + OOD = "ood" + +# Update layer order +WORKFLOW_LAYER_ORDER = [ + WorkflowType.SERVICE, + WorkflowType.CONTEXT, + WorkflowType.CUSTOM, # Add to chain + WorkflowType.RAG, + WorkflowType.OOD, +] +``` + +3. **Initialize in ToolClassifier**: + +```python +# src/tool_classifier/classifier.py + +def __init__(self, ...): + # ... existing workflows ... + self.custom_workflow = CustomWorkflowExecutor(...) +``` + +4. **Add Feature Flag**: + +```python +# src/llm_orchestrator_config/feature_flags.py + +CUSTOM_WORKFLOW_ENABLED = ( + os.getenv("CUSTOM_WORKFLOW_ENABLED", "true").lower() == "true" +) +``` + +--- + +## Key Concepts + +### 1. None Return Pattern + +Workflows return `None` when they cannot handle a query: + +```python +if not can_handle: + return None # Triggers fallback to next layer +``` + +This enables the fallback chain: Service → Context → RAG → OOD + +### 2. Validation-First Streaming + +For Service and Context workflows (complete responses): + +```python +# 1. Get complete response +response = await call_service(...) + +# 2. Validate BEFORE streaming +is_safe = await guardrails.check_output_async(response) + +if not is_safe: + yield format_sse(chatId, VIOLATION_MESSAGE) + yield format_sse(chatId, "END") + return + +# 3. Stream validated response +for chunk in split_into_tokens(response): + yield format_sse(chatId, chunk) +yield format_sse(chatId, "END") +``` + +### 3. Two Execution Methods + +Every workflow implements both: +- `execute_async()` → For `/orchestrate` (returns complete response) +- `execute_streaming()` → For `/orchestrate/stream` (yields SSE strings) + +--- + +## Summary + +This skeleton provides: + + **Complete framework** for multi-workflow routing + **Safe deployment** with feature flags + **Extensible architecture** using OOP patterns + **Backward compatibility** (disabled by default) + **Clear contracts** via abstract base classes + **Documentation** for implementation tasks + +The system is ready for workflow implementation in separate, independent tasks. + +--- diff --git a/pyproject.toml b/pyproject.toml index dd8f876..56e1426 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,4 +123,4 @@ exclude = [ ] # --- Global strictness --- -typeCheckingMode = "standard" # Standard typechecking mode \ No newline at end of file +typeCheckingMode = "standard" # Standard typechecking mode diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py index 92dd7b0..3c059f5 100644 --- a/src/llm_orchestration_service.py +++ b/src/llm_orchestration_service.py @@ -60,6 +60,8 @@ ContextualRetrieverInitializationError, ContextualRetrievalFailureError, ) +from src.llm_orchestrator_config.feature_flags import FeatureFlags +from src.tool_classifier import ToolClassifier class LangfuseConfig: @@ -128,8 +130,15 @@ def __init__(self) -> None: f"Service will continue with default behavior." ) + # Initialize tool classifier (lazy initialization - will be created when first needed) + # This allows components to be initialized per-request with proper context + self.tool_classifier = None + + # Log feature flag configuration + FeatureFlags.log_configuration() + @observe(name="orchestration_request", as_type="agent") - def process_orchestration_request( + async def process_orchestration_request( self, request: OrchestrationRequest ) -> Union[OrchestrationResponse, TestOrchestrationResponse]: """ @@ -204,10 +213,65 @@ def process_orchestration_request( # Initialize all service components (only for valid queries) components = self._initialize_service_components(request) - # Execute the orchestration pipeline - response = self._execute_orchestration_pipeline( - request, components, costs_dict, timing_dict - ) + # TOOL CLASSIFIER INTEGRATION + # Route through tool classifier if enabled, otherwise use existing RAG pipeline + if FeatureFlags.TOOL_CLASSIFIER_ENABLED: + try: + logger.info( + f"[{request.chatId}] Tool classifier enabled - routing query" + ) + + # Initialize tool classifier if not already done + if self.tool_classifier is None: + self.tool_classifier = ToolClassifier( + llm_manager=components["llm_manager"], + orchestration_service=self, + ) + logger.info("Tool classifier initialized") + + # Classify query to determine workflow + classification = await self.tool_classifier.classify( + query=request.message, + conversation_history=request.conversationHistory, + language=detected_language, + ) + + logger.info( + f"[{request.chatId}] Classification: {classification.workflow.value} " + f"(confidence: {classification.confidence:.2f})" + ) + + # Route to appropriate workflow + response = await self.tool_classifier.route_to_workflow( + classification=classification, + request=request, + is_streaming=False, + ) + + except Exception as classifier_error: + logger.error( + f"[{request.chatId}] Tool classifier error: {classifier_error}", + exc_info=True, + ) + + if FeatureFlags.FALLBACK_TO_RAG_ON_ERROR: + logger.info( + f"[{request.chatId}] Falling back to RAG pipeline due to classifier error" + ) + # Execute existing RAG pipeline as fallback + response = await self._execute_orchestration_pipeline( + request, components, costs_dict, timing_dict + ) + else: + raise + else: + # Tool classifier disabled - use existing RAG pipeline + logger.debug( + f"[{request.chatId}] Tool classifier disabled - using RAG pipeline" + ) + response = await self._execute_orchestration_pipeline( + request, components, costs_dict, timing_dict + ) # Log final costs and return response self._log_costs(costs_dict) @@ -317,7 +381,6 @@ async def stream_orchestration_response( # Track costs after streaming completes costs_dict: Dict[str, Dict[str, Any]] = {} timing_dict: Dict[str, float] = {} - streaming_start_time = datetime.now() # STEP 0: Detect language from user message detected_language = detect_language(request.message) @@ -390,465 +453,518 @@ async def stream_orchestration_response( f"[{request.chatId}] [{stream_ctx.stream_id}] Input guardrails passed " ) - # STEP 2: REFINE USER PROMPT (blocking) - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Step 2: Refining user prompt" - ) + # TOOL CLASSIFIER INTEGRATION (STREAMING) + # Route through tool classifier if enabled, otherwise use existing RAG pipeline + if FeatureFlags.TOOL_CLASSIFIER_ENABLED: + try: + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Tool classifier enabled - routing query (streaming)" + ) - start_time = time.time() - refined_output, refiner_usage = self._refine_user_prompt( - llm_manager=components["llm_manager"], - original_message=request.message, - conversation_history=request.conversationHistory, - ) - timing_dict["prompt_refiner"] = time.time() - start_time - costs_dict["prompt_refiner"] = refiner_usage + # Initialize tool classifier if not already done + if self.tool_classifier is None: + self.tool_classifier = ToolClassifier( + llm_manager=components["llm_manager"], + orchestration_service=self, + ) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Tool classifier initialized" + ) - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Prompt refinement complete " - ) + # Classify query to determine workflow + classification = await self.tool_classifier.classify( + query=request.message, + conversation_history=request.conversationHistory, + language=detected_language, + ) - # STEP 3: RETRIEVE CONTEXT CHUNKS (blocking) - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Step 3: Retrieving context chunks" - ) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Classification: {classification.workflow.value} " + f"(confidence: {classification.confidence:.2f})" + ) - try: - start_time = time.time() - relevant_chunks = await self._safe_retrieve_contextual_chunks( - components["contextual_retriever"], refined_output, request - ) - timing_dict["contextual_retrieval"] = time.time() - start_time - except ( - ContextualRetrieverInitializationError, - ContextualRetrievalFailureError, - ) as e: - logger.warning( - f"[{request.chatId}] [{stream_ctx.stream_id}] Contextual retrieval failed: {str(e)}" - ) - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Returning out-of-scope due to retrieval failure" - ) - yield self._format_sse(request.chatId, OUT_OF_SCOPE_MESSAGE) - yield self._format_sse(request.chatId, "END") - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) - stream_ctx.mark_completed() - return - - if len(relevant_chunks) == 0: - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] No relevant chunks - out of scope" - ) - detected_lang = getattr(request, "_detected_language", "en") - localized_msg = get_localized_message( - OUT_OF_SCOPE_MESSAGES, detected_lang + # Route to appropriate workflow (streaming) + # route_to_workflow returns AsyncIterator[str] when is_streaming=True + stream_result = await self.tool_classifier.route_to_workflow( + classification=classification, + request=request, + is_streaming=True, + ) + + async for sse_chunk in stream_result: + yield sse_chunk + + # Successfully completed streaming through classifier + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Tool classifier streaming completed" + ) + + # Log costs and timings + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + stream_ctx.mark_completed() + return # Exit after successful classifier routing + + except Exception as classifier_error: + logger.error( + f"[{request.chatId}] [{stream_ctx.stream_id}] Tool classifier error: {classifier_error}", + exc_info=True, + ) + + if not FeatureFlags.FALLBACK_TO_RAG_ON_ERROR: + # Don't fallback - raise error + raise + + # Fallback to RAG pipeline below + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Falling back to RAG streaming due to classifier error" + ) + # Continue to existing RAG streaming pipeline below + else: + logger.debug( + f"[{request.chatId}] [{stream_ctx.stream_id}] Tool classifier disabled - using RAG streaming" ) - yield self._format_sse(request.chatId, localized_msg) - yield self._format_sse(request.chatId, "END") - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) - stream_ctx.mark_completed() - return - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Retrieved {len(relevant_chunks)} chunks " - ) + # Execute core RAG streaming pipeline + # NOTE: This only executes if tool classifier is disabled or fallback occurred + async for sse_chunk in self._stream_rag_pipeline( + request=request, + components=components, + stream_ctx=stream_ctx, + costs_dict=costs_dict, + timing_dict=timing_dict, + ): + yield sse_chunk + + # Pipeline completed successfully + return - # STEP 4: QUICK OUT-OF-SCOPE CHECK (blocking) - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Step 4: Checking if question is in scope" + except Exception as e: + error_id = generate_error_id() + stream_ctx.mark_error(error_id) + log_error_with_context( + logger, error_id, "streaming_orchestration", request.chatId, e ) - start_time = time.time() - is_out_of_scope = await components[ - "response_generator" - ].check_scope_quick( - question=refined_output.original_question, - chunks=relevant_chunks, - max_blocks=ResponseGenerationConstants.DEFAULT_MAX_BLOCKS, + yield self._format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE) + yield self._format_sse(request.chatId, "END") + + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + + # Update budget even on outer exception + self._update_connection_budget( + request.connection_id, costs_dict, request.environment ) - timing_dict["scope_check"] = time.time() - start_time - if is_out_of_scope: - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Question out of scope" - ) - detected_lang = getattr(request, "_detected_language", "en") - localized_msg = get_localized_message( - OUT_OF_SCOPE_MESSAGES, detected_lang + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error_id": error_id, + "error_type": type(e).__name__, + "streaming": True, + "streaming_failed": True, + "stream_id": stream_ctx.stream_id, + } ) - yield self._format_sse(request.chatId, localized_msg) - yield self._format_sse(request.chatId, "END") - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) - stream_ctx.mark_completed() - return + langfuse.flush() - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Question is in scope " - ) + async def _stream_rag_pipeline( + self, + request: OrchestrationRequest, + components: Dict[str, Any], + stream_ctx: Any, + costs_dict: Dict[str, Dict[str, Any]], + timing_dict: Dict[str, float], + ) -> AsyncIterator[str]: + """ + Core RAG streaming pipeline without classifier routing. - # STEP 5: STREAM THROUGH NEMO GUARDRAILS (validation-first) - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Step 5: Starting streaming through NeMo Guardrails " - f"(validation-first, chunk_size=200)" - ) + This method contains the RAG pipeline logic that can be called directly + by workflows to avoid infinite recursion when the tool classifier is enabled. - streaming_step_start = time.time() + Pipeline Steps: + 1. Refine user prompt (blocking) + 2. Retrieve context chunks (blocking) + 3. Out-of-scope check (blocking) + 4. Stream through NeMo Guardrails (validation-first) - # Record history length before streaming - lm = dspy.settings.lm - history_length_before = ( - len(lm.history) if lm and hasattr(lm, "history") else 0 - ) + Args: + request: Orchestration request + components: Initialized service components (LLM, retriever, generator, guardrails) + stream_ctx: Stream context for tracking + costs_dict: Dictionary to accumulate costs + timing_dict: Dictionary to accumulate timings - async def bot_response_generator() -> AsyncIterator[str]: - """Generator that yields tokens from NATIVE DSPy LLM streaming.""" - async for token in stream_response_native( - agent=components["response_generator"], - question=refined_output.original_question, - chunks=relevant_chunks, - max_blocks=ResponseGenerationConstants.DEFAULT_MAX_BLOCKS, - ): - yield token + Yields: + SSE-formatted strings + """ + streaming_start_time = datetime.now() + detected_language = getattr(request, "_detected_language", "en") - # Create and store bot_generator in stream context for guaranteed cleanup - bot_generator = bot_response_generator() - stream_ctx.bot_generator = bot_generator + # STEP 1: REFINE USER PROMPT (blocking) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] RAG Pipeline Step 1: Refining user prompt" + ) - # Wrap entire streaming logic in try/except for proper error handling - try: - # Track tokens and accumulated response in stream context - accumulated_response = [] # Track the full response for production storage - - if components["guardrails_adapter"]: - # Use NeMo's stream_with_guardrails helper method - # This properly integrates the external generator with NeMo's validation - chunk_count = 0 - - try: - async for validated_chunk in components[ - "guardrails_adapter" - ].stream_with_guardrails( - user_message=refined_output.original_question, - bot_message_generator=bot_generator, - ): - chunk_count += 1 - - # Estimate tokens (rough approximation: 4 characters = 1 token) - chunk_tokens = len(validated_chunk) // 4 - stream_ctx.token_count += chunk_tokens - - # Accumulate response for production storage - accumulated_response.append(validated_chunk) - - # Check token limit - if ( - stream_ctx.token_count - > StreamConfig.MAX_TOKENS_PER_STREAM - ): - logger.error( - f"[{request.chatId}] [{stream_ctx.stream_id}] Token limit exceeded: " - f"{stream_ctx.token_count} > {StreamConfig.MAX_TOKENS_PER_STREAM}" - ) - # Send error message and end stream immediately - yield self._format_sse( - request.chatId, STREAM_TOKEN_LIMIT_MESSAGE - ) - yield self._format_sse(request.chatId, "END") - - # Extract usage and log costs - usage_info = get_lm_usage_since( - history_length_before - ) - costs_dict["streaming_generation"] = usage_info - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) - stream_ctx.mark_completed() - return # Stop immediately - cleanup happens in finally - - # Check for guardrail violations using blocked phrases - # Match the actual behavior of NeMo Guardrails adapter - is_guardrail_error = False - if isinstance(validated_chunk, str): - # Use the same blocked phrases as the guardrails adapter - blocked_phrases = GUARDRAILS_BLOCKED_PHRASES - chunk_lower = validated_chunk.strip().lower() - # Check if the chunk is primarily a blocked phrase - for phrase in blocked_phrases: - # More robust check: ensure the phrase is the main content - if ( - phrase.lower() in chunk_lower - and len(chunk_lower) - <= len(phrase.lower()) + 20 - ): - is_guardrail_error = True - break - - if is_guardrail_error: - logger.warning( - f"[{request.chatId}] [{stream_ctx.stream_id}] Guardrails violation detected" - ) - # Send the violation message and end stream - yield self._format_sse( - request.chatId, - OUTPUT_GUARDRAIL_VIOLATION_MESSAGE, - ) - yield self._format_sse(request.chatId, "END") - - # Log the violation - logger.warning( - f"[{request.chatId}] [{stream_ctx.stream_id}] Output blocked by guardrails: {validated_chunk}" - ) - - # Extract usage and log costs - usage_info = get_lm_usage_since( - history_length_before - ) - costs_dict["streaming_generation"] = usage_info - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) - stream_ctx.mark_completed() - return # Cleanup happens in finally - - # Log first few chunks for debugging - if ( - chunk_count - <= ResponseGenerationConstants.DEFAULT_MAX_BLOCKS - ): - logger.debug( - f"[{request.chatId}] [{stream_ctx.stream_id}] Validated chunk {chunk_count}: {repr(validated_chunk)}" - ) - - # Yield the validated chunk to client - yield self._format_sse(request.chatId, validated_chunk) - except GeneratorExit: - # Client disconnected - stream_ctx.mark_cancelled() - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Client disconnected during guardrails streaming" - ) - raise + start_time = time.time() + refined_output, refiner_usage = self._refine_user_prompt( + llm_manager=components["llm_manager"], + original_message=request.message, + conversation_history=request.conversationHistory, + ) + timing_dict["prompt_refiner"] = time.time() - start_time + costs_dict["prompt_refiner"] = refiner_usage - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Stream completed successfully " - f"({chunk_count} chunks streamed)" - ) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Prompt refinement complete" + ) - # Send document references before END token - doc_references = self._extract_document_references( - relevant_chunks - ) - if doc_references: - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Sending {len(doc_references)} document references before END" - ) - # Format references as markdown text - refs_text = "\n\n**References:**\n" + "\n".join( - f"{i + 1}. [{ref.document_url}]({ref.document_url})" - for i, ref in enumerate(doc_references) - ) - yield self._format_sse(request.chatId, refs_text) + # STEP 2: RETRIEVE CONTEXT CHUNKS (blocking) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] RAG Pipeline Step 2: Retrieving context chunks" + ) - yield self._format_sse(request.chatId, "END") + try: + start_time = time.time() + relevant_chunks = await self._safe_retrieve_contextual_chunks( + components["contextual_retriever"], refined_output, request + ) + timing_dict["contextual_retrieval"] = time.time() - start_time + except ( + ContextualRetrieverInitializationError, + ContextualRetrievalFailureError, + ) as e: + logger.warning( + f"[{request.chatId}] [{stream_ctx.stream_id}] Contextual retrieval failed: {str(e)}" + ) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Returning out-of-scope due to retrieval failure" + ) + localized_msg = get_localized_message( + OUT_OF_SCOPE_MESSAGES, detected_language + ) + yield self._format_sse(request.chatId, localized_msg) + yield self._format_sse(request.chatId, "END") + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + stream_ctx.mark_completed() + return - else: - # No guardrails - stream directly - logger.warning( - f"[{request.chatId}] [{stream_ctx.stream_id}] Streaming without guardrails validation" - ) - chunk_count = 0 - async for token in bot_generator: - chunk_count += 1 - - # Estimate tokens and check limit - token_estimate = len(token) // 4 - stream_ctx.token_count += token_estimate - - # Accumulate response for production storage - accumulated_response.append(token) - - if ( - stream_ctx.token_count - > StreamConfig.MAX_TOKENS_PER_STREAM - ): - logger.error( - f"[{request.chatId}] [{stream_ctx.stream_id}] Token limit exceeded (no guardrails): " - f"{stream_ctx.token_count} > {StreamConfig.MAX_TOKENS_PER_STREAM}" - ) - yield self._format_sse( - request.chatId, STREAM_TOKEN_LIMIT_MESSAGE - ) - yield self._format_sse(request.chatId, "END") - stream_ctx.mark_completed() - return # Stop immediately - cleanup in finally - - yield self._format_sse(request.chatId, token) - - # Send document references before END token - doc_references = self._extract_document_references( - relevant_chunks - ) - if doc_references: - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Sending {len(doc_references)} document references before END" - ) - # Format references as markdown text - refs_text = "\n\n**References:**\n" + "\n".join( - f"{i + 1}. [{ref.document_url}]({ref.document_url})" - for i, ref in enumerate(doc_references) - ) - yield self._format_sse(request.chatId, refs_text) + if len(relevant_chunks) == 0: + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] No relevant chunks - out of scope" + ) + localized_msg = get_localized_message( + OUT_OF_SCOPE_MESSAGES, detected_language + ) + yield self._format_sse(request.chatId, localized_msg) + yield self._format_sse(request.chatId, "END") + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + stream_ctx.mark_completed() + return - yield self._format_sse(request.chatId, "END") + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Retrieved {len(relevant_chunks)} chunks" + ) - # Extract usage information after streaming completes - usage_info = get_lm_usage_since(history_length_before) - costs_dict["streaming_generation"] = usage_info + # STEP 3: QUICK OUT-OF-SCOPE CHECK (blocking) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] RAG Pipeline Step 3: Checking if question is in scope" + ) - # Record streaming generation time - timing_dict["streaming_generation"] = ( - time.time() - streaming_step_start - ) - # Mark output guardrails as inline (not blocking) - timing_dict["output_guardrails"] = 0.0 # Inline during streaming + start_time = time.time() + is_out_of_scope = await components["response_generator"].check_scope_quick( + question=refined_output.original_question, + chunks=relevant_chunks, + max_blocks=ResponseGenerationConstants.DEFAULT_MAX_BLOCKS, + ) + timing_dict["scope_check"] = time.time() - start_time - # Calculate streaming duration - streaming_duration = ( - datetime.now() - streaming_start_time - ).total_seconds() - logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Streaming completed in {streaming_duration:.2f}s" - ) + if is_out_of_scope: + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Question out of scope" + ) + localized_msg = get_localized_message( + OUT_OF_SCOPE_MESSAGES, detected_language + ) + yield self._format_sse(request.chatId, localized_msg) + yield self._format_sse(request.chatId, "END") + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + stream_ctx.mark_completed() + return - # Log costs and trace - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) + logger.info(f"[{request.chatId}] [{stream_ctx.stream_id}] Question is in scope") - # Update budget for the LLM connection - self._update_connection_budget( - request.connection_id, costs_dict, request.environment - ) + # STEP 4: STREAM THROUGH NEMO GUARDRAILS (validation-first) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] RAG Pipeline Step 4: Starting streaming through NeMo Guardrails" + ) - if self.langfuse_config.langfuse_client: - langfuse = self.langfuse_config.langfuse_client - total_costs = calculate_total_costs(costs_dict) - - langfuse.update_current_generation( - model=components["llm_manager"] - .get_provider_info() - .get("model", "unknown"), - usage_details={ - "input": usage_info.get("total_prompt_tokens", 0), - "output": usage_info.get("total_completion_tokens", 0), - "total": usage_info.get("total_tokens", 0), - }, - cost_details={ - "total": total_costs.get("total_cost", 0.0), - }, - metadata={ - "streaming": True, - "streaming_duration_seconds": streaming_duration, - "chunks_streamed": chunk_count, - "cost_breakdown": costs_dict, - "chat_id": request.chatId, - "environment": request.environment, - "stream_id": stream_ctx.stream_id, - }, - ) - langfuse.flush() - - # Store inference data (for production and testing environments) - if request.environment in [ - PRODUCTION_DEPLOYMENT_ENVIRONMENT, - TEST_DEPLOYMENT_ENVIRONMENT, - ]: - try: - await self._store_production_inference_data_async( - request=request, - refined_output=refined_output, - relevant_chunks=relevant_chunks, - accumulated_response="".join(accumulated_response), - ) - except Exception as storage_error: - # Log storage error but don't fail the request + streaming_step_start = time.time() + + # Record history length before streaming + lm = dspy.settings.lm + history_length_before = len(lm.history) if lm and hasattr(lm, "history") else 0 + + async def bot_response_generator() -> AsyncIterator[str]: + """Generator that yields tokens from NATIVE DSPy LLM streaming.""" + async for token in stream_response_native( + agent=components["response_generator"], + question=refined_output.original_question, + chunks=relevant_chunks, + max_blocks=ResponseGenerationConstants.DEFAULT_MAX_BLOCKS, + ): + yield token + + # Create and store bot_generator in stream context for guaranteed cleanup + bot_generator = bot_response_generator() + stream_ctx.bot_generator = bot_generator + + # Wrap entire streaming logic in try/except for proper error handling + try: + # Track tokens and accumulated response in stream context + accumulated_response = [] # Track the full response for production storage + + if components["guardrails_adapter"]: + # Use NeMo's stream_with_guardrails helper method + chunk_count = 0 + + try: + async for validated_chunk in components[ + "guardrails_adapter" + ].stream_with_guardrails( + user_message=refined_output.original_question, + bot_message_generator=bot_generator, + ): + chunk_count += 1 + + # Estimate tokens (rough approximation: 4 characters = 1 token) + chunk_tokens = len(validated_chunk) // 4 + stream_ctx.token_count += chunk_tokens + + # Accumulate response for production storage + accumulated_response.append(validated_chunk) + + # Check token limit + if stream_ctx.token_count > StreamConfig.MAX_TOKENS_PER_STREAM: logger.error( - f"Storage failed for chat_id: {request.chatId}, environment: {request.environment} - {str(storage_error)}" + f"[{request.chatId}] [{stream_ctx.stream_id}] Token limit exceeded: " + f"{stream_ctx.token_count} > {StreamConfig.MAX_TOKENS_PER_STREAM}" + ) + yield self._format_sse( + request.chatId, STREAM_TOKEN_LIMIT_MESSAGE + ) + yield self._format_sse(request.chatId, "END") + + usage_info = get_lm_usage_since(history_length_before) + costs_dict["streaming_generation"] = usage_info + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + stream_ctx.mark_completed() + return + + # Check for guardrail violations + is_guardrail_error = False + if isinstance(validated_chunk, str): + blocked_phrases = GUARDRAILS_BLOCKED_PHRASES + chunk_lower = validated_chunk.strip().lower() + for phrase in blocked_phrases: + if ( + phrase.lower() in chunk_lower + and len(chunk_lower) <= len(phrase.lower()) + 20 + ): + is_guardrail_error = True + break + + if is_guardrail_error: + logger.warning( + f"[{request.chatId}] [{stream_ctx.stream_id}] Guardrails violation detected" + ) + yield self._format_sse( + request.chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE ) + yield self._format_sse(request.chatId, "END") - # Mark stream as completed successfully - stream_ctx.mark_completed() + usage_info = get_lm_usage_since(history_length_before) + costs_dict["streaming_generation"] = usage_info + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + stream_ctx.mark_completed() + return + # Yield the validated chunk to client + yield self._format_sse(request.chatId, validated_chunk) except GeneratorExit: - # Client disconnected - mark as cancelled stream_ctx.mark_cancelled() logger.info( - f"[{request.chatId}] [{stream_ctx.stream_id}] Client disconnected" - ) - usage_info = get_lm_usage_since(history_length_before) - costs_dict["streaming_generation"] = usage_info - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) - - # Update budget even on client disconnect - self._update_connection_budget( - request.connection_id, costs_dict, request.environment + f"[{request.chatId}] [{stream_ctx.stream_id}] Client disconnected during guardrails streaming" ) raise - except Exception as stream_error: - error_id = generate_error_id() - stream_ctx.mark_error(error_id) - log_error_with_context( - logger, - error_id, - "streaming_generation", - request.chatId, - stream_error, - ) - yield self._format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE) - yield self._format_sse(request.chatId, "END") - usage_info = get_lm_usage_since(history_length_before) - costs_dict["streaming_generation"] = usage_info - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Stream completed successfully ({chunk_count} chunks)" + ) - # Update budget even on streaming error - self._update_connection_budget( - request.connection_id, costs_dict, request.environment + # Send document references before END token + doc_references = self._extract_document_references(relevant_chunks) + if doc_references: + refs_text = "\n\n**References:**\n" + "\n".join( + f"{i + 1}. [{ref.document_url}]({ref.document_url})" + for i, ref in enumerate(doc_references) ) + yield self._format_sse(request.chatId, refs_text) - except Exception as e: - error_id = generate_error_id() - stream_ctx.mark_error(error_id) - log_error_with_context( - logger, error_id, "streaming_orchestration", request.chatId, e + yield self._format_sse(request.chatId, "END") + + else: + # No guardrails - stream directly + logger.warning( + f"[{request.chatId}] [{stream_ctx.stream_id}] Streaming without guardrails validation" ) + chunk_count = 0 + async for token in bot_generator: + chunk_count += 1 + + token_estimate = len(token) // 4 + stream_ctx.token_count += token_estimate + accumulated_response.append(token) + + if stream_ctx.token_count > StreamConfig.MAX_TOKENS_PER_STREAM: + logger.error( + f"[{request.chatId}] [{stream_ctx.stream_id}] Token limit exceeded (no guardrails)" + ) + yield self._format_sse( + request.chatId, STREAM_TOKEN_LIMIT_MESSAGE + ) + yield self._format_sse(request.chatId, "END") + stream_ctx.mark_completed() + return + + yield self._format_sse(request.chatId, token) + + # Send document references before END token + doc_references = self._extract_document_references(relevant_chunks) + if doc_references: + refs_text = "\n\n**References:**\n" + "\n".join( + f"{i + 1}. [{ref.document_url}]({ref.document_url})" + for i, ref in enumerate(doc_references) + ) + yield self._format_sse(request.chatId, refs_text) - yield self._format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE) yield self._format_sse(request.chatId, "END") - self._log_costs(costs_dict) - log_step_timings(timing_dict, request.chatId) + # Extract usage information after streaming completes + usage_info = get_lm_usage_since(history_length_before) + costs_dict["streaming_generation"] = usage_info - # Update budget even on outer exception - self._update_connection_budget( - request.connection_id, costs_dict, request.environment + # Record timings + timing_dict["streaming_generation"] = time.time() - streaming_step_start + timing_dict["output_guardrails"] = 0.0 # Inline during streaming + + # Calculate streaming duration + streaming_duration = (datetime.now() - streaming_start_time).total_seconds() + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Streaming completed in {streaming_duration:.2f}s" + ) + + # Log costs and trace + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + + # Update budget + self._update_connection_budget( + request.connection_id, costs_dict, request.environment + ) + + # Langfuse tracking + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + total_costs = calculate_total_costs(costs_dict) + + langfuse.update_current_generation( + model=components["llm_manager"] + .get_provider_info() + .get("model", "unknown"), + usage_details={ + "input": usage_info.get("total_prompt_tokens", 0), + "output": usage_info.get("total_completion_tokens", 0), + "total": usage_info.get("total_tokens", 0), + }, + cost_details={"total": total_costs.get("total_cost", 0.0)}, + metadata={ + "streaming": True, + "streaming_duration_seconds": streaming_duration, + "chunks_streamed": chunk_count, + "cost_breakdown": costs_dict, + "chat_id": request.chatId, + "environment": request.environment, + "stream_id": stream_ctx.stream_id, + }, ) + langfuse.flush() - if self.langfuse_config.langfuse_client: - langfuse = self.langfuse_config.langfuse_client - langfuse.update_current_generation( - metadata={ - "error_id": error_id, - "error_type": type(e).__name__, - "streaming": True, - "streaming_failed": True, - "stream_id": stream_ctx.stream_id, - } + # Store inference data (for production and testing environments) + if request.environment in [ + PRODUCTION_DEPLOYMENT_ENVIRONMENT, + TEST_DEPLOYMENT_ENVIRONMENT, + ]: + try: + await self._store_production_inference_data_async( + request=request, + refined_output=refined_output, + relevant_chunks=relevant_chunks, + accumulated_response="".join(accumulated_response), ) - langfuse.flush() + except Exception as storage_error: + logger.error( + f"Storage failed for chat_id: {request.chatId}, environment: {request.environment} - {str(storage_error)}" + ) + + # Mark stream as completed successfully + stream_ctx.mark_completed() + + except GeneratorExit: + # Client disconnected - mark as cancelled + stream_ctx.mark_cancelled() + logger.info( + f"[{request.chatId}] [{stream_ctx.stream_id}] Client disconnected" + ) + usage_info = get_lm_usage_since(history_length_before) + costs_dict["streaming_generation"] = usage_info + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + + # Update budget even on client disconnect + self._update_connection_budget( + request.connection_id, costs_dict, request.environment + ) + raise + except Exception as stream_error: + error_id = generate_error_id() + stream_ctx.mark_error(error_id) + log_error_with_context( + logger, + error_id, + "streaming_generation", + request.chatId, + stream_error, + ) + yield self._format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE) + yield self._format_sse(request.chatId, "END") + + usage_info = get_lm_usage_since(history_length_before) + costs_dict["streaming_generation"] = usage_info + self._log_costs(costs_dict) + log_step_timings(timing_dict, request.chatId) + + # Update budget even on streaming error + self._update_connection_budget( + request.connection_id, costs_dict, request.environment + ) def _format_sse(self, chat_id: str, content: str) -> str: """ @@ -992,7 +1108,7 @@ def _log_generator_status(self, components: Dict[str, Any]) -> None: logger.warning(f" Generator: Status check failed - {str(e)}") @observe(name="execute_orchestration_pipeline", as_type="span") - def _execute_orchestration_pipeline( + async def _execute_orchestration_pipeline( self, request: OrchestrationRequest, components: Dict[str, Any], @@ -1006,7 +1122,7 @@ def _execute_orchestration_pipeline( # Step 1: Input Guardrails Check if components["guardrails_adapter"]: start_time = time.time() - input_blocked_response = self.handle_input_guardrails( + input_blocked_response = await self.handle_input_guardrails( components["guardrails_adapter"], request, costs_dict ) timing_dict["input_guardrails_check"] = time.time() - start_time @@ -1026,7 +1142,7 @@ def _execute_orchestration_pipeline( # Step 3: Retrieve relevant chunks using contextual retrieval try: start_time = time.time() - relevant_chunks = self._safe_retrieve_contextual_chunks_sync( + relevant_chunks = await self._safe_retrieve_contextual_chunks( components["contextual_retriever"], refined_output, request ) timing_dict["contextual_retrieval"] = time.time() - start_time @@ -1057,7 +1173,7 @@ def _execute_orchestration_pipeline( # Step 5: Output Guardrails Check # Apply guardrails to all response types for consistent safety across all environments start_time = time.time() - output_guardrails_response = self.handle_output_guardrails( + output_guardrails_response = await self.handle_output_guardrails( components["guardrails_adapter"], generated_response, request, @@ -1132,14 +1248,14 @@ def _safe_initialize_response_generator( ) return None - def handle_input_guardrails( + async def handle_input_guardrails( self, guardrails_adapter: NeMoRailsAdapter, request: OrchestrationRequest, costs_dict: Dict[str, Dict[str, Any]], ) -> Union[OrchestrationResponse, TestOrchestrationResponse, None]: """Check input guardrails and return blocked response if needed.""" - input_check_result = self._check_input_guardrails( + input_check_result = await self._check_input_guardrails_async( guardrails_adapter=guardrails_adapter, user_message=request.message, costs_dict=costs_dict, @@ -1186,21 +1302,23 @@ def _safe_retrieve_contextual_chunks_sync( """Synchronous wrapper for _safe_retrieve_contextual_chunks for non-streaming pipeline.""" try: - # Safely execute the async method in the sync context + # Check if there's a running event loop try: asyncio.get_running_loop() - # If we get here, there's a running event loop; cannot block synchronously - raise RuntimeError( + # If we get here, there IS a running event loop; cannot use asyncio.run() + raise ContextualRetrievalFailureError( "Cannot call _safe_retrieve_contextual_chunks_sync from an async context with a running event loop. " "Please use the async version _safe_retrieve_contextual_chunks instead." ) except RuntimeError: - # No running loop, safe to use asyncio.run() - return asyncio.run( - self._safe_retrieve_contextual_chunks( - contextual_retriever, refined_output, request - ) + # No running loop (get_running_loop raised RuntimeError), safe to use asyncio.run() + pass + + return asyncio.run( + self._safe_retrieve_contextual_chunks( + contextual_retriever, refined_output, request ) + ) except ( ContextualRetrieverInitializationError, ContextualRetrievalFailureError, @@ -1255,7 +1373,7 @@ async def _safe_retrieve_contextual_chunks( f"Contextual chunk retrieval failed: {str(retrieval_error)}" ) from retrieval_error - def handle_output_guardrails( + async def handle_output_guardrails( self, guardrails_adapter: Optional[NeMoRailsAdapter], generated_response: Union[OrchestrationResponse, TestOrchestrationResponse], @@ -1273,7 +1391,7 @@ def handle_output_guardrails( if should_check_guardrails: # Type assertion: should_check_guardrails guarantees guardrails_adapter is not None assert guardrails_adapter is not None - output_check_result = self._check_output_guardrails( + output_check_result = await self._check_output_guardrails( guardrails_adapter=guardrails_adapter, assistant_message=generated_response.content, costs_dict=costs_dict, @@ -1694,7 +1812,7 @@ def _check_input_guardrails( ) @observe(name="check_output_guardrails", as_type="span") - def _check_output_guardrails( + async def _check_output_guardrails( self, guardrails_adapter: NeMoRailsAdapter, assistant_message: str, @@ -1714,7 +1832,7 @@ def _check_output_guardrails( logger.info("Starting output guardrails check") try: - result = guardrails_adapter.check_output(assistant_message) + result = await guardrails_adapter.check_output_async(assistant_message) # Store guardrail costs costs_dict["output_guardrails"] = result.usage diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py index 8bdc80c..2a929db 100644 --- a/src/llm_orchestration_service_api.py +++ b/src/llm_orchestration_service_api.py @@ -225,7 +225,7 @@ def health_check(request: Request) -> dict[str, str]: summary="Process LLM orchestration request", description="Processes a user message through the LLM orchestration pipeline", ) -def orchestrate_llm_request( +async def orchestrate_llm_request( http_request: Request, request: OrchestrationRequest, ) -> OrchestrationResponse: @@ -262,7 +262,7 @@ def orchestrate_llm_request( ) # Process the request - response = orchestration_service.process_orchestration_request(request) + response = await orchestration_service.process_orchestration_request(request) logger.info(f"Successfully processed request for chatId: {request.chatId}") return response @@ -287,7 +287,7 @@ def orchestrate_llm_request( summary="Process test LLM orchestration request", description="Processes a simplified test message through the LLM orchestration pipeline", ) -def test_orchestrate_llm_request( +async def test_orchestrate_llm_request( http_request: Request, request: TestOrchestrationRequest, ) -> TestOrchestrationResponse: @@ -341,7 +341,9 @@ def test_orchestrate_llm_request( logger.info(f"This is full request constructed for testing: {full_request}") # Process the request using the same logic - response = orchestration_service.process_orchestration_request(full_request) + response = await orchestration_service.process_orchestration_request( + full_request + ) # If response is already TestOrchestrationResponse (when environment is testing), return it directly if isinstance(response, TestOrchestrationResponse): diff --git a/src/llm_orchestrator_config/feature_flags.py b/src/llm_orchestrator_config/feature_flags.py new file mode 100644 index 0000000..d0d3fff --- /dev/null +++ b/src/llm_orchestrator_config/feature_flags.py @@ -0,0 +1,82 @@ +"""Feature flags for tool classifier system.""" + +import os +from loguru import logger + + +class FeatureFlags: + """ + Feature flags for controlling tool classifier and workflow behavior. + + These flags enable safe deployment and gradual rollout of the multi-workflow + system. They can be controlled via environment variables. + + Deployment Strategy: + 1. Start with TOOL_CLASSIFIER_ENABLED=false (use existing RAG only) + 2. Enable classifier with all workflows disabled for testing + 3. Enable workflows one at a time (SERVICE → CONTEXT → etc.) + 4. Monitor and rollback if issues occur + + Environment Variables: + - TOOL_CLASSIFIER_ENABLED: Master switch for classifier (default: false) + - SERVICE_WORKFLOW_ENABLED: Enable Layer 1 service workflow (default: true) + - CONTEXT_WORKFLOW_ENABLED: Enable Layer 2 context workflow (default: true) + """ + + # Master switch for tool classifier + # When False: Uses existing RAG-only pipeline (backward compatibility) + # When True: Routes through tool classifier + TOOL_CLASSIFIER_ENABLED = ( + os.getenv("TOOL_CLASSIFIER_ENABLED", "false").lower() == "true" + ) + + # Individual workflow toggles + # These only take effect when TOOL_CLASSIFIER_ENABLED=true + SERVICE_WORKFLOW_ENABLED = ( + os.getenv("SERVICE_WORKFLOW_ENABLED", "true").lower() == "true" + ) + CONTEXT_WORKFLOW_ENABLED = ( + os.getenv("CONTEXT_WORKFLOW_ENABLED", "true").lower() == "true" + ) + + # RAG and OOD workflows are always enabled (no flags) + # RAG is the core fallback, OOD is the final safety net + + # Safety: Fallback to RAG if tool classifier encounters errors + # This ensures service continues working even if classifier fails + FALLBACK_TO_RAG_ON_ERROR = True + + @classmethod + def log_configuration(cls): + """Log current feature flag configuration (useful for debugging).""" + logger.info("Tool Classifier Feature Flags:") + logger.info(f" TOOL_CLASSIFIER_ENABLED: {cls.TOOL_CLASSIFIER_ENABLED}") + if cls.TOOL_CLASSIFIER_ENABLED: + logger.info(f" SERVICE_WORKFLOW_ENABLED: {cls.SERVICE_WORKFLOW_ENABLED}") + logger.info(f" CONTEXT_WORKFLOW_ENABLED: {cls.CONTEXT_WORKFLOW_ENABLED}") + logger.info(f" FALLBACK_TO_RAG_ON_ERROR: {cls.FALLBACK_TO_RAG_ON_ERROR}") + else: + logger.info(" (Classifier disabled - using RAG-only pipeline)") + + @classmethod + def is_workflow_enabled(cls, workflow_name: str) -> bool: + """ + Check if a specific workflow is enabled. + + Args: + workflow_name: Name of workflow ("service", "context", "rag", "ood") + + Returns: + True if workflow is enabled and classifier is enabled + """ + if not cls.TOOL_CLASSIFIER_ENABLED: + return False + + workflow_flags = { + "service": cls.SERVICE_WORKFLOW_ENABLED, + "context": cls.CONTEXT_WORKFLOW_ENABLED, + "rag": True, # Always enabled + "ood": True, # Always enabled + } + + return workflow_flags.get(workflow_name.lower(), False) diff --git a/src/tool_classifier/__init__.py b/src/tool_classifier/__init__.py new file mode 100644 index 0000000..38b861d --- /dev/null +++ b/src/tool_classifier/__init__.py @@ -0,0 +1,20 @@ +""" +Tool Classifier Module - Multi-workflow routing system. + +This module implements a layer-wise workflow routing system that determines +whether a user query should be handled by: +- Layer 1: Service Workflow (external API calls) +- Layer 2: Context Workflow (conversation history/greetings) +- Layer 3: RAG Workflow (knowledge base retrieval) +- Layer 4: OOD Workflow (out-of-domain fallback) +""" + +from .classifier import ToolClassifier +from .enums import WorkflowType +from .models import ClassificationResult + +__all__ = [ + "ToolClassifier", + "WorkflowType", + "ClassificationResult", +] diff --git a/src/tool_classifier/base_workflow.py b/src/tool_classifier/base_workflow.py new file mode 100644 index 0000000..50faf7a --- /dev/null +++ b/src/tool_classifier/base_workflow.py @@ -0,0 +1,118 @@ +"""Abstract base class for workflow executors.""" + +from abc import ABC, abstractmethod +from typing import Any, AsyncIterator, Dict, Optional + +from models.request_models import OrchestrationRequest, OrchestrationResponse + + +class BaseWorkflow(ABC): + """ + Abstract base class for all workflow executors. + + This class defines the contract that all workflow implementations must follow. + Each workflow must implement both streaming and non-streaming execution methods. + + Design Pattern: Strategy Pattern + - Each workflow is a concrete strategy for handling queries + - ToolClassifier acts as the context that selects the appropriate strategy + + Workflows: + - ServiceWorkflowExecutor: Handles external service/API calls + - ContextWorkflowExecutor: Handles conversation history and greetings + - RAGWorkflowExecutor: Handles knowledge base retrieval (existing) + - OODWorkflowExecutor: Handles out-of-domain queries + + Return None Pattern: + Workflows return None when they cannot handle a query, triggering + fallback to the next layer in the classification chain. + """ + + @abstractmethod + async def execute_async( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[OrchestrationResponse]: + """ + Execute workflow in non-streaming mode. + + This method is called for the /orchestrate and /orchestrate/test endpoints + which return complete responses in a single HTTP response. + + Args: + request: The orchestration request containing user query and context + context: Workflow-specific metadata from ClassificationResult.metadata + + Returns: + OrchestrationResponse if workflow can handle this query + None if workflow cannot handle (triggers fallback to next layer) + + Example: + # If Service workflow detects no matching service: + return None # Falls back to Context workflow + + # If Service workflow successfully executes: + return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=False, + inputGuardFailed=False, + content="EUR/USD rate is 1.0850" + ) + """ + pass + + @abstractmethod + async def execute_streaming( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[AsyncIterator[str]]: + """ + Execute workflow in streaming mode (Server-Sent Events). + + This method is called for the /orchestrate/stream endpoint which yields + response chunks progressively to the client. + + Args: + request: The orchestration request containing user query and context + context: Workflow-specific metadata from ClassificationResult.metadata + + Returns: + AsyncIterator[str] yielding SSE-formatted strings if workflow can handle + None if workflow cannot handle (triggers fallback to next layer) + + SSE Format: + Each yielded string should be formatted as: + 'data: {"chatId": "...", "payload": {"content": "..."}, ...}\\n\\n' + + Streaming Types: + - Real streaming (RAG): LLM generates tokens progressively + - Simulated streaming (Service/Context): Complete response chunked for UX + + Example: + # If Context workflow cannot answer from history: + return None # Falls back to RAG workflow + + # If Context workflow can answer: + async def stream_response(): + # Validate complete response first + answer = "The rate I mentioned was 1.08" + is_safe = await validate_with_guardrails(answer) + + if not is_safe: + yield format_sse(chatId, VIOLATION_MESSAGE) + yield format_sse(chatId, "END") + return + + # Stream validated response token-by-token + for chunk in split_into_chunks(answer): + yield format_sse(chatId, chunk) + await asyncio.sleep(0.01) + + yield format_sse(chatId, "END") + + return stream_response() + """ + pass diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py new file mode 100644 index 0000000..71a4592 --- /dev/null +++ b/src/tool_classifier/classifier.py @@ -0,0 +1,338 @@ +"""Main tool classifier for workflow routing.""" + +from typing import Any, AsyncIterator, Dict, List, Literal, Union, overload +from loguru import logger + +from models.request_models import ( + ConversationItem, + OrchestrationRequest, + OrchestrationResponse, +) +from tool_classifier.enums import WorkflowType, WORKFLOW_DISPLAY_NAMES +from tool_classifier.models import ClassificationResult +from tool_classifier.workflows import ( + ServiceWorkflowExecutor, + ContextWorkflowExecutor, + RAGWorkflowExecutor, + OODWorkflowExecutor, +) + + +class ToolClassifier: + """ + Main classifier that determines which workflow should handle user queries. + + Implements a layer-wise filtering approach: + Layer 1: Service Workflow → External API calls + Layer 2: Context Workflow → Conversation history/greetings + Layer 3: RAG Workflow → Knowledge base retrieval + Layer 4: OOD Workflow → Out-of-domain fallback + + Each layer is tried in sequence. If a layer cannot handle the query + (returns None), the classifier falls back to the next layer. + + Architecture: + - Strategy Pattern: Each workflow is a pluggable strategy + - Chain of Responsibility: Layers form a fallback chain + - Dependency Injection: LLM manager and connections injected from main service + """ + + def __init__( + self, + llm_manager: Any, + orchestration_service: Any, + ): + """ + Initialize tool classifier with required dependencies. + + Args: + llm_manager: LLM manager for making LLM calls (intent detection, context check) + orchestration_service: Reference to main orchestration service (for RAG workflow) + """ + self.llm_manager = llm_manager + self.orchestration_service = orchestration_service + + # Initialize workflow executors + self.service_workflow = ServiceWorkflowExecutor( + llm_manager=llm_manager, + ) + self.context_workflow = ContextWorkflowExecutor( + llm_manager=llm_manager, + ) + self.rag_workflow = RAGWorkflowExecutor( + orchestration_service=orchestration_service, + ) + self.ood_workflow = OODWorkflowExecutor() + + logger.info("Tool classifier initialized with all workflow executors") + + async def classify( + self, + query: str, + conversation_history: List[ConversationItem], + language: str, + ) -> ClassificationResult: + """ + Classify a user query to determine which workflow should handle it. + + Implements layer-wise classification logic: + 1. Check if SERVICE workflow can handle (intent detection) + 2. Check if CONTEXT workflow can handle (greeting/history check) + 3. Default to RAG workflow (knowledge retrieval) + + Args: + query: User's query string + conversation_history: List of previous conversation messages + language: Detected language code (e.g., 'en', 'et') + + Returns: + ClassificationResult indicating which workflow to use + + Note: + In this skeleton, always defaults to RAG. Full implementation + will add Layer 1 and Layer 2 logic in separate tasks. + """ + logger.info(f"Classifying query: {query[:100]}...") + + # TODO: LAYER 1 - SERVICE WORKFLOW DETECTION + # Implementation task: Service workflow implementation + # Logic: + # 1. Count active services in database + # 2. If count > 50: Use Qdrant semantic search for top 20 services + # 3. If count <= 50: Use all services + # 4. Call LLM to detect intent and extract entities + # 5. If intent detected and service valid: return SERVICE classification + # Example: + # service_check = await self._check_service_layer(query, language) + # if service_check.can_handle: + # return ClassificationResult( + # workflow=WorkflowType.SERVICE, + # confidence=service_check.confidence, + # metadata=service_check.metadata, + # reasoning="Service intent detected" + # ) + + # TODO: LAYER 2 - CONTEXT WORKFLOW DETECTION + # Implementation task: Context workflow implementation + # Logic: + # 1. Check if query is a greeting using LLM + # 2. If greeting: return CONTEXT classification + # 3. If conversation_history exists: Check if query references history + # 4. Call LLM to determine if history contains answer + # 5. If can answer from history: return CONTEXT classification + # Example: + # context_check = await self._check_context_layer( + # query, conversation_history, language + # ) + # if context_check.can_handle: + # return ClassificationResult( + # workflow=WorkflowType.CONTEXT, + # confidence=context_check.confidence, + # metadata=context_check.metadata, + # reasoning="Greeting or answerable from history" + # ) + + # LAYER 3 - RAG WORKFLOW (DEFAULT) + # Always defaults to RAG for now + # RAG workflow will handle the query or return OOD if no chunks found + logger.info("Defaulting to RAG workflow (Layers 1-2 not implemented)") + return ClassificationResult( + workflow=WorkflowType.RAG, + confidence=1.0, + metadata={}, + reasoning="Default to RAG workflow (service and context layers not implemented)", + ) + + @overload + async def route_to_workflow( + self, + classification: ClassificationResult, + request: OrchestrationRequest, + is_streaming: Literal[False] = False, + ) -> OrchestrationResponse: ... + + @overload + async def route_to_workflow( + self, + classification: ClassificationResult, + request: OrchestrationRequest, + is_streaming: Literal[True], + ) -> AsyncIterator[str]: ... + + async def route_to_workflow( + self, + classification: ClassificationResult, + request: OrchestrationRequest, + is_streaming: bool = False, + ) -> Union[OrchestrationResponse, AsyncIterator[str]]: + """ + Route request to appropriate workflow based on classification. + + Implements fallback chain: If a workflow returns None, tries the next layer. + This ensures queries always get handled, even if primary workflow fails. + + Args: + classification: Classification result from classify() + request: Original orchestration request + is_streaming: Whether to use streaming mode (for /orchestrate/stream) + + Returns: + OrchestrationResponse for non-streaming mode + AsyncIterator[str] for streaming mode + + Fallback Chain: + SERVICE → CONTEXT → RAG → OOD + Each layer returns None if it cannot handle, triggering next layer. + """ + chat_id = request.chatId + workflow_name = WORKFLOW_DISPLAY_NAMES.get( + classification.workflow, classification.workflow.value + ) + + logger.info( + f"[{chat_id}] Routing to {workflow_name} " + f"(streaming: {is_streaming}, confidence: {classification.confidence:.2f})" + ) + + # Get the workflow executor + workflow = self._get_workflow_executor(classification.workflow) + + if is_streaming: + # STREAMING MODE: For /orchestrate/stream endpoint + # Return the async iterator directly + return self._execute_with_fallback_streaming( + workflow=workflow, + request=request, + context=classification.metadata, + start_layer=classification.workflow, + ) + else: + # NON-STREAMING MODE: For /orchestrate and /orchestrate/test endpoints + return await self._execute_with_fallback_async( + workflow=workflow, + request=request, + context=classification.metadata, + start_layer=classification.workflow, + ) + + def _get_workflow_executor(self, workflow_type: WorkflowType) -> Any: + """Get workflow executor instance for given workflow type.""" + workflow_map = { + WorkflowType.SERVICE: self.service_workflow, + WorkflowType.CONTEXT: self.context_workflow, + WorkflowType.RAG: self.rag_workflow, + WorkflowType.OOD: self.ood_workflow, + } + return workflow_map[workflow_type] + + async def _execute_with_fallback_async( + self, + workflow: Any, + request: OrchestrationRequest, + context: Dict[str, Any], + start_layer: WorkflowType, + ) -> OrchestrationResponse: + """ + Execute workflow with fallback to subsequent layers (non-streaming). + + TODO: Implement full fallback chain logic + Currently just executes the primary workflow. + + Full implementation should: + 1. Try primary workflow + 2. If returns None, try next layer in WORKFLOW_LAYER_ORDER + 3. Continue until workflow returns non-None result + 4. OOD workflow always returns result (never None) + """ + chat_id = request.chatId + workflow_name = WORKFLOW_DISPLAY_NAMES.get(start_layer, start_layer.value) + + logger.info(f"[{chat_id}] Executing {workflow_name} (non-streaming)") + + try: + result = await workflow.execute_async(request, context) + + if result is not None: + logger.info(f"[{chat_id}] {workflow_name} handled successfully") + return result + + # TODO: Implement fallback to next layer + # For now, if workflow returns None, call RAG as fallback + logger.warning( + f"[{chat_id}] {workflow_name} returned None, " + f"falling back to RAG workflow" + ) + rag_result = await self.rag_workflow.execute_async(request, {}) + if rag_result is not None: + return rag_result + else: + # This should never happen since RAG always returns a result + # But handle gracefully + raise RuntimeError("RAG workflow returned None unexpectedly") + + except Exception as e: + logger.error(f"[{chat_id}] Error executing {workflow_name}: {e}") + # Fallback to RAG on error + logger.info(f"[{chat_id}] Falling back to RAG due to error") + rag_result = await self.rag_workflow.execute_async(request, {}) + if rag_result is not None: + return rag_result + else: + raise RuntimeError("RAG workflow returned None unexpectedly") + + async def _execute_with_fallback_streaming( + self, + workflow: Any, + request: OrchestrationRequest, + context: Dict[str, Any], + start_layer: WorkflowType, + ) -> AsyncIterator[str]: + """ + Execute workflow with fallback to subsequent layers (streaming). + + TODO: Implement full fallback chain logic + Currently just executes the primary workflow. + + Full implementation should: + 1. Try primary workflow + 2. If returns None, try next layer in WORKFLOW_LAYER_ORDER + 3. Stream from the first workflow that returns non-None + 4. OOD workflow always returns result (never None) + """ + chat_id = request.chatId + workflow_name = WORKFLOW_DISPLAY_NAMES.get(start_layer, start_layer.value) + + logger.info(f"[{chat_id}] Executing {workflow_name} (streaming)") + + try: + result = await workflow.execute_streaming(request, context) + + if result is not None: + logger.info(f"[{chat_id}] {workflow_name} streaming started") + async for chunk in result: + yield chunk + return + + # TODO: Implement fallback to next layer + # For now, if workflow returns None, call RAG as fallback + logger.warning( + f"[{chat_id}] {workflow_name} returned None, " + f"falling back to RAG workflow streaming" + ) + streaming_result = await self.rag_workflow.execute_streaming(request, {}) + if streaming_result is not None: + async for chunk in streaming_result: + yield chunk + else: + raise RuntimeError("RAG workflow returned None unexpectedly") + + except Exception as e: + logger.error(f"[{chat_id}] Error executing {workflow_name} streaming: {e}") + # Fallback to RAG on error + logger.info(f"[{chat_id}] Falling back to RAG streaming due to error") + streaming_result = await self.rag_workflow.execute_streaming(request, {}) + if streaming_result is not None: + async for chunk in streaming_result: + yield chunk + else: + raise RuntimeError("RAG workflow returned None unexpectedly") diff --git a/src/tool_classifier/enums.py b/src/tool_classifier/enums.py new file mode 100644 index 0000000..ce6c785 --- /dev/null +++ b/src/tool_classifier/enums.py @@ -0,0 +1,39 @@ +"""Enumerations and constants for tool classifier system.""" + +from enum import Enum + + +class WorkflowType(Enum): + """ + Workflow types representing different query handling strategies. + + The tool classifier uses a layer-wise approach to determine which + workflow should handle each user query: + + - SERVICE: External service/API calls (Layer 1) + - CONTEXT: Conversation history or greetings (Layer 2) + - RAG: Knowledge base retrieval (Layer 3) + - OOD: Out-of-domain fallback (Layer 4) + """ + + SERVICE = "service" + CONTEXT = "context" + RAG = "rag" + OOD = "ood" + + +# Layer configuration - defines the order of workflow evaluation +WORKFLOW_LAYER_ORDER = [ + WorkflowType.SERVICE, # Layer 1: Try service first + WorkflowType.CONTEXT, # Layer 2: Then context + WorkflowType.RAG, # Layer 3: Then RAG + WorkflowType.OOD, # Layer 4: Finally OOD (always succeeds) +] + +# Workflow display names for logging +WORKFLOW_DISPLAY_NAMES = { + WorkflowType.SERVICE: "Service Workflow", + WorkflowType.CONTEXT: "Context Workflow", + WorkflowType.RAG: "RAG Workflow", + WorkflowType.OOD: "Out-of-Domain Workflow", +} diff --git a/src/tool_classifier/models.py b/src/tool_classifier/models.py new file mode 100644 index 0000000..9929473 --- /dev/null +++ b/src/tool_classifier/models.py @@ -0,0 +1,81 @@ +"""Data models for tool classifier system.""" + +from typing import Any, Dict, Optional +from pydantic import BaseModel, Field + +from tool_classifier.enums import WorkflowType + + +class ClassificationResult(BaseModel): + """ + Result of query classification by the tool classifier. + + This model encapsulates the decision of which workflow should handle + a user query, along with confidence score and metadata. + + Attributes: + workflow: The workflow type that should handle this query + confidence: Confidence score (0.0-1.0) for this classification + metadata: Workflow-specific data (e.g., service_id, intent, entities) + reasoning: Human-readable explanation of why this workflow was chosen + """ + + workflow: WorkflowType = Field( + ..., description="Which workflow should handle this query" + ) + confidence: float = Field( + default=1.0, + ge=0.0, + le=1.0, + description="Confidence score for this classification", + ) + metadata: Dict[str, Any] = Field( + default_factory=dict, description="Workflow-specific data passed to executor" + ) + reasoning: Optional[str] = Field( + default=None, description="Explanation of classification decision" + ) + + +class ServiceWorkflowMetadata(BaseModel): + """ + Metadata specific to Service Workflow execution. + + TODO: Will be populated by service discovery logic with: + - service_id: Identified service to call + - intent: Detected user intent + - entities: Extracted parameters for service call + - confidence: Intent detection confidence + """ + + service_id: Optional[str] = Field( + default=None, description="ID of the service to execute" + ) + intent: Optional[str] = Field( + default=None, description="Detected user intent/service name" + ) + entities: Optional[Dict[str, Any]] = Field( + default=None, description="Extracted entities/parameters" + ) + + +class ContextWorkflowMetadata(BaseModel): + """ + Metadata specific to Context Workflow execution. + + TODO: Will be populated by context analysis logic with: + - is_greeting: Whether query is a greeting + - greeting_type: Type of greeting (hello, goodbye, thanks, etc.) + - can_answer_from_history: Whether conversation history has answer + - relevant_history_indices: Indices of relevant history items + """ + + is_greeting: bool = Field( + default=False, description="Whether this is a greeting/conversational query" + ) + greeting_type: Optional[str] = Field( + default=None, description="Type of greeting: hello, goodbye, thanks, casual" + ) + can_answer_from_history: bool = Field( + default=False, description="Whether conversation history can answer this" + ) diff --git a/src/tool_classifier/workflows/__init__.py b/src/tool_classifier/workflows/__init__.py new file mode 100644 index 0000000..3d733d5 --- /dev/null +++ b/src/tool_classifier/workflows/__init__.py @@ -0,0 +1,13 @@ +"""Workflow executor implementations.""" + +from tool_classifier.workflows.service_workflow import ServiceWorkflowExecutor +from tool_classifier.workflows.context_workflow import ContextWorkflowExecutor +from tool_classifier.workflows.rag_workflow import RAGWorkflowExecutor +from tool_classifier.workflows.ood_workflow import OODWorkflowExecutor + +__all__ = [ + "ServiceWorkflowExecutor", + "ContextWorkflowExecutor", + "RAGWorkflowExecutor", + "OODWorkflowExecutor", +] diff --git a/src/tool_classifier/workflows/context_workflow.py b/src/tool_classifier/workflows/context_workflow.py new file mode 100644 index 0000000..88212ef --- /dev/null +++ b/src/tool_classifier/workflows/context_workflow.py @@ -0,0 +1,86 @@ +"""Context workflow executor - Layer 2: Conversation history and greetings.""" + +from typing import Any, AsyncIterator, Dict, Optional +from loguru import logger + +from models.request_models import OrchestrationRequest, OrchestrationResponse +from tool_classifier.base_workflow import BaseWorkflow + + +class ContextWorkflowExecutor(BaseWorkflow): + """ + Handles greetings and conversation history queries (Layer 2). + + Detects: + - Greetings: "Hello", "Thanks", "Goodbye" + - History references: "What did you say earlier?", "Can you repeat that?" + + Uses LLM for semantic detection (multilingual), no regex patterns. + + Status: SKELETON - Returns None (fallback to RAG) + TODO: Implement greeting/context detection, answer extraction, guardrails + """ + + def __init__(self, llm_manager: Any): + """ + Initialize context workflow executor. + + Args: + llm_manager: LLM manager for context analysis + """ + self.llm_manager = llm_manager + logger.info("Context workflow executor initialized (skeleton)") + + async def execute_async( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[OrchestrationResponse]: + """ + Execute context workflow in non-streaming mode. + + TODO: Check greeting (LLM) → generate response, OR check history (last 10 turns) + → extract answer → validate with guardrails. Return None if cannot answer. + + Args: + request: Orchestration request with user query and history + context: Metadata with is_greeting, can_answer_from_history flags + + Returns: + OrchestrationResponse with context-based answer or None to fallback + """ + logger.debug( + f"[{request.chatId}] Context workflow execute_async called " + f"(not implemented - returning None)" + ) + + # TODO: Implement context workflow logic here + # For now, return None to trigger fallback to next layer (RAG) + return None + + async def execute_streaming( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[AsyncIterator[str]]: + """ + Execute context workflow in streaming mode. + + TODO: Get answer (greeting/history) → validate BEFORE streaming → chunk and + yield as SSE. Return None if cannot answer. + + Args: + request: Orchestration request with user query and history + context: Metadata with is_greeting, can_answer_from_history flags + + Returns: + AsyncIterator yielding SSE strings or None to fallback + """ + logger.debug( + f"[{request.chatId}] Context workflow execute_streaming called " + f"(not implemented - returning None)" + ) + + # TODO: Implement context streaming logic here + # For now, return None to trigger fallback to next layer (RAG) + return None diff --git a/src/tool_classifier/workflows/ood_workflow.py b/src/tool_classifier/workflows/ood_workflow.py new file mode 100644 index 0000000..fed467a --- /dev/null +++ b/src/tool_classifier/workflows/ood_workflow.py @@ -0,0 +1,131 @@ +"""OOD workflow executor - Layer 4: Out-of-domain fallback.""" + +from typing import Any, AsyncIterator, Dict, Optional +from loguru import logger + +from models.request_models import OrchestrationRequest, OrchestrationResponse +from tool_classifier.base_workflow import BaseWorkflow + + +class OODWorkflowExecutor(BaseWorkflow): + """ + Handles out-of-domain queries that no workflow can answer (Layer 4). + + This is the final fallback in the workflow chain. It returns a polite + "cannot answer" message when: + - No service matches (Layer 1 failed) + - No context match (Layer 2 failed) + - No relevant knowledge chunks (Layer 3 failed) + + Examples of OOD queries: + - "What's the weather today?" (not in scope) + - "Tell me a joke" (not government service) + - Questions with no relevant knowledge + + Implementation Status: SKELETON + Returns None (will implement to return OOD message) + + TODO - Implementation (Simple): + - Return localized OUT_OF_SCOPE_MESSAGE + - Set questionOutOfLLMScope flag to True + - For streaming: chunk message and stream for UX consistency + """ + + def __init__(self): + """Initialize OOD workflow executor.""" + logger.info("OOD workflow executor initialized (skeleton)") + + async def execute_async( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[OrchestrationResponse]: + """ + Execute OOD workflow in non-streaming mode. + + TODO: Implement OOD response: + ```python + from src.llm_orchestrator_config.llm_ochestrator_constants import ( + get_localized_message, + OUT_OF_SCOPE_MESSAGES, + ) + + # Get detected language from request + detected_language = getattr(request, "_detected_language", "en") + + # Get localized message + ood_message = get_localized_message(OUT_OF_SCOPE_MESSAGES, detected_language) + + return OrchestrationResponse( + chatId=request.chatId, + llmServiceActive=True, + questionOutOfLLMScope=True, # Flag as out of scope + inputGuardFailed=False, + content=ood_message, + ) + ``` + + Args: + request: Orchestration request with user query + context: Unused (OOD doesn't need metadata) + + Returns: + OrchestrationResponse with OOD message + Never returns None (this is final fallback) + """ + logger.info( + f"[{request.chatId}] OOD workflow execute_async called " + f"(not implemented - returning None for now)" + ) + + # TODO: Implement OOD response logic here + # For now, return None (will be implemented as simple message return) + return None + + async def execute_streaming( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[AsyncIterator[str]]: + """ + Execute OOD workflow in streaming mode. + + TODO: Implement OOD streaming: + ```python + from src.llm_orchestrator_config.llm_ochestrator_constants import ( + get_localized_message, + OUT_OF_SCOPE_MESSAGES, + ) + + # Get localized OOD message + detected_language = getattr(request, "_detected_language", "en") + ood_message = get_localized_message(OUT_OF_SCOPE_MESSAGES, detected_language) + + # Stream message for UX consistency (no guardrails needed - fixed message) + async def stream_ood_message(): + for chunk in split_into_tokens(ood_message, chunk_size=5): + yield self._format_sse(request.chatId, chunk) + await asyncio.sleep(0.01) + yield self._format_sse(request.chatId, "END") + + return stream_ood_message() + ``` + + Note: No output guardrails needed since this is a fixed, safe message. + + Args: + request: Orchestration request with user query + context: Unused (OOD doesn't need metadata) + + Returns: + AsyncIterator yielding SSE strings + Never returns None (this is final fallback) + """ + logger.info( + f"[{request.chatId}] OOD workflow execute_streaming called " + f"(not implemented - returning None for now)" + ) + + # TODO: Implement OOD streaming logic here + # For now, return None (will be implemented as simple message streaming) + return None diff --git a/src/tool_classifier/workflows/rag_workflow.py b/src/tool_classifier/workflows/rag_workflow.py new file mode 100644 index 0000000..d83080a --- /dev/null +++ b/src/tool_classifier/workflows/rag_workflow.py @@ -0,0 +1,172 @@ +"""RAG workflow executor - Layer 3: Knowledge base retrieval.""" + +from typing import Any, AsyncIterator, Dict, Optional +from loguru import logger + +from models.request_models import OrchestrationRequest, OrchestrationResponse +from tool_classifier.base_workflow import BaseWorkflow + + +class RAGWorkflowExecutor(BaseWorkflow): + """ + Wrapper for existing RAG (Retrieval-Augmented Generation) workflow (Layer 3). + + This workflow handles queries that require searching the knowledge base + and generating responses based on retrieved chunks. It uses the existing + RAG pipeline: + 1. Prompt refinement + 2. Contextual retrieval (Qdrant + BM25) + 3. Rank fusion (RRF) + 4. Response generation + 5. Output guardrails (validation-first streaming) + + Examples of RAG queries: + - "What are digital signatures?" + - "How do I register a company?" + - "Explain tax regulations" + + Implementation Status: COMPLETE + This is a thin wrapper that delegates to existing LLMOrchestrationService methods. + + No TODO - Just wraps existing pipeline: + - Non-streaming: Calls _execute_orchestration_pipeline() + - Streaming: Calls existing streaming logic with NeMo guardrails + + Note: If no relevant chunks found, returns OOD response (not None) + """ + + def __init__(self, orchestration_service: Any): + """ + Initialize RAG workflow executor. + + Args: + orchestration_service: Reference to LLMOrchestrationService + for calling existing RAG pipeline + """ + self.orchestration_service = orchestration_service + logger.info("RAG workflow executor initialized (wrapper)") + + async def execute_async( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[OrchestrationResponse]: + """ + Execute RAG workflow in non-streaming mode. + + Delegates to existing LLMOrchestrationService._execute_orchestration_pipeline() + which handles: + - Prompt refinement + - Chunk retrieval (Qdrant + BM25) + - Response generation + - Output guardrails + + Args: + request: Orchestration request with user query + context: Unused (RAG doesn't need classification metadata) + + Returns: + OrchestrationResponse with RAG-generated answer + Never returns None (handles OOD internally) + """ + logger.info(f"[{request.chatId}] Executing RAG workflow (non-streaming)") + + # Initialize components needed for RAG pipeline + costs_dict: Dict[str, Any] = {} + timing_dict: Dict[str, float] = {} + + # Initialize service components + components = self.orchestration_service._initialize_service_components(request) + + # Call existing RAG pipeline + response = await self.orchestration_service._execute_orchestration_pipeline( + request=request, + components=components, + costs_dict=costs_dict, + timing_dict=timing_dict, + ) + + # Log costs and timings + self.orchestration_service._log_costs(costs_dict) + from src.utils.time_tracker import log_step_timings + + log_step_timings(timing_dict, request.chatId) + + return response + + async def execute_streaming( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[AsyncIterator[str]]: + """ + Execute RAG workflow in streaming mode. + + Delegates to existing streaming pipeline which handles: + - Prompt refinement (blocking) + - Chunk retrieval (blocking) + - Streaming through NeMo guardrails (validation-first) + - Real-time token validation + + The existing implementation uses NeMo's stream_with_guardrails which: + - Buffers tokens (chunk_size=200) + - Validates each buffer before yielding + - Provides true validation-first streaming + + Args: + request: Orchestration request with user query + context: Unused (RAG doesn't need classification metadata) + + Returns: + AsyncIterator yielding SSE-formatted strings + Never returns None (handles OOD internally) + """ + logger.info(f"[{request.chatId}] Executing RAG workflow (streaming)") + + # Initialize tracking dictionaries + costs_dict: Dict[str, Any] = {} + timing_dict: Dict[str, float] = {} + + # Get components from context if provided, otherwise initialize + components = context.get("components") + if components is None: + components = self.orchestration_service._initialize_service_components( + request + ) + + # Get stream context from context if provided, otherwise create minimal tracking + stream_ctx = context.get("stream_ctx") + if stream_ctx is None: + # Create minimal stream context when called via tool classifier + # In production flow, this is provided by stream_orchestration_response + class MinimalStreamContext: + """Minimal stream context for RAG workflow when called directly.""" + + def __init__(self, chat_id: str) -> None: + self.stream_id = f"rag-{chat_id}" + self.token_count = 0 + self.bot_generator = None + + def mark_completed(self) -> None: + """No-op: Tracking handled by orchestration service.""" + pass + + def mark_cancelled(self) -> None: + """No-op: Tracking handled by orchestration service.""" + pass + + def mark_error(self, error_id: str) -> None: + """No-op: Tracking handled by orchestration service.""" + pass + + stream_ctx = MinimalStreamContext(request.chatId) + + # Delegate to core RAG pipeline (bypasses classifier to avoid recursion) + async for sse_chunk in self.orchestration_service._stream_rag_pipeline( + request=request, + components=components, + stream_ctx=stream_ctx, + costs_dict=costs_dict, + timing_dict=timing_dict, + ): + yield sse_chunk diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py new file mode 100644 index 0000000..8a6889b --- /dev/null +++ b/src/tool_classifier/workflows/service_workflow.py @@ -0,0 +1,137 @@ +"""Service workflow executor - Layer 1: External service/API calls.""" + +from typing import Any, AsyncIterator, Dict, Optional +from loguru import logger + +from models.request_models import OrchestrationRequest, OrchestrationResponse +from tool_classifier.base_workflow import BaseWorkflow + + +class ServiceWorkflowExecutor(BaseWorkflow): + """ + Executes external service calls via Ruuter endpoints (Layer 1). + + This workflow handles queries that require calling external government + services or APIs. It performs: + 1. Service discovery (semantic search if >50 services) + 2. Intent detection using LLM + 3. Entity extraction from query + 4. Service validation against database + 5. External API call via Ruuter + 6. Output guardrails validation + + Examples of Service queries: + - "What's the EUR to USD exchange rate?" + - "Check my document status" + - "Submit a tax declaration" + + Implementation Status: SKELETON + Returns None (triggers fallback to Context workflow) + + TODO - Full Implementation (Separate Task): + - Service discovery logic (Qdrant semantic search) + - Intent detection (LLM-based) + - Entity extraction and transformation + - Service validation (database lookup) + - Ruuter API integration + - Output guardrails for service responses + """ + + def __init__(self, llm_manager: Any): + """ + Initialize service workflow executor. + + Args: + llm_manager: LLM manager for intent detection + """ + self.llm_manager = llm_manager + logger.info("Service workflow executor initialized (skeleton)") + + async def execute_async( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[OrchestrationResponse]: + """ + Execute service workflow in non-streaming mode. + + TODO: Implement service workflow logic: + 1. Extract service metadata from context (service_id, intent, entities) + 2. Validate service exists and is active in database + 3. Transform entities to array format for service call + 4. Call Ruuter endpoint: POST {RUUTER_BASE_URL}/services/active{ServiceName} + 5. Validate response with output guardrails + 6. Return OrchestrationResponse with service result + + Failure scenarios: + - No service_id in context → return None (fallback to Context) + - Service not found/inactive → return None (fallback to Context) + - Service call timeout → return error response + - Output guardrails blocked → return violation response or None + + Args: + request: Orchestration request with user query + context: Metadata with service_id, intent, entities + + Returns: + OrchestrationResponse with service result or None to fallback + """ + logger.debug( + f"[{request.chatId}] Service workflow execute_async called " + f"(not implemented - returning None)" + ) + + # TODO: Implement service workflow logic here + # For now, return None to trigger fallback to next layer + return None + + async def execute_streaming( + self, + request: OrchestrationRequest, + context: Dict[str, Any], + ) -> Optional[AsyncIterator[str]]: + """ + Execute service workflow in streaming mode. + + TODO: Implement service workflow streaming: + 1. Execute service call (same as non-streaming) + 2. Get complete service response + 3. Validate with output guardrails (validation-first) + 4. If blocked: yield violation message + END + 5. If allowed: chunk response and stream token-by-token + 6. Simulate streaming for consistent UX with RAG + + Streaming approach (validation-first): + ```python + # Get complete response + service_response = await call_service(...) + + # Validate BEFORE streaming + is_safe = await guardrails.check_output_async(service_response) + if not is_safe: + yield format_sse(chatId, VIOLATION_MESSAGE) + yield format_sse(chatId, "END") + return + + # Stream validated response + for chunk in split_into_tokens(service_response, chunk_size=5): + yield format_sse(chatId, chunk) + await asyncio.sleep(0.01) + yield format_sse(chatId, "END") + ``` + + Args: + request: Orchestration request with user query + context: Metadata with service_id, intent, entities + + Returns: + AsyncIterator yielding SSE strings or None to fallback + """ + logger.debug( + f"[{request.chatId}] Service workflow execute_streaming called " + f"(not implemented - returning None)" + ) + + # TODO: Implement service streaming logic here + # For now, return None to trigger fallback to next layer + return None