diff --git a/.claude/plans/enhanced_search_implementation_plan.md b/.claude/plans/enhanced_search_implementation_plan.md new file mode 100644 index 0000000..6a69d19 --- /dev/null +++ b/.claude/plans/enhanced_search_implementation_plan.md @@ -0,0 +1,770 @@ +# Enhanced Search Implementation Plan + +## Overview + +This plan implements intelligent web search with context-aware query enhancement for Nova AI Assistant. The enhancement system uses a three-stage pipeline: NLP extraction → LLM optimization → JSON-driven execution, replacing both the `web_search` tool and `/search` command with enhanced versions. + +## Key Features + +- **Three-stage enhancement pipeline**: spaCy + YAKE/KeyBERT → LLM → structured search execution +- **Swappable NLP backends**: YAKE (fast, default) and KeyBERT (semantic, optional) +- **Conversation context integration**: Automatic context injection from chat history +- **Performance flexibility**: Multiple enhancement modes from disabled to hybrid +- **Comprehensive configuration**: User-configurable defaults and preferences +- **Backward compatibility**: All existing usage patterns continue to work + +## Phase 1: Create Modular Search Architecture + +### 1.1 New Directory Structure + +``` +nova/ +├── search/ # NEW: Dedicated search module +│ ├── __init__.py # Search module exports +│ ├── engines/ # Search engine implementations +│ │ ├── __init__.py +│ │ ├── base.py # Abstract search engine +│ │ ├── duckduckgo.py # DuckDuckGo implementation +│ │ ├── google.py # Google Search implementation +│ │ └── bing.py # Bing implementation +│ ├── enhancement/ # Query enhancement components +│ │ ├── __init__.py +│ │ ├── extractors.py # Keyword extraction (YAKE/KeyBERT) +│ │ ├── enhancer.py # Main enhancement logic +│ │ └── classifier.py # Term classification +│ ├── models.py # Search-specific models +│ ├── config.py # Search configuration +│ ├── manager.py # Main search manager (replaces core/search.py) +│ └── utils.py # Search utilities +├── tools/built_in/ +│ └── web_search.py # UPDATED: Enhanced web search tool +└── core/ + └── search.py # DEPRECATED: Will be removed +``` + +### 1.2 File Structure Details + +**nova/search/__init__.py** +```python +"""Enhanced search module with intelligent query enhancement""" + +from .manager import EnhancedSearchManager +from .models import SearchResult, SearchResponse, EnhancedSearchPlan +from .config import SearchConfig +from .enhancement import QueryEnhancer + +__all__ = [ + "EnhancedSearchManager", + "SearchResult", + "SearchResponse", + "EnhancedSearchPlan", + "SearchConfig", + "QueryEnhancer" +] +``` + +**nova/search/models.py** +```python +"""Search-specific models and data structures""" +# Move SearchResult, SearchResponse from core/search.py +# Add new models: EnhancedSearchPlan, KeywordExtractionResult, etc. +``` + +**nova/search/config.py** +```python +"""Search configuration management""" +# Consolidate all search-related configuration +# Include extraction backend settings, performance options +``` + +**nova/search/manager.py** +```python +"""Main search manager replacing core/search.py functionality""" +# Enhanced SearchManager with query enhancement integration +# Backward compatibility with existing SearchManager interface +``` + +## Phase 2: Replace web_search Tool and /search Command + +### 2.1 Enhanced web_search Tool Signature + +```python +# nova/tools/built_in/web_search.py - COMPLETE REPLACEMENT + +@tool( + description="Intelligent web search with context-aware query enhancement", + permission_level=PermissionLevel.ELEVATED, + category=ToolCategory.INFORMATION, + tags=["web", "search", "nlp", "enhanced"], + examples=[ + ToolExample( + description="Enhanced search with automatic optimization", + arguments={"query": "Python async programming", "enhancement": "auto"}, + expected_result="Optimized search queries with extracted keywords", + ), + ToolExample( + description="Fast search without enhancement", + arguments={"query": "exact search terms", "enhancement": "disabled"}, + expected_result="Direct search results without query modification", + ), + ToolExample( + description="Semantic search for complex topics", + arguments={"query": "machine learning deployment", "enhancement": "semantic"}, + expected_result="Semantically enhanced search with KeyBERT extraction", + ), + ], +) +async def web_search( + query: str, + enhancement: str = None, # Will use config default if None + provider: str = None, # Will use config default if None + max_results: int = None, # Will use config default if None + include_content: bool = True, + timeframe: str = None, # Will use config default if None + technical_level: str = None, # Will use config default if None +) -> dict: + """ + Enhanced web search with intelligent query optimization. + + Enhancement modes: + - auto: Automatically choose best enhancement (YAKE + context) + - disabled: No enhancement, direct search + - fast: YAKE-only enhancement (~50ms) + - semantic: KeyBERT semantic enhancement (~200-500ms) + - hybrid: YAKE + KeyBERT for best accuracy (~300-600ms) + + Args: + query: Search query or question + enhancement: Enhancement mode to use (uses config default if None) + provider: Search provider (duckduckgo, google, bing) + max_results: Maximum results to return (1-20) + include_content: Extract detailed content from pages + timeframe: Preferred time range for results + technical_level: Adjust query complexity + + Returns: + Enhanced search results with optimization details + """ +``` + +### 2.2 Backward Compatibility Strategy + +```python +# Maintain existing function signatures for compatibility +async def web_search( + query: str, + provider: str = None, # Use config default + max_results: int = None, # Use config default + include_content: bool = True, + # NEW parameters with defaults to maintain compatibility + enhancement: str = None, # Use config default + timeframe: str = None, # Use config default + technical_level: str = None, # Use config default +) -> dict: + """Backward compatible web search with optional enhancement""" + + # Get configuration defaults + from nova.core.config import get_config + config = get_config() + + # Apply configuration defaults for None values + enhancement = enhancement or config.search.default_enhancement.value + provider = provider or config.search.default_provider + max_results = max_results or config.search.max_results + timeframe = timeframe or config.search.default_timeframe + technical_level = technical_level or config.search.default_technical_level + + # Rest of implementation... +``` + +### 2.3 Enhanced /search Command Design + +**Current /search Command Signature:** +```bash +/search [--provider ] [--max ] +/s [--provider ] [--max ] +``` + +**Enhanced /search Command Signature:** +```bash +/search [--provider ] [--max ] [--enhancement ] [--technical-level ] [--timeframe