diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0c6a1b2 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,281 @@ +name: CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +jobs: + lint: + name: Lint & Code Quality + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pylint black isort bandit safety + pip install -r requirements.txt + + - name: Lint with flake8 + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 lixsearch --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 lixsearch --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics + + - name: Format check with black + run: | + black --check lixsearch --line-length=120 || true + + - name: Import sorting check with isort + run: | + isort --check-only lixsearch --profile=black || true + + - name: Security check with bandit + run: | + bandit -r lixsearch -ll || true + + - name: Dependency vulnerability check + run: | + safety check || true + + type-check: + name: Type Checking + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install mypy types-PyYAML + pip install -r requirements.txt + + - name: Type checking with mypy + run: | + mypy lixsearch --ignore-missing-imports --no-error-summary || true + + test: + name: Integration Tests + runs-on: ubuntu-latest + + services: + redis: + image: redis:7-alpine + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + + chroma: + image: ghcr.io/chroma-core/chroma:latest + ports: + - 8000:8000 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest pytest-asyncio pytest-cov + + - name: Run integration tests + env: + REDIS_HOST: localhost + REDIS_PORT: 6379 + CHROMA_SERVER_HOST: localhost + CHROMA_SERVER_PORT: 8000 + run: | + cd tester + python test_redis_semantic_cache.py || true + python test_session_persistence.py || true + + api-validation: + name: API Specification Validation + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install validation tools + run: | + python -m pip install --upgrade pip + pip install openapi-spec-validator pyyaml + + - name: Validate OpenAPI spec + run: | + python -c " + import yaml + from openapi_spec_validator import validate_spec + + with open('openapi.yaml', 'r') as f: + spec = yaml.safe_load(f) + + try: + validate_spec(spec) + print('✓ OpenAPI spec is valid') + except Exception as e: + print(f'✗ OpenAPI spec validation failed: {e}') + exit(1) + " + + docker: + name: Docker Build Check + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: false + cache-from: type=gha + cache-to: type=gha,mode=max + + security-scanning: + name: Security Scanning + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + scan-type: fs + scan-ref: '.' + format: sarif + output: trivy-results.sarif + + - name: Upload Trivy results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: trivy-results.sarif + category: trivy + + requirements-check: + name: Requirements Consistency + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Check requirements.txt + run: | + python -c " + import re + + with open('requirements.txt', 'r') as f: + lines = f.readlines() + + errors = [] + for i, line in enumerate(lines, 1): + line = line.strip() + if not line or line.startswith('#'): + continue + + # Check format: package_name==version or package_name>=version + if not re.match(r'^[a-zA-Z0-9._-]+(\[.*\])?(~=|==|>=|<=|>|<)[0-9]', line): + errors.append(f'Line {i}: Invalid format: {line}') + + if errors: + for error in errors: + print(f'✗ {error}') + exit(1) + else: + print('✓ requirements.txt format is valid') + " + + lint-github-actions: + name: GitHub Actions Validation + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Validate workflow files + run: | + python -c " + import yaml + import os + + workflow_dir = '.github/workflows' + if not os.path.exists(workflow_dir): + print('✓ No workflow files to validate') + exit(0) + + for file in os.listdir(workflow_dir): + if file.endswith('.yml') or file.endswith('.yaml'): + path = os.path.join(workflow_dir, file) + try: + with open(path, 'r') as f: + yaml.safe_load(f) + print(f'✓ {file} is valid') + except Exception as e: + print(f'✗ {file} validation failed: {e}') + exit(1) + " + + summary: + name: CI Summary + runs-on: ubuntu-latest + needs: [lint, type-check, api-validation, docker, requirements-check, lint-github-actions] + if: always() + + steps: + - name: Check CI status + run: | + if [[ "${{ needs.lint.result }}" == "failure" ]] || \ + [[ "${{ needs.type-check.result }}" == "failure" ]] || \ + [[ "${{ needs.api-validation.result }}" == "failure" ]] || \ + [[ "${{ needs.docker.result }}" == "failure" ]] || \ + [[ "${{ needs.requirements-check.result }}" == "failure" ]] || \ + [[ "${{ needs.lint-github-actions.result }}" == "failure" ]]; then + echo "❌ CI checks failed" + exit 1 + else + echo "✅ All CI checks passed" + fi diff --git a/lixsearch/app/main.py b/lixsearch/app/main.py index 070539f..714db04 100644 --- a/lixsearch/app/main.py +++ b/lixsearch/app/main.py @@ -6,7 +6,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from quart import Quart, request, jsonify +from quart import Quart, request, jsonify, send_file, render_template_string from quart_cors import cors from sessions.main import get_session_manager from ragService.main import get_retrieval_system @@ -66,6 +66,39 @@ async def session_chat_wrapper(session_id): async def chat_completions_wrapper(session_id): return await chat.chat_completions(session_id, self.pipeline_initialized) + async def scalar_ui(): + """Serve Scalar API documentation UI""" + html = ''' + + + + lixSearch API Documentation + + + + + + + + + + + ''' + return html, 200, {"Content-Type": "text/html"} + self.app.route('/api/health', methods=['GET'])(health_check_wrapper) self.app.route('/api/search', methods=['POST', 'GET'])(search_wrapper) self.app.route('/api/session/create', methods=['POST'])(session.create_session) @@ -83,6 +116,24 @@ async def chat_completions_wrapper(session_id): self.app.route('/api/session//history', methods=['GET'])(chat.get_chat_history) self.app.route('/api/stats', methods=['GET'])(stats.get_stats) self.app.websocket('/ws/search')(websocket.websocket_search) + + # Scalar API documentation UI + self.app.route('/docs', methods=['GET'])(scalar_ui) + self.app.route('/api/docs', methods=['GET'])(scalar_ui) + + # OpenAPI spec endpoint + async def openapi_spec(): + import yaml + spec_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'openapi.yaml') + try: + with open(spec_path, 'r') as f: + spec = yaml.safe_load(f) + return jsonify(spec) + except Exception as e: + logger.error(f"[APP] Failed to load OpenAPI spec: {e}") + return jsonify({"error": "OpenAPI spec not found"}), 404 + + self.app.route('/openapi.json', methods=['GET'])(openapi_spec) def _register_error_handlers(self): @self.app.errorhandler(404) diff --git a/openapi.yaml b/openapi.yaml new file mode 100644 index 0000000..f97630b --- /dev/null +++ b/openapi.yaml @@ -0,0 +1,563 @@ +openapi: 3.0.0 +info: + title: lixSearch API + description: Intelligent multi-service search assistant with web search, video/image fetching, and answer synthesis + version: 1.0.0 + contact: + name: lixSearch Team + license: + name: MIT + +servers: + - url: http://localhost:9002 + description: Local development server + - url: http://localhost + description: Production (via nginx) + +paths: + /api/health: + get: + summary: Health check endpoint + tags: + - System + description: Returns the health status of the service and initialization state + responses: + '200': + description: Service is healthy + content: + application/json: + schema: + type: object + properties: + status: + type: string + example: healthy + timestamp: + type: string + format: date-time + initialized: + type: boolean + + /api/search: + get: + summary: Search (GET) + tags: + - Search + description: Perform a search with query and session + parameters: + - name: session_id + in: query + schema: + type: string + description: Session identifier for context persistence + - name: query + in: query + required: true + schema: + type: string + description: Search query + - name: image_url + in: query + schema: + type: string + format: uri + description: URL of image for reverse image search + - name: stream + in: query + schema: + type: string + enum: [true, false] + default: 'true' + description: Enable server-sent events streaming + - name: deep_search + in: query + schema: + type: string + enum: [true, false] + default: 'false' + description: Enable deep search for more comprehensive results + responses: + '200': + description: Search results (streamed or JSON) + content: + text/event-stream: + schema: + type: object + properties: + id: + type: string + object: + type: string + model: + type: string + choices: + type: array + items: + type: object + properties: + index: + type: integer + delta: + type: object + properties: + role: + type: string + content: + type: string + '400': + description: Invalid request parameters + content: + application/json: + schema: + type: object + properties: + error: + type: string + '503': + description: Server not initialized + + post: + summary: Search (POST) + tags: + - Search + description: Perform a search with query and session (POST variant) + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + description: Search query + session_id: + type: string + description: Session identifier + image_url: + type: string + format: uri + description: URL for image search + image: + type: string + format: uri + description: Alternative parameter name for image_url + stream: + type: boolean + default: true + description: Enable streaming + deep_search: + type: boolean + default: false + description: Enable deep search + responses: + '200': + description: Search results + '400': + description: Invalid request + '503': + description: Server not initialized + + /api/session/create: + post: + summary: Create new session + tags: + - Sessions + description: Create a new conversation session + requestBody: + content: + application/json: + schema: + type: object + properties: + initial_message: + type: string + description: Optional initial message + responses: + '201': + description: Session created + content: + application/json: + schema: + type: object + properties: + session_id: + type: string + created_at: + type: string + format: date-time + + /api/session/{session_id}: + get: + summary: Get session info + tags: + - Sessions + description: Retrieve session metadata and status + parameters: + - name: session_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Session information + content: + application/json: + schema: + type: object + properties: + session_id: + type: string + created_at: + type: string + format: date-time + message_count: + type: integer + last_activity: + type: string + format: date-time + '404': + description: Session not found + + delete: + summary: Delete session + tags: + - Sessions + description: Delete a session and its associated data + parameters: + - name: session_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Session deleted + '404': + description: Session not found + + /api/session/{session_id}/kg: + get: + summary: Get session knowledge graph + tags: + - Sessions + description: Retrieve the knowledge graph for a session + parameters: + - name: session_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Knowledge graph + content: + application/json: + schema: + type: object + '404': + description: Session not found + + /api/session/{session_id}/query: + post: + summary: Query session knowledge graph + tags: + - Sessions + description: Query the knowledge graph with a SPARQL-like query + parameters: + - name: session_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + query: + type: string + description: Query string + responses: + '200': + description: Query results + + /api/session/{session_id}/entity/{entity}: + get: + summary: Get entity evidence + tags: + - Sessions + description: Get evidence and sources for a specific entity + parameters: + - name: session_id + in: path + required: true + schema: + type: string + - name: entity + in: path + required: true + schema: + type: string + responses: + '200': + description: Entity evidence + content: + application/json: + schema: + type: object + properties: + entity: + type: string + evidence: + type: array + items: + type: object + '404': + description: Entity or session not found + + /api/session/{session_id}/summary: + get: + summary: Get session summary + tags: + - Sessions + description: Get a summary of the session conversation + parameters: + - name: session_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Session summary + content: + application/json: + schema: + type: object + properties: + summary: + type: string + key_topics: + type: array + items: + type: string + '404': + description: Session not found + + /api/chat: + post: + summary: Chat endpoint + tags: + - Chat + description: Send a message for chat with optional web search + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - message + properties: + message: + type: string + description: User message + session_id: + type: string + description: Session identifier (creates new if omitted) + search: + type: boolean + default: true + description: Enable web search augmentation + image_url: + type: string + format: uri + description: Optional image URL for multimodal queries + responses: + '200': + description: Chat response (streamed) + content: + text/event-stream: + schema: + type: object + '400': + description: Invalid request + '503': + description: Server not initialized + + /api/session/{session_id}/chat: + post: + summary: Session chat endpoint + tags: + - Chat + description: Send a message in an existing session + parameters: + - name: session_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - message + properties: + message: + type: string + search: + type: boolean + default: false + responses: + '200': + description: Chat response (streamed) + '404': + description: Session not found + + /api/session/{session_id}/chat/completions: + post: + summary: Chat completions endpoint + tags: + - Chat + description: OpenAI-compatible chat completions endpoint + parameters: + - name: session_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + messages: + type: array + items: + type: object + properties: + role: + type: string + enum: [system, user, assistant] + content: + type: string + model: + type: string + temperature: + type: number + minimum: 0 + maximum: 2 + responses: + '200': + description: Chat completion response + + /api/session/{session_id}/history: + get: + summary: Get chat history + tags: + - Chat + description: Retrieve conversation history for a session + parameters: + - name: session_id + in: path + required: true + schema: + type: string + - name: limit + in: query + schema: + type: integer + default: 20 + description: Maximum number of messages + - name: offset + in: query + schema: + type: integer + default: 0 + description: Offset for pagination + responses: + '200': + description: Chat history + content: + application/json: + schema: + type: object + properties: + messages: + type: array + items: + type: object + properties: + role: + type: string + content: + type: string + timestamp: + type: string + format: date-time + '404': + description: Session not found + + /api/stats: + get: + summary: Get performance statistics + tags: + - System + description: Retrieve system-wide statistics and metrics + responses: + '200': + description: Statistics + content: + application/json: + schema: + type: object + properties: + requests_total: + type: integer + sessions_active: + type: integer + avg_response_time: + type: number + uptime_seconds: + type: integer + + /ws/search: + get: + summary: WebSocket search endpoint + tags: + - Search + description: WebSocket connection for real-time search streaming + parameters: + - name: session_id + in: query + schema: + type: string + - name: query + in: query + required: true + schema: + type: string + responses: + '101': + description: WebSocket connection established + +tags: + - name: System + description: System health and statistics + - name: Search + description: Search operations + - name: Sessions + description: Session management + - name: Chat + description: Chat and conversation endpoints