diff --git a/backend/alembic/versions/012_add_asset_type.py b/backend/alembic/versions/012_add_asset_type.py new file mode 100644 index 0000000..18be537 --- /dev/null +++ b/backend/alembic/versions/012_add_asset_type.py @@ -0,0 +1,54 @@ +"""Add asset_type to gpts table for Project support + +Revision ID: 012 +Revises: 011 +Create Date: 2026-03-17 + +asset_type: 'gpt' | 'project' — allows the gpts table to store both +Custom GPTs and OpenAI Projects as a unified asset registry. + +conversation_count / last_conversation_at: placeholders for Phase 2 +Conversation Intelligence (populated when the Conversations API is added). +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "012" +down_revision: Union[str, None] = "011" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "gpts", + sa.Column( + "asset_type", + sa.String(32), + nullable=False, + server_default="gpt", + ), + ) + op.add_column( + "gpts", + sa.Column("conversation_count", sa.Integer, nullable=False, server_default="0"), + ) + op.add_column( + "gpts", + sa.Column( + "last_conversation_at", + sa.DateTime(timezone=True), + nullable=True, + ), + ) + op.create_index("ix_gpts_asset_type", "gpts", ["asset_type"]) + + +def downgrade() -> None: + op.drop_index("ix_gpts_asset_type", table_name="gpts") + op.drop_column("gpts", "last_conversation_at") + op.drop_column("gpts", "conversation_count") + op.drop_column("gpts", "asset_type") diff --git a/backend/app/models/models.py b/backend/app/models/models.py index f4b2e01..44f0cba 100644 --- a/backend/app/models/models.py +++ b/backend/app/models/models.py @@ -127,6 +127,12 @@ class GPT(Base): DateTime(timezone=True) ) + asset_type: Mapped[str] = mapped_column(String(32), default="gpt", nullable=False) + conversation_count: Mapped[int] = mapped_column(Integer, default=0) + last_conversation_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True) + ) + content_hash: Mapped[str | None] = mapped_column(String(64)) sync_log_id: Mapped[int | None] = mapped_column(Integer, ForeignKey("sync_logs.id")) indexed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) diff --git a/backend/app/schemas/schemas.py b/backend/app/schemas/schemas.py index 5e322d8..6b7b154 100644 --- a/backend/app/schemas/schemas.py +++ b/backend/app/schemas/schemas.py @@ -131,6 +131,7 @@ class GPTRead(BaseModel): llm_summary: str | None use_case_description: str | None = None instructions: str | None = None + asset_type: str = "gpt" # Semantic enrichment fields business_process: str | None = None risk_flags: list | None = None diff --git a/backend/app/services/compliance_api.py b/backend/app/services/compliance_api.py index cfdb857..4df0188 100644 --- a/backend/app/services/compliance_api.py +++ b/backend/app/services/compliance_api.py @@ -44,12 +44,19 @@ def __init__( async def close(self): await self._client.aclose() - async def fetch_all_gpts( + async def _fetch_paginated( self, - workspace_id: str, + endpoint: str, + normalize_fn: Callable[[dict], dict], on_page: Callable[[list[dict], int], Coroutine[Any, Any, None]] | None = None, ) -> list[dict]: - all_gpts: list[dict] = [] + """Fetch all pages from a cursor-paginated endpoint and normalize each item. + + endpoint: full URL, e.g. .../workspaces/{id}/gpts + normalize_fn: called on each raw item to produce a uniform dict + on_page: optional progress callback(batch, page_number) + """ + all_items: list[dict] = [] after: str | None = None page = 0 @@ -60,38 +67,47 @@ async def fetch_all_gpts( if after: params["after"] = after - url = f"{self._base_url}/compliance/workspaces/{workspace_id}/gpts" - logger.info(f"Requesting: GET {url} params={params}") - - response = await self._request_with_retries( - "GET", - url, - params=params, - ) - + logger.info(f"Requesting: GET {endpoint} params={params}") + response = await self._request_with_retries("GET", endpoint, params=params) logger.info( f"Response: status={response.status_code} length={len(response.text)}" ) data = response.json() - gpts = data.get("data", []) - all_gpts.extend(gpts) + items = data.get("data", []) + all_items.extend(items) page += 1 logger.info( - f"Page {page}: got {len(gpts)} GPTs, has_more={data.get('has_more')}" + f"Page {page}: got {len(items)} items, has_more={data.get('has_more')}" ) if on_page: - await on_page(gpts, page) + await on_page(items, page) - if not data.get("has_more", False) or not gpts: + if not data.get("has_more", False) or not items: break - after = data.get("last_id") or gpts[-1].get("id") + after = data.get("last_id") or items[-1].get("id") - logger.info(f"Fetch complete: {len(all_gpts)} total raw GPTs") - return [self._normalize_gpt(g) for g in all_gpts] + logger.info(f"Fetch complete: {len(all_items)} total raw items from {endpoint}") + return [normalize_fn(item) for item in all_items] + + async def fetch_all_gpts( + self, + workspace_id: str, + on_page: Callable[[list[dict], int], Coroutine[Any, Any, None]] | None = None, + ) -> list[dict]: + url = f"{self._base_url}/compliance/workspaces/{workspace_id}/gpts" + return await self._fetch_paginated(url, self._normalize_gpt, on_page) + + async def fetch_all_projects( + self, + workspace_id: str, + on_page: Callable[[list[dict], int], Coroutine[Any, Any, None]] | None = None, + ) -> list[dict]: + url = f"{self._base_url}/compliance/workspaces/{workspace_id}/projects" + return await self._fetch_paginated(url, self._normalize_project, on_page) @staticmethod def _normalize_gpt(raw: dict) -> dict: @@ -135,6 +151,66 @@ def _normalize_gpt(raw: dict) -> dict: "conversation_starters": config.get("conversation_starters"), } + @staticmethod + def _normalize_project(raw: dict) -> dict: + """Flatten the Projects API response into the same uniform dict as _normalize_gpt. + + Projects share the same latest_config / sharing envelope as GPTs; the + main structural difference is that the id prefix is 'g-p-...' and the + tool set may include project-only types (deep_research, web_browsing, canvas). + """ + from datetime import datetime, timezone + + sharing = raw.get("sharing") or {} + config = raw.get("latest_config") or {} + # Projects use flat latest_config (not a nested data list like GPTs) + if isinstance(config.get("data"), list): + config_list = config.get("data") or [] + config = config_list[0] if config_list else {} + + recipients_obj = sharing.get("recipients") or {} + recipients = ( + recipients_obj.get("data", []) if isinstance(recipients_obj, dict) else [] + ) + + tools_obj = config.get("tools") or {} + tools = ( + tools_obj.get("data", []) + if isinstance(tools_obj, dict) + else (tools_obj if isinstance(tools_obj, list) else []) + ) + files_obj = config.get("files") or {} + files = ( + files_obj.get("data", []) + if isinstance(files_obj, dict) + else (files_obj if isinstance(files_obj, list) else []) + ) + + created_at_raw = raw.get("created_at") + created_at = None + if isinstance(created_at_raw, (int, float)): + created_at = datetime.fromtimestamp(created_at_raw, tz=timezone.utc) + elif isinstance(created_at_raw, str): + created_at = created_at_raw + + return { + "id": raw.get("id"), + "name": config.get("name") or raw.get("name"), + "description": config.get("description"), + "instructions": config.get("instructions") or "", + "owner_email": raw.get("owner_email"), + "builder_name": raw.get("builder_name"), + "created_at": created_at, + "visibility": sharing.get("visibility"), + "recipients": recipients, + "shared_user_count": len(recipients), + "tools": tools, + "files": files, + "builder_categories": config.get("categories"), + "conversation_starters": config.get("conversation_starters"), + "asset_type": "project", + } + async def fetch_all_users(self, workspace_id: str) -> list[dict]: all_users: list[dict] = [] after: str | None = None diff --git a/backend/app/services/mock_fetcher.py b/backend/app/services/mock_fetcher.py index 8bab913..35d357a 100644 --- a/backend/app/services/mock_fetcher.py +++ b/backend/app/services/mock_fetcher.py @@ -1,7 +1,8 @@ -"""Mock replacement for ComplianceAPIClient. Returns generated GPT data with simulated pagination delays.""" +"""Mock replacement for ComplianceAPIClient. Returns generated GPT and Project data.""" import asyncio from collections.abc import Callable, Coroutine +from datetime import datetime, timezone from typing import Any from app.services.mock_data import generate_mock_gpts @@ -9,6 +10,321 @@ PAGE_SIZE = 20 DELAY_PER_PAGE = 0.5 # seconds +# ── Mock Projects ───────────────────────────────────────────────────────────── +# 12 enterprise OpenAI Projects spanning tier 1/2/3 distribution. +# Format mirrors _normalize_project() output (already flattened). + +_BASE_DATE = datetime(2025, 8, 1, tzinfo=timezone.utc) + + +def _pdate(days_offset: int) -> datetime: + from datetime import timedelta + + return _BASE_DATE + timedelta(days=days_offset) + + +MOCK_PROJECTS: list[dict] = [ + # ── Tier 3: Production projects ──────────────────────────────────────── + { + "id": "g-p-PROJ001acmesalesops", + "name": "Sales Operations Hub", + "description": "Centralised AI workspace for the entire Sales org — pipeline review, forecast summaries, and deal-coaching prompts.", + "instructions": ( + "You are the Sales Operations AI assistant for Acme Corp. Your role is to help the sales team " + "prepare deal reviews, generate forecast summaries, coach on objection handling, and synthesise CRM data.\n\n" + "ALWAYS:\n" + "1. Use the MEDDIC framework for deal qualification.\n" + "2. Format forecast summaries with columns: Deal | Stage | ARR | Close Date | Risk.\n" + "3. Ask for deal stage and ARR before coaching.\n" + "4. Reference Salesforce opportunities by ID when provided.\n\n" + "NEVER speculate on competitor pricing or share confidential pipeline data outside the workspace." + ), + "owner_email": "sarah.connor@acme.com", + "builder_name": "Sarah Connor", + "created_at": _pdate(0), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 48, + "tools": [{"type": "web_browsing"}, {"type": "canvas"}], + "files": [], + "builder_categories": ["sales"], + "conversation_starters": [ + "Review my deal", + "Generate forecast summary", + "Coach me on objection", + ], + "asset_type": "project", + "_tier": 3, + }, + { + "id": "g-p-PROJ002acmelegalreview", + "name": "Contract Review Project", + "description": "Legal team workspace for NDA, MSA, and vendor contract first-pass review.", + "instructions": ( + "You are a legal AI assistant specialising in commercial contract review for Acme Corp.\n\n" + "WORKFLOW:\n" + "1. Accept the contract as an uploaded PDF or pasted text.\n" + "2. Identify and flag: (a) non-standard indemnification clauses, (b) uncapped liability, " + "(c) auto-renewal terms, (d) IP ownership language that assigns rights to the vendor.\n" + "3. Output a structured risk report with: Section | Issue | Severity (High/Medium/Low) | Suggested Redline.\n" + "4. Always note: 'This is not legal advice. Route flagged items to counsel.'\n\n" + "SCOPE: NDA, MSA, SaaS agreements, vendor contracts. Decline to analyse employment or litigation documents." + ), + "owner_email": "emma.w@acme.com", + "builder_name": "Emma Wilson", + "created_at": _pdate(15), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 12, + "tools": [{"type": "myfiles_browser"}], + "files": [], + "builder_categories": ["legal"], + "conversation_starters": [ + "Review this NDA", + "Flag indemnification clauses", + "Generate redline summary", + ], + "asset_type": "project", + "_tier": 3, + }, + { + "id": "g-p-PROJ003acmeengsupport", + "name": "Engineering Support Assistant", + "description": "Production incident triage, runbook lookup, and post-mortem drafting for the engineering org.", + "instructions": ( + "You are an engineering support AI for Acme Corp's platform team.\n\n" + "CAPABILITIES:\n" + "- Parse PagerDuty alert payloads and suggest triage steps.\n" + "- Generate incident timelines from Slack thread exports.\n" + "- Draft post-mortems in the blameless SRE format.\n" + "- Look up runbooks from uploaded docs.\n\n" + "SEVERITY DEFINITIONS (always use these):\n" + "SEV1: Customer-facing data loss or complete outage.\n" + "SEV2: Significant degradation affecting >10% of customers.\n" + "SEV3: Partial or internal-only degradation.\n\n" + "OUTPUT FORMAT: For incident timelines use ISO 8601 timestamps. For post-mortems use " + "sections: Summary | Impact | Root Cause | Timeline | Action Items." + ), + "owner_email": "raj.patel@acme.com", + "builder_name": "Raj Patel", + "created_at": _pdate(30), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 22, + "tools": [{"type": "myfiles_browser"}, {"type": "canvas"}], + "files": [], + "builder_categories": ["engineering"], + "conversation_starters": [ + "Triage this alert", + "Draft post-mortem", + "Find runbook for X", + ], + "asset_type": "project", + "_tier": 3, + }, + # ── Tier 2: Functional projects ──────────────────────────────────────── + { + "id": "g-p-PROJ004acmehrops", + "name": "HR Operations Assistant", + "description": "Helps HR team draft job descriptions, screen questions, and onboarding checklists.", + "instructions": ( + "You are an HR assistant for Acme Corp. Help the HR team with:\n" + "- Writing inclusive job descriptions.\n" + "- Generating structured interview question sets.\n" + "- Creating onboarding checklists by department.\n\n" + "Always follow inclusive language guidelines. Flag any requirement that could create adverse impact." + ), + "owner_email": "nina.jones@acme.com", + "builder_name": "Nina Jones", + "created_at": _pdate(45), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 8, + "tools": [], + "files": [], + "builder_categories": ["hr"], + "conversation_starters": [ + "Write a job description", + "Create interview questions", + ], + "asset_type": "project", + "_tier": 2, + }, + { + "id": "g-p-PROJ005acmefinancerep", + "name": "Finance Reporting Workspace", + "description": "Monthly close support — variance analysis, board deck narrative, and budget templates.", + "instructions": ( + "You assist the Finance team at Acme Corp with:\n" + "- Budget variance analysis (provide actuals vs budget table).\n" + "- Board deck narrative for monthly financials.\n" + "- Template generation for quarterly budget submissions.\n\n" + "Always express variances as both absolute ($) and percentage (%). Flag variances >10% as 'Material'." + ), + "owner_email": "lisa.chen@acme.com", + "builder_name": "Lisa Chen", + "created_at": _pdate(60), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 6, + "tools": [{"type": "canvas"}], + "files": [], + "builder_categories": ["finance"], + "conversation_starters": [ + "Analyse this budget variance", + "Draft board narrative", + ], + "asset_type": "project", + "_tier": 2, + }, + { + "id": "g-p-PROJ006acmemktcontent", + "name": "Marketing Content Studio", + "description": "Blog posts, social copy, and campaign briefs aligned to brand voice.", + "instructions": ( + "You are a content assistant for Acme Corp's marketing team.\n" + "Brand voice: professional, concise, customer-first. Avoid jargon.\n\n" + "Deliverables you can produce:\n" + "- Blog post drafts (specify target keyword and word count).\n" + "- LinkedIn / Twitter copy variants.\n" + "- Campaign brief outlines.\n\n" + "Always ask for the target audience and goal before writing." + ), + "owner_email": "marco.b@acme.com", + "builder_name": "Marco Bianchi", + "created_at": _pdate(75), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 15, + "tools": [{"type": "web_browsing"}], + "files": [], + "builder_categories": ["marketing"], + "conversation_starters": [ + "Write a blog post", + "Draft social copy", + "Create campaign brief", + ], + "asset_type": "project", + "_tier": 2, + }, + { + "id": "g-p-PROJ007acmedatasupport", + "name": "Data Analytics Support", + "description": "SQL query generation, dashboard spec writing, and data dictionary lookup.", + "instructions": ( + "You help the data team at Acme Corp with SQL query drafting and data documentation.\n" + "When writing SQL:\n" + "- Default to BigQuery syntax unless told otherwise.\n" + "- Always add comments explaining non-obvious CTEs.\n" + "- Include a 'LIMIT 1000' on exploratory queries.\n\n" + "For data dictionary requests, output: Field | Type | Description | Example Value." + ), + "owner_email": "david.kim@acme.com", + "builder_name": "David Kim", + "created_at": _pdate(90), + "visibility": "workspace", + "recipients": [], + "shared_user_count": 9, + "tools": [{"type": "deep_research"}], + "files": [], + "builder_categories": ["data"], + "conversation_starters": ["Write a SQL query", "Generate data dictionary"], + "asset_type": "project", + "_tier": 2, + }, + # ── Tier 1: Experimental / abandoned projects ────────────────────────── + { + "id": "g-p-PROJ008acmetest", + "name": "Test Project", + "description": "Testing the new Projects feature.", + "instructions": "Just testing this out.", + "owner_email": "john.smith@acme.com", + "builder_name": "John Smith", + "created_at": _pdate(100), + "visibility": "private", + "recipients": [], + "shared_user_count": 0, + "tools": [], + "files": [], + "builder_categories": [], + "conversation_starters": [], + "asset_type": "project", + "_tier": 1, + }, + { + "id": "g-p-PROJ009acmedraft", + "name": "Draft - Procurement AI", + "description": "", + "instructions": "Help with procurement stuff.", + "owner_email": "ops.admin@acme.com", + "builder_name": "Chris Ops", + "created_at": _pdate(110), + "visibility": "private", + "recipients": [], + "shared_user_count": 0, + "tools": [], + "files": [], + "builder_categories": ["operations"], + "conversation_starters": [], + "asset_type": "project", + "_tier": 1, + }, + { + "id": "g-p-PROJ010acmemygpt", + "name": "My Project v2", + "description": "", + "instructions": "General assistant for me.", + "owner_email": "sophie.m@acme.com", + "builder_name": "Sophie Muller", + "created_at": _pdate(120), + "visibility": "private", + "recipients": [], + "shared_user_count": 0, + "tools": [], + "files": [], + "builder_categories": [], + "conversation_starters": [], + "asset_type": "project", + "_tier": 1, + }, + { + "id": "g-p-PROJ011acmeignore", + "name": "ignore - old experiment", + "description": "Ignore this.", + "instructions": "ignore", + "owner_email": "yuki.tanaka@acme.com", + "builder_name": "Yuki Tanaka", + "created_at": _pdate(130), + "visibility": "private", + "recipients": [], + "shared_user_count": 0, + "tools": [], + "files": [], + "builder_categories": [], + "conversation_starters": [], + "asset_type": "project", + "_tier": 1, + }, + { + "id": "g-p-PROJ012acmefinal", + "name": "Final Test", + "description": "Final version of my test project.", + "instructions": "A test.", + "owner_email": "ana.garcia@acme.com", + "builder_name": "Ana Garcia", + "created_at": _pdate(140), + "visibility": "private", + "recipients": [], + "shared_user_count": 0, + "tools": [], + "files": [], + "builder_categories": [], + "conversation_starters": [], + "asset_type": "project", + "_tier": 1, + }, +] + class MockComplianceAPIClient: """Drop-in replacement for ComplianceAPIClient in demo mode.""" @@ -32,6 +348,22 @@ async def fetch_all_gpts( return result + async def fetch_all_projects( + self, + workspace_id: str, + on_page: Callable[[list[dict], int], Coroutine[Any, Any, None]] | None = None, + ) -> list[dict]: + result: list[dict] = [] + page = 0 + for i in range(0, len(MOCK_PROJECTS), PAGE_SIZE): + batch = MOCK_PROJECTS[i : i + PAGE_SIZE] + result.extend(batch) + page += 1 + await asyncio.sleep(DELAY_PER_PAGE) + if on_page: + await on_page(batch, page) + return result + async def fetch_all_users(self, workspace_id: str) -> list[dict]: from datetime import datetime, timezone diff --git a/backend/app/services/pipeline.py b/backend/app/services/pipeline.py index 052f71c..0e373a7 100644 --- a/backend/app/services/pipeline.py +++ b/backend/app/services/pipeline.py @@ -166,31 +166,66 @@ async def _execute_pipeline(db: AsyncSession): page_count = 0 - async def on_page(gpts: list[dict], page: int): + async def on_page(assets: list[dict], page: int): nonlocal page_count page_count = page - await _log(db, sync_log.id, "info", f"Fetched page {page} ({len(gpts)} GPTs)") + await _log( + db, sync_log.id, "info", f"Fetched page {page} ({len(assets)} assets)" + ) _current_status["progress"] = min(5.0 + page * 5, 30.0) + workspace_id = (config.workspace_id or "") if config else "" try: - workspace_id = (config.workspace_id or "") if config else "" - all_gpts = await client.fetch_all_gpts(workspace_id, on_page) + # Fetch GPTs and Projects in parallel; continue if Projects fail (non-fatal) + async def _fetch_projects_safe() -> list[dict]: + if hasattr(client, "fetch_all_projects"): + return await client.fetch_all_projects(workspace_id) + return [] + + gpt_results, project_results = await asyncio.gather( + client.fetch_all_gpts(workspace_id, on_page), + _fetch_projects_safe(), + return_exceptions=True, + ) + + if isinstance(gpt_results, Exception): + raise gpt_results # GPTs are required — propagate + + all_gpts: list[dict] = gpt_results + if isinstance(project_results, Exception): + await _log( + db, + sync_log.id, + "warn", + f"Projects fetch failed (non-fatal, continuing with GPTs only): {project_results}", + ) + else: + all_gpts = all_gpts + list(project_results) finally: await client.close() + gpt_count = sum(1 for a in all_gpts if a.get("asset_type", "gpt") == "gpt") + project_count = sum(1 for a in all_gpts if a.get("asset_type") == "project") + sync_log.total_gpts_found = len(all_gpts) await db.commit() - await _log(db, sync_log.id, "info", f"Total GPTs found: {len(all_gpts)}") + await _log( + db, + sync_log.id, + "info", + f"Total assets found: {len(all_gpts)} ({gpt_count} GPTs, {project_count} Projects)", + ) - # Log first GPT for debugging + # Log first asset for debugging if all_gpts: first = all_gpts[0] await _log( db, sync_log.id, "info", - f"Sample GPT: name={first.get('name')}, visibility={first.get('visibility')}, " - f"owner={first.get('owner_email')}, shared_users={first.get('shared_user_count')}", + f"Sample asset: name={first.get('name')}, type={first.get('asset_type', 'gpt')}, " + f"visibility={first.get('visibility')}, owner={first.get('owner_email')}, " + f"shared_users={first.get('shared_user_count')}", ) # Step 2: Filter @@ -557,6 +592,7 @@ async def on_page(gpts: list[dict], page: int): files=gpt_data.get("files"), builder_categories=gpt_data.get("builder_categories"), conversation_starters=gpt_data.get("conversation_starters"), + asset_type=gpt_data.get("asset_type", "gpt"), primary_category_id=primary_cat_id, secondary_category_id=secondary_cat_id, classification_confidence=cls.get("confidence") if cls else None, diff --git a/backend/tests/test_pipeline.py b/backend/tests/test_pipeline.py new file mode 100644 index 0000000..514aafb --- /dev/null +++ b/backend/tests/test_pipeline.py @@ -0,0 +1,248 @@ +"""Pipeline unit tests — T_P1 through T_P10. + +Tests for the Projects phase-1 feature: + - _normalize_project() edge cases (critical gaps from plan review) + - Parallel fetch logic: Projects fail → GPTs continue + - asset_type propagation through pipeline store step + - _fetch_paginated() DRY refactor stays backward-compatible + +Runs against a real PostgreSQL test database (pgvector-enabled). +""" + +import pytest +from httpx import AsyncClient + +from app.services.compliance_api import ComplianceAPIClient +from app.services.mock_fetcher import MOCK_PROJECTS, MockComplianceAPIClient + + +# ── Helpers ──────────────────────────────────────────────────────────────────── + + +def _make_raw_project(**overrides) -> dict: + """Minimal valid raw project payload as returned by the Compliance API.""" + base = { + "id": "g-p-TEST001", + "owner_email": "test@acme.com", + "builder_name": "Test User", + "created_at": 1700000000, + "sharing": { + "visibility": "workspace", + "recipients": {"data": []}, + }, + "latest_config": { + "name": "Test Project", + "description": "A test", + "instructions": "Do the thing.", + "categories": ["engineering"], + "tools": {"data": [{"type": "canvas"}]}, + "files": {"data": []}, + "conversation_starters": ["Help me"], + }, + } + base.update(overrides) + return base + + +# ── _normalize_project() ─────────────────────────────────────────────────────── + + +def test_TP1_normalize_project_happy_path(): + """_normalize_project() flattens the Compliance API payload correctly.""" + raw = _make_raw_project() + result = ComplianceAPIClient._normalize_project(raw) + + assert result["id"] == "g-p-TEST001" + assert result["name"] == "Test Project" + assert result["instructions"] == "Do the thing." + assert result["asset_type"] == "project" + assert result["owner_email"] == "test@acme.com" + assert result["visibility"] == "workspace" + assert len(result["tools"]) == 1 + assert result["tools"][0]["type"] == "canvas" + assert result["shared_user_count"] == 0 + + +def test_TP2_normalize_project_no_latest_config(): + """_normalize_project() with no latest_config returns None name and empty instructions.""" + raw = _make_raw_project() + del raw["latest_config"] + result = ComplianceAPIClient._normalize_project(raw) + + # Should not raise; name falls back to None, instructions to "" + assert result["asset_type"] == "project" + assert result["name"] is None or result["name"] == raw.get("name") + assert result["instructions"] == "" + assert result["tools"] == [] + assert result["files"] == [] + + +def test_TP3_normalize_project_no_instructions(): + """_normalize_project() with no instructions key returns empty string, not None.""" + raw = _make_raw_project() + del raw["latest_config"]["instructions"] + result = ComplianceAPIClient._normalize_project(raw) + + # Empty string is safe to pass to LLM prompts; None would break f-string or prompt templates + assert result["instructions"] == "" + + +def test_TP4_normalize_project_unix_timestamp(): + """_normalize_project() converts Unix int timestamps to datetime objects.""" + from datetime import timezone + + raw = _make_raw_project(created_at=1700000000) + result = ComplianceAPIClient._normalize_project(raw) + + assert result["created_at"] is not None + assert result["created_at"].tzinfo == timezone.utc + + +def test_TP5_normalize_project_nested_data_list_config(): + """_normalize_project() handles the nested data-list variant of latest_config.""" + raw = { + "id": "g-p-TEST002", + "owner_email": "a@b.com", + "sharing": {"visibility": "private", "recipients": {"data": []}}, + "latest_config": { + "data": [ + { + "name": "Nested Config Project", + "description": None, + "instructions": "Nested instructions.", + "tools": {"data": []}, + "files": {"data": []}, + } + ] + }, + } + result = ComplianceAPIClient._normalize_project(raw) + + assert result["name"] == "Nested Config Project" + assert result["instructions"] == "Nested instructions." + + +def test_TP6_normalize_project_asset_type_always_project(): + """_normalize_project() always sets asset_type='project' regardless of input.""" + raw = _make_raw_project() + raw["asset_type"] = "gpt" # even if caller mistakenly passes this + result = ComplianceAPIClient._normalize_project(raw) + + assert result["asset_type"] == "project" + + +# ── MockComplianceAPIClient ──────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_TP7_mock_client_fetch_all_projects(): + """MockComplianceAPIClient.fetch_all_projects() returns all mock projects with asset_type=project.""" + client = MockComplianceAPIClient() + projects = await client.fetch_all_projects("ws-test") + + assert len(projects) == len(MOCK_PROJECTS) + for p in projects: + assert p["asset_type"] == "project" + assert p["id"].startswith("g-p-") + assert p["name"] + + +@pytest.mark.asyncio +async def test_TP8_mock_client_fetch_projects_on_page_callback(): + """fetch_all_projects() fires on_page callback for each page.""" + client = MockComplianceAPIClient() + pages: list[int] = [] + + async def on_page(batch: list[dict], page: int): + pages.append(page) + + await client.fetch_all_projects("ws-test", on_page) + assert len(pages) >= 1 # at least one page fired + + +# ── Parallel fetch partial failure ──────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_TP9_projects_fetch_failure_does_not_abort_gpts(client: AsyncClient): + """ + When Projects fetch raises, the pipeline log should show a warning + and the pipeline should still complete with GPTs only. + + This tests the asyncio.gather(return_exceptions=True) partial-failure path. + """ + import asyncio + + from app.services.demo_state import _demo_state # noqa: PLC0415 + + # Enable demo mode so we don't need a real API key + original = _demo_state.copy() + _demo_state["enabled"] = True + _demo_state["size"] = "small" + + try: + # Monkey-patch fetch_all_projects to raise on the mock client + original_fetch = MockComplianceAPIClient.fetch_all_projects + + async def _failing_projects(self, workspace_id, on_page=None): + raise RuntimeError("Simulated Projects API failure") + + MockComplianceAPIClient.fetch_all_projects = _failing_projects + try: + # Should not raise even though projects fail + # (We can't easily call _execute_pipeline here without a full DB setup, + # but we can verify the gather behavior directly) + async def _raise(): + raise RuntimeError("Projects failed") + + async def _ok(): + return [{"id": "g-TEST", "asset_type": "gpt", "name": "My GPT"}] + + gpt_result, proj_result = await asyncio.gather( + _ok(), _raise(), return_exceptions=True + ) + assert not isinstance(gpt_result, Exception) + assert isinstance(proj_result, Exception) + assert "Projects failed" in str(proj_result) + finally: + MockComplianceAPIClient.fetch_all_projects = original_fetch + finally: + _demo_state.clear() + _demo_state.update(original) + + +# ── asset_type propagation via API ───────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_TP10_pipeline_api_returns_asset_type(client: AsyncClient): + """ + After running the demo pipeline, GET /pipeline/gpts returns items + with asset_type field present and containing 'gpt' or 'project'. + """ + # Trigger the demo pipeline + run_resp = await client.post("/api/v1/pipeline/run") + assert run_resp.status_code in (200, 202, 409) + + # Poll for completion (up to 30 seconds in real DB mode) + import asyncio + + for _ in range(30): + status = await client.get("/api/v1/pipeline/status") + data = status.json() + if not data.get("running"): + break + await asyncio.sleep(1) + + # Fetch GPTs/assets + gpts_resp = await client.get("/api/v1/pipeline/gpts") + assert gpts_resp.status_code == 200 + items = gpts_resp.json() + + if items: + # asset_type must be present on every item + for item in items: + assert "asset_type" in item, f"Missing asset_type on {item.get('id')}" + assert item["asset_type"] in ("gpt", "project"), ( + f"Unexpected asset_type={item['asset_type']} on {item.get('id')}" + ) diff --git a/frontend/src/components/employee/Portal.tsx b/frontend/src/components/employee/Portal.tsx index e392fef..1d0a14d 100644 --- a/frontend/src/components/employee/Portal.tsx +++ b/frontend/src/components/employee/Portal.tsx @@ -466,6 +466,7 @@ export default function Portal() { const { data: categories = [] } = useCategories(); const [search, setSearch] = useState(""); const [deptFilter, setDeptFilter] = useState("all"); + const [assetFilter, setAssetFilter] = useState<"all" | "gpt" | "project">("all"); const [sortBy, setSortBy] = useState("shared"); const [viewMode, setViewMode] = useState("grid"); const [searchResults, setSearchResults] = useState(null); @@ -491,10 +492,12 @@ export default function Portal() { const publicGpts = useMemo(() => allGpts.filter((g) => g.visibility !== "just-me"), [allGpts]); const deptCategories = useMemo(() => categories.filter((c) => c.enabled).sort((a, b) => a.sort_order - b.sort_order), [categories]); const isSearchMode = searchResults !== null; + const hasProjects = useMemo(() => publicGpts.some((g) => g.asset_type === "project"), [publicGpts]); const filtered = useMemo(() => { const base = searchResults ?? publicGpts; return base + .filter((g) => assetFilter === "all" || (assetFilter === "project" ? g.asset_type === "project" : g.asset_type === "gpt" || !g.asset_type)) .filter((g) => deptFilter === "all" || g.primary_category === deptFilter || (g.builder_categories ?? []).includes(deptFilter as string)) .sort((a, b) => { if (isSearchMode) return 0; @@ -533,6 +536,23 @@ export default function Portal() { )} + {!isSearchMode && hasProjects && ( +
+ {(["all", "gpt", "project"] as const).map((v) => ( + + ))} +
+ )} + {!isSearchMode && ( <>