llm-cloud-run/.dockerignore

-Original file line number
+Diff line change
@@ -1,7 +1,9 @@
-    # Vim artifacts
-    *.sw?
-    .*.sw?
-    *~
-    .DS_Store
+    __pycache__
+    *.pyc
+    *.pyo
+    *.pyd
+    .Python
+    .venv
+    venv
+    .git
+    .pytest_cache

llm-cloud-run/Dockerfile

-Original file line number
+Diff line change
@@ -1,13 +1,14 @@
-    FROM python:3.13-slim
+    FROM python:3.11-slim
+    ENV PYTHONDONTWRITEBYTECODE=1
+    ENV PYTHONUNBUFFERED=1
     WORKDIR /app
     COPY requirements.txt .
     RUN pip install --no-cache-dir -r requirements.txt
     COPY . .
-    EXPOSE 8080
-    CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
+    ENV PORT=8080
+    CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

llm-cloud-run/common/__init__.py

Empty file.

llm-cloud-run/common/config.py

-Original file line number
+Diff line change
@@ -0,0 +1,53 @@
+    import os
+    from dataclasses import dataclass
+    def _env(name: str, default: str | None = None) -> str | None:
+        v = os.getenv(name)
+        if v is None or v == "":
+            return default
+        return v
+    @dataclass(frozen=True)
+    class Settings:
+        # GCP / Vertex
+        gcp_project: str | None = _env("GCP_PROJECT") or _env("GOOGLE_CLOUD_PROJECT")
+        gcp_location: str = _env("GCP_LOCATION", "us-central1") or "us-central1"
+        # BigQuery
+        bq_project: str | None = _env("BQ_PROJECT")  # optional; defaults to gcp_project
+        bq_dataset: str = _env("BQ_DATASET", "vertex_rag_demo") or "vertex_rag_demo"
+        bq_notes_table: str = _env("BQ_NOTES_TABLE", "demo_Notes") or "demo_Notes"
+        bq_embeddings_table: str = _env("BQ_EMBEDDINGS_TABLE", "demo_NoteEmbeddings") or "demo_NoteEmbeddings"
+        # Column names (override if your schema differs)
+        notes_id_col: str = _env("NOTES_ID_COL", "note_id") or "note_id"
+        notes_content_col: str = _env("NOTES_CONTENT_COL", "content") or "content"
+        notes_source_col: str = _env("NOTES_SOURCE_COL", "source") or "source"
+        notes_created_col: str = _env("NOTES_CREATED_COL", "created_at") or "created_at"
+        emb_id_col: str = _env("EMB_ID_COL", "note_id") or "note_id"
+        emb_vector_col: str = _env("EMB_VECTOR_COL", "embedding") or "embedding"  # ARRAY<FLOAT64>
+        emb_model_col: str = _env("EMB_MODEL_COL", "model") or "model"
+        emb_updated_col: str = _env("EMB_UPDATED_COL", "updated_at") or "updated_at"
+        # RAG settings
+        rag_top_k: int = int(_env("RAG_TOP_K", "3") or "3")
+        snippet_chars: int = int(_env("RAG_SNIPPET_CHARS", "800") or "800")
+        # LLM + Embeddings
+        gemini_model: str = _env("GEMINI_MODEL", "gemini-2.5-flash-lite") or "gemini-2.5-flash-lite"
+        embedding_model: str = _env("EMBEDDING_MODEL", "text-embedding-004") or "text-embedding-004"
+        # Optional: cap how many notes to load into memory cache
+        bq_max_notes: int = int(_env("BQ_MAX_NOTES", "5000") or "5000")
+        @property
+        def effective_bq_project(self) -> str | None:
+            return self.bq_project or self.gcp_project
+        def require_project(self) -> str:
+            if not self.gcp_project:
+                raise RuntimeError("GCP_PROJECT / GOOGLE_CLOUD_PROJECT not set.")
+            return self.gcp_project

llm-cloud-run/common/logging_utils.py

-Original file line number
+Diff line change
@@ -0,0 +1,10 @@
+    import logging
+    import os
+    def setup_logging() -> None:
+        level = os.getenv("LOG_LEVEL", "INFO").upper()
+        logging.basicConfig(
+            level=getattr(logging, level, logging.INFO),
+            format="%(asctime)s %(levelname)s %(name)s - %(message)s",
+        )

llm-cloud-run/llm/__init__.py

Empty file.

llm-cloud-run/llm/embeddings.py

-Original file line number
+Diff line change
@@ -0,0 +1,18 @@
+    from __future__ import annotations
+    from typing import List
+    from vertexai.language_models import TextEmbeddingModel
+    from common.config import Settings
+    from llm.vertex_init import init_vertex
+    _MODEL: TextEmbeddingModel | None = None
+    def embed_text(settings: Settings, text: str) -> List[float]:
+        """Return an embedding vector for given text using Vertex Embeddings."""
+        global _MODEL
+        init_vertex(settings)
+        if _MODEL is None:
+            _MODEL = TextEmbeddingModel.from_pretrained(settings.embedding_model)
+        return _MODEL.get_embeddings([text])[0].values

llm-cloud-run/llm/gemini_client.py

-Original file line number
+Diff line change
@@ -0,0 +1,36 @@
+    from __future__ import annotations
+    from vertexai.preview.generative_models import GenerativeModel, Part
+    from common.config import Settings
+    from llm.vertex_init import init_vertex
+    def generate(
+        settings: Settings,
+        parts: list[Part] | Part,
+        *,
+        temperature: float = 0.3,
+        max_output_tokens: int = 1024,
+    ) -> str:
+        """
+        Generate text using a Gemini model on Vertex AI.
+        `parts` can be:
+          - a single Part (text or image)
+          - a list of Parts (multimodal prompt)
+        """
+        init_vertex(settings)
+        model = GenerativeModel(settings.gemini_model)
+        response = model.generate_content(
+            parts,
+            generation_config={
+                "temperature": temperature,
+                "max_output_tokens": max_output_tokens,
+            },
+        )
+        # Vertex responses expose `.text` for convenience
+        return getattr(response, "text", "") or ""

llm-cloud-run/llm/vertex_init.py

-Original file line number
+Diff line change
@@ -0,0 +1,21 @@
+    import logging
+    import vertexai
+    from common.config import Settings
+    _INITIALIZED = False
+    def init_vertex(settings: Settings) -> None:
+        """Initialize Vertex AI once per process."""
+        global _INITIALIZED
+        if _INITIALIZED:
+            return
+        project = settings.require_project()
+        vertexai.init(project=project, location=settings.gcp_location)
+        logging.getLogger(__name__).info(
+            "Vertex AI initialized (project=%s, location=%s).",
+            project,
+            settings.gcp_location,
+        )
+        _INITIALIZED = True

llm-cloud-run: Vector RAG Skeleton #219

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

AaronMT wants to merge 2 commits into mozilla-mobile:main from AaronMT:vector-update

-Original file line number
+Diff line change
@@ -1,7 +1,9 @@
-    # Vim artifacts
-    *.sw?
-    .*.sw?
-    *~
-    .DS_Store
+    __pycache__
+    *.pyc
+    *.pyo
+    *.pyd
+    .Python
+    .venv
+    venv
+    .git
+    .pytest_cache

-Original file line number
+Diff line change
@@ -1,13 +1,14 @@
-    FROM python:3.13-slim
+    FROM python:3.11-slim
+    ENV PYTHONDONTWRITEBYTECODE=1
+    ENV PYTHONUNBUFFERED=1
     WORKDIR /app
     COPY requirements.txt .
     RUN pip install --no-cache-dir -r requirements.txt
     COPY . .
-    EXPOSE 8080
-    CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
+    ENV PORT=8080
+    CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

-Original file line number
+Diff line change
@@ -0,0 +1,53 @@
+    import os
+    from dataclasses import dataclass
+    def _env(name: str, default: str | None = None) -> str | None:
+        v = os.getenv(name)
+        if v is None or v == "":
+            return default
+        return v
+    @dataclass(frozen=True)
+    class Settings:
+        # GCP / Vertex
+        gcp_project: str | None = _env("GCP_PROJECT") or _env("GOOGLE_CLOUD_PROJECT")
+        gcp_location: str = _env("GCP_LOCATION", "us-central1") or "us-central1"
+        # BigQuery
+        bq_project: str | None = _env("BQ_PROJECT")  # optional; defaults to gcp_project
+        bq_dataset: str = _env("BQ_DATASET", "vertex_rag_demo") or "vertex_rag_demo"
+        bq_notes_table: str = _env("BQ_NOTES_TABLE", "demo_Notes") or "demo_Notes"
+        bq_embeddings_table: str = _env("BQ_EMBEDDINGS_TABLE", "demo_NoteEmbeddings") or "demo_NoteEmbeddings"
+        # Column names (override if your schema differs)
+        notes_id_col: str = _env("NOTES_ID_COL", "note_id") or "note_id"
+        notes_content_col: str = _env("NOTES_CONTENT_COL", "content") or "content"
+        notes_source_col: str = _env("NOTES_SOURCE_COL", "source") or "source"
+        notes_created_col: str = _env("NOTES_CREATED_COL", "created_at") or "created_at"
+        emb_id_col: str = _env("EMB_ID_COL", "note_id") or "note_id"
+        emb_vector_col: str = _env("EMB_VECTOR_COL", "embedding") or "embedding"  # ARRAY<FLOAT64>
+        emb_model_col: str = _env("EMB_MODEL_COL", "model") or "model"
+        emb_updated_col: str = _env("EMB_UPDATED_COL", "updated_at") or "updated_at"
+        # RAG settings
+        rag_top_k: int = int(_env("RAG_TOP_K", "3") or "3")
+        snippet_chars: int = int(_env("RAG_SNIPPET_CHARS", "800") or "800")
+        # LLM + Embeddings
+        gemini_model: str = _env("GEMINI_MODEL", "gemini-2.5-flash-lite") or "gemini-2.5-flash-lite"
+        embedding_model: str = _env("EMBEDDING_MODEL", "text-embedding-004") or "text-embedding-004"
+        # Optional: cap how many notes to load into memory cache
+        bq_max_notes: int = int(_env("BQ_MAX_NOTES", "5000") or "5000")
+        @property
+        def effective_bq_project(self) -> str | None:
+            return self.bq_project or self.gcp_project
+        def require_project(self) -> str:
+            if not self.gcp_project:
+                raise RuntimeError("GCP_PROJECT / GOOGLE_CLOUD_PROJECT not set.")
+            return self.gcp_project

-Original file line number
+Diff line change
@@ -0,0 +1,10 @@
+    import logging
+    import os
+    def setup_logging() -> None:
+        level = os.getenv("LOG_LEVEL", "INFO").upper()
+        logging.basicConfig(
+            level=getattr(logging, level, logging.INFO),
+            format="%(asctime)s %(levelname)s %(name)s - %(message)s",
+        )

-Original file line number
+Diff line change
@@ -0,0 +1,18 @@
+    from __future__ import annotations
+    from typing import List
+    from vertexai.language_models import TextEmbeddingModel
+    from common.config import Settings
+    from llm.vertex_init import init_vertex
+    _MODEL: TextEmbeddingModel | None = None
+    def embed_text(settings: Settings, text: str) -> List[float]:
+        """Return an embedding vector for given text using Vertex Embeddings."""
+        global _MODEL
+        init_vertex(settings)
+        if _MODEL is None:
+            _MODEL = TextEmbeddingModel.from_pretrained(settings.embedding_model)
+        return _MODEL.get_embeddings([text])[0].values

-Original file line number
+Diff line change
@@ -0,0 +1,36 @@
+    from __future__ import annotations
+    from vertexai.preview.generative_models import GenerativeModel, Part
+    from common.config import Settings
+    from llm.vertex_init import init_vertex
+    def generate(
+        settings: Settings,
+        parts: list[Part] | Part,
+        *,
+        temperature: float = 0.3,
+        max_output_tokens: int = 1024,
+    ) -> str:
+        """
+        Generate text using a Gemini model on Vertex AI.
+        `parts` can be:
+          - a single Part (text or image)
+          - a list of Parts (multimodal prompt)
+        """
+        init_vertex(settings)
+        model = GenerativeModel(settings.gemini_model)
+        response = model.generate_content(
+            parts,
+            generation_config={
+                "temperature": temperature,
+                "max_output_tokens": max_output_tokens,
+            },
+        )
+        # Vertex responses expose `.text` for convenience
+        return getattr(response, "text", "") or ""

-Original file line number
+Diff line change
@@ -0,0 +1,21 @@
+    import logging
+    import vertexai
+    from common.config import Settings
+    _INITIALIZED = False
+    def init_vertex(settings: Settings) -> None:
+        """Initialize Vertex AI once per process."""
+        global _INITIALIZED
+        if _INITIALIZED:
+            return
+        project = settings.require_project()
+        vertexai.init(project=project, location=settings.gcp_location)
+        logging.getLogger(__name__).info(
+            "Vertex AI initialized (project=%s, location=%s).",
+            project,
+            settings.gcp_location,
+        )
+        _INITIALIZED = True

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

llm-cloud-run: Vector RAG Skeleton #219

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

llm-cloud-run: Vector RAG Skeleton #219

Are you sure you want to change the base?

Uh oh!

llm-cloud-run: Vector RAG Skeleton #219

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!