Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions llm-cloud-run/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Vim artifacts
*.sw?
.*.sw?
*~ 
4913

.DS_Store
__pycache__
*.pyc
*.pyo
*.pyd
.Python
.venv
venv
.git
.pytest_cache
11 changes: 6 additions & 5 deletions llm-cloud-run/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
FROM python:3.13-slim
FROM python:3.11-slim

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

WORKDIR /app

COPY requirements.txt .

RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 8080

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
ENV PORT=8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
Empty file.
53 changes: 53 additions & 0 deletions llm-cloud-run/common/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
from dataclasses import dataclass


def _env(name: str, default: str | None = None) -> str | None:
v = os.getenv(name)
if v is None or v == "":
return default
return v


@dataclass(frozen=True)
class Settings:
# GCP / Vertex
gcp_project: str | None = _env("GCP_PROJECT") or _env("GOOGLE_CLOUD_PROJECT")
gcp_location: str = _env("GCP_LOCATION", "us-central1") or "us-central1"

# BigQuery
bq_project: str | None = _env("BQ_PROJECT") # optional; defaults to gcp_project
bq_dataset: str = _env("BQ_DATASET", "vertex_rag_demo") or "vertex_rag_demo"
bq_notes_table: str = _env("BQ_NOTES_TABLE", "demo_Notes") or "demo_Notes"
bq_embeddings_table: str = _env("BQ_EMBEDDINGS_TABLE", "demo_NoteEmbeddings") or "demo_NoteEmbeddings"

# Column names (override if your schema differs)
notes_id_col: str = _env("NOTES_ID_COL", "note_id") or "note_id"
notes_content_col: str = _env("NOTES_CONTENT_COL", "content") or "content"
notes_source_col: str = _env("NOTES_SOURCE_COL", "source") or "source"
notes_created_col: str = _env("NOTES_CREATED_COL", "created_at") or "created_at"

emb_id_col: str = _env("EMB_ID_COL", "note_id") or "note_id"
emb_vector_col: str = _env("EMB_VECTOR_COL", "embedding") or "embedding" # ARRAY<FLOAT64>
emb_model_col: str = _env("EMB_MODEL_COL", "model") or "model"
emb_updated_col: str = _env("EMB_UPDATED_COL", "updated_at") or "updated_at"

# RAG settings
rag_top_k: int = int(_env("RAG_TOP_K", "3") or "3")
snippet_chars: int = int(_env("RAG_SNIPPET_CHARS", "800") or "800")

# LLM + Embeddings
gemini_model: str = _env("GEMINI_MODEL", "gemini-2.5-flash-lite") or "gemini-2.5-flash-lite"
embedding_model: str = _env("EMBEDDING_MODEL", "text-embedding-004") or "text-embedding-004"

# Optional: cap how many notes to load into memory cache
bq_max_notes: int = int(_env("BQ_MAX_NOTES", "5000") or "5000")

@property
def effective_bq_project(self) -> str | None:
return self.bq_project or self.gcp_project

def require_project(self) -> str:
if not self.gcp_project:
raise RuntimeError("GCP_PROJECT / GOOGLE_CLOUD_PROJECT not set.")
return self.gcp_project
10 changes: 10 additions & 0 deletions llm-cloud-run/common/logging_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import logging
import os


def setup_logging() -> None:
level = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(
level=getattr(logging, level, logging.INFO),
format="%(asctime)s %(levelname)s %(name)s - %(message)s",
)
Empty file added llm-cloud-run/llm/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions llm-cloud-run/llm/embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from typing import List
from vertexai.language_models import TextEmbeddingModel

from common.config import Settings
from llm.vertex_init import init_vertex

_MODEL: TextEmbeddingModel | None = None


def embed_text(settings: Settings, text: str) -> List[float]:
"""Return an embedding vector for given text using Vertex Embeddings."""
global _MODEL
init_vertex(settings)
if _MODEL is None:
_MODEL = TextEmbeddingModel.from_pretrained(settings.embedding_model)
return _MODEL.get_embeddings([text])[0].values
36 changes: 36 additions & 0 deletions llm-cloud-run/llm/gemini_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from vertexai.preview.generative_models import GenerativeModel, Part

from common.config import Settings
from llm.vertex_init import init_vertex


def generate(
settings: Settings,
parts: list[Part] | Part,
*,
temperature: float = 0.3,
max_output_tokens: int = 1024,
) -> str:
"""
Generate text using a Gemini model on Vertex AI.

`parts` can be:
- a single Part (text or image)
- a list of Parts (multimodal prompt)
"""
init_vertex(settings)

model = GenerativeModel(settings.gemini_model)

response = model.generate_content(
parts,
generation_config={
"temperature": temperature,
"max_output_tokens": max_output_tokens,
},
)

# Vertex responses expose `.text` for convenience
return getattr(response, "text", "") or ""
21 changes: 21 additions & 0 deletions llm-cloud-run/llm/vertex_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import logging
import vertexai
from common.config import Settings

_INITIALIZED = False


def init_vertex(settings: Settings) -> None:
"""Initialize Vertex AI once per process."""
global _INITIALIZED
if _INITIALIZED:
return

project = settings.require_project()
vertexai.init(project=project, location=settings.gcp_location)
logging.getLogger(__name__).info(
"Vertex AI initialized (project=%s, location=%s).",
project,
settings.gcp_location,
)
_INITIALIZED = True
Loading