From 12425290a3c266afe939652e83f41e9dfe9f1753 Mon Sep 17 00:00:00 2001 From: mtalvi Date: Mon, 15 Dec 2025 13:47:22 +0200 Subject: [PATCH 1/4] rag as a service - cluster --- .../charts/backend/templates/configmap.yaml | 7 +- .../charts/backend/templates/deployment.yaml | 10 - .../charts/backend/templates/init-job.yaml | 9 - .../charts/backend/templates/rag-pvc.yaml | 19 - .../charts/backend/values.yaml | 28 +- .../ansible-log-monitor/charts/rag/Chart.yaml | 9 + .../charts/rag/templates/NOTES.txt | 36 + .../charts/rag/templates/_helpers.tpl | 63 ++ .../charts/rag/templates/deployment.yaml | 101 +++ .../charts/rag/templates/hpa.yaml | 33 + .../charts/rag/templates/role.yaml | 13 + .../charts/rag/templates/rolebinding.yaml | 17 + .../charts/rag/templates/service.yaml | 16 + .../charts/rag/templates/serviceaccount.yaml | 14 + .../charts/rag/values.yaml | 109 +++ .../ansible-log-monitor/global-values.yaml | 3 +- docs/RAG_SERVICE_MIGRATION.md | 740 ++++++++++++++++++ init_pipeline.py | 187 +++-- pyproject.toml | 1 + services/rag/Containerfile | 31 + services/rag/README.md | 151 ++++ services/rag/index_loader.py | 189 +++++ services/rag/main.py | 281 +++++++ services/rag/pyproject.toml | 18 + .../get_more_context_agent/rag_handler.py | 179 +++-- src/alm/database.py | 19 +- src/alm/main_fastapi.py | 9 + src/alm/models.py | 46 ++ src/alm/rag/embed_and_index.py | 131 ++++ uv.lock | 252 +++--- 30 files changed, 2417 insertions(+), 304 deletions(-) delete mode 100644 deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/values.yaml create mode 100644 docs/RAG_SERVICE_MIGRATION.md create mode 100644 services/rag/Containerfile create mode 100644 services/rag/README.md create mode 100644 services/rag/index_loader.py create mode 100644 services/rag/main.py create mode 100644 services/rag/pyproject.toml diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml index a83b6bf..6d30a15 100644 --- a/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml +++ b/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml @@ -19,11 +19,14 @@ data: {{- if .Values.rag.enabled }} # RAG Configuration RAG_ENABLED: {{ .Values.rag.enabled | quote }} - # Model is hardcoded to nomic-ai/nomic-embed-text-v1.5, no env var needed - # API URL defaults to http://alm-embedding:8080 (local cluster service) + # RAG Service URL (microservice endpoint) + RAG_SERVICE_URL: {{ .Values.rag.serviceUrl | default "http://alm-rag:8002" | quote }} + # Embedding service URL (for init job, not used by backend) EMBEDDINGS_LLM_URL: {{ .Values.rag.embedding.apiUrl | default "http://alm-embedding:8080" | quote }} + # Data paths (for init job only) DATA_DIR: {{ .Values.rag.dataDir | quote }} KNOWLEDGE_BASE_DIR: {{ .Values.rag.knowledgeBaseDir | quote }} + # Query configuration RAG_TOP_K: {{ .Values.rag.query.topK | quote }} RAG_TOP_N: {{ .Values.rag.query.topN | quote }} RAG_SIMILARITY_THRESHOLD: {{ .Values.rag.query.similarityThreshold | quote }} diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml index 4e061f4..9127ec8 100644 --- a/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml +++ b/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml @@ -76,20 +76,10 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} volumeMounts: - {{- if .Values.rag.enabled }} - - name: rag-data - mountPath: {{ .Values.rag.pvcMountPath }} - readOnly: true - {{- end }} {{- with .Values.volumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} volumes: - {{- if .Values.rag.enabled }} - - name: rag-data - persistentVolumeClaim: - claimName: {{ include "backend.fullname" . }}-rag-data - {{- end }} {{- with .Values.volumes }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml index 689ef65..dd46f78 100644 --- a/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml +++ b/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml @@ -107,10 +107,6 @@ spec: volumeMounts: - name: init-sync mountPath: /init-sync - {{- if .Values.rag.enabled }} - - name: rag-data - mountPath: {{ .Values.rag.pvcMountPath }} - {{- end }} {{- with .Values.volumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} @@ -133,11 +129,6 @@ spec: volumes: - name: init-sync emptyDir: {} - {{- if .Values.rag.enabled }} - - name: rag-data - persistentVolumeClaim: - claimName: {{ include "backend.fullname" . }}-rag-data - {{- end }} {{- with .Values.volumes }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml deleted file mode 100644 index d23961b..0000000 --- a/deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml +++ /dev/null @@ -1,19 +0,0 @@ -{{- if .Values.rag.enabled }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "backend.fullname" . }}-rag-data - labels: - {{- include "backend.labels" . | nindent 4 }} - app.kubernetes.io/component: rag-storage -spec: - accessModes: - - {{ .Values.rag.persistence.accessMode }} - {{- if .Values.rag.persistence.storageClassName }} - storageClassName: {{ .Values.rag.persistence.storageClassName }} - {{- end }} - resources: - requests: - storage: {{ .Values.rag.persistence.size }} -{{- end }} - diff --git a/deploy/helm/ansible-log-monitor/charts/backend/values.yaml b/deploy/helm/ansible-log-monitor/charts/backend/values.yaml index be57e34..cfa9383 100644 --- a/deploy/helm/ansible-log-monitor/charts/backend/values.yaml +++ b/deploy/helm/ansible-log-monitor/charts/backend/values.yaml @@ -235,7 +235,11 @@ rag: # Enable or disable RAG functionality enabled: true - # Embedding model configuration + # RAG Service URL (microservice endpoint) + # Backend pods communicate with RAG service via HTTP + serviceUrl: "http://alm-rag:8002" + + # Embedding model configuration (used by init job for building index) # NOTE: API credentials (apiKey, apiUrl, modelName) are provided during 'make install' # and stored in the 'model-secret' Kubernetes secret embedding: @@ -244,24 +248,12 @@ rag: apiUrl: "http://alm-embedding:8080" # TEI service URL (defaults to local cluster service) port: 8080 # Port for the embedding service (TEI) - # Data paths + # Data paths (used by init job for knowledge base PDFs) + # Note: PDFs should be baked into the container image at /app/data/knowledge_base + # The init job will read PDFs from the image and process them + # The RAG index (embeddings) is stored in PostgreSQL dataDir: "/app/data/rag" - knowledgeBaseDir: "/app/data/rag/knowledge_base" - # PVC mount path (mounted directly at /app/data/rag) - pvcMountPath: "/app/data/rag" - - # Persistence configuration for RAG index storage - persistence: - # Storage size for RAG index and metadata - size: "2Gi" - # Access mode: ReadWriteOnce (RWO) is used because: - # 1. Init job writes the index once - # 2. Backend pods only read (never write) - # 3. AWS EBS (gp3-csi) only supports RWO - # Note: For RWO, all backend pods must be scheduled on the same node as the PVC - accessMode: "ReadWriteOnce" - # Storage class (leave empty for default) - storageClassName: "" + knowledgeBaseDir: "/app/data/knowledge_base" # PDFs should be in container image # Query configuration query: diff --git a/deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml b/deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml new file mode 100644 index 0000000..269d305 --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: rag +description: A Helm chart for RAG service + +type: application + +version: 0.1.0 +appVersion: "0.1.0" + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt b/deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt new file mode 100644 index 0000000..e23d794 --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt @@ -0,0 +1,36 @@ +1. Get the application URL by running these commands: +{{- if .Values.httpRoute.enabled }} +{{- if .Values.httpRoute.hostnames }} + export APP_HOSTNAME={{ .Values.httpRoute.hostnames | first }} +{{- else }} + export APP_HOSTNAME=$(kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o jsonpath="{.spec.listeners[0].hostname}") + {{- end }} +{{- if and .Values.httpRoute.rules (first .Values.httpRoute.rules).matches (first (first .Values.httpRoute.rules).matches).path.value }} + echo "Visit http://$APP_HOSTNAME{{ (first (first .Values.httpRoute.rules).matches).path.value }} to use your application" + + NOTE: Your HTTPRoute depends on the listener configuration of your gateway and your HTTPRoute rules. + The rules can be set for path, method, header and query parameters. + You can check the gateway configuration with 'kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o yaml' +{{- end }} +{{- else if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "rag.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "rag.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "rag.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "rag.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl b/deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl new file mode 100644 index 0000000..b86565f --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl @@ -0,0 +1,63 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "rag.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "rag.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "rag.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "rag.labels" -}} +helm.sh/chart: {{ include "rag.chart" . }} +{{ include "rag.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "rag.selectorLabels" -}} +app.kubernetes.io/name: {{ include "rag.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "rag.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "rag.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml new file mode 100644 index 0000000..f399a55 --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "rag.fullname" . }} + labels: + {{- include "rag.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "rag.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "rag.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "rag.serviceAccountName" . }} + initContainers: + - name: wait-for-postgres + image: postgres:15-alpine + command: + - sh + - -c + - | + until pg_isready -d "$DATABASE_URL"; do + echo "Waiting for PostgreSQL to be ready..." + sleep 5 + done + echo "PostgreSQL is ready!" + env: + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: pgvector + key: uri + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml new file mode 100644 index 0000000..e0e7bc2 --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml @@ -0,0 +1,33 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "rag.fullname" . }} + labels: + {{- include "rag.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "rag.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml new file mode 100644 index 0000000..c914019 --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml @@ -0,0 +1,13 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "rag.fullname" . }}-job-reader + labels: + {{- include "rag.labels" . | nindent 4 }} +rules: + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch"] +{{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml new file mode 100644 index 0000000..c6aeed8 --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml @@ -0,0 +1,17 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "rag.fullname" . }}-job-reader + labels: + {{- include "rag.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "rag.fullname" . }}-job-reader +subjects: + - kind: ServiceAccount + name: {{ include "rag.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml new file mode 100644 index 0000000..1b0a09f --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "rag.fullname" . }} + labels: + {{- include "rag.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "rag.selectorLabels" . | nindent 4 }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml new file mode 100644 index 0000000..4e17c2e --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "rag.serviceAccountName" . }} + labels: + {{- include "rag.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} + diff --git a/deploy/helm/ansible-log-monitor/charts/rag/values.yaml b/deploy/helm/ansible-log-monitor/charts/rag/values.yaml new file mode 100644 index 0000000..e09165f --- /dev/null +++ b/deploy/helm/ansible-log-monitor/charts/rag/values.yaml @@ -0,0 +1,109 @@ +# Default values for rag service. +replicaCount: 1 + +image: + repository: quay.io/rh-ai-quickstart/alm-rag + pullPolicy: Always + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + create: true + automount: true + annotations: {} + name: "" + +rbac: + create: true + +podAnnotations: {} +podLabels: {} + +podSecurityContext: {} + +securityContext: {} + +service: + type: ClusterIP + port: 8002 + targetPort: 8002 + +ingress: + enabled: false + className: "" + annotations: {} + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + +httpRoute: + enabled: false + annotations: {} + parentRefs: + - name: gateway + sectionName: http + hostnames: + - chart-example.local + rules: + - matches: + - path: + type: PathPrefix + value: /rag + +env: + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: pgvector + key: uri + - name: EMBEDDINGS_LLM_URL + value: "http://alm-embedding:8080" + - name: RAG_MODEL_NAME + value: "nomic-ai/nomic-embed-text-v1.5" + - name: PORT + value: "8002" + +resources: + requests: + memory: "512Mi" + cpu: "200m" + limits: + memory: "2Gi" + cpu: "1000m" + +livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 60 + periodSeconds: 30 + +readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 3 + targetCPUUtilizationPercentage: 80 + +volumes: [] + +volumeMounts: [] + +nodeSelector: {} + +tolerations: [] + +affinity: {} + diff --git a/deploy/helm/ansible-log-monitor/global-values.yaml b/deploy/helm/ansible-log-monitor/global-values.yaml index 36fbda6..a5c749d 100644 --- a/deploy/helm/ansible-log-monitor/global-values.yaml +++ b/deploy/helm/ansible-log-monitor/global-values.yaml @@ -35,4 +35,5 @@ global: annotationInterface: "alm-annotation-interface" clustering: "alm-clustering" ui: "alm-ui" - embedding: "alm-embedding" \ No newline at end of file + embedding: "alm-embedding" + rag: "alm-rag" \ No newline at end of file diff --git a/docs/RAG_SERVICE_MIGRATION.md b/docs/RAG_SERVICE_MIGRATION.md new file mode 100644 index 0000000..b4401da --- /dev/null +++ b/docs/RAG_SERVICE_MIGRATION.md @@ -0,0 +1,740 @@ +# RAG Service Migration Guide + +## Overview + +This document describes the migration from PVC-based RAG storage to a dedicated RAG microservice with PostgreSQL storage. This change eliminates ReadWriteOnce (RWO) constraints, reduces resource duplication, and simplifies the architecture. + +## What Changed + +### Before (PVC-based) +- RAG index stored on PersistentVolumeClaim (PVC) +- Each backend pod loaded FAISS index from PVC +- All backend pods required to be on same node (RWO constraint) +- N backend pods = N copies of FAISS index in memory +- Index updates required PVC rebuild and pod restarts + +### After (RAG Service + PostgreSQL) +- RAG index stored in PostgreSQL (`ragembedding` table) +- Single RAG service loads FAISS index from PostgreSQL +- Backend pods make HTTP calls to RAG service +- Backend pods can run on any node (no constraints) +- 1 RAG service = 1 copy of FAISS index in memory +- Index updates via PostgreSQL (no pod restarts needed) + +## Architecture + +``` +┌─────────────────────┐ +│ Init Job Pod │ +│ (alm-backend-init) │ +│ │ +│ 1. Parse PDFs │ +│ 2. Generate │ +│ embeddings │ +│ 3. Save to │ +│ PostgreSQL │ +└──────────┬──────────┘ + │ + │ Writes embeddings + ▼ +┌─────────────────────┐ +│ PostgreSQL │ +│ │ +│ - ragembedding │ +│ table │ +│ - pgvector │ +│ extension │ +└──────────┬──────────┘ + │ + │ Reads embeddings + │ (polls every 5s) + ▼ +┌─────────────────────┐ +│ RAG Service Pod │ +│ (alm-rag) │ +│ │ +│ ┌───────────────┐ │ +│ │ Background │ │ +│ │ Task: Poll │ │ +│ │ PostgreSQL │ │ +│ └───────────────┘ │ +│ │ +│ ┌───────────────┐ │ +│ │ FAISS Index │ │ (in-memory) +│ │ (loaded from │ │ +│ │ PostgreSQL) │ │ +│ └───────────────┘ │ +└──────────┬──────────┘ + │ + │ HTTP /rag/query + │ + ▼ +┌─────────────────────┐ +│ Backend Pods │ +│ (alm-backend) │ +│ │ +│ - Pod 1 │ +│ - Pod 2 │ +│ - Pod N │ +│ │ +│ All make HTTP │ +│ calls to RAG │ +│ service │ +└─────────────────────┘ +``` + +## Init Job and RAG Service Relationship + +### Overview + +The init job and RAG service have a **producer-consumer relationship** coordinated through PostgreSQL: + +- **Init Job** = **Producer**: Creates and saves embeddings to PostgreSQL +- **RAG Service** = **Consumer**: Reads embeddings from PostgreSQL and serves queries +- **PostgreSQL** = **Coordination Point**: Shared data store, no direct communication needed + +### Key Characteristics + +1. **No Direct Dependency**: Services don't wait for each other to start +2. **Asynchronous Coordination**: RAG service polls PostgreSQL, init job polls RAG service HTTP endpoint +3. **Graceful Degradation**: Both services can start independently and handle missing data gracefully +4. **Data Persistence**: Embeddings persist in PostgreSQL across pod restarts + +### Detailed Flow and Timeline + +``` +Time Init Job PostgreSQL RAG Service +───────────────────────────────────────────────────────────────────────── +T+0s Pod starts ── Pod starts + │ │ │ +T+5s Wait for PostgreSQL ── Wait for PostgreSQL + │ │ │ +T+10s ── Ready ── + │ │ │ +T+15s PostgreSQL ready! ── PostgreSQL ready! + │ │ │ + │ │ Start background task + │ │ Poll for embeddings... + │ │ (no embeddings yet) + │ │ │ +T+30s Building RAG index... ── Still polling... + - Parse PDFs │ (every 5 seconds) + - Generate embeddings │ │ + │ │ │ +T+60s Saving embeddings... Writing embeddings... ── + │ │ │ +T+65s Index complete! Embeddings saved! ── + │ │ │ +T+70s ── ── Found embeddings! + │ │ Loading index... + │ │ │ +T+75s ── ── Index loaded! ✓ + │ │ Service ready! + │ │ │ +T+80s Waiting for RAG service... ── ── + (polls /ready endpoint) │ │ + │ │ │ +T+85s RAG service ready! ── ── + │ │ │ +T+90s Running training pipeline ── ── + (uses RAG service) │ │ + │ │ │ +T+95s Querying RAG service... ── Serving queries ✓ + │ │ │ +``` + +### Phase-by-Phase Breakdown + +#### Phase 1: Parallel Startup (T+0s to T+15s) +- **Init Job**: Starts, waits for PostgreSQL via initContainer +- **RAG Service**: Starts, waits for PostgreSQL via initContainer +- **No Dependency**: Both can start simultaneously, no blocking + +#### Phase 2: Data Preparation - Init Job (T+15s to T+65s) +- **Init Job Actions**: + 1. Checks if embeddings already exist (skips if found, unless `RAG_FORCE_REBUILD=true`) + 2. Reads PDFs from container image (`/app/data/knowledge_base`) + 3. Parses PDFs into chunks using `AnsibleErrorParser` + 4. Generates embeddings using embedding service (TEI) + 5. Saves embeddings to PostgreSQL `ragembedding` table +- **PostgreSQL**: Receives and stores embeddings +- **RAG Service**: Continues polling PostgreSQL (embeddings not found yet) + +#### Phase 3: Index Loading - RAG Service (T+65s to T+75s) +- **RAG Service Actions**: + 1. Background task polls PostgreSQL every 5 seconds + 2. When embeddings found: queries all embeddings from `ragembedding` table + 3. Parses pgvector string format to numpy arrays + 4. Builds FAISS IndexFlatIP in memory + 5. Creates error store and index-to-error-id mapping + 6. Marks service as ready (`/ready` endpoint returns 200) +- **PostgreSQL**: Serves embedding queries +- **Init Job**: Continues waiting for RAG service + +#### Phase 4: Coordination - Init Job Waits (T+75s to T+85s) +- **Init Job Actions**: + 1. After saving embeddings, calls `wait_for_rag_service()` + 2. Polls `http://alm-rag:8002/ready` endpoint every 5 seconds + 3. Timeout: 5 minutes (300 seconds) + 4. Once RAG service ready, proceeds to training pipeline +- **RAG Service**: Responds to `/ready` checks (returns 200 when ready) +- **If Timeout**: Init job continues with warning, RAG queries may fail + +#### Phase 5: Runtime - Training Pipeline (T+85s+) +- **Init Job**: Runs `training_pipeline()` which: + - Processes alerts + - Uses RAG service for context retrieval (HTTP calls) + - Saves results to database +- **RAG Service**: Serves queries via `/rag/query` endpoint +- **Backend Pods**: (After init job completes) Can query RAG service for context + +### Communication Patterns + +#### Init Job → PostgreSQL +- **Method**: Direct database writes via SQLModel +- **When**: During `build_rag_index()` function +- **What**: Inserts/updates `ragembedding` table +- **Frequency**: Once per init job run + +#### RAG Service → PostgreSQL +- **Method**: Raw SQL queries via asyncpg +- **When**: Background polling task (every 5 seconds) +- **What**: SELECT queries from `ragembedding` table +- **Frequency**: Every 5 seconds until embeddings found, then once at startup + +#### Init Job → RAG Service +- **Method**: HTTP GET requests +- **When**: After saving embeddings, before training pipeline +- **What**: Polls `/ready` endpoint +- **Frequency**: Every 5 seconds, timeout 5 minutes + +#### Backend → RAG Service +- **Method**: HTTP POST requests +- **When**: During training pipeline and runtime queries +- **What**: `/rag/query` endpoint with query text +- **Frequency**: As needed for context retrieval + +### Error Handling and Resilience + +1. **RAG Service Startup Failure**: + - Service starts but stays in "not ready" state + - Background task continues polling + - Service becomes ready when embeddings available + - No crash, graceful degradation + +2. **Init Job Failure**: + - RAG service continues polling (will timeout after 10 minutes) + - Can be restarted independently + - No impact on RAG service pod + +3. **Embeddings Not Found**: + - RAG service logs warning, continues polling + - Init job can be rerun to populate embeddings + - No data loss (embeddings persist in PostgreSQL) + +4. **RAG Service Not Ready**: + - Init job waits up to 5 minutes + - If timeout: continues with warning + - Training pipeline proceeds, RAG queries may fail gracefully + +### Why This Design? + +1. **Eliminates Circular Dependencies**: + - Old design: RAG service waited for init job, init job needed RAG service → deadlock + - New design: Both start independently, coordinate via PostgreSQL + +2. **Faster Startup**: + - Services don't block each other + - Parallel execution possible + - No sequential waiting + +3. **Resilience**: + - Services can restart independently + - Data persists in PostgreSQL + - Graceful degradation if one service fails + +4. **Scalability**: + - RAG service can scale independently + - Multiple backend pods share single RAG service + - No resource duplication + +## Key Design Decisions + +### 1. Non-Blocking Startup +- **Problem**: RAG service was crashing if embeddings weren't available immediately +- **Solution**: Background task loads index asynchronously, service starts immediately +- **Benefit**: No circular dependencies, service can start before init job completes + +### 2. PostgreSQL as Coordination Point +- **Problem**: Need to coordinate between init job and RAG service +- **Solution**: PostgreSQL acts as shared data store, both services read/write independently +- **Benefit**: No direct dependencies, both services can start in parallel + +### 3. Embedding Persistence +- **Problem**: Training pipeline was deleting `ragembedding` table +- **Solution**: Modified `init_tables()` to preserve `ragembedding` table when `delete_tables=True` +- **Benefit**: Embeddings persist across training pipeline runs + +### 4. pgvector String Parsing +- **Problem**: pgvector returns embeddings as strings when queried via raw SQL +- **Solution**: Added parsing logic to handle both array and string representations +- **Benefit**: Robust handling of different PostgreSQL response formats + +## Components + +### 1. Database Schema +- **Table**: `ragembedding` (SQLModel) +- **Fields**: + - `error_id` (primary key) - Unique identifier for each error + - `embedding` (Vector(768)) - pgvector type, 768 dimensions for nomic-embed-text-v1.5 + - `error_title` - Title of the error + - `error_metadata` (JSON) - Complete error metadata including sections + - `model_name` - Embedding model used + - `embedding_dim` - Dimension of embedding vector (768) + - `created_at`, `updated_at` - Timestamps + +### 2. RAG Service (`services/rag/`) +- **Technology**: FastAPI +- **Port**: 8002 +- **Endpoints**: + - `POST /rag/query` - Query knowledge base for relevant errors + - `GET /health` - Health check (returns status even if index not loaded) + - `GET /ready` - Readiness check (returns 503 until index loaded) + - `POST /rag/reload` - Reload index from PostgreSQL without restart + +### 3. Backend Changes +- **Removed**: FAISS loading, PVC mounting, local index management +- **Added**: HTTP client (`httpx.AsyncClient`) for RAG service communication +- **Interface**: Same API (no changes to calling code) +- **Cleanup**: Proper HTTP client shutdown on application shutdown + +### 4. Init Job +- **Changed**: Saves embeddings to PostgreSQL instead of PVC +- **PDFs**: Read directly from container image (`/app/data/knowledge_base`) +- **Coordination**: Waits for RAG service to be ready before running training pipeline +- **Persistence**: Embeddings persist across training pipeline runs + +## Migration Steps + +### Step 1: Database Migration + +The database schema is automatically created when `init_tables()` is called. The pgvector extension is enabled automatically: + +```sql +CREATE EXTENSION IF NOT EXISTS vector; +``` + +### Step 2: Build and Deploy RAG Service + +1. **Build RAG service image** (from project root): + ```bash + podman build -f services/rag/Containerfile -t quay.io/rh-ai-quickstart/alm-rag:latest . + podman push quay.io/rh-ai-quickstart/alm-rag:latest + ``` + +2. **Deploy RAG service** (via Helm): + ```bash + helm upgrade --install ansible-log-monitor ./deploy/helm/ansible-log-monitor + ``` + +### Step 3: Run Init Job + +The init job will: +1. Read PDFs from container image +2. Generate embeddings +3. Save to PostgreSQL +4. Wait for RAG service to be ready +5. Run training pipeline + +```bash +# Check init job status +oc get jobs -n -l app.kubernetes.io/component=init + +# View logs +oc logs -n -l job-name=alm-backend-init --tail=100 +``` + +### Step 4: Verify RAG Service + +```bash +# Check service is running +oc get pods -n -l app.kubernetes.io/name=rag + +# Check service health +RAG_POD=$(oc get pods -n -l app.kubernetes.io/name=rag -o jsonpath='{.items[0].metadata.name}') +oc exec -n $RAG_POD -- curl -s http://localhost:8002/health | jq + +# Check readiness (should return 200 when index is loaded) +oc exec -n $RAG_POD -- curl -s http://localhost:8002/ready | jq +``` + +### Step 5: Test RAG Query + +```bash +# Test query from within cluster +oc run -it --rm test-rag-query -n --image=curlimages/curl --restart=Never -- \ + curl -X POST http://alm-rag:8002/rag/query \ + -H "Content-Type: application/json" \ + -d '{ + "query": "ansible playbook execution failed", + "top_k": 5, + "top_n": 3, + "similarity_threshold": 0.6 + }' | jq +``` + +## Configuration + +### Environment Variables + +#### RAG Service +- `DATABASE_URL` - PostgreSQL connection URL (required, from secret `pgvector`) +- `EMBEDDINGS_LLM_URL` - Embedding service URL (default: `http://alm-embedding:8080`) +- `RAG_MODEL_NAME` - Model name (default: `nomic-ai/nomic-embed-text-v1.5`) +- `PORT` - Service port (default: `8002`) + +#### Backend +- `RAG_ENABLED` - Enable/disable RAG (default: `true`, accepts: `true`, `1`, `yes`) +- `RAG_SERVICE_URL` - RAG service URL (default: `http://alm-rag:8002`) +- `RAG_TOP_K` - Top K candidates to retrieve (default: `10`) +- `RAG_TOP_N` - Top N final results to return (default: `3`) +- `RAG_SIMILARITY_THRESHOLD` - Minimum similarity threshold (default: `0.6`) + +### Helm Values + +```yaml +rag: + enabled: true + serviceUrl: "http://alm-rag:8002" + query: + topK: 4 + topN: 1 + similarityThreshold: 0.6 +``` + +## Testing + +### 1. Unit Tests + +Test the RAG service locally: + +```bash +cd services/rag +# Set environment variables +export DATABASE_URL="postgresql+asyncpg://user:pass@localhost:5432/dbname" +export EMBEDDINGS_LLM_URL="http://localhost:8080" + +# Run service +uvicorn main:app --host 0.0.0.0 --port 8002 +``` + +### 2. Integration Tests + +Test backend → RAG service communication: + +```python +# In backend pod or test environment +from alm.agents.get_more_context_agent.rag_handler import RAGHandler + +handler = RAGHandler() +context = await handler.get_cheat_sheet_context("ansible error message") +print(context) +``` + +### 3. End-to-End Test + +1. Deploy all components +2. Run init job +3. Verify RAG service loads index +4. Trigger an alert that requires RAG context +5. Verify RAG service is called and returns results + +## Troubleshooting + +### RAG Service Not Starting + +**Problem**: Service fails to start or index doesn't load + +**Check**: +```bash +# Check logs +oc logs -n -l app.kubernetes.io/name=rag --tail=50 + +# Verify PostgreSQL connection +RAG_POD=$(oc get pods -n -l app.kubernetes.io/name=rag -o jsonpath='{.items[0].metadata.name}') +oc exec -n $RAG_POD -- env | grep DATABASE_URL + +# Check if embeddings exist (replace with actual database name) +PG_POD=$(oc get pods -n -l app=postgresql -o jsonpath='{.items[0].metadata.name}') +oc exec -n $PG_POD -- psql -U postgres -d -c "SELECT COUNT(*) FROM ragembedding;" +``` + +### RAG Service Stuck in "Not Ready" State + +**Problem**: Service starts but `/ready` endpoint returns 503 + +**Possible Causes**: +1. Embeddings not yet available (init job still running) +2. Database connection issue +3. Embedding parsing error + +**Check**: +```bash +# Check RAG service logs for polling messages +oc logs -n -l app.kubernetes.io/name=rag | grep -i "embedding" + +# Verify embeddings exist in database +oc exec -n $PG_POD -- psql -U postgres -d -c "SELECT COUNT(*) FROM ragembedding;" + +# Check init job status +oc get jobs -n -l app.kubernetes.io/component=init +``` + +### Backend Can't Reach RAG Service + +**Problem**: Backend returns empty context + +**Check**: +```bash +# Verify service exists +oc get svc -n alm-rag + +# Test connectivity from backend pod +BACKEND_POD=$(oc get pods -n -l app.kubernetes.io/name=backend -o jsonpath='{.items[0].metadata.name}') +oc exec -n $BACKEND_POD -- curl -s http://alm-rag:8002/health + +# Check backend logs +oc logs -n $BACKEND_POD | grep -i rag +``` + +### No Embeddings in Database + +**Problem**: Init job didn't populate embeddings + +**Check**: +```bash +# Check init job logs +oc logs -n -l job-name=alm-backend-init --tail=100 + +# Verify PDFs in image +INIT_POD=$(oc get pods -n -l app.kubernetes.io/component=init -o jsonpath='{.items[0].metadata.name}') +oc exec -n $INIT_POD -- ls -la /app/data/knowledge_base/ + +# Check database +oc exec -n $PG_POD -- psql -U postgres -d -c "SELECT error_id, model_name FROM ragembedding LIMIT 5;" +``` + +### Embeddings Deleted After Training Pipeline + +**Problem**: Embeddings disappear after init job completes + +**Solution**: This was fixed - `init_tables(delete_tables=True)` no longer deletes `ragembedding` table. If you see this issue, ensure you're using the latest backend image. + +**Verify**: +```bash +# Check database.py has the fix +oc exec -n $PG_POD -- psql -U postgres -d -c "SELECT COUNT(*) FROM ragembedding;" +# Should return > 0 even after training pipeline runs +``` + +### Performance Issues + +**Problem**: Slow query responses + +**Solutions**: +- Increase RAG service resources (memory/CPU) +- Check PostgreSQL connection pool +- Verify FAISS index is loaded (check `/ready` endpoint) +- Consider adding RAG service replicas with load balancing + +## Rollback Plan + +If issues occur, you can rollback: + +1. **Disable RAG service**: + ```yaml + rag: + enabled: false + ``` + +2. **Revert to PVC** (if needed): + - Restore `rag-pvc.yaml` template + - Update `init_pipeline.py` to save to disk + - Update backend to load from PVC + +3. **Database cleanup** (optional): + ```sql + DROP TABLE IF EXISTS ragembedding; + ``` + +## Benefits Achieved + +✅ **No RWO Constraints**: Backend pods can run on any node +✅ **Reduced Memory**: Single FAISS index instead of N copies +✅ **Simplified Storage**: Single source of truth (PostgreSQL) +✅ **Easier Updates**: Update embeddings via SQL, no pod restarts +✅ **Better Scaling**: Independent scaling of RAG vs backend +✅ **No PVC Management**: Eliminated persistent volume complexity +✅ **Resilient Startup**: No circular dependencies, graceful degradation +✅ **Data Persistence**: Embeddings survive training pipeline runs + +## Files Changed + +### New Files + +#### `services/rag/main.py` +FastAPI application for the RAG service. Implements: +- Background task for loading index (non-blocking startup) +- HTTP endpoints for querying, health checks, and reloading +- Query processing: generates embeddings, searches FAISS, returns results +- Graceful error handling and service state management + +#### `services/rag/index_loader.py` +Loads embeddings from PostgreSQL and builds FAISS index. Handles: +- PostgreSQL connection and querying +- Parsing pgvector string format to numpy arrays +- Building FAISS IndexFlatIP for similarity search +- Error store and index-to-error-id mapping + +#### `services/rag/pyproject.toml` +Python dependencies for RAG service: +- FastAPI, uvicorn for web framework +- sqlmodel, asyncpg, psycopg2-binary for database access +- faiss-cpu, numpy for similarity search +- httpx for embedding service calls + +#### `services/rag/Containerfile` +Container image definition for RAG service: +- Based on UBI8 Python 3.12 +- Uses `uv` for dependency management +- Copies service code and dependencies +- Exposes port 8002 + +#### `deploy/helm/ansible-log-monitor/charts/rag/` +Complete Helm chart for deploying RAG service: +- Deployment with initContainer for PostgreSQL readiness +- Service for cluster-internal access +- ServiceAccount and RBAC (if needed) +- HPA for autoscaling (optional) +- ConfigMap and environment variable management + +### Modified Files + +#### `src/alm/models.py` +**Change**: Added `RAGEmbedding` SQLModel class +- Defines database schema for storing embeddings +- Uses `pgvector.sqlalchemy.Vector(768)` for embedding column +- Includes error metadata as JSON field +- Tracks model name and embedding dimensions + +#### `src/alm/database.py` +**Changes**: +1. Added `RAGEmbedding` to table creation/dropping +2. Added automatic pgvector extension enablement +3. **Critical Fix**: Modified `init_tables()` to NOT delete `ragembedding` table when `delete_tables=True` + - Prevents training pipeline from deleting embeddings + - Ensures embeddings persist across runs + +#### `src/alm/rag/embed_and_index.py` +**Changes**: +1. Added `_embeddings_array` attribute to store embeddings before FAISS +2. Added `save_to_postgresql()` method to persist embeddings +3. Added `ingest_and_index_to_postgresql()` async entry point +4. Modified `build_faiss_index()` to store embeddings array for PostgreSQL saving + +#### `src/alm/agents/get_more_context_agent/rag_handler.py` +**Changes**: +1. Replaced local FAISS loading with HTTP client +2. Added `httpx.AsyncClient` for RAG service communication +3. Implemented lazy initialization of HTTP client +4. Added `cleanup()` method for graceful shutdown +5. Updated `_format_rag_results()` to parse JSON response from service + +#### `src/alm/main_fastapi.py` +**Change**: Added shutdown event handler +- Calls `RAGHandler().cleanup()` on application shutdown +- Ensures HTTP client is properly closed +- Prevents resource leaks + +#### `init_pipeline.py` +**Changes**: +1. Removed PVC-related logic (PDF copying, volume mounting) +2. Updated `build_rag_index()` to always use PostgreSQL +3. Added `wait_for_rag_service()` function to coordinate with RAG service +4. Updated main flow: build index → wait for RAG service → run training pipeline +5. Simplified data directory setup (PDFs now in container image) + +#### `deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml` +**Changes**: +- Removed `volumeMounts` and `volumes` for `rag-data` PVC +- Backend no longer needs direct access to RAG storage + +#### `deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml` +**Changes**: +- Removed `volumeMounts` and `volumes` for `rag-data` PVC +- Removed conditional PVC checks +- Always assumes PostgreSQL storage + +#### `deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml` +**Changes**: +- Added `RAG_SERVICE_URL` environment variable +- Updated comments to reflect new architecture + +#### `deploy/helm/ansible-log-monitor/charts/backend/values.yaml` +**Changes**: +- Removed `rag.persistence` section (no PVC needed) +- Added `rag.serviceUrl` configuration +- Updated `rag.knowledgeBaseDir` to reflect PDFs in image + +#### `deploy/helm/ansible-log-monitor/global-values.yaml` +**Change**: Added `rag: "alm-rag"` to `servicesNames` for service discovery + +#### `pyproject.toml` (root) +**Change**: Added `pgvector>=0.2.5` dependency +- Required for `Vector` type in `RAGEmbedding` model +- Needed for backend to create tables with pgvector columns + +### Deleted Files + +#### `deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml` +**Reason**: No longer needed - RAG data stored in PostgreSQL, not PVC + +## Key Fixes Applied + +### 1. Circular Dependency Resolution +- **Problem**: RAG service waited for init job, init job needed RAG service +- **Solution**: + - RAG service starts independently, polls PostgreSQL for embeddings + - Init job waits for RAG service after building index + - Both can start in parallel, coordinate via PostgreSQL + +### 2. Non-Blocking Startup +- **Problem**: RAG service crashed if embeddings not available immediately +- **Solution**: Background task loads index asynchronously, service starts immediately +- **Result**: Service stays in "not ready" state until embeddings available + +### 3. Embedding Persistence +- **Problem**: Training pipeline deleted `ragembedding` table +- **Solution**: Modified `init_tables()` to preserve `ragembedding` when `delete_tables=True` +- **Result**: Embeddings persist across training pipeline runs + +### 4. pgvector String Parsing +- **Problem**: pgvector returns embeddings as strings in raw SQL queries +- **Solution**: Added parsing logic using JSON and `ast.literal_eval()` +- **Result**: Handles both array and string representations + +## Next Steps + +1. **Deploy RAG service** to your cluster +2. **Run init job** to populate embeddings +3. **Monitor** RAG service health and performance +4. **Test** end-to-end RAG queries +5. **Optimize** resource allocation based on usage + +## Support + +For issues or questions: +- Check service logs: `oc logs -n -l app.kubernetes.io/name=rag` +- Check backend logs: `oc logs -n -l app.kubernetes.io/name=backend` +- Verify database: Check `ragembedding` table in PostgreSQL +- Check init job: `oc logs -n -l job-name=alm-backend-init` diff --git a/init_pipeline.py b/init_pipeline.py index 1357a17..c22c5a6 100644 --- a/init_pipeline.py +++ b/init_pipeline.py @@ -3,99 +3,103 @@ from alm.utils.phoenix import register_phoenix import os import glob -import shutil from pathlib import Path +import httpx def setup_data_directories(): """ - Setup data directory structure in PVC mount path. - Creates necessary directories and copies PDFs from image to PVC if needed. + Setup data directory structure. + Knowledge base PDFs should be baked into the container image at /app/data/knowledge_base. """ - from src.alm.config import config + from alm.config import config print("\n" + "=" * 70) print("SETTING UP DATA DIRECTORY STRUCTURE") print("=" * 70) - # Get paths from config (uses DATA_DIR and KNOWLEDGE_BASE_DIR env vars) + # Get paths from config (uses DATA_DIR env var) data_dir = Path(config.storage.data_dir) - knowledge_base_dir = Path(config.storage.knowledge_base_dir) logs_dir = data_dir / "logs" / "failed" - # Create necessary directories + # Create necessary directories (for logs, etc.) print("Creating directories...") data_dir.mkdir(parents=True, exist_ok=True) - knowledge_base_dir.mkdir(parents=True, exist_ok=True) logs_dir.mkdir(parents=True, exist_ok=True) print(f" ✓ {data_dir}") - print(f" ✓ {knowledge_base_dir}") print(f" ✓ {logs_dir}") - # Copy PDFs from image to PVC if PVC knowledge_base is empty + # Check for knowledge base PDFs in image image_kb_dir = Path("/app/data/knowledge_base") - pvc_kb_dir = knowledge_base_dir - - # Check if PVC knowledge_base has any PDFs - pvc_pdfs = list(pvc_kb_dir.glob("*.pdf")) - - if not pvc_pdfs: - # PVC is empty, copy from image if available - if image_kb_dir.exists(): - image_pdfs = list(image_kb_dir.glob("*.pdf")) - if image_pdfs: - print(f"\nCopying {len(image_pdfs)} PDF file(s) from image to PVC...") - for pdf_path in image_pdfs: - dest_path = pvc_kb_dir / pdf_path.name - try: - shutil.copy2(pdf_path, dest_path) - print(f" ✓ Copied {pdf_path.name}") - except Exception as e: - print(f" ✗ Error copying {pdf_path.name}: {e}") - print("✓ Knowledge base PDFs copied to PVC") - else: - print(f"\n⚠ No PDFs found in image at {image_kb_dir}") + if image_kb_dir.exists(): + image_pdfs = list(image_kb_dir.glob("*.pdf")) + if image_pdfs: + print(f"\n✓ Found {len(image_pdfs)} PDF file(s) in container image:") + for pdf in image_pdfs: + print(f" - {pdf.name}") else: - print(f"\n⚠ Image knowledge base directory not found at {image_kb_dir}") + print(f"\n⚠ No PDF files found in image at {image_kb_dir}") else: + print(f"\n⚠ Knowledge base directory not found in image at {image_kb_dir}") print( - f"\n✓ PVC knowledge base already contains {len(pvc_pdfs)} PDF file(s), skipping copy" + " PDFs should be baked into the container image at /app/data/knowledge_base" ) print("=" * 70) -def build_rag_index(): +async def build_rag_index(): """ - Build RAG index from knowledge base PDFs. - This runs during the init job to create the FAISS index and metadata. + Build RAG index from knowledge base PDFs and save to PostgreSQL. + This runs during the init job to create the FAISS index and save embeddings to database. """ - from src.alm.config import config - from src.alm.rag.ingest_and_chunk import AnsibleErrorParser - from src.alm.rag.embed_and_index import AnsibleErrorEmbedder - - # Check if RAG is enabled - rag_enabled = os.getenv("RAG_ENABLED", "true").lower() == "true" + from alm.config import config + from alm.rag.ingest_and_chunk import AnsibleErrorParser + from alm.rag.embed_and_index import AnsibleErrorEmbedder + from alm.database import init_tables + + # Check if RAG is enabled (consistent with rag_handler.py) + rag_enabled_env = os.getenv("RAG_ENABLED", "true").lower() + rag_enabled = rag_enabled_env in ["true", "1", "yes"] if not rag_enabled: - print("RAG is disabled (RAG_ENABLED=false), skipping RAG index build") + print( + f"RAG is disabled (RAG_ENABLED={rag_enabled_env}), skipping RAG index build" + ) return - # Check if index already exists (skip rebuild for faster upgrades) - index_path = Path(config.storage.index_path) - metadata_path = Path(config.storage.metadata_path) + # Check if embeddings already exist in PostgreSQL (skip rebuild for faster upgrades) + from alm.database import get_session + from alm.models import RAGEmbedding + from sqlmodel import select - if index_path.exists() and metadata_path.exists(): - print("✓ RAG index already exists, skipping rebuild") - print(f" Index: {index_path}") - print(f" Metadata: {metadata_path}") - print(" To force rebuild, delete the PVC or these files") - return + try: + async with get_session() as session: + result = await session.exec(select(RAGEmbedding)) + existing = result.first() + if existing: + count_result = await session.exec(select(RAGEmbedding)) + count = len(list(count_result.all())) + print( + f"✓ Found {count} existing embeddings in PostgreSQL, skipping rebuild" + ) + print( + " To force rebuild, delete embeddings from PostgreSQL or set RAG_FORCE_REBUILD=true" + ) + if os.getenv("RAG_FORCE_REBUILD", "false").lower() != "true": + return + except Exception as e: + print(f"⚠ Could not check PostgreSQL: {e}") + print(" Proceeding with index build...") print("\n" + "=" * 70) print("BUILDING RAG INDEX FROM KNOWLEDGE BASE") + print(" Storage: PostgreSQL") print("=" * 70) try: + # Ensure database tables exist + await init_tables(delete_tables=False) + # Validate configuration config.print_config() config.validate() @@ -104,12 +108,13 @@ def build_rag_index(): parser = AnsibleErrorParser() embedder = AnsibleErrorEmbedder() - # Find PDFs in knowledge base - kb_dir = config.storage.knowledge_base_dir - pdf_files = sorted(glob.glob(str(kb_dir / "*.pdf"))) + # Find PDFs in knowledge base (from container image) + # PDFs should be baked into the image at /app/data/knowledge_base + image_kb_dir = Path("/app/data/knowledge_base") + pdf_files = sorted(glob.glob(str(image_kb_dir / "*.pdf"))) if not pdf_files: - print(f"⚠ WARNING: No PDF files found in {kb_dir}") + print(f"⚠ WARNING: No PDF files found in {image_kb_dir}") print(" RAG index will not be created") return @@ -138,14 +143,11 @@ def build_rag_index(): print(f"TOTAL: {len(all_chunks)} chunks from {len(pdf_files)} PDFs") print(f"{'=' * 70}") - # Build and save index - embedder.ingest_and_index(all_chunks) - + # Build and save index to PostgreSQL + await embedder.ingest_and_index_to_postgresql(all_chunks) print("\n" + "=" * 70) - print("✓ RAG INDEX BUILD COMPLETE") + print("✓ RAG INDEX BUILD COMPLETE (PostgreSQL)") print("=" * 70) - print(f" Index: {index_path}") - print(f" Metadata: {metadata_path}") except Exception as e: print(f"\n✗ ERROR building RAG index: {e}") @@ -155,6 +157,61 @@ def build_rag_index(): traceback.print_exc() +async def wait_for_rag_service(rag_service_url: str, max_wait_time: int = 300): + """ + Wait for RAG service to be ready before proceeding. + + Args: + rag_service_url: URL of the RAG service (e.g., http://alm-rag:8002) + max_wait_time: Maximum time to wait in seconds (default: 5 minutes) + """ + # Check if RAG is enabled + rag_enabled_env = os.getenv("RAG_ENABLED", "true").lower() + rag_enabled = rag_enabled_env in ["true", "1", "yes"] + if not rag_enabled: + print("RAG is disabled, skipping RAG service wait") + return + + print("\n" + "=" * 70) + print("WAITING FOR RAG SERVICE TO BE READY") + print("=" * 70) + + ready_url = f"{rag_service_url}/ready" + elapsed = 0 + check_interval = 5 + + async with httpx.AsyncClient(timeout=10.0) as client: + while elapsed < max_wait_time: + try: + response = await client.get(ready_url) + if response.status_code == 200: + data = response.json() + index_size = data.get("index_size", 0) + print(f"✓ RAG service is ready (index size: {index_size})") + return + else: + print( + f"RAG service not ready yet (status: {response.status_code}), waiting..." + ) + except (httpx.RequestError, httpx.HTTPStatusError): + if elapsed == 0: + print( + f"RAG service not yet available at {rag_service_url}, waiting..." + ) + elif elapsed % 30 == 0: # Print every 30 seconds + print(f"Still waiting for RAG service... (elapsed: {elapsed}s)") + + await asyncio.sleep(check_interval) + elapsed += check_interval + + # Timeout reached + print( + f"\n⚠ WARNING: RAG service did not become ready within {max_wait_time} seconds" + ) + print(" The training pipeline will proceed, but RAG queries may fail") + print(" This is expected if the RAG service is still starting up") + + async def main(): # Setup and initialization print("\n" + "=" * 70) @@ -165,7 +222,11 @@ async def main(): setup_data_directories() # Step 2: Build RAG index - build_rag_index() + await build_rag_index() + + # Step 2.5: Wait for RAG service to be ready (if RAG is enabled) + rag_service_url = os.getenv("RAG_SERVICE_URL", "http://alm-rag:8002") + await wait_for_rag_service(rag_service_url) # Step 3: Run main pipeline (clustering, summarization, etc.) print("\n" + "=" * 70) diff --git a/pyproject.toml b/pyproject.toml index d6340d3..29fda59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "psycopg2-binary>=2.9.0", "alembic>=1.13.0", "asyncpg>=0.30.0", + "pgvector>=0.2.5", # ML / RAG "scikit-learn>=1.7.1", diff --git a/services/rag/Containerfile b/services/rag/Containerfile new file mode 100644 index 0000000..8d16379 --- /dev/null +++ b/services/rag/Containerfile @@ -0,0 +1,31 @@ +FROM registry.access.redhat.com/ubi8/python-312 + +USER root + +# Install uv pointing to the uv image and coping from there +# /uv and /uvx are the source files copied from the uv image +# /bin is the destination +COPY --from=ghcr.io/astral-sh/uv:0.9.7 /uv /uvx /bin/ + +# Set working directory +WORKDIR /app + +# Copy dependency files (from services/rag/ directory) +COPY services/rag/pyproject.toml ./ + +# Install dependencies +RUN uv sync --no-dev +ENV VIRTUAL_ENV=/app/.venv +ENV PATH="/app/.venv/bin:$PATH" + +RUN chmod -R +r . + +# Copy source code (from services/rag/ directory) +COPY services/rag/index_loader.py services/rag/main.py . + +# Expose port +EXPOSE 8002 + +# Default command +ENTRYPOINT ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"] + diff --git a/services/rag/README.md b/services/rag/README.md new file mode 100644 index 0000000..bb4000b --- /dev/null +++ b/services/rag/README.md @@ -0,0 +1,151 @@ +# RAG Service + +FastAPI microservice for RAG (Retrieval-Augmented Generation) queries. + +## Overview + +The RAG service provides similarity search over the knowledge base embeddings stored in PostgreSQL. It: + +1. **Starts immediately** (non-blocking startup) - service becomes available even if embeddings aren't ready +2. **Polls PostgreSQL** in background - checks every 5 seconds for embeddings (up to 10 minutes) +3. **Loads embeddings** when available - parses pgvector format and builds FAISS index in memory +4. **Exposes REST API** - provides query endpoints for knowledge base retrieval + +### Key Features + +- **Non-blocking startup**: Service starts immediately, loads index in background +- **Graceful degradation**: Service stays in "not ready" state until embeddings available +- **Automatic recovery**: Polls PostgreSQL until embeddings found +- **No circular dependencies**: Can start before init job completes + +## API Endpoints + +### `POST /rag/query` + +Query the knowledge base for relevant error solutions. + +**Request:** +```json +{ + "query": "error message or log summary", + "top_k": 10, + "top_n": 3, + "similarity_threshold": 0.6 +} +``` + +**Response:** +```json +{ + "query": "error message", + "results": [ + { + "error_id": "error_123", + "error_title": "Error Title", + "similarity_score": 0.85, + "source_file": "file.pdf", + "page": 5, + "sections": { + "description": "...", + "symptoms": "...", + "resolution": "...", + "code": "...", + "benefits": "..." + } + } + ], + "metadata": { + "num_results": 3, + "search_time_ms": 12.5, + "top_k": 10, + "top_n": 3, + "similarity_threshold": 0.6 + } +} +``` + +### `GET /health` + +Health check endpoint. Returns service status even if index is not loaded. + +**Response:** +```json +{ + "status": "healthy", + "index_size": 109 +} +``` + +Or if index not loaded: +```json +{ + "status": "unhealthy", + "reason": "Index not loaded" +} +``` + +### `GET /ready` + +Readiness check - ensures index is loaded. Returns 503 if index not ready, 200 when ready. + +**Response (ready):** +```json +{ + "status": "ready", + "index_size": 109 +} +``` + +**Response (not ready):** +- HTTP 503 with error detail + +### `POST /rag/reload` + +Reload the index from PostgreSQL without restarting the service. + +## Environment Variables + +- `DATABASE_URL` - PostgreSQL connection URL (required) +- `EMBEDDINGS_LLM_URL` - URL of the embedding service (default: `http://alm-embedding:8080`) +- `RAG_MODEL_NAME` - Name of the embedding model (default: `nomic-ai/nomic-embed-text-v1.5`) +- `PORT` - Service port (default: `8002`) + +## Startup Behavior + +The service uses a **background task** to load the index, allowing it to start even if embeddings aren't available yet: + +1. **Service starts** → FastAPI application becomes available +2. **Background task starts** → Begins polling PostgreSQL every 5 seconds +3. **If embeddings found** → Loads index, service becomes ready +4. **If embeddings not found** → Continues polling (up to 10 minutes) +5. **Service state**: + - `/health` always returns 200 (service is running) + - `/ready` returns 503 until index loaded, then 200 + +This design allows the RAG service to start independently of the init job, eliminating circular dependencies. + +## Deployment + +The service is deployed as a Kubernetes deployment via Helm chart. + +**Prerequisites:** +- PostgreSQL with `pgvector` extension enabled +- `ragembedding` table (created automatically by init job) +- Embeddings populated in database (via init job) + +**Startup Sequence:** +1. RAG service pod starts +2. Waits for PostgreSQL (initContainer) +3. Service starts, begins background polling +4. When embeddings available, loads index automatically +5. Service becomes ready for queries + +## Dependencies + +- **PostgreSQL** with `ragembedding` table populated (via init job) +- **pgvector extension** - for vector storage and queries +- **Embedding service (TEI)** - for generating query embeddings +- **FAISS** - for in-memory similarity search +- **FastAPI** - web framework +- **asyncpg** - async PostgreSQL driver + diff --git a/services/rag/index_loader.py b/services/rag/index_loader.py new file mode 100644 index 0000000..072c582 --- /dev/null +++ b/services/rag/index_loader.py @@ -0,0 +1,189 @@ +""" +Load RAG embeddings from PostgreSQL and build FAISS index. +""" + +import numpy as np +from typing import Dict, Any, Optional, Tuple +import faiss +from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.orm import sessionmaker +from sqlmodel.ext.asyncio.session import AsyncSession as SQLModelAsyncSession + +# Import models - we'll need to make these available +# For now, we'll define a simple structure or import from the main codebase +# In production, these should be in a shared package + + +class RAGIndexLoader: + """ + Loads embeddings from PostgreSQL and builds FAISS index in memory. + """ + + def __init__( + self, database_url: str, model_name: str = "nomic-ai/nomic-embed-text-v1.5" + ): + """ + Initialize the index loader. + + Args: + database_url: PostgreSQL connection URL + model_name: Name of the embedding model (for validation) + """ + self.database_url = database_url.replace("+asyncpg", "").replace( + "postgresql", "postgresql+asyncpg" + ) + self.model_name = model_name + self.embedding_dim = 768 # nomic-embed-text-v1.5 dimension + + self.engine = create_async_engine(self.database_url) + self.session_factory = sessionmaker( + self.engine, class_=SQLModelAsyncSession, expire_on_commit=False + ) + + self.index: Optional[faiss.Index] = None + self.error_store: Dict[str, Dict[str, Any]] = {} + self.index_to_error_id: Dict[int, str] = {} + self._loaded = False + + async def load_index( + self, + ) -> Tuple[faiss.Index, Dict[str, Dict[str, Any]], Dict[int, str]]: + """ + Load embeddings from PostgreSQL and build FAISS index. + + Returns: + Tuple of (FAISS index, error_store, index_to_error_id mapping) + """ + if self._loaded and self.index is not None: + return self.index, self.error_store, self.index_to_error_id + + print("Loading embeddings from PostgreSQL...") + + # Define RAGEmbedding model inline (or import from shared package) + # For now, we'll use raw SQL to avoid circular dependencies + from sqlalchemy import text + + async with self.engine.begin() as conn: + # Query all embeddings + # Note: pgvector Vector type may be returned as string, we'll parse it in Python + result = await conn.execute( + text(""" + SELECT + error_id, + embedding, + error_title, + error_metadata, + model_name, + embedding_dim + FROM ragembedding + ORDER BY error_id + """) + ) + rows = result.fetchall() + + if not rows: + raise ValueError("No embeddings found in PostgreSQL. Run init job first.") + + print(f"Found {len(rows)} embeddings in database") + + # Extract data + embeddings_list = [] + error_ids = [] + error_store = {} + index_to_error_id = {} + + for idx, row in enumerate(rows): + error_id = row[0] + embedding = row[1] # This is a list/array + error_title = row[2] + error_metadata = row[3] if row[3] else {} + model_name_db = row[4] + embedding_dim_db = row[5] + + # Validate model + if model_name_db != self.model_name: + print( + f"Warning: Model mismatch. DB has {model_name_db}, expected {self.model_name}" + ) + + if embedding_dim_db != self.embedding_dim: + raise ValueError( + f"Embedding dimension mismatch: DB has {embedding_dim_db}, " + f"expected {self.embedding_dim}" + ) + + # Convert embedding to numpy array + # Handle both array and string representations from pgvector + if isinstance(embedding, str): + # Parse string representation (e.g., "[0.1, 0.2, ...]") + import json + import ast + + try: + # Try JSON first (safer) + embedding = json.loads(embedding) + except json.JSONDecodeError: + # If JSON parsing fails, use ast.literal_eval (safe for literals) + # pgvector returns vectors as string like '[0.1,0.2,...]' + try: + embedding = ast.literal_eval(embedding) + except (ValueError, SyntaxError): + raise ValueError( + f"Could not parse embedding for {error_id}: invalid format" + ) + + embedding_array = np.array(embedding, dtype=np.float32) + + # Validate embedding shape + if embedding_array.shape[0] != self.embedding_dim: + raise ValueError( + f"Invalid embedding shape for {error_id}: " + f"expected {self.embedding_dim}, got {embedding_array.shape[0]}" + ) + + embeddings_list.append(embedding_array) + error_ids.append(error_id) + + # Build error_store + error_store[error_id] = { + "error_id": error_id, + "error_title": error_title, + "sections": error_metadata.get("sections", {}), + "metadata": error_metadata.get("metadata", {}), + } + + index_to_error_id[idx] = error_id + + # Convert to numpy array + embeddings = np.array(embeddings_list, dtype=np.float32) + + print(f"Loaded {len(embeddings)} embeddings, shape: {embeddings.shape}") + + # Verify embeddings are normalized + norms = np.linalg.norm(embeddings, axis=1) + print( + f"Embedding norms: min={norms.min():.4f}, max={norms.max():.4f}, mean={norms.mean():.4f}" + ) + + # Build FAISS index + print(f"Building FAISS IndexFlatIP with dimension {self.embedding_dim}...") + index = faiss.IndexFlatIP(self.embedding_dim) + index.add(embeddings) + + print(f"FAISS index created with {index.ntotal} vectors") + + # Store for reuse + self.index = index + self.error_store = error_store + self.index_to_error_id = index_to_error_id + self._loaded = True + + return index, error_store, index_to_error_id + + async def reload_index(self): + """Force reload of index from database.""" + self._loaded = False + self.index = None + self.error_store = {} + self.index_to_error_id = {} + return await self.load_index() diff --git a/services/rag/main.py b/services/rag/main.py new file mode 100644 index 0000000..7717b50 --- /dev/null +++ b/services/rag/main.py @@ -0,0 +1,281 @@ +""" +RAG Service - FastAPI service for RAG queries. +""" + +import os +import asyncio +from typing import Optional, List, Dict, Any +import numpy as np +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel, Field +from index_loader import RAGIndexLoader +import time + +app = FastAPI(title="RAG Service", version="0.1.0") + +# Global index loader (initialized on startup) +index_loader: Optional[RAGIndexLoader] = None + + +class QueryRequest(BaseModel): + """Request model for RAG query.""" + + query: str = Field(description="Query text to search for") + top_k: int = Field( + default=10, ge=1, le=100, description="Number of top candidates to retrieve" + ) + top_n: int = Field( + default=3, ge=1, le=20, description="Number of final results to return" + ) + similarity_threshold: float = Field( + default=0.6, ge=0.0, le=1.0, description="Minimum similarity threshold (0-1)" + ) + + +class ErrorSection(BaseModel): + """Error section data.""" + + description: Optional[str] = None + symptoms: Optional[str] = None + resolution: Optional[str] = None + code: Optional[str] = None + benefits: Optional[str] = None + + +class ErrorResult(BaseModel): + """Single error result.""" + + error_id: str + error_title: str + similarity_score: float + source_file: Optional[str] = None + page: Optional[int] = None + sections: ErrorSection + + +class QueryResponse(BaseModel): + """Response model for RAG query.""" + + query: str + results: List[ErrorResult] + metadata: Dict[str, Any] + + +async def load_index_background(): + """Background task to load index from PostgreSQL (polls until available).""" + global index_loader + + database_url = os.getenv("DATABASE_URL") + if not database_url: + print("ERROR: DATABASE_URL environment variable is required") + return + + model_name = os.getenv("RAG_MODEL_NAME", "nomic-ai/nomic-embed-text-v1.5") + + print("Initializing RAG index loader...") + index_loader = RAGIndexLoader(database_url=database_url, model_name=model_name) + + # Wait for embeddings to be available (poll PostgreSQL) + # This allows the service to start before the init job completes + max_wait_time = 600 # 10 minutes + wait_interval = 5 # Check every 5 seconds + elapsed = 0 + + print("Waiting for embeddings to be available in PostgreSQL...") + while elapsed < max_wait_time: + try: + await index_loader.load_index() + print("✓ RAG index loaded successfully") + return + except ValueError as e: + if "No embeddings found" in str(e): + if elapsed == 0 or elapsed % 30 == 0: # Print every 30 seconds + print( + f"Embeddings not yet available (waited {elapsed}s), retrying in {wait_interval}s..." + ) + await asyncio.sleep(wait_interval) + elapsed += wait_interval + else: + print(f"✗ Failed to load RAG index: {e}") + return # Don't raise, just return - service will stay in "not ready" state + except Exception as e: + print(f"✗ Failed to load RAG index: {e}") + return # Don't raise, just return - service will stay in "not ready" state + + # If we get here, we've timed out + print(f"⚠ WARNING: Failed to load RAG index after {max_wait_time} seconds") + print(" Service will remain in 'not ready' state until embeddings are available") + + +@app.on_event("startup") +async def startup_event(): + """Start background task to load index.""" + # Start background task - don't block startup + asyncio.create_task(load_index_background()) + + +@app.get("/health") +def health_check(): + """Health check endpoint for Kubernetes probes.""" + if index_loader is None or index_loader.index is None: + return {"status": "unhealthy", "reason": "Index not loaded"} + return { + "status": "healthy", + "index_size": index_loader.index.ntotal if index_loader.index else 0, + } + + +@app.get("/ready") +def readiness_check(): + """Readiness check - ensures index is loaded.""" + if index_loader is None or index_loader.index is None: + raise HTTPException(status_code=503, detail="Index not loaded") + return {"status": "ready", "index_size": index_loader.index.ntotal} + + +@app.post("/rag/query", response_model=QueryResponse) +async def query_rag(request: QueryRequest): + """ + Query the RAG system for relevant error solutions. + + This endpoint: + 1. Generates embedding for the query + 2. Performs similarity search using FAISS + 3. Returns top-N most relevant errors + """ + if index_loader is None or index_loader.index is None: + raise HTTPException( + status_code=503, detail="RAG index not loaded. Service is not ready." + ) + + start_time = time.time() + + try: + # Step 1: Generate query embedding + # For now, we'll need to call the embedding service + # This should be the same TEI service used during indexing + embedding_url = os.getenv("EMBEDDINGS_LLM_URL", "http://alm-embedding:8080") + + import httpx + + async with httpx.AsyncClient(timeout=30.0) as client: + # Prepare query text with task prefix (for nomic models) + query_text = f"search_query: {request.query}" + + # Call embedding service + embedding_response = await client.post( + f"{embedding_url}/embeddings", + json={ + "input": [query_text], + "model": "nomic-embed-text-v1.5", + }, + ) + embedding_response.raise_for_status() + + # Extract embedding + embedding_data = embedding_response.json() + if "data" in embedding_data and len(embedding_data["data"]) > 0: + query_embedding = np.array( + embedding_data["data"][0]["embedding"], dtype=np.float32 + ) + elif ( + "embeddings" in embedding_data and len(embedding_data["embeddings"]) > 0 + ): + query_embedding = np.array( + embedding_data["embeddings"][0], dtype=np.float32 + ) + else: + raise ValueError("Unexpected embedding response format") + + # Normalize embedding + norm = np.linalg.norm(query_embedding) + if norm > 0: + query_embedding = query_embedding / norm + + # Step 2: Similarity search in FAISS + query_vector = query_embedding.reshape(1, -1) + similarities, indices = index_loader.index.search(query_vector, request.top_k) + + # Flatten results + similarities = similarities[0] + indices = indices[0] + + # Step 3: Filter by threshold and format results + results = [] + for idx, similarity in zip(indices, similarities): + if idx == -1: # FAISS returns -1 when not enough results + continue + + if similarity < request.similarity_threshold: + continue + + error_id = index_loader.index_to_error_id[idx] + error_data = index_loader.error_store[error_id] + + # Extract sections + sections = error_data.get("sections", {}) + metadata = error_data.get("metadata", {}) + + result = ErrorResult( + error_id=error_id, + error_title=error_data.get("error_title", error_id), + similarity_score=float(similarity), + source_file=metadata.get("source_file"), + page=metadata.get("page"), + sections=ErrorSection( + description=sections.get("description"), + symptoms=sections.get("symptoms"), + resolution=sections.get("resolution"), + code=sections.get("code"), + benefits=sections.get("benefits"), + ), + ) + results.append(result) + + # Step 4: Take top-N results + results = results[: request.top_n] + + search_time_ms = (time.time() - start_time) * 1000 + + return QueryResponse( + query=request.query, + results=results, + metadata={ + "num_results": len(results), + "search_time_ms": search_time_ms, + "top_k": request.top_k, + "top_n": request.top_n, + "similarity_threshold": request.similarity_threshold, + }, + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}") + + +@app.post("/rag/reload") +async def reload_index(): + """ + Reload the index from PostgreSQL. + + Useful for updating the index without restarting the service. + """ + if index_loader is None: + raise HTTPException(status_code=503, detail="Index loader not initialized") + + try: + await index_loader.reload_index() + return { + "status": "success", + "message": "Index reloaded", + "index_size": index_loader.index.ntotal if index_loader.index else 0, + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error reloading index: {str(e)}") + + +if __name__ == "__main__": + import uvicorn + + port = int(os.getenv("PORT", "8002")) + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/services/rag/pyproject.toml b/services/rag/pyproject.toml new file mode 100644 index 0000000..f8c3bc1 --- /dev/null +++ b/services/rag/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "rag-service" +version = "0.1.0" +description = "RAG service for Ansible Log Monitoring" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "fastapi>=0.116.1", + "uvicorn>=0.37.0", + "httpx>=0.27.2", + "sqlmodel>=0.0.8", + "psycopg2-binary>=2.9.0", + "asyncpg>=0.30.0", + "faiss-cpu>=1.7.4", + "numpy>=1.24.0", + "pydantic>=2.0.0", +] + diff --git a/src/alm/agents/get_more_context_agent/rag_handler.py b/src/alm/agents/get_more_context_agent/rag_handler.py index 135e348..ec28bd1 100644 --- a/src/alm/agents/get_more_context_agent/rag_handler.py +++ b/src/alm/agents/get_more_context_agent/rag_handler.py @@ -1,5 +1,6 @@ import os -from typing import Optional +from typing import Optional, Dict, Any +import httpx from alm.utils.logger import get_logger @@ -11,13 +12,13 @@ class RAGHandler: Handles RAG (Retrieval-Augmented Generation) operations for retrieving relevant context from the knowledge base. - Uses lazy loading singleton pattern to ensure the FAISS index is loaded - only once and reused across requests. + Uses HTTP client to communicate with the RAG service. """ _instance: Optional["RAGHandler"] = None - _pipeline = None _enabled: Optional[bool] = None + _rag_service_url: Optional[str] = None + _client: Optional[httpx.AsyncClient] = None def __new__(cls): """Singleton pattern implementation.""" @@ -25,106 +26,108 @@ def __new__(cls): cls._instance = super(RAGHandler, cls).__new__(cls) return cls._instance - def _initialize_rag_pipeline(self): + async def cleanup(self): """ - Initialize RAG pipeline with lazy loading (singleton pattern). - This ensures the FAISS index is loaded only once and reused across requests. + Cleanup HTTP client resources. + + Should be called during application shutdown to properly close + the HTTP connection pool and avoid resource leaks. + """ + if self._client is not None: + try: + await self._client.aclose() + logger.info("RAG service HTTP client closed") + except Exception as e: + logger.warning("Error closing RAG service HTTP client: %s", e) + finally: + self._client = None + + def _initialize_rag_service(self): + """ + Initialize RAG service client. Returns: - AnsibleErrorQueryPipeline instance or None if RAG is disabled/failed + True if service is available, False otherwise """ # Check if already initialized if self._enabled is not None: - return self._pipeline + return self._enabled # Check if RAG is enabled via environment variable rag_enabled_env = os.getenv("RAG_ENABLED", "true").lower() if rag_enabled_env not in ["true", "1", "yes"]: logger.info("RAG is disabled (RAG_ENABLED=%s)", rag_enabled_env) self._enabled = False - self._pipeline = None - return None - - try: - logger.info("Initializing RAG pipeline (lazy loading)...") + return False - from alm.rag.query_pipeline import AnsibleErrorQueryPipeline + # Get RAG service URL + self._rag_service_url = os.getenv("RAG_SERVICE_URL", "http://alm-rag:8002") - # Get configuration from environment variables - top_k = int(os.getenv("RAG_TOP_K", "10")) - top_n = int(os.getenv("RAG_TOP_N", "3")) - similarity_threshold = float(os.getenv("RAG_SIMILARITY_THRESHOLD", "0.6")) - - # Initialize pipeline (this loads the FAISS index) - self._pipeline = AnsibleErrorQueryPipeline( - top_k=top_k, - top_n=top_n, - similarity_threshold=similarity_threshold, + # Create HTTP client and initialize (wrapped in try-except for error handling) + try: + # Create HTTP client + self._client = httpx.AsyncClient( + base_url=self._rag_service_url, + timeout=30.0, ) + # We'll do a lazy check on first request instead of blocking here self._enabled = True logger.info( - "✓ RAG pipeline initialized successfully with %d errors in index", - len(self._pipeline.embedder.error_store), - ) - - return self._pipeline - - except FileNotFoundError as e: - logger.warning("RAG index not found: %s", e) - logger.warning( - "RAG functionality disabled - proceeding without cheat sheet context" + "RAG service client initialized (URL: %s)", self._rag_service_url ) - self._enabled = False - self._pipeline = None - return None - + return True except Exception as e: - logger.error("Failed to initialize RAG pipeline: %s", e, exc_info=True) - logger.warning( - "RAG functionality disabled - proceeding without cheat sheet context" - ) + logger.warning("Failed to initialize RAG service client: %s", e) self._enabled = False - self._pipeline = None - return None + # Clean up client if it was partially created + self._client = None + return False - def _format_rag_results(self, response) -> str: + def _format_rag_results(self, response_data: Dict[str, Any]) -> str: """ Format RAG query results into a structured string for LLM context. Args: - response: QueryResponse from RAG pipeline + response_data: Response dictionary from RAG service Returns: Formatted string with error solutions """ - if not response.results: + results = response_data.get("results", []) + if not results: return "No matching solutions found in knowledge base." output = ["## Relevant Error Solutions from Knowledge Base\n"] - for i, result in enumerate(response.results, 1): - output.append(f"### Error {i}: {result.error_title}") - output.append(f"**Confidence Score:** {result.similarity_score:.2f}\n") + for i, result in enumerate(results, 1): + error_title = result.get( + "error_title", result.get("error_id", f"Error {i}") + ) + similarity_score = result.get("similarity_score", 0.0) + sections = result.get("sections", {}) + + output.append(f"### Error {i}: {error_title}") + output.append(f"**Confidence Score:** {similarity_score:.2f}\n") - if result.sections.description: + if sections.get("description"): output.append("**Description:**") - output.append(result.sections.description) + output.append(sections["description"]) output.append("") - if result.sections.symptoms: + if sections.get("symptoms"): output.append("**Symptoms:**") - output.append(result.sections.symptoms) + output.append(sections["symptoms"]) output.append("") - if result.sections.resolution: + if sections.get("resolution"): output.append("**Resolution:**") - output.append(result.sections.resolution) + output.append(sections["resolution"]) output.append("") - if result.sections.code: + if sections.get("code"): output.append("**Code Example:**") - output.append(f"```\n{result.sections.code}\n```") + output.append(f"```\n{sections['code']}\n```") output.append("") output.append("---\n") @@ -136,8 +139,8 @@ async def get_cheat_sheet_context(self, log_summary: str) -> str: Retrieve relevant context from the RAG knowledge base for solving the error. This function: - 1. Lazily initializes the RAG pipeline (loads FAISS index on first call) - 2. Queries the knowledge base with the log summary + 1. Initializes the RAG service client (if not already done) + 2. Queries the RAG service with the log summary 3. Formats the results for LLM consumption 4. Returns empty string if RAG is disabled or fails @@ -149,32 +152,64 @@ async def get_cheat_sheet_context(self, log_summary: str) -> str: """ logger.info("Retrieving cheat sheet context for log summary") - # Initialize RAG pipeline (lazy loading) - pipeline = self._initialize_rag_pipeline() + # Initialize RAG service client (lazy loading) + if not self._initialize_rag_service(): + logger.debug("RAG service not available, returning empty context") + return "" - if pipeline is None: - logger.debug("RAG pipeline not available, returning empty context") + if self._client is None: + logger.debug("RAG service client not initialized, returning empty context") return "" try: - # Query the RAG system + # Get configuration from environment variables + top_k = int(os.getenv("RAG_TOP_K", "10")) + top_n = int(os.getenv("RAG_TOP_N", "3")) + similarity_threshold = float(os.getenv("RAG_SIMILARITY_THRESHOLD", "0.6")) + + # Query the RAG service logger.debug( - "Querying RAG system with log summary: %s...", log_summary[:100] + "Querying RAG service with log summary: %s...", log_summary[:100] ) - response = pipeline.query(log_summary) + + response = await self._client.post( + "/rag/query", + json={ + "query": log_summary, + "top_k": top_k, + "top_n": top_n, + "similarity_threshold": similarity_threshold, + }, + ) + + response.raise_for_status() + response_data = response.json() # Format results - formatted_context = self._format_rag_results(response) + formatted_context = self._format_rag_results(response_data) + metadata = response_data.get("metadata", {}) logger.info( "✓ Retrieved %d relevant errors from knowledge base (search time: %.2fms)", - response.metadata["num_results"], - response.metadata["search_time_ms"], + metadata.get("num_results", 0), + metadata.get("search_time_ms", 0.0), ) return formatted_context + except httpx.HTTPStatusError as e: + logger.error( + "RAG service returned error status %d: %s", + e.response.status_code, + e.response.text, + ) + logger.warning("Proceeding without cheat sheet context") + return "" + except httpx.RequestError as e: + logger.error("Error connecting to RAG service: %s", e) + logger.warning("Proceeding without cheat sheet context") + return "" except Exception as e: - logger.error("Error querying RAG system: %s", e, exc_info=True) + logger.error("Error querying RAG service: %s", e, exc_info=True) logger.warning("Proceeding without cheat sheet context") return "" diff --git a/src/alm/database.py b/src/alm/database.py index f817e08..14e9f8c 100644 --- a/src/alm/database.py +++ b/src/alm/database.py @@ -4,9 +4,10 @@ from datetime import datetime from typing import Generator +from sqlalchemy import text from sqlalchemy.ext.asyncio import create_async_engine from sqlmodel.ext.asyncio.session import AsyncSession -from alm.models import GrafanaAlert +from alm.models import GrafanaAlert, RAGEmbedding from alm.agents.state import GrafanaAlertState from alm.models import LogEntry from alm.utils.logger import get_logger @@ -26,8 +27,24 @@ async def init_tables(delete_tables=False): async with engine.begin() as conn: if delete_tables: logger.info("Starting to delete tables") + # Only delete GrafanaAlert table, NOT RAGEmbedding + # RAG embeddings should persist across training pipeline runs await conn.run_sync(GrafanaAlert.metadata.drop_all) + # RAGEmbedding table is NOT deleted - it persists across runs + + # Ensure pgvector extension is enabled (must be done before creating tables) + try: + await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) + logger.info("pgvector extension enabled") + except Exception as e: + logger.warning(f"Could not enable pgvector extension: {e}") + logger.warning( + "This is OK if extension is already enabled or not available" + ) + + # Create all tables await conn.run_sync(GrafanaAlert.metadata.create_all) + await conn.run_sync(RAGEmbedding.metadata.create_all) def get_session(): diff --git a/src/alm/main_fastapi.py b/src/alm/main_fastapi.py index bb8129b..b375543 100644 --- a/src/alm/main_fastapi.py +++ b/src/alm/main_fastapi.py @@ -27,6 +27,15 @@ def create_app() -> FastAPI: async def read_root() -> dict[str, str]: return {"service": "alm", "status": "ok"} + @app.on_event("shutdown") + async def shutdown_event(): + """Cleanup resources on application shutdown.""" + from alm.agents.get_more_context_agent.rag_handler import RAGHandler + + # Cleanup RAG handler HTTP client + handler = RAGHandler() + await handler.cleanup() + return app diff --git a/src/alm/models.py b/src/alm/models.py index e6dc2d7..f0ea779 100644 --- a/src/alm/models.py +++ b/src/alm/models.py @@ -5,6 +5,7 @@ from sqlalchemy import JSON from sqlmodel import Column, Field, SQLModel +from pgvector.sqlalchemy import Vector from enum import Enum from pydantic import BaseModel @@ -86,3 +87,48 @@ def convert_datetime_to_str(cls, v): if isinstance(v, datetime): return v.isoformat() return v + + +# RAG Embeddings Model +class RAGEmbedding(SQLModel, table=True): + """ + Stores RAG embeddings and metadata for knowledge base retrieval. + + This table stores the embeddings generated from knowledge base PDFs, + along with the complete error metadata needed for RAG queries. + """ + + error_id: str = Field( + primary_key=True, description="Unique identifier for the error" + ) + + # Embedding vector (stored using pgvector Vector type) + # Note: pgvector extension must be enabled in PostgreSQL + # Dimension is 768 for nomic-embed-text-v1.5 model + embedding: list[float] = Field( + sa_column=Column(Vector(768)), + description="Embedding vector (768 dimensions for nomic-embed-text-v1.5)", + ) + + # Error metadata stored as JSONB for flexibility + error_title: Optional[str] = Field(default=None, description="Title of the error") + error_metadata: dict = Field( + default_factory=dict, + description="Complete error metadata including sections (description, symptoms, resolution, code, benefits) and source information", + sa_column=Column(JSON), + ) + + # Model information + model_name: str = Field(description="Name of the embedding model used") + embedding_dim: int = Field( + default=768, description="Dimension of the embedding vector" + ) + + # Timestamps + created_at: datetime = Field( + default_factory=datetime.now, + description="Timestamp when the embedding was created", + ) + updated_at: Optional[datetime] = Field( + default=None, description="Timestamp when the embedding was last updated" + ) diff --git a/src/alm/rag/embed_and_index.py b/src/alm/rag/embed_and_index.py index 0ee7e1c..70cefa0 100644 --- a/src/alm/rag/embed_and_index.py +++ b/src/alm/rag/embed_and_index.py @@ -22,6 +22,7 @@ from typing import List, Dict, Any, Tuple, Optional from collections import defaultdict from pathlib import Path +from datetime import datetime from langchain_core.documents import Document import faiss @@ -233,6 +234,8 @@ def __init__( self.index = None self.error_store = {} + self.index_to_error_id = {} + self._embeddings_array = None # Store embeddings for PostgreSQL saving logger.info("Embedder initialized") logger.info(" Mode: TEI Service") @@ -423,6 +426,9 @@ def build_faiss_index( logger.info("STEP:CREATING FAISS INDEX") logger.info("=" * 60) + # Store embeddings array for PostgreSQL saving + self._embeddings_array = embeddings.copy() + # Verify embeddings are normalized norms = np.linalg.norm(embeddings, axis=1) logger.info( @@ -484,6 +490,112 @@ def save_index(self): logger.info(" Metadata size: %.2f MB", metadata_size_mb) logger.info(" Total storage: %.2f MB", index_size_mb + metadata_size_mb) + async def save_to_postgresql(self): + """ + Save embeddings and metadata to PostgreSQL. + + This method saves the current FAISS index data (embeddings, error_store) + to the PostgreSQL database for use by the RAG service. + """ + if self.index is None: + raise ValueError("FAISS index must be built before saving to PostgreSQL") + + if not self.error_store: + raise ValueError( + "Error store must be populated before saving to PostgreSQL" + ) + + logger.info("=" * 60) + logger.info("SAVING EMBEDDINGS TO POSTGRESQL") + logger.info("=" * 60) + + from alm.database import get_session + from alm.models import RAGEmbedding + + # Get embeddings from FAISS index + # FAISS doesn't have a direct "get all vectors" method, so we need to reconstruct + # We'll use the index_to_error_id mapping and error_store to get the data + # Actually, we need to store embeddings separately or reconstruct from error_store + # For now, let's assume we have the embeddings array from build_faiss_index + + # Since we don't have direct access to the embeddings array after it's added to FAISS, + # we need to either: + # 1. Store embeddings in memory during build_faiss_index + # 2. Re-embed from error_store (inefficient) + # 3. Store embeddings before adding to FAISS + + # For now, we'll need to modify build_faiss_index to keep embeddings + # Let's add a property to store them + if not hasattr(self, "_embeddings_array") or self._embeddings_array is None: + logger.warning("Embeddings array not available, cannot save to PostgreSQL") + logger.warning( + "This method should be called immediately after build_faiss_index" + ) + return + + embeddings_array = self._embeddings_array + error_ids = list(self.index_to_error_id.values()) + + logger.info("Saving %d embeddings to PostgreSQL...", len(error_ids)) + + async with get_session() as session: + saved_count = 0 + updated_count = 0 + + for idx, error_id in enumerate(error_ids): + embedding_vector = embeddings_array[ + idx + ].tolist() # Convert numpy to list + error_data = self.error_store[error_id] + + # Prepare error metadata + error_metadata = { + "sections": error_data.get("sections", {}), + "metadata": error_data.get("metadata", {}), + } + + # Check if embedding already exists + from sqlmodel import select + + result = await session.exec( + select(RAGEmbedding).where(RAGEmbedding.error_id == error_id) + ) + existing = result.first() + + if existing: + # Update existing + existing.embedding = embedding_vector + existing.error_title = error_data.get("error_title") + existing.error_metadata = error_metadata + existing.updated_at = datetime.now() + session.add(existing) + updated_count += 1 + else: + # Create new + rag_embedding = RAGEmbedding( + error_id=error_id, + embedding=embedding_vector, + error_title=error_data.get("error_title"), + error_metadata=error_metadata, + model_name=self.model_name, + embedding_dim=self.embedding_dim, + ) + session.add(rag_embedding) + saved_count += 1 + + if (saved_count + updated_count) % 100 == 0: + logger.info( + " Progress: %d embeddings processed", + saved_count + updated_count, + ) + + await session.commit() + + logger.info("✓ Embeddings saved to PostgreSQL") + logger.info(" New embeddings: %d", saved_count) + logger.info(" Updated embeddings: %d", updated_count) + logger.info(" Total: %d", saved_count + updated_count) + def load_index(self): """Load FAISS index and metadata from disk.""" logger.info("=" * 60) @@ -528,6 +640,25 @@ def ingest_and_index(self, chunks: List[Document]): logger.info("INGESTION AND INDEXING COMPLETE") logger.info("=" * 70) + async def ingest_and_index_to_postgresql(self, chunks: List[Document]): + """ + Complete ingestion and indexing pipeline, saving to PostgreSQL. + + This is the async version that saves to PostgreSQL instead of disk. + """ + logger.info("=" * 70) + logger.info("ANSIBLE ERROR RAG SYSTEM - INGESTION AND INDEXING (PostgreSQL)") + logger.info("=" * 70) + + error_store = self.group_chunks_by_error(chunks) + embeddings, error_ids = self.create_composite_embeddings(error_store) + self.build_faiss_index(embeddings, error_ids, error_store) + await self.save_to_postgresql() + + logger.info("=" * 70) + logger.info("INGESTION AND INDEXING COMPLETE (PostgreSQL)") + logger.info("=" * 70) + def main(): """Process all PDFs in knowledge_base directory.""" diff --git a/uv.lock b/uv.lock index 60ecf02..a6e86b3 100644 --- a/uv.lock +++ b/uv.lock @@ -155,6 +155,7 @@ dependencies = [ { name = "langgraph" }, { name = "minio" }, { name = "openinference-instrumentation-langchain" }, + { name = "pgvector" }, { name = "psycopg2-binary" }, { name = "pypdf" }, { name = "python-dateutil" }, @@ -196,6 +197,7 @@ requires-dist = [ { name = "langgraph", specifier = ">=0.6.5" }, { name = "minio", specifier = ">=7.2.17" }, { name = "openinference-instrumentation-langchain", specifier = ">=0.1.33" }, + { name = "pgvector", specifier = ">=0.2.5" }, { name = "psycopg2-binary", specifier = ">=2.9.0" }, { name = "pypdf", specifier = ">=5.0.0" }, { name = "python-dateutil", specifier = ">=2.8.0" }, @@ -923,10 +925,10 @@ wheels = [ [[package]] name = "filelock" version = "3.20.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" } +source = { registry = "https://download.pytorch.org/whl/cpu" } +sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" }, + { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2" }, ] [[package]] @@ -1051,8 +1053,9 @@ wheels = [ name = "fsspec" version = "2025.9.0" source = { registry = "https://download.pytorch.org/whl/cpu" } +sdist = { url = "https://files.pythonhosted.org/packages/de/e0/bab50af11c2d75c9c4a2a26a5254573c0bd97cea152254401510950486fa/fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19" } wheels = [ - { url = "https://download.pytorch.org/whl/fsspec-2025.9.0-py3-none-any.whl" }, + { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7" }, ] [[package]] @@ -2234,8 +2237,9 @@ wheels = [ name = "mpmath" version = "1.3.0" source = { registry = "https://download.pytorch.org/whl/cpu" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f" } wheels = [ - { url = "https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c" }, + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c" }, ] [[package]] @@ -2414,8 +2418,9 @@ wheels = [ name = "networkx" version = "3.5" source = { registry = "https://download.pytorch.org/whl/cpu" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037" } wheels = [ - { url = "https://download.pytorch.org/whl/networkx-3.5-py3-none-any.whl" }, + { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec" }, ] [[package]] @@ -2442,64 +2447,64 @@ wheels = [ [[package]] name = "numpy" version = "2.3.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a", size = 20582187, upload-time = "2025-10-15T16:18:11.77Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11", size = 20957727, upload-time = "2025-10-15T16:15:44.9Z" }, - { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9", size = 14187262, upload-time = "2025-10-15T16:15:47.761Z" }, - { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667", size = 5115992, upload-time = "2025-10-15T16:15:50.144Z" }, - { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef", size = 6648672, upload-time = "2025-10-15T16:15:52.442Z" }, - { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e", size = 14284156, upload-time = "2025-10-15T16:15:54.351Z" }, - { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a", size = 16641271, upload-time = "2025-10-15T16:15:56.67Z" }, - { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16", size = 16059531, upload-time = "2025-10-15T16:15:59.412Z" }, - { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786", size = 18578983, upload-time = "2025-10-15T16:16:01.804Z" }, - { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc", size = 6291380, upload-time = "2025-10-15T16:16:03.938Z" }, - { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32", size = 12787999, upload-time = "2025-10-15T16:16:05.801Z" }, - { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db", size = 10197412, upload-time = "2025-10-15T16:16:07.854Z" }, - { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966", size = 20950335, upload-time = "2025-10-15T16:16:10.304Z" }, - { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3", size = 14179878, upload-time = "2025-10-15T16:16:12.595Z" }, - { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197", size = 5108673, upload-time = "2025-10-15T16:16:14.877Z" }, - { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e", size = 6641438, upload-time = "2025-10-15T16:16:16.805Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7", size = 14281290, upload-time = "2025-10-15T16:16:18.764Z" }, - { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953", size = 16636543, upload-time = "2025-10-15T16:16:21.072Z" }, - { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37", size = 16056117, upload-time = "2025-10-15T16:16:23.369Z" }, - { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd", size = 18577788, upload-time = "2025-10-15T16:16:27.496Z" }, - { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646", size = 6282620, upload-time = "2025-10-15T16:16:29.811Z" }, - { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d", size = 12784672, upload-time = "2025-10-15T16:16:31.589Z" }, - { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc", size = 10196702, upload-time = "2025-10-15T16:16:33.902Z" }, - { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879", size = 21049003, upload-time = "2025-10-15T16:16:36.101Z" }, - { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562", size = 14302980, upload-time = "2025-10-15T16:16:39.124Z" }, - { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a", size = 5231472, upload-time = "2025-10-15T16:16:41.168Z" }, - { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6", size = 6739342, upload-time = "2025-10-15T16:16:43.777Z" }, - { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7", size = 14354338, upload-time = "2025-10-15T16:16:46.081Z" }, - { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0", size = 16702392, upload-time = "2025-10-15T16:16:48.455Z" }, - { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f", size = 16134998, upload-time = "2025-10-15T16:16:51.114Z" }, - { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64", size = 18651574, upload-time = "2025-10-15T16:16:53.429Z" }, - { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb", size = 6413135, upload-time = "2025-10-15T16:16:55.992Z" }, - { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c", size = 12928582, upload-time = "2025-10-15T16:16:57.943Z" }, - { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40", size = 10266691, upload-time = "2025-10-15T16:17:00.048Z" }, - { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e", size = 20955580, upload-time = "2025-10-15T16:17:02.509Z" }, - { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff", size = 14188056, upload-time = "2025-10-15T16:17:04.873Z" }, - { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f", size = 5116555, upload-time = "2025-10-15T16:17:07.499Z" }, - { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b", size = 6643581, upload-time = "2025-10-15T16:17:09.774Z" }, - { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7", size = 14299186, upload-time = "2025-10-15T16:17:11.937Z" }, - { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2", size = 16638601, upload-time = "2025-10-15T16:17:14.391Z" }, - { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52", size = 16074219, upload-time = "2025-10-15T16:17:17.058Z" }, - { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26", size = 18576702, upload-time = "2025-10-15T16:17:19.379Z" }, - { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc", size = 6337136, upload-time = "2025-10-15T16:17:22.886Z" }, - { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9", size = 12920542, upload-time = "2025-10-15T16:17:24.783Z" }, - { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868", size = 10480213, upload-time = "2025-10-15T16:17:26.935Z" }, - { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec", size = 21052280, upload-time = "2025-10-15T16:17:29.638Z" }, - { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3", size = 14302930, upload-time = "2025-10-15T16:17:32.384Z" }, - { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365", size = 5231504, upload-time = "2025-10-15T16:17:34.515Z" }, - { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252", size = 6739405, upload-time = "2025-10-15T16:17:36.128Z" }, - { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e", size = 14354866, upload-time = "2025-10-15T16:17:38.884Z" }, - { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0", size = 16703296, upload-time = "2025-10-15T16:17:41.564Z" }, - { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0", size = 16136046, upload-time = "2025-10-15T16:17:43.901Z" }, - { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f", size = 18652691, upload-time = "2025-10-15T16:17:46.247Z" }, - { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d", size = 6485782, upload-time = "2025-10-15T16:17:48.872Z" }, - { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6", size = 13113301, upload-time = "2025-10-15T16:17:50.938Z" }, - { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29", size = 10547532, upload-time = "2025-10-15T16:17:53.48Z" }, +source = { registry = "https://download.pytorch.org/whl/cpu" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11" }, + { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9" }, + { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667" }, + { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef" }, + { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e" }, + { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a" }, + { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16" }, + { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786" }, + { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc" }, + { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32" }, + { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db" }, + { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966" }, + { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3" }, + { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197" }, + { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e" }, + { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7" }, + { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953" }, + { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37" }, + { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd" }, + { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646" }, + { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d" }, + { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc" }, + { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879" }, + { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562" }, + { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a" }, + { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6" }, + { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7" }, + { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0" }, + { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f" }, + { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64" }, + { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb" }, + { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c" }, + { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40" }, + { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e" }, + { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff" }, + { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f" }, + { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b" }, + { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7" }, + { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2" }, + { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52" }, + { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26" }, + { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc" }, + { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9" }, + { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868" }, + { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec" }, + { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3" }, + { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365" }, + { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252" }, + { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e" }, + { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0" }, + { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0" }, + { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f" }, + { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d" }, + { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6" }, + { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29" }, ] [[package]] @@ -2876,54 +2881,82 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, ] +[[package]] +name = "pgvector" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/6c/6d8b4b03b958c02fa8687ec6063c49d952a189f8c91ebbe51e877dfab8f7/pgvector-0.4.2.tar.gz", hash = "sha256:322cac0c1dc5d41c9ecf782bd9991b7966685dee3a00bc873631391ed949513a", size = 31354, upload-time = "2025-12-05T01:07:17.87Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/26/6cee8a1ce8c43625ec561aff19df07f9776b7525d9002c86bceb3e0ac970/pgvector-0.4.2-py3-none-any.whl", hash = "sha256:549d45f7a18593783d5eec609ea1684a724ba8405c4cb182a0b2b08aeff04e08", size = 27441, upload-time = "2025-12-05T01:07:16.536Z" }, +] + [[package]] name = "pillow" version = "11.3.0" source = { registry = "https://download.pytorch.org/whl/cpu" } -wheels = [ - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-win_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-win_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-win_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-win_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-win_arm64.whl" }, +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4" }, + { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69" }, + { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d" }, + { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6" }, + { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7" }, + { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024" }, + { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809" }, + { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d" }, + { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149" }, + { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d" }, + { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542" }, + { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd" }, + { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8" }, + { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f" }, + { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c" }, + { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd" }, + { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e" }, + { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1" }, + { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805" }, + { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8" }, + { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2" }, + { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b" }, + { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3" }, + { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51" }, + { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580" }, + { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e" }, + { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d" }, + { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced" }, + { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c" }, + { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8" }, + { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59" }, + { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe" }, + { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c" }, + { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788" }, + { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31" }, + { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e" }, + { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12" }, + { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a" }, + { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632" }, + { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673" }, + { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027" }, + { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77" }, + { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874" }, + { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a" }, + { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214" }, + { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635" }, + { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6" }, + { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae" }, + { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653" }, + { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6" }, + { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36" }, + { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b" }, + { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477" }, + { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50" }, + { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b" }, + { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12" }, + { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db" }, + { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa" }, ] [[package]] @@ -4095,8 +4128,9 @@ source = { registry = "https://download.pytorch.org/whl/cpu" } dependencies = [ { name = "mpmath" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517" } wheels = [ - { url = "https://download.pytorch.org/whl/sympy-1.14.0-py3-none-any.whl" }, + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5" }, ] [[package]] From 979cda3a205cc43f28593de87d039f4488e3705e Mon Sep 17 00:00:00 2001 From: mtalvi Date: Mon, 15 Dec 2025 14:03:46 +0200 Subject: [PATCH 2/4] updating quay reo for tei image --- .../charts/text-embeddings-inference/values.yaml | 2 +- deploy/local/compose.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml b/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml index a8baccf..945a187 100644 --- a/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml +++ b/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml @@ -4,7 +4,7 @@ replicaCount: 1 strategy: {} image: - repository: quay.io/rh-ai-quickstart/alm-backend + repository: quay.io/rh-ai-quickstart/alm-rag pullPolicy: Always tag: "tei-rag-v1" diff --git a/deploy/local/compose.yaml b/deploy/local/compose.yaml index 9ac7b21..634d5af 100644 --- a/deploy/local/compose.yaml +++ b/deploy/local/compose.yaml @@ -221,7 +221,7 @@ services: retries: 3 alm-embedding: - image: quay.io/rh-ai-quickstart/alm-backend:tei-rag-v1 + image: quay.io/rh-ai-quickstart/alm-rag:tei-rag-v1 container_name: alm-embedding # Entrypoint is already set to text-embeddings-router in the image ports: From 2ace1eab35bddbd5ceb0ee25b98a4b608eb47477 Mon Sep 17 00:00:00 2001 From: mtalvi Date: Tue, 16 Dec 2025 13:05:11 +0200 Subject: [PATCH 3/4] local deployment --- .env.example | 4 ++ Makefile | 6 ++- deploy/local/Makefile | 95 ++++++++++++++++++++++++++++++++------- deploy/local/compose.yaml | 34 +++++++++++++- init_pipeline.py | 34 +++++++------- src/alm/database.py | 23 +++++----- 6 files changed, 151 insertions(+), 45 deletions(-) diff --git a/.env.example b/.env.example index 029f18d..a8bc893 100644 --- a/.env.example +++ b/.env.example @@ -20,6 +20,10 @@ PROD_CORS_ORIGIN=http://localhost:3000 # RAG (Retrieval-Augmented Generation) Configuration # ============================================================================ EMBEDDINGS_LLM_URL=http://localhost:8080 +# RAG Service URL (microservice endpoint) +# Backend communicates with RAG service via HTTP +# Default: http://alm-rag:8002 (for Kubernetes) or http://localhost:8002 (for local) +RAG_SERVICE_URL=http://localhost:8002 # Enable/disable RAG functionality (default: true) RAG_ENABLED=true diff --git a/Makefile b/Makefile index 218c79f..4a875db 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # This makefile routes targets to local or helm specific makefiles -.PHONY: all local helm help +.PHONY: all local helm help rag-status test-rag # ifneq (,$(wildcard .env)) # # ifneq (,$(filter local,$(MAKECMDGOALS))) @@ -34,3 +34,7 @@ local/%: ## Route local targets to deploy/local/Makefile cluster/%: ## Route deploy targets to deploy/helm/Makefile @$(MAKE) -C deploy/helm $* + +# Convenience targets for common local commands +rag-status: local/rag-status ## Check RAG service status +test-rag: local/test-rag ## Test RAG service diff --git a/deploy/local/Makefile b/deploy/local/Makefile index 1740ba4..ec1c855 100644 --- a/deploy/local/Makefile +++ b/deploy/local/Makefile @@ -1,4 +1,4 @@ -.PHONY: deploy start stop status help postgres phoenix loki-stack aap-mock replay stop-replay backend ui annotation health logs restart clean stop-postgres stop-phoenix stop-loki-stack stop-aap-mock embedding stop-embedding wait-for-embedding test-embedding test-rag +.PHONY: deploy start stop status help postgres phoenix loki-stack aap-mock replay stop-replay backend ui annotation health logs restart clean stop-postgres stop-phoenix stop-loki-stack stop-aap-mock embedding stop-embedding wait-for-embedding rag stop-rag test-embedding test-rag .DEFAULT_GOAL := help include ../../.env @@ -36,8 +36,10 @@ help: ## Show this help message @echo "" @echo "📚 RAG Setup:" @echo " 1. Place PDF files in data/knowledge_base/" - @echo " 2. Run 'make run-whole-training-pipeline' to build RAG index" - @echo " 3. Ensure RAG_ENABLED=true in .env file" + @echo " 2. Start services: 'make start' (includes RAG service)" + @echo " 3. Run 'make run-whole-training-pipeline' to build RAG embeddings in PostgreSQL" + @echo " 4. RAG service will automatically load embeddings when available" + @echo " 5. Ensure RAG_ENABLED=true and RAG_SERVICE_URL=http://alm-rag:8002 in .env file" @echo "" @@ -51,6 +53,7 @@ start: stop ## 🚀 Start all services locally @$(MAKE) -s embedding @echo " ⚠️ Note: Embedding service may take 3-5 minutes to load the model" @echo " You can check status with: make test-embedding" + @$(MAKE) -s rag @$(MAKE) -s aap-mock @$(MAKE) -s backend @$(MAKE) -s ui @@ -79,6 +82,12 @@ embedding: ## 🤖 Start Embedding Service (TEI) @echo " Starting Embedding Service (TEI)..." @$(COMPOSE_CMD) -f compose.yaml up -d alm-embedding +rag: ## 🔍 Start RAG Service + @echo " Starting RAG Service..." + @$(COMPOSE_CMD) -f compose.yaml up -d alm-rag + @echo " ⚠️ Note: RAG service will poll PostgreSQL for embeddings" + @echo " It will become ready once embeddings are available (after init job runs)" + wait-for-embedding: ## ⏳ Wait for embedding service to be ready (optional, for manual use) @echo " Waiting for embedding service to be ready (this may take 3-5 minutes for model loading)..." @timeout=300; \ @@ -167,6 +176,7 @@ stop: ## 🛑 Stop all services @$(MAKE) -s stop-postgres @$(MAKE) -s stop-phoenix @$(MAKE) -s stop-embedding + @$(MAKE) -s stop-rag @$(MAKE) -s kill-ports @echo "👋 All services stopped" @@ -190,6 +200,10 @@ stop-embedding: ## 🛑 Stop Embedding Service @$(COMPOSE_CMD) -f compose.yaml down alm-embedding || true @echo " ✓ Embedding service stopped and removed" +stop-rag: ## 🛑 Stop RAG Service + @$(COMPOSE_CMD) -f compose.yaml down alm-rag || true + @echo " ✓ RAG service stopped and removed" + kill-ports: ## 🔌 Kill processes using required ports @if [ "$$(uname)" = "Darwin" ]; then \ lsof -ti :7860 | xargs kill -9 2>/dev/null || true; \ @@ -239,6 +253,12 @@ kill-ports: ## 🔌 Kill processes using required ports fuser -k 8081/tcp 2>/dev/null || true; \ fi @echo " ✓ Loki MCP Server 8081 killed" + @if [ "$$(uname)" = "Darwin" ]; then \ + lsof -ti :8002 | xargs kill -9 2>/dev/null || true; \ + else \ + fuser -k 8002/tcp 2>/dev/null || true; \ + fi + @echo " ✓ RAG Service 8002 killed" @if [ "$$(uname)" = "Darwin" ]; then \ lsof -ti :3000 | xargs kill -9 2>/dev/null || true; \ else \ @@ -259,6 +279,7 @@ status: ## 📊 Show status of all services @echo " 📊 Loki: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'loki' | grep -v 'loki-mcp' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:3100" @echo " 🔍 Loki MCP Server: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'loki-mcp-server' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8081" @echo " 🤖 Embedding Service: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'alm-embedding' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8080" + @echo " 🔍 RAG Service: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'alm-rag' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8002" @echo " 📈 Grafana: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'grafana' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:3000" @echo " 📝 Promtail: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'promtail' | grep -q 'Up' && echo 'Running' || echo 'Stopped')" @echo " 🎭 AAP Mock: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'aap-mock' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8082" @@ -293,14 +314,20 @@ run-whole-training-pipeline: ## 🔍 Run whole training pipeline (builds RAG ind @( cd ../.. && uv run init_pipeline.py ) # Add new target to check RAG status -rag-status: ## 📊 Check RAG index status - @echo "📊 RAG Index Status:" - @if [ -f "../../data/ansible_errors.index" ] && [ -f "../../data/error_metadata.pkl" ]; then \ - echo " ✅ RAG index exists"; \ - ls -lh ../../data/ansible_errors.index ../../data/error_metadata.pkl; \ +rag-status: ## 📊 Check RAG service and index status + @echo "📊 RAG Service Status:" + @if curl -s -f http://localhost:8002/health >/dev/null 2>&1; then \ + echo " ✅ RAG service is running"; \ + if curl -s -f http://localhost:8002/ready >/dev/null 2>&1; then \ + index_size=$$(curl -s http://localhost:8002/ready | grep -o '"index_size":[0-9]*' | grep -o '[0-9]*' || echo "unknown"); \ + echo " ✅ RAG index is loaded ($$index_size embeddings)"; \ + else \ + echo " ⚠️ RAG index not loaded yet (waiting for embeddings)"; \ + echo " Run 'make run-whole-training-pipeline' to build embeddings"; \ + fi; \ else \ - echo " ❌ RAG index not found"; \ - echo " Run 'make run-whole-training-pipeline' to build it"; \ + echo " ❌ RAG service is not running"; \ + echo " Start it with: make rag"; \ fi @if [ -d "../../data/knowledge_base" ]; then \ pdf_count=$$(ls -1 ../../data/knowledge_base/*.pdf 2>/dev/null | wc -l); \ @@ -335,14 +362,40 @@ test-embedding: ## 🧪 Test embedding service @echo "" @echo "✅ Embedding service test passed!" -test-rag: ## 🧪 Test RAG system (requires RAG index) - @echo "🧪 Testing RAG System..." - @if [ ! -f "../../data/ansible_errors.index" ]; then \ - echo "❌ RAG index not found. Run 'make run-whole-training-pipeline' first."; \ +test-rag: ## 🧪 Test RAG service (requires RAG service running and embeddings in PostgreSQL) + @echo "🧪 Testing RAG Service..." + @echo "" + @echo "1. Health Check:" + @if curl -s -f http://localhost:8002/health >/dev/null 2>&1; then \ + echo " ✅ Service is healthy"; \ + else \ + echo " ❌ Service is not responding"; \ + exit 1; \ + fi + @echo "" + @echo "2. Readiness Check:" + @if curl -s -f http://localhost:8002/ready >/dev/null 2>&1; then \ + echo " ✅ Service is ready (index loaded)"; \ + else \ + echo " ⚠️ Service is not ready (index not loaded yet)"; \ + echo " Run 'make run-whole-training-pipeline' to build embeddings"; \ + exit 1; \ + fi + @echo "" + @echo "3. Test Query:" + @response=$$(curl -s -X POST http://localhost:8002/rag/query \ + -H "Content-Type: application/json" \ + -d '{"query": "ansible playbook execution failed", "top_k": 5, "top_n": 3, "similarity_threshold": 0.6}'); \ + if echo "$$response" | grep -q '"results"'; then \ + echo " ✅ Query successful"; \ + echo " Response preview: $$(echo $$response | head -c 200)..."; \ + else \ + echo " ❌ Query failed"; \ + echo " Response: $$response"; \ exit 1; \ fi - @echo "Running RAG test script..." - @(cd ../.. && uv run python tests/rag/test_embeddings.py) + @echo "" + @echo "✅ RAG service test passed!" health: ## 🔍 Check health of running services @echo "🔍 Health Checks:" @@ -381,6 +434,16 @@ health: ## 🔍 Check health of running services else \ echo " 🤖 Embedding Service http://localhost:8080: Unhealthy (may still be loading model)"; \ fi + @if curl -s http://localhost:8002/health >/dev/null 2>&1; then \ + echo " 🔍 RAG Service http://localhost:8002: Healthy"; \ + if curl -s http://localhost:8002/ready >/dev/null 2>&1; then \ + echo " 🔍 RAG Service http://localhost:8002: Ready (index loaded)"; \ + else \ + echo " 🔍 RAG Service http://localhost:8002: Not ready (index not loaded yet)"; \ + fi; \ + else \ + echo " 🔍 RAG Service http://localhost:8002: Unhealthy"; \ + fi @if curl -s http://localhost:3000 >/dev/null 2>&1; then \ echo " 📈 Grafana http://localhost:3000: Healthy"; \ else \ diff --git a/deploy/local/compose.yaml b/deploy/local/compose.yaml index 634d5af..22795ac 100644 --- a/deploy/local/compose.yaml +++ b/deploy/local/compose.yaml @@ -125,7 +125,7 @@ services: restart: unless-stopped postgres: - image: postgres:15 + image: pgvector/pgvector:pg15 # PostgreSQL with pgvector extension environment: - POSTGRES_USER=user - POSTGRES_PASSWORD=password @@ -173,6 +173,7 @@ services: - PROD_CORS_ORIGIN=${PROD_CORS_ORIGIN} # RAG Configuration - RAG_ENABLED=${RAG_ENABLED} + - RAG_SERVICE_URL=${RAG_SERVICE_URL:-http://alm-rag:8002} - DATA_DIR=${DATA_DIR} - KNOWLEDGE_BASE_DIR=${KNOWLEDGE_BASE_DIR} - RAG_TOP_K=${RAG_TOP_K} @@ -194,6 +195,8 @@ services: condition: service_healthy alm-embedding: condition: service_healthy + alm-rag: + condition: service_started # RAG service can start before embeddings are ready restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] @@ -244,6 +247,35 @@ services: # Note: This service requires significant memory (8Gi recommended) # Adjust resources based on your system capabilities + alm-rag: + build: + context: ../.. + dockerfile: services/rag/Containerfile + container_name: alm-rag + ports: + - "8002:8002" + environment: + # Use container network name for PostgreSQL (not localhost) + # Override any local DATABASE_URL to use container network + - DATABASE_URL=postgresql+asyncpg://user:password@postgres:5432/logsdb + - EMBEDDINGS_LLM_URL=http://alm-embedding:8080 + - RAG_MODEL_NAME=nomic-ai/nomic-embed-text-v1.5 + - PORT=8002 + networks: + - alm + depends_on: + postgres: + condition: service_healthy + alm-embedding: + condition: service_healthy + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8002/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s # Service starts immediately, but index may take time to load + volumes: postgres_data: aap_mock_data: diff --git a/init_pipeline.py b/init_pipeline.py index c22c5a6..9ef1b15 100644 --- a/init_pipeline.py +++ b/init_pipeline.py @@ -29,21 +29,21 @@ def setup_data_directories(): print(f" ✓ {data_dir}") print(f" ✓ {logs_dir}") - # Check for knowledge base PDFs in image - image_kb_dir = Path("/app/data/knowledge_base") - if image_kb_dir.exists(): - image_pdfs = list(image_kb_dir.glob("*.pdf")) - if image_pdfs: - print(f"\n✓ Found {len(image_pdfs)} PDF file(s) in container image:") - for pdf in image_pdfs: + # Check for knowledge base PDFs + # Use config path (works for both local and container) + kb_dir = Path(config.storage.knowledge_base_dir) + if kb_dir.exists(): + pdfs = list(kb_dir.glob("*.pdf")) + if pdfs: + print(f"\n✓ Found {len(pdfs)} PDF file(s) in knowledge base ({kb_dir}):") + for pdf in pdfs: print(f" - {pdf.name}") else: - print(f"\n⚠ No PDF files found in image at {image_kb_dir}") + print(f"\n⚠ No PDF files found in {kb_dir}") + print(" Add PDF files to the knowledge base directory to enable RAG") else: - print(f"\n⚠ Knowledge base directory not found in image at {image_kb_dir}") - print( - " PDFs should be baked into the container image at /app/data/knowledge_base" - ) + print(f"\n⚠ Knowledge base directory not found at {kb_dir}") + print(" Create the directory and add PDF files to enable RAG") print("=" * 70) @@ -108,13 +108,13 @@ async def build_rag_index(): parser = AnsibleErrorParser() embedder = AnsibleErrorEmbedder() - # Find PDFs in knowledge base (from container image) - # PDFs should be baked into the image at /app/data/knowledge_base - image_kb_dir = Path("/app/data/knowledge_base") - pdf_files = sorted(glob.glob(str(image_kb_dir / "*.pdf"))) + # Find PDFs in knowledge base + # Use config path (works for both local and container) + kb_dir = Path(config.storage.knowledge_base_dir) + pdf_files = sorted(glob.glob(str(kb_dir / "*.pdf"))) if not pdf_files: - print(f"⚠ WARNING: No PDF files found in {image_kb_dir}") + print(f"⚠ WARNING: No PDF files found in {kb_dir}") print(" RAG index will not be created") return diff --git a/src/alm/database.py b/src/alm/database.py index 14e9f8c..56f320b 100644 --- a/src/alm/database.py +++ b/src/alm/database.py @@ -24,6 +24,19 @@ # Create tables async def init_tables(delete_tables=False): + # First, try to enable pgvector extension in a separate transaction + # This prevents transaction abort errors if extension creation fails + try: + async with engine.begin() as ext_conn: + await ext_conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) + logger.info("pgvector extension enabled") + except Exception as e: + logger.warning(f"Could not enable pgvector extension: {e}") + logger.warning("This is OK if extension is already enabled or not available") + # For local dev without pgvector, we'll continue but RAG won't work + # In production, this should fail + + # Now create tables in a separate transaction async with engine.begin() as conn: if delete_tables: logger.info("Starting to delete tables") @@ -32,16 +45,6 @@ async def init_tables(delete_tables=False): await conn.run_sync(GrafanaAlert.metadata.drop_all) # RAGEmbedding table is NOT deleted - it persists across runs - # Ensure pgvector extension is enabled (must be done before creating tables) - try: - await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - logger.info("pgvector extension enabled") - except Exception as e: - logger.warning(f"Could not enable pgvector extension: {e}") - logger.warning( - "This is OK if extension is already enabled or not available" - ) - # Create all tables await conn.run_sync(GrafanaAlert.metadata.create_all) await conn.run_sync(RAGEmbedding.metadata.create_all) From adf7331692b97943870cf68073884674f5813dba Mon Sep 17 00:00:00 2001 From: mtalvi Date: Tue, 16 Dec 2025 18:18:14 +0200 Subject: [PATCH 4/4] final fix --- services/rag/main.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/services/rag/main.py b/services/rag/main.py index 7717b50..97b98f1 100644 --- a/services/rag/main.py +++ b/services/rag/main.py @@ -99,8 +99,27 @@ async def load_index_background(): print(f"✗ Failed to load RAG index: {e}") return # Don't raise, just return - service will stay in "not ready" state except Exception as e: - print(f"✗ Failed to load RAG index: {e}") - return # Don't raise, just return - service will stay in "not ready" state + # Check if this is a "table doesn't exist" error - continue polling + error_str = str(e).lower() + if ( + "does not exist" in error_str + or "undefinedtable" in error_str + or "relation" in error_str + ): + # Table doesn't exist yet - init job is still creating it + if elapsed == 0 or elapsed % 30 == 0: # Print every 30 seconds + print( + f"Table not yet created (waited {elapsed}s), retrying in {wait_interval}s..." + ) + await asyncio.sleep(wait_interval) + elapsed += wait_interval + else: + # Some other error - log and continue polling (might be transient) + if elapsed == 0 or elapsed % 30 == 0: # Print every 30 seconds + print(f"Error loading index (waited {elapsed}s): {e}") + print(f" Retrying in {wait_interval}s...") + await asyncio.sleep(wait_interval) + elapsed += wait_interval # If we get here, we've timed out print(f"⚠ WARNING: Failed to load RAG index after {max_wait_time} seconds")