From 12425290a3c266afe939652e83f41e9dfe9f1753 Mon Sep 17 00:00:00 2001
From: mtalvi <mtalvi@redhat.com>
Date: Mon, 15 Dec 2025 13:47:22 +0200
Subject: [PATCH 1/4] rag as a service - cluster

---
 .../charts/backend/templates/configmap.yaml   |   7 +-
 .../charts/backend/templates/deployment.yaml  |  10 -
 .../charts/backend/templates/init-job.yaml    |   9 -
 .../charts/backend/templates/rag-pvc.yaml     |  19 -
 .../charts/backend/values.yaml                |  28 +-
 .../ansible-log-monitor/charts/rag/Chart.yaml |   9 +
 .../charts/rag/templates/NOTES.txt            |  36 +
 .../charts/rag/templates/_helpers.tpl         |  63 ++
 .../charts/rag/templates/deployment.yaml      | 101 +++
 .../charts/rag/templates/hpa.yaml             |  33 +
 .../charts/rag/templates/role.yaml            |  13 +
 .../charts/rag/templates/rolebinding.yaml     |  17 +
 .../charts/rag/templates/service.yaml         |  16 +
 .../charts/rag/templates/serviceaccount.yaml  |  14 +
 .../charts/rag/values.yaml                    | 109 +++
 .../ansible-log-monitor/global-values.yaml    |   3 +-
 docs/RAG_SERVICE_MIGRATION.md                 | 740 ++++++++++++++++++
 init_pipeline.py                              | 187 +++--
 pyproject.toml                                |   1 +
 services/rag/Containerfile                    |  31 +
 services/rag/README.md                        | 151 ++++
 services/rag/index_loader.py                  | 189 +++++
 services/rag/main.py                          | 281 +++++++
 services/rag/pyproject.toml                   |  18 +
 .../get_more_context_agent/rag_handler.py     | 179 +++--
 src/alm/database.py                           |  19 +-
 src/alm/main_fastapi.py                       |   9 +
 src/alm/models.py                             |  46 ++
 src/alm/rag/embed_and_index.py                | 131 ++++
 uv.lock                                       | 252 +++---
 30 files changed, 2417 insertions(+), 304 deletions(-)
 delete mode 100644 deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml
 create mode 100644 deploy/helm/ansible-log-monitor/charts/rag/values.yaml
 create mode 100644 docs/RAG_SERVICE_MIGRATION.md
 create mode 100644 services/rag/Containerfile
 create mode 100644 services/rag/README.md
 create mode 100644 services/rag/index_loader.py
 create mode 100644 services/rag/main.py
 create mode 100644 services/rag/pyproject.toml

diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml
index a83b6bf..6d30a15 100644
--- a/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml
+++ b/deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml
@@ -19,11 +19,14 @@ data:
   {{- if .Values.rag.enabled }}
   # RAG Configuration
   RAG_ENABLED: {{ .Values.rag.enabled | quote }}
-  # Model is hardcoded to nomic-ai/nomic-embed-text-v1.5, no env var needed
-  # API URL defaults to http://alm-embedding:8080 (local cluster service)
+  # RAG Service URL (microservice endpoint)
+  RAG_SERVICE_URL: {{ .Values.rag.serviceUrl | default "http://alm-rag:8002" | quote }}
+  # Embedding service URL (for init job, not used by backend)
   EMBEDDINGS_LLM_URL: {{ .Values.rag.embedding.apiUrl | default "http://alm-embedding:8080" | quote }}
+  # Data paths (for init job only)
   DATA_DIR: {{ .Values.rag.dataDir | quote }}
   KNOWLEDGE_BASE_DIR: {{ .Values.rag.knowledgeBaseDir | quote }}
+  # Query configuration
   RAG_TOP_K: {{ .Values.rag.query.topK | quote }}
   RAG_TOP_N: {{ .Values.rag.query.topN | quote }}
   RAG_SIMILARITY_THRESHOLD: {{ .Values.rag.query.similarityThreshold | quote }}
diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml
index 4e061f4..9127ec8 100644
--- a/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml
+++ b/deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml
@@ -76,20 +76,10 @@ spec:
             {{- toYaml . | nindent 12 }}
           {{- end }}
           volumeMounts:
-          {{- if .Values.rag.enabled }}
-            - name: rag-data
-              mountPath: {{ .Values.rag.pvcMountPath }}
-              readOnly: true
-          {{- end }}
           {{- with .Values.volumeMounts }}
             {{- toYaml . | nindent 12 }}
           {{- end }}
       volumes:
-      {{- if .Values.rag.enabled }}
-        - name: rag-data
-          persistentVolumeClaim:
-            claimName: {{ include "backend.fullname" . }}-rag-data
-      {{- end }}
       {{- with .Values.volumes }}
         {{- toYaml . | nindent 8 }}
       {{- end }}
diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml
index 689ef65..dd46f78 100644
--- a/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml
+++ b/deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml
@@ -107,10 +107,6 @@ spec:
           volumeMounts:
             - name: init-sync
               mountPath: /init-sync
-          {{- if .Values.rag.enabled }}
-            - name: rag-data
-              mountPath: {{ .Values.rag.pvcMountPath }}
-          {{- end }}
           {{- with .Values.volumeMounts }}
             {{- toYaml . | nindent 12 }}
           {{- end }}
@@ -133,11 +129,6 @@ spec:
       volumes:
         - name: init-sync
           emptyDir: {}
-      {{- if .Values.rag.enabled }}
-        - name: rag-data
-          persistentVolumeClaim:
-            claimName: {{ include "backend.fullname" . }}-rag-data
-      {{- end }}
       {{- with .Values.volumes }}
         {{- toYaml . | nindent 8 }}
       {{- end }}
diff --git a/deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml b/deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml
deleted file mode 100644
index d23961b..0000000
--- a/deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-{{- if .Values.rag.enabled }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "backend.fullname" . }}-rag-data
-  labels:
-    {{- include "backend.labels" . | nindent 4 }}
-    app.kubernetes.io/component: rag-storage
-spec:
-  accessModes:
-    - {{ .Values.rag.persistence.accessMode }}
-  {{- if .Values.rag.persistence.storageClassName }}
-  storageClassName: {{ .Values.rag.persistence.storageClassName }}
-  {{- end }}
-  resources:
-    requests:
-      storage: {{ .Values.rag.persistence.size }}
-{{- end }}
-
diff --git a/deploy/helm/ansible-log-monitor/charts/backend/values.yaml b/deploy/helm/ansible-log-monitor/charts/backend/values.yaml
index be57e34..cfa9383 100644
--- a/deploy/helm/ansible-log-monitor/charts/backend/values.yaml
+++ b/deploy/helm/ansible-log-monitor/charts/backend/values.yaml
@@ -235,7 +235,11 @@ rag:
   # Enable or disable RAG functionality
   enabled: true
   
-  # Embedding model configuration
+  # RAG Service URL (microservice endpoint)
+  # Backend pods communicate with RAG service via HTTP
+  serviceUrl: "http://alm-rag:8002"
+  
+  # Embedding model configuration (used by init job for building index)
   # NOTE: API credentials (apiKey, apiUrl, modelName) are provided during 'make install'
   # and stored in the 'model-secret' Kubernetes secret
   embedding:
@@ -244,24 +248,12 @@ rag:
     apiUrl: "http://alm-embedding:8080"  # TEI service URL (defaults to local cluster service)
     port: 8080  # Port for the embedding service (TEI)
   
-  # Data paths
+  # Data paths (used by init job for knowledge base PDFs)
+  # Note: PDFs should be baked into the container image at /app/data/knowledge_base
+  # The init job will read PDFs from the image and process them
+  # The RAG index (embeddings) is stored in PostgreSQL
   dataDir: "/app/data/rag"
-  knowledgeBaseDir: "/app/data/rag/knowledge_base"
-  # PVC mount path (mounted directly at /app/data/rag)
-  pvcMountPath: "/app/data/rag"
-  
-  # Persistence configuration for RAG index storage
-  persistence:
-    # Storage size for RAG index and metadata
-    size: "2Gi"
-    # Access mode: ReadWriteOnce (RWO) is used because:
-    # 1. Init job writes the index once
-    # 2. Backend pods only read (never write)
-    # 3. AWS EBS (gp3-csi) only supports RWO
-    # Note: For RWO, all backend pods must be scheduled on the same node as the PVC
-    accessMode: "ReadWriteOnce"
-    # Storage class (leave empty for default)
-    storageClassName: ""
+  knowledgeBaseDir: "/app/data/knowledge_base"  # PDFs should be in container image
   
   # Query configuration
   query:
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml b/deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml
new file mode 100644
index 0000000..269d305
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml
@@ -0,0 +1,9 @@
+apiVersion: v2
+name: rag
+description: A Helm chart for RAG service
+
+type: application
+
+version: 0.1.0
+appVersion: "0.1.0"
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt b/deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt
new file mode 100644
index 0000000..e23d794
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt
@@ -0,0 +1,36 @@
+1. Get the application URL by running these commands:
+{{- if .Values.httpRoute.enabled }}
+{{- if .Values.httpRoute.hostnames }}
+    export APP_HOSTNAME={{ .Values.httpRoute.hostnames | first }}
+{{- else }}
+    export APP_HOSTNAME=$(kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o jsonpath="{.spec.listeners[0].hostname}")
+  {{- end }}
+{{- if and .Values.httpRoute.rules (first .Values.httpRoute.rules).matches (first (first .Values.httpRoute.rules).matches).path.value }}
+    echo "Visit http://$APP_HOSTNAME{{ (first (first .Values.httpRoute.rules).matches).path.value }} to use your application"
+
+    NOTE: Your HTTPRoute depends on the listener configuration of your gateway and your HTTPRoute rules.
+    The rules can be set for path, method, header and query parameters.
+    You can check the gateway configuration with 'kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o yaml'
+{{- end }}
+{{- else if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "rag.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "rag.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "rag.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "rag.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl b/deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl
new file mode 100644
index 0000000..b86565f
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/_helpers.tpl
@@ -0,0 +1,63 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "rag.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "rag.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "rag.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "rag.labels" -}}
+helm.sh/chart: {{ include "rag.chart" . }}
+{{ include "rag.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "rag.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "rag.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "rag.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "rag.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml
new file mode 100644
index 0000000..f399a55
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml
@@ -0,0 +1,101 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "rag.fullname" . }}
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "rag.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "rag.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "rag.serviceAccountName" . }}
+      initContainers:
+        - name: wait-for-postgres
+          image: postgres:15-alpine
+          command:
+            - sh
+            - -c
+            - |
+              until pg_isready -d "$DATABASE_URL"; do
+                echo "Waiting for PostgreSQL to be ready..."
+                sleep 5
+              done
+              echo "PostgreSQL is ready!"
+          env:
+            - name: DATABASE_URL
+              valueFrom:
+                secretKeyRef:
+                  name: pgvector
+                  key: uri
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          {{- with .Values.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.env }}
+          env:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.volumeMounts }}
+          volumeMounts:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      {{- with .Values.volumes }}
+      volumes:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml
new file mode 100644
index 0000000..e0e7bc2
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml
@@ -0,0 +1,33 @@
+{{- if .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "rag.fullname" . }}
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "rag.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml
new file mode 100644
index 0000000..c914019
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/role.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.rbac.create -}}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ include "rag.fullname" . }}-job-reader
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+rules:
+  - apiGroups: ["batch"]
+    resources: ["jobs"]
+    verbs: ["get", "list", "watch"]
+{{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml
new file mode 100644
index 0000000..c6aeed8
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/rolebinding.yaml
@@ -0,0 +1,17 @@
+{{- if .Values.rbac.create -}}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ include "rag.fullname" . }}-job-reader
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ include "rag.fullname" . }}-job-reader
+subjects:
+  - kind: ServiceAccount
+    name: {{ include "rag.serviceAccountName" . }}
+    namespace: {{ .Release.Namespace }}
+{{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml
new file mode 100644
index 0000000..1b0a09f
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/service.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "rag.fullname" . }}
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "rag.selectorLabels" . | nindent 4 }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml b/deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml
new file mode 100644
index 0000000..4e17c2e
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/templates/serviceaccount.yaml
@@ -0,0 +1,14 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "rag.serviceAccountName" . }}
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
+{{- end }}
+
diff --git a/deploy/helm/ansible-log-monitor/charts/rag/values.yaml b/deploy/helm/ansible-log-monitor/charts/rag/values.yaml
new file mode 100644
index 0000000..e09165f
--- /dev/null
+++ b/deploy/helm/ansible-log-monitor/charts/rag/values.yaml
@@ -0,0 +1,109 @@
+# Default values for rag service.
+replicaCount: 1
+
+image:
+  repository: quay.io/rh-ai-quickstart/alm-rag
+  pullPolicy: Always
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+serviceAccount:
+  create: true
+  automount: true
+  annotations: {}
+  name: ""
+
+rbac:
+  create: true
+
+podAnnotations: {}
+podLabels: {}
+
+podSecurityContext: {}
+
+securityContext: {}
+
+service:
+  type: ClusterIP
+  port: 8002
+  targetPort: 8002
+
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+  hosts:
+    - host: chart-example.local
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls: []
+
+httpRoute:
+  enabled: false
+  annotations: {}
+  parentRefs:
+  - name: gateway
+    sectionName: http
+  hostnames:
+  - chart-example.local
+  rules:
+  - matches:
+    - path:
+        type: PathPrefix
+        value: /rag
+
+env:
+  - name: DATABASE_URL
+    valueFrom:
+      secretKeyRef:
+        name: pgvector
+        key: uri
+  - name: EMBEDDINGS_LLM_URL
+    value: "http://alm-embedding:8080"
+  - name: RAG_MODEL_NAME
+    value: "nomic-ai/nomic-embed-text-v1.5"
+  - name: PORT
+    value: "8002"
+
+resources:
+  requests:
+    memory: "512Mi"
+    cpu: "200m"
+  limits:
+    memory: "2Gi"
+    cpu: "1000m"
+
+livenessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 60
+  periodSeconds: 30
+
+readinessProbe:
+  httpGet:
+    path: /ready
+    port: http
+  initialDelaySeconds: 30
+  periodSeconds: 10
+
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 3
+  targetCPUUtilizationPercentage: 80
+
+volumes: []
+
+volumeMounts: []
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
diff --git a/deploy/helm/ansible-log-monitor/global-values.yaml b/deploy/helm/ansible-log-monitor/global-values.yaml
index 36fbda6..a5c749d 100644
--- a/deploy/helm/ansible-log-monitor/global-values.yaml
+++ b/deploy/helm/ansible-log-monitor/global-values.yaml
@@ -35,4 +35,5 @@ global:
     annotationInterface: "alm-annotation-interface"
     clustering: "alm-clustering"
     ui: "alm-ui"
-    embedding: "alm-embedding"
\ No newline at end of file
+    embedding: "alm-embedding"
+    rag: "alm-rag"
\ No newline at end of file
diff --git a/docs/RAG_SERVICE_MIGRATION.md b/docs/RAG_SERVICE_MIGRATION.md
new file mode 100644
index 0000000..b4401da
--- /dev/null
+++ b/docs/RAG_SERVICE_MIGRATION.md
@@ -0,0 +1,740 @@
+# RAG Service Migration Guide
+
+## Overview
+
+This document describes the migration from PVC-based RAG storage to a dedicated RAG microservice with PostgreSQL storage. This change eliminates ReadWriteOnce (RWO) constraints, reduces resource duplication, and simplifies the architecture.
+
+## What Changed
+
+### Before (PVC-based)
+- RAG index stored on PersistentVolumeClaim (PVC)
+- Each backend pod loaded FAISS index from PVC
+- All backend pods required to be on same node (RWO constraint)
+- N backend pods = N copies of FAISS index in memory
+- Index updates required PVC rebuild and pod restarts
+
+### After (RAG Service + PostgreSQL)
+- RAG index stored in PostgreSQL (`ragembedding` table)
+- Single RAG service loads FAISS index from PostgreSQL
+- Backend pods make HTTP calls to RAG service
+- Backend pods can run on any node (no constraints)
+- 1 RAG service = 1 copy of FAISS index in memory
+- Index updates via PostgreSQL (no pod restarts needed)
+
+## Architecture
+
+```
+┌─────────────────────┐
+│   Init Job Pod      │
+│  (alm-backend-init) │
+│                     │
+│  1. Parse PDFs      │
+│  2. Generate        │
+│     embeddings      │
+│  3. Save to        │
+│     PostgreSQL      │
+└──────────┬──────────┘
+           │
+           │ Writes embeddings
+           ▼
+┌─────────────────────┐
+│   PostgreSQL        │
+│                     │
+│  - ragembedding     │
+│    table            │
+│  - pgvector         │
+│    extension        │
+└──────────┬──────────┘
+           │
+           │ Reads embeddings
+           │ (polls every 5s)
+           ▼
+┌─────────────────────┐
+│   RAG Service Pod   │
+│  (alm-rag)          │
+│                     │
+│  ┌───────────────┐  │
+│  │ Background    │  │
+│  │ Task: Poll    │  │
+│  │ PostgreSQL    │  │
+│  └───────────────┘  │
+│                     │
+│  ┌───────────────┐  │
+│  │ FAISS Index   │  │ (in-memory)
+│  │ (loaded from  │  │
+│  │  PostgreSQL)  │  │
+│  └───────────────┘  │
+└──────────┬──────────┘
+           │
+           │ HTTP /rag/query
+           │
+           ▼
+┌─────────────────────┐
+│  Backend Pods       │
+│  (alm-backend)      │
+│                     │
+│  - Pod 1            │
+│  - Pod 2            │
+│  - Pod N            │
+│                     │
+│  All make HTTP      │
+│  calls to RAG       │
+│  service            │
+└─────────────────────┘
+```
+
+## Init Job and RAG Service Relationship
+
+### Overview
+
+The init job and RAG service have a **producer-consumer relationship** coordinated through PostgreSQL:
+
+- **Init Job** = **Producer**: Creates and saves embeddings to PostgreSQL
+- **RAG Service** = **Consumer**: Reads embeddings from PostgreSQL and serves queries
+- **PostgreSQL** = **Coordination Point**: Shared data store, no direct communication needed
+
+### Key Characteristics
+
+1. **No Direct Dependency**: Services don't wait for each other to start
+2. **Asynchronous Coordination**: RAG service polls PostgreSQL, init job polls RAG service HTTP endpoint
+3. **Graceful Degradation**: Both services can start independently and handle missing data gracefully
+4. **Data Persistence**: Embeddings persist in PostgreSQL across pod restarts
+
+### Detailed Flow and Timeline
+
+```
+Time    Init Job                    PostgreSQL              RAG Service
+─────────────────────────────────────────────────────────────────────────
+T+0s    Pod starts                 ──                      Pod starts
+        │                          │                       │
+T+5s    Wait for PostgreSQL        ──                      Wait for PostgreSQL
+        │                          │                       │
+T+10s   ──                         Ready                   ──
+        │                          │                       │
+T+15s   PostgreSQL ready!          ──                      PostgreSQL ready!
+        │                          │                       │
+        │                          │                       Start background task
+        │                          │                       Poll for embeddings...
+        │                          │                       (no embeddings yet)
+        │                          │                       │
+T+30s   Building RAG index...      ──                      Still polling...
+        - Parse PDFs                │                       (every 5 seconds)
+        - Generate embeddings       │                       │
+        │                          │                       │
+T+60s   Saving embeddings...       Writing embeddings...   ──
+        │                          │                       │
+T+65s   Index complete!            Embeddings saved!       ──
+        │                          │                       │
+T+70s   ──                         ──                      Found embeddings!
+        │                          │                       Loading index...
+        │                          │                       │
+T+75s   ──                         ──                      Index loaded! ✓
+        │                          │                       Service ready!
+        │                          │                       │
+T+80s   Waiting for RAG service... ──                      ──
+        (polls /ready endpoint)     │                       │
+        │                          │                       │
+T+85s   RAG service ready!          ──                      ──
+        │                          │                       │
+T+90s   Running training pipeline   ──                      ──
+        (uses RAG service)          │                       │
+        │                          │                       │
+T+95s   Querying RAG service...    ──                      Serving queries ✓
+        │                          │                       │
+```
+
+### Phase-by-Phase Breakdown
+
+#### Phase 1: Parallel Startup (T+0s to T+15s)
+- **Init Job**: Starts, waits for PostgreSQL via initContainer
+- **RAG Service**: Starts, waits for PostgreSQL via initContainer
+- **No Dependency**: Both can start simultaneously, no blocking
+
+#### Phase 2: Data Preparation - Init Job (T+15s to T+65s)
+- **Init Job Actions**:
+  1. Checks if embeddings already exist (skips if found, unless `RAG_FORCE_REBUILD=true`)
+  2. Reads PDFs from container image (`/app/data/knowledge_base`)
+  3. Parses PDFs into chunks using `AnsibleErrorParser`
+  4. Generates embeddings using embedding service (TEI)
+  5. Saves embeddings to PostgreSQL `ragembedding` table
+- **PostgreSQL**: Receives and stores embeddings
+- **RAG Service**: Continues polling PostgreSQL (embeddings not found yet)
+
+#### Phase 3: Index Loading - RAG Service (T+65s to T+75s)
+- **RAG Service Actions**:
+  1. Background task polls PostgreSQL every 5 seconds
+  2. When embeddings found: queries all embeddings from `ragembedding` table
+  3. Parses pgvector string format to numpy arrays
+  4. Builds FAISS IndexFlatIP in memory
+  5. Creates error store and index-to-error-id mapping
+  6. Marks service as ready (`/ready` endpoint returns 200)
+- **PostgreSQL**: Serves embedding queries
+- **Init Job**: Continues waiting for RAG service
+
+#### Phase 4: Coordination - Init Job Waits (T+75s to T+85s)
+- **Init Job Actions**:
+  1. After saving embeddings, calls `wait_for_rag_service()`
+  2. Polls `http://alm-rag:8002/ready` endpoint every 5 seconds
+  3. Timeout: 5 minutes (300 seconds)
+  4. Once RAG service ready, proceeds to training pipeline
+- **RAG Service**: Responds to `/ready` checks (returns 200 when ready)
+- **If Timeout**: Init job continues with warning, RAG queries may fail
+
+#### Phase 5: Runtime - Training Pipeline (T+85s+)
+- **Init Job**: Runs `training_pipeline()` which:
+  - Processes alerts
+  - Uses RAG service for context retrieval (HTTP calls)
+  - Saves results to database
+- **RAG Service**: Serves queries via `/rag/query` endpoint
+- **Backend Pods**: (After init job completes) Can query RAG service for context
+
+### Communication Patterns
+
+#### Init Job → PostgreSQL
+- **Method**: Direct database writes via SQLModel
+- **When**: During `build_rag_index()` function
+- **What**: Inserts/updates `ragembedding` table
+- **Frequency**: Once per init job run
+
+#### RAG Service → PostgreSQL
+- **Method**: Raw SQL queries via asyncpg
+- **When**: Background polling task (every 5 seconds)
+- **What**: SELECT queries from `ragembedding` table
+- **Frequency**: Every 5 seconds until embeddings found, then once at startup
+
+#### Init Job → RAG Service
+- **Method**: HTTP GET requests
+- **When**: After saving embeddings, before training pipeline
+- **What**: Polls `/ready` endpoint
+- **Frequency**: Every 5 seconds, timeout 5 minutes
+
+#### Backend → RAG Service
+- **Method**: HTTP POST requests
+- **When**: During training pipeline and runtime queries
+- **What**: `/rag/query` endpoint with query text
+- **Frequency**: As needed for context retrieval
+
+### Error Handling and Resilience
+
+1. **RAG Service Startup Failure**:
+   - Service starts but stays in "not ready" state
+   - Background task continues polling
+   - Service becomes ready when embeddings available
+   - No crash, graceful degradation
+
+2. **Init Job Failure**:
+   - RAG service continues polling (will timeout after 10 minutes)
+   - Can be restarted independently
+   - No impact on RAG service pod
+
+3. **Embeddings Not Found**:
+   - RAG service logs warning, continues polling
+   - Init job can be rerun to populate embeddings
+   - No data loss (embeddings persist in PostgreSQL)
+
+4. **RAG Service Not Ready**:
+   - Init job waits up to 5 minutes
+   - If timeout: continues with warning
+   - Training pipeline proceeds, RAG queries may fail gracefully
+
+### Why This Design?
+
+1. **Eliminates Circular Dependencies**: 
+   - Old design: RAG service waited for init job, init job needed RAG service → deadlock
+   - New design: Both start independently, coordinate via PostgreSQL
+
+2. **Faster Startup**:
+   - Services don't block each other
+   - Parallel execution possible
+   - No sequential waiting
+
+3. **Resilience**:
+   - Services can restart independently
+   - Data persists in PostgreSQL
+   - Graceful degradation if one service fails
+
+4. **Scalability**:
+   - RAG service can scale independently
+   - Multiple backend pods share single RAG service
+   - No resource duplication
+
+## Key Design Decisions
+
+### 1. Non-Blocking Startup
+- **Problem**: RAG service was crashing if embeddings weren't available immediately
+- **Solution**: Background task loads index asynchronously, service starts immediately
+- **Benefit**: No circular dependencies, service can start before init job completes
+
+### 2. PostgreSQL as Coordination Point
+- **Problem**: Need to coordinate between init job and RAG service
+- **Solution**: PostgreSQL acts as shared data store, both services read/write independently
+- **Benefit**: No direct dependencies, both services can start in parallel
+
+### 3. Embedding Persistence
+- **Problem**: Training pipeline was deleting `ragembedding` table
+- **Solution**: Modified `init_tables()` to preserve `ragembedding` table when `delete_tables=True`
+- **Benefit**: Embeddings persist across training pipeline runs
+
+### 4. pgvector String Parsing
+- **Problem**: pgvector returns embeddings as strings when queried via raw SQL
+- **Solution**: Added parsing logic to handle both array and string representations
+- **Benefit**: Robust handling of different PostgreSQL response formats
+
+## Components
+
+### 1. Database Schema
+- **Table**: `ragembedding` (SQLModel)
+- **Fields**:
+  - `error_id` (primary key) - Unique identifier for each error
+  - `embedding` (Vector(768)) - pgvector type, 768 dimensions for nomic-embed-text-v1.5
+  - `error_title` - Title of the error
+  - `error_metadata` (JSON) - Complete error metadata including sections
+  - `model_name` - Embedding model used
+  - `embedding_dim` - Dimension of embedding vector (768)
+  - `created_at`, `updated_at` - Timestamps
+
+### 2. RAG Service (`services/rag/`)
+- **Technology**: FastAPI
+- **Port**: 8002
+- **Endpoints**:
+  - `POST /rag/query` - Query knowledge base for relevant errors
+  - `GET /health` - Health check (returns status even if index not loaded)
+  - `GET /ready` - Readiness check (returns 503 until index loaded)
+  - `POST /rag/reload` - Reload index from PostgreSQL without restart
+
+### 3. Backend Changes
+- **Removed**: FAISS loading, PVC mounting, local index management
+- **Added**: HTTP client (`httpx.AsyncClient`) for RAG service communication
+- **Interface**: Same API (no changes to calling code)
+- **Cleanup**: Proper HTTP client shutdown on application shutdown
+
+### 4. Init Job
+- **Changed**: Saves embeddings to PostgreSQL instead of PVC
+- **PDFs**: Read directly from container image (`/app/data/knowledge_base`)
+- **Coordination**: Waits for RAG service to be ready before running training pipeline
+- **Persistence**: Embeddings persist across training pipeline runs
+
+## Migration Steps
+
+### Step 1: Database Migration
+
+The database schema is automatically created when `init_tables()` is called. The pgvector extension is enabled automatically:
+
+```sql
+CREATE EXTENSION IF NOT EXISTS vector;
+```
+
+### Step 2: Build and Deploy RAG Service
+
+1. **Build RAG service image** (from project root):
+   ```bash
+   podman build -f services/rag/Containerfile -t quay.io/rh-ai-quickstart/alm-rag:latest .
+   podman push quay.io/rh-ai-quickstart/alm-rag:latest
+   ```
+
+2. **Deploy RAG service** (via Helm):
+   ```bash
+   helm upgrade --install ansible-log-monitor ./deploy/helm/ansible-log-monitor
+   ```
+
+### Step 3: Run Init Job
+
+The init job will:
+1. Read PDFs from container image
+2. Generate embeddings
+3. Save to PostgreSQL
+4. Wait for RAG service to be ready
+5. Run training pipeline
+
+```bash
+# Check init job status
+oc get jobs -n <namespace> -l app.kubernetes.io/component=init
+
+# View logs
+oc logs -n <namespace> -l job-name=alm-backend-init --tail=100
+```
+
+### Step 4: Verify RAG Service
+
+```bash
+# Check service is running
+oc get pods -n <namespace> -l app.kubernetes.io/name=rag
+
+# Check service health
+RAG_POD=$(oc get pods -n <namespace> -l app.kubernetes.io/name=rag -o jsonpath='{.items[0].metadata.name}')
+oc exec -n <namespace> $RAG_POD -- curl -s http://localhost:8002/health | jq
+
+# Check readiness (should return 200 when index is loaded)
+oc exec -n <namespace> $RAG_POD -- curl -s http://localhost:8002/ready | jq
+```
+
+### Step 5: Test RAG Query
+
+```bash
+# Test query from within cluster
+oc run -it --rm test-rag-query -n <namespace> --image=curlimages/curl --restart=Never -- \
+  curl -X POST http://alm-rag:8002/rag/query \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "ansible playbook execution failed",
+    "top_k": 5,
+    "top_n": 3,
+    "similarity_threshold": 0.6
+  }' | jq
+```
+
+## Configuration
+
+### Environment Variables
+
+#### RAG Service
+- `DATABASE_URL` - PostgreSQL connection URL (required, from secret `pgvector`)
+- `EMBEDDINGS_LLM_URL` - Embedding service URL (default: `http://alm-embedding:8080`)
+- `RAG_MODEL_NAME` - Model name (default: `nomic-ai/nomic-embed-text-v1.5`)
+- `PORT` - Service port (default: `8002`)
+
+#### Backend
+- `RAG_ENABLED` - Enable/disable RAG (default: `true`, accepts: `true`, `1`, `yes`)
+- `RAG_SERVICE_URL` - RAG service URL (default: `http://alm-rag:8002`)
+- `RAG_TOP_K` - Top K candidates to retrieve (default: `10`)
+- `RAG_TOP_N` - Top N final results to return (default: `3`)
+- `RAG_SIMILARITY_THRESHOLD` - Minimum similarity threshold (default: `0.6`)
+
+### Helm Values
+
+```yaml
+rag:
+  enabled: true
+  serviceUrl: "http://alm-rag:8002"
+  query:
+    topK: 4
+    topN: 1
+    similarityThreshold: 0.6
+```
+
+## Testing
+
+### 1. Unit Tests
+
+Test the RAG service locally:
+
+```bash
+cd services/rag
+# Set environment variables
+export DATABASE_URL="postgresql+asyncpg://user:pass@localhost:5432/dbname"
+export EMBEDDINGS_LLM_URL="http://localhost:8080"
+
+# Run service
+uvicorn main:app --host 0.0.0.0 --port 8002
+```
+
+### 2. Integration Tests
+
+Test backend → RAG service communication:
+
+```python
+# In backend pod or test environment
+from alm.agents.get_more_context_agent.rag_handler import RAGHandler
+
+handler = RAGHandler()
+context = await handler.get_cheat_sheet_context("ansible error message")
+print(context)
+```
+
+### 3. End-to-End Test
+
+1. Deploy all components
+2. Run init job
+3. Verify RAG service loads index
+4. Trigger an alert that requires RAG context
+5. Verify RAG service is called and returns results
+
+## Troubleshooting
+
+### RAG Service Not Starting
+
+**Problem**: Service fails to start or index doesn't load
+
+**Check**:
+```bash
+# Check logs
+oc logs -n <namespace> -l app.kubernetes.io/name=rag --tail=50
+
+# Verify PostgreSQL connection
+RAG_POD=$(oc get pods -n <namespace> -l app.kubernetes.io/name=rag -o jsonpath='{.items[0].metadata.name}')
+oc exec -n <namespace> $RAG_POD -- env | grep DATABASE_URL
+
+# Check if embeddings exist (replace <dbname> with actual database name)
+PG_POD=$(oc get pods -n <namespace> -l app=postgresql -o jsonpath='{.items[0].metadata.name}')
+oc exec -n <namespace> $PG_POD -- psql -U postgres -d <dbname> -c "SELECT COUNT(*) FROM ragembedding;"
+```
+
+### RAG Service Stuck in "Not Ready" State
+
+**Problem**: Service starts but `/ready` endpoint returns 503
+
+**Possible Causes**:
+1. Embeddings not yet available (init job still running)
+2. Database connection issue
+3. Embedding parsing error
+
+**Check**:
+```bash
+# Check RAG service logs for polling messages
+oc logs -n <namespace> -l app.kubernetes.io/name=rag | grep -i "embedding"
+
+# Verify embeddings exist in database
+oc exec -n <namespace> $PG_POD -- psql -U postgres -d <dbname> -c "SELECT COUNT(*) FROM ragembedding;"
+
+# Check init job status
+oc get jobs -n <namespace> -l app.kubernetes.io/component=init
+```
+
+### Backend Can't Reach RAG Service
+
+**Problem**: Backend returns empty context
+
+**Check**:
+```bash
+# Verify service exists
+oc get svc -n <namespace> alm-rag
+
+# Test connectivity from backend pod
+BACKEND_POD=$(oc get pods -n <namespace> -l app.kubernetes.io/name=backend -o jsonpath='{.items[0].metadata.name}')
+oc exec -n <namespace> $BACKEND_POD -- curl -s http://alm-rag:8002/health
+
+# Check backend logs
+oc logs -n <namespace> $BACKEND_POD | grep -i rag
+```
+
+### No Embeddings in Database
+
+**Problem**: Init job didn't populate embeddings
+
+**Check**:
+```bash
+# Check init job logs
+oc logs -n <namespace> -l job-name=alm-backend-init --tail=100
+
+# Verify PDFs in image
+INIT_POD=$(oc get pods -n <namespace> -l app.kubernetes.io/component=init -o jsonpath='{.items[0].metadata.name}')
+oc exec -n <namespace> $INIT_POD -- ls -la /app/data/knowledge_base/
+
+# Check database
+oc exec -n <namespace> $PG_POD -- psql -U postgres -d <dbname> -c "SELECT error_id, model_name FROM ragembedding LIMIT 5;"
+```
+
+### Embeddings Deleted After Training Pipeline
+
+**Problem**: Embeddings disappear after init job completes
+
+**Solution**: This was fixed - `init_tables(delete_tables=True)` no longer deletes `ragembedding` table. If you see this issue, ensure you're using the latest backend image.
+
+**Verify**:
+```bash
+# Check database.py has the fix
+oc exec -n <namespace> $PG_POD -- psql -U postgres -d <dbname> -c "SELECT COUNT(*) FROM ragembedding;"
+# Should return > 0 even after training pipeline runs
+```
+
+### Performance Issues
+
+**Problem**: Slow query responses
+
+**Solutions**:
+- Increase RAG service resources (memory/CPU)
+- Check PostgreSQL connection pool
+- Verify FAISS index is loaded (check `/ready` endpoint)
+- Consider adding RAG service replicas with load balancing
+
+## Rollback Plan
+
+If issues occur, you can rollback:
+
+1. **Disable RAG service**:
+   ```yaml
+   rag:
+     enabled: false
+   ```
+
+2. **Revert to PVC** (if needed):
+   - Restore `rag-pvc.yaml` template
+   - Update `init_pipeline.py` to save to disk
+   - Update backend to load from PVC
+
+3. **Database cleanup** (optional):
+   ```sql
+   DROP TABLE IF EXISTS ragembedding;
+   ```
+
+## Benefits Achieved
+
+✅ **No RWO Constraints**: Backend pods can run on any node  
+✅ **Reduced Memory**: Single FAISS index instead of N copies  
+✅ **Simplified Storage**: Single source of truth (PostgreSQL)  
+✅ **Easier Updates**: Update embeddings via SQL, no pod restarts  
+✅ **Better Scaling**: Independent scaling of RAG vs backend  
+✅ **No PVC Management**: Eliminated persistent volume complexity  
+✅ **Resilient Startup**: No circular dependencies, graceful degradation  
+✅ **Data Persistence**: Embeddings survive training pipeline runs  
+
+## Files Changed
+
+### New Files
+
+#### `services/rag/main.py`
+FastAPI application for the RAG service. Implements:
+- Background task for loading index (non-blocking startup)
+- HTTP endpoints for querying, health checks, and reloading
+- Query processing: generates embeddings, searches FAISS, returns results
+- Graceful error handling and service state management
+
+#### `services/rag/index_loader.py`
+Loads embeddings from PostgreSQL and builds FAISS index. Handles:
+- PostgreSQL connection and querying
+- Parsing pgvector string format to numpy arrays
+- Building FAISS IndexFlatIP for similarity search
+- Error store and index-to-error-id mapping
+
+#### `services/rag/pyproject.toml`
+Python dependencies for RAG service:
+- FastAPI, uvicorn for web framework
+- sqlmodel, asyncpg, psycopg2-binary for database access
+- faiss-cpu, numpy for similarity search
+- httpx for embedding service calls
+
+#### `services/rag/Containerfile`
+Container image definition for RAG service:
+- Based on UBI8 Python 3.12
+- Uses `uv` for dependency management
+- Copies service code and dependencies
+- Exposes port 8002
+
+#### `deploy/helm/ansible-log-monitor/charts/rag/`
+Complete Helm chart for deploying RAG service:
+- Deployment with initContainer for PostgreSQL readiness
+- Service for cluster-internal access
+- ServiceAccount and RBAC (if needed)
+- HPA for autoscaling (optional)
+- ConfigMap and environment variable management
+
+### Modified Files
+
+#### `src/alm/models.py`
+**Change**: Added `RAGEmbedding` SQLModel class
+- Defines database schema for storing embeddings
+- Uses `pgvector.sqlalchemy.Vector(768)` for embedding column
+- Includes error metadata as JSON field
+- Tracks model name and embedding dimensions
+
+#### `src/alm/database.py`
+**Changes**:
+1. Added `RAGEmbedding` to table creation/dropping
+2. Added automatic pgvector extension enablement
+3. **Critical Fix**: Modified `init_tables()` to NOT delete `ragembedding` table when `delete_tables=True`
+   - Prevents training pipeline from deleting embeddings
+   - Ensures embeddings persist across runs
+
+#### `src/alm/rag/embed_and_index.py`
+**Changes**:
+1. Added `_embeddings_array` attribute to store embeddings before FAISS
+2. Added `save_to_postgresql()` method to persist embeddings
+3. Added `ingest_and_index_to_postgresql()` async entry point
+4. Modified `build_faiss_index()` to store embeddings array for PostgreSQL saving
+
+#### `src/alm/agents/get_more_context_agent/rag_handler.py`
+**Changes**:
+1. Replaced local FAISS loading with HTTP client
+2. Added `httpx.AsyncClient` for RAG service communication
+3. Implemented lazy initialization of HTTP client
+4. Added `cleanup()` method for graceful shutdown
+5. Updated `_format_rag_results()` to parse JSON response from service
+
+#### `src/alm/main_fastapi.py`
+**Change**: Added shutdown event handler
+- Calls `RAGHandler().cleanup()` on application shutdown
+- Ensures HTTP client is properly closed
+- Prevents resource leaks
+
+#### `init_pipeline.py`
+**Changes**:
+1. Removed PVC-related logic (PDF copying, volume mounting)
+2. Updated `build_rag_index()` to always use PostgreSQL
+3. Added `wait_for_rag_service()` function to coordinate with RAG service
+4. Updated main flow: build index → wait for RAG service → run training pipeline
+5. Simplified data directory setup (PDFs now in container image)
+
+#### `deploy/helm/ansible-log-monitor/charts/backend/templates/deployment.yaml`
+**Changes**:
+- Removed `volumeMounts` and `volumes` for `rag-data` PVC
+- Backend no longer needs direct access to RAG storage
+
+#### `deploy/helm/ansible-log-monitor/charts/backend/templates/init-job.yaml`
+**Changes**:
+- Removed `volumeMounts` and `volumes` for `rag-data` PVC
+- Removed conditional PVC checks
+- Always assumes PostgreSQL storage
+
+#### `deploy/helm/ansible-log-monitor/charts/backend/templates/configmap.yaml`
+**Changes**:
+- Added `RAG_SERVICE_URL` environment variable
+- Updated comments to reflect new architecture
+
+#### `deploy/helm/ansible-log-monitor/charts/backend/values.yaml`
+**Changes**:
+- Removed `rag.persistence` section (no PVC needed)
+- Added `rag.serviceUrl` configuration
+- Updated `rag.knowledgeBaseDir` to reflect PDFs in image
+
+#### `deploy/helm/ansible-log-monitor/global-values.yaml`
+**Change**: Added `rag: "alm-rag"` to `servicesNames` for service discovery
+
+#### `pyproject.toml` (root)
+**Change**: Added `pgvector>=0.2.5` dependency
+- Required for `Vector` type in `RAGEmbedding` model
+- Needed for backend to create tables with pgvector columns
+
+### Deleted Files
+
+#### `deploy/helm/ansible-log-monitor/charts/backend/templates/rag-pvc.yaml`
+**Reason**: No longer needed - RAG data stored in PostgreSQL, not PVC
+
+## Key Fixes Applied
+
+### 1. Circular Dependency Resolution
+- **Problem**: RAG service waited for init job, init job needed RAG service
+- **Solution**: 
+  - RAG service starts independently, polls PostgreSQL for embeddings
+  - Init job waits for RAG service after building index
+  - Both can start in parallel, coordinate via PostgreSQL
+
+### 2. Non-Blocking Startup
+- **Problem**: RAG service crashed if embeddings not available immediately
+- **Solution**: Background task loads index asynchronously, service starts immediately
+- **Result**: Service stays in "not ready" state until embeddings available
+
+### 3. Embedding Persistence
+- **Problem**: Training pipeline deleted `ragembedding` table
+- **Solution**: Modified `init_tables()` to preserve `ragembedding` when `delete_tables=True`
+- **Result**: Embeddings persist across training pipeline runs
+
+### 4. pgvector String Parsing
+- **Problem**: pgvector returns embeddings as strings in raw SQL queries
+- **Solution**: Added parsing logic using JSON and `ast.literal_eval()`
+- **Result**: Handles both array and string representations
+
+## Next Steps
+
+1. **Deploy RAG service** to your cluster
+2. **Run init job** to populate embeddings
+3. **Monitor** RAG service health and performance
+4. **Test** end-to-end RAG queries
+5. **Optimize** resource allocation based on usage
+
+## Support
+
+For issues or questions:
+- Check service logs: `oc logs -n <namespace> -l app.kubernetes.io/name=rag`
+- Check backend logs: `oc logs -n <namespace> -l app.kubernetes.io/name=backend`
+- Verify database: Check `ragembedding` table in PostgreSQL
+- Check init job: `oc logs -n <namespace> -l job-name=alm-backend-init`
diff --git a/init_pipeline.py b/init_pipeline.py
index 1357a17..c22c5a6 100644
--- a/init_pipeline.py
+++ b/init_pipeline.py
@@ -3,99 +3,103 @@
 from alm.utils.phoenix import register_phoenix
 import os
 import glob
-import shutil
 from pathlib import Path
+import httpx
 
 
 def setup_data_directories():
     """
-    Setup data directory structure in PVC mount path.
-    Creates necessary directories and copies PDFs from image to PVC if needed.
+    Setup data directory structure.
+    Knowledge base PDFs should be baked into the container image at /app/data/knowledge_base.
     """
-    from src.alm.config import config
+    from alm.config import config
 
     print("\n" + "=" * 70)
     print("SETTING UP DATA DIRECTORY STRUCTURE")
     print("=" * 70)
 
-    # Get paths from config (uses DATA_DIR and KNOWLEDGE_BASE_DIR env vars)
+    # Get paths from config (uses DATA_DIR env var)
     data_dir = Path(config.storage.data_dir)
-    knowledge_base_dir = Path(config.storage.knowledge_base_dir)
     logs_dir = data_dir / "logs" / "failed"
 
-    # Create necessary directories
+    # Create necessary directories (for logs, etc.)
     print("Creating directories...")
     data_dir.mkdir(parents=True, exist_ok=True)
-    knowledge_base_dir.mkdir(parents=True, exist_ok=True)
     logs_dir.mkdir(parents=True, exist_ok=True)
     print(f"  ✓ {data_dir}")
-    print(f"  ✓ {knowledge_base_dir}")
     print(f"  ✓ {logs_dir}")
 
-    # Copy PDFs from image to PVC if PVC knowledge_base is empty
+    # Check for knowledge base PDFs in image
     image_kb_dir = Path("/app/data/knowledge_base")
-    pvc_kb_dir = knowledge_base_dir
-
-    # Check if PVC knowledge_base has any PDFs
-    pvc_pdfs = list(pvc_kb_dir.glob("*.pdf"))
-
-    if not pvc_pdfs:
-        # PVC is empty, copy from image if available
-        if image_kb_dir.exists():
-            image_pdfs = list(image_kb_dir.glob("*.pdf"))
-            if image_pdfs:
-                print(f"\nCopying {len(image_pdfs)} PDF file(s) from image to PVC...")
-                for pdf_path in image_pdfs:
-                    dest_path = pvc_kb_dir / pdf_path.name
-                    try:
-                        shutil.copy2(pdf_path, dest_path)
-                        print(f"  ✓ Copied {pdf_path.name}")
-                    except Exception as e:
-                        print(f"  ✗ Error copying {pdf_path.name}: {e}")
-                print("✓ Knowledge base PDFs copied to PVC")
-            else:
-                print(f"\n⚠ No PDFs found in image at {image_kb_dir}")
+    if image_kb_dir.exists():
+        image_pdfs = list(image_kb_dir.glob("*.pdf"))
+        if image_pdfs:
+            print(f"\n✓ Found {len(image_pdfs)} PDF file(s) in container image:")
+            for pdf in image_pdfs:
+                print(f"  - {pdf.name}")
         else:
-            print(f"\n⚠ Image knowledge base directory not found at {image_kb_dir}")
+            print(f"\n⚠ No PDF files found in image at {image_kb_dir}")
     else:
+        print(f"\n⚠ Knowledge base directory not found in image at {image_kb_dir}")
         print(
-            f"\n✓ PVC knowledge base already contains {len(pvc_pdfs)} PDF file(s), skipping copy"
+            "  PDFs should be baked into the container image at /app/data/knowledge_base"
         )
 
     print("=" * 70)
 
 
-def build_rag_index():
+async def build_rag_index():
     """
-    Build RAG index from knowledge base PDFs.
-    This runs during the init job to create the FAISS index and metadata.
+    Build RAG index from knowledge base PDFs and save to PostgreSQL.
+    This runs during the init job to create the FAISS index and save embeddings to database.
     """
-    from src.alm.config import config
-    from src.alm.rag.ingest_and_chunk import AnsibleErrorParser
-    from src.alm.rag.embed_and_index import AnsibleErrorEmbedder
-
-    # Check if RAG is enabled
-    rag_enabled = os.getenv("RAG_ENABLED", "true").lower() == "true"
+    from alm.config import config
+    from alm.rag.ingest_and_chunk import AnsibleErrorParser
+    from alm.rag.embed_and_index import AnsibleErrorEmbedder
+    from alm.database import init_tables
+
+    # Check if RAG is enabled (consistent with rag_handler.py)
+    rag_enabled_env = os.getenv("RAG_ENABLED", "true").lower()
+    rag_enabled = rag_enabled_env in ["true", "1", "yes"]
     if not rag_enabled:
-        print("RAG is disabled (RAG_ENABLED=false), skipping RAG index build")
+        print(
+            f"RAG is disabled (RAG_ENABLED={rag_enabled_env}), skipping RAG index build"
+        )
         return
 
-    # Check if index already exists (skip rebuild for faster upgrades)
-    index_path = Path(config.storage.index_path)
-    metadata_path = Path(config.storage.metadata_path)
+    # Check if embeddings already exist in PostgreSQL (skip rebuild for faster upgrades)
+    from alm.database import get_session
+    from alm.models import RAGEmbedding
+    from sqlmodel import select
 
-    if index_path.exists() and metadata_path.exists():
-        print("✓ RAG index already exists, skipping rebuild")
-        print(f"  Index: {index_path}")
-        print(f"  Metadata: {metadata_path}")
-        print("  To force rebuild, delete the PVC or these files")
-        return
+    try:
+        async with get_session() as session:
+            result = await session.exec(select(RAGEmbedding))
+            existing = result.first()
+            if existing:
+                count_result = await session.exec(select(RAGEmbedding))
+                count = len(list(count_result.all()))
+                print(
+                    f"✓ Found {count} existing embeddings in PostgreSQL, skipping rebuild"
+                )
+                print(
+                    "  To force rebuild, delete embeddings from PostgreSQL or set RAG_FORCE_REBUILD=true"
+                )
+                if os.getenv("RAG_FORCE_REBUILD", "false").lower() != "true":
+                    return
+    except Exception as e:
+        print(f"⚠ Could not check PostgreSQL: {e}")
+        print("  Proceeding with index build...")
 
     print("\n" + "=" * 70)
     print("BUILDING RAG INDEX FROM KNOWLEDGE BASE")
+    print("  Storage: PostgreSQL")
     print("=" * 70)
 
     try:
+        # Ensure database tables exist
+        await init_tables(delete_tables=False)
+
         # Validate configuration
         config.print_config()
         config.validate()
@@ -104,12 +108,13 @@ def build_rag_index():
         parser = AnsibleErrorParser()
         embedder = AnsibleErrorEmbedder()
 
-        # Find PDFs in knowledge base
-        kb_dir = config.storage.knowledge_base_dir
-        pdf_files = sorted(glob.glob(str(kb_dir / "*.pdf")))
+        # Find PDFs in knowledge base (from container image)
+        # PDFs should be baked into the image at /app/data/knowledge_base
+        image_kb_dir = Path("/app/data/knowledge_base")
+        pdf_files = sorted(glob.glob(str(image_kb_dir / "*.pdf")))
 
         if not pdf_files:
-            print(f"⚠ WARNING: No PDF files found in {kb_dir}")
+            print(f"⚠ WARNING: No PDF files found in {image_kb_dir}")
             print("  RAG index will not be created")
             return
 
@@ -138,14 +143,11 @@ def build_rag_index():
         print(f"TOTAL: {len(all_chunks)} chunks from {len(pdf_files)} PDFs")
         print(f"{'=' * 70}")
 
-        # Build and save index
-        embedder.ingest_and_index(all_chunks)
-
+        # Build and save index to PostgreSQL
+        await embedder.ingest_and_index_to_postgresql(all_chunks)
         print("\n" + "=" * 70)
-        print("✓ RAG INDEX BUILD COMPLETE")
+        print("✓ RAG INDEX BUILD COMPLETE (PostgreSQL)")
         print("=" * 70)
-        print(f"  Index: {index_path}")
-        print(f"  Metadata: {metadata_path}")
 
     except Exception as e:
         print(f"\n✗ ERROR building RAG index: {e}")
@@ -155,6 +157,61 @@ def build_rag_index():
         traceback.print_exc()
 
 
+async def wait_for_rag_service(rag_service_url: str, max_wait_time: int = 300):
+    """
+    Wait for RAG service to be ready before proceeding.
+
+    Args:
+        rag_service_url: URL of the RAG service (e.g., http://alm-rag:8002)
+        max_wait_time: Maximum time to wait in seconds (default: 5 minutes)
+    """
+    # Check if RAG is enabled
+    rag_enabled_env = os.getenv("RAG_ENABLED", "true").lower()
+    rag_enabled = rag_enabled_env in ["true", "1", "yes"]
+    if not rag_enabled:
+        print("RAG is disabled, skipping RAG service wait")
+        return
+
+    print("\n" + "=" * 70)
+    print("WAITING FOR RAG SERVICE TO BE READY")
+    print("=" * 70)
+
+    ready_url = f"{rag_service_url}/ready"
+    elapsed = 0
+    check_interval = 5
+
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        while elapsed < max_wait_time:
+            try:
+                response = await client.get(ready_url)
+                if response.status_code == 200:
+                    data = response.json()
+                    index_size = data.get("index_size", 0)
+                    print(f"✓ RAG service is ready (index size: {index_size})")
+                    return
+                else:
+                    print(
+                        f"RAG service not ready yet (status: {response.status_code}), waiting..."
+                    )
+            except (httpx.RequestError, httpx.HTTPStatusError):
+                if elapsed == 0:
+                    print(
+                        f"RAG service not yet available at {rag_service_url}, waiting..."
+                    )
+                elif elapsed % 30 == 0:  # Print every 30 seconds
+                    print(f"Still waiting for RAG service... (elapsed: {elapsed}s)")
+
+            await asyncio.sleep(check_interval)
+            elapsed += check_interval
+
+        # Timeout reached
+        print(
+            f"\n⚠ WARNING: RAG service did not become ready within {max_wait_time} seconds"
+        )
+        print("  The training pipeline will proceed, but RAG queries may fail")
+        print("  This is expected if the RAG service is still starting up")
+
+
 async def main():
     # Setup and initialization
     print("\n" + "=" * 70)
@@ -165,7 +222,11 @@ async def main():
     setup_data_directories()
 
     # Step 2: Build RAG index
-    build_rag_index()
+    await build_rag_index()
+
+    # Step 2.5: Wait for RAG service to be ready (if RAG is enabled)
+    rag_service_url = os.getenv("RAG_SERVICE_URL", "http://alm-rag:8002")
+    await wait_for_rag_service(rag_service_url)
 
     # Step 3: Run main pipeline (clustering, summarization, etc.)
     print("\n" + "=" * 70)
diff --git a/pyproject.toml b/pyproject.toml
index d6340d3..29fda59 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "psycopg2-binary>=2.9.0",
     "alembic>=1.13.0",
     "asyncpg>=0.30.0",
+    "pgvector>=0.2.5",
 
     # ML / RAG
     "scikit-learn>=1.7.1",
diff --git a/services/rag/Containerfile b/services/rag/Containerfile
new file mode 100644
index 0000000..8d16379
--- /dev/null
+++ b/services/rag/Containerfile
@@ -0,0 +1,31 @@
+FROM registry.access.redhat.com/ubi8/python-312
+
+USER root
+
+# Install uv pointing to the uv image and coping from there
+# /uv and /uvx are the source files copied from the uv image
+# /bin is the destination
+COPY --from=ghcr.io/astral-sh/uv:0.9.7 /uv /uvx /bin/
+
+# Set working directory
+WORKDIR /app
+
+# Copy dependency files (from services/rag/ directory)
+COPY services/rag/pyproject.toml ./
+
+# Install dependencies
+RUN uv sync --no-dev
+ENV VIRTUAL_ENV=/app/.venv
+ENV PATH="/app/.venv/bin:$PATH"
+
+RUN chmod -R +r .
+
+# Copy source code (from services/rag/ directory)
+COPY services/rag/index_loader.py services/rag/main.py .
+
+# Expose port
+EXPOSE 8002
+
+# Default command
+ENTRYPOINT ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"]
+
diff --git a/services/rag/README.md b/services/rag/README.md
new file mode 100644
index 0000000..bb4000b
--- /dev/null
+++ b/services/rag/README.md
@@ -0,0 +1,151 @@
+# RAG Service
+
+FastAPI microservice for RAG (Retrieval-Augmented Generation) queries.
+
+## Overview
+
+The RAG service provides similarity search over the knowledge base embeddings stored in PostgreSQL. It:
+
+1. **Starts immediately** (non-blocking startup) - service becomes available even if embeddings aren't ready
+2. **Polls PostgreSQL** in background - checks every 5 seconds for embeddings (up to 10 minutes)
+3. **Loads embeddings** when available - parses pgvector format and builds FAISS index in memory
+4. **Exposes REST API** - provides query endpoints for knowledge base retrieval
+
+### Key Features
+
+- **Non-blocking startup**: Service starts immediately, loads index in background
+- **Graceful degradation**: Service stays in "not ready" state until embeddings available
+- **Automatic recovery**: Polls PostgreSQL until embeddings found
+- **No circular dependencies**: Can start before init job completes
+
+## API Endpoints
+
+### `POST /rag/query`
+
+Query the knowledge base for relevant error solutions.
+
+**Request:**
+```json
+{
+  "query": "error message or log summary",
+  "top_k": 10,
+  "top_n": 3,
+  "similarity_threshold": 0.6
+}
+```
+
+**Response:**
+```json
+{
+  "query": "error message",
+  "results": [
+    {
+      "error_id": "error_123",
+      "error_title": "Error Title",
+      "similarity_score": 0.85,
+      "source_file": "file.pdf",
+      "page": 5,
+      "sections": {
+        "description": "...",
+        "symptoms": "...",
+        "resolution": "...",
+        "code": "...",
+        "benefits": "..."
+      }
+    }
+  ],
+  "metadata": {
+    "num_results": 3,
+    "search_time_ms": 12.5,
+    "top_k": 10,
+    "top_n": 3,
+    "similarity_threshold": 0.6
+  }
+}
+```
+
+### `GET /health`
+
+Health check endpoint. Returns service status even if index is not loaded.
+
+**Response:**
+```json
+{
+  "status": "healthy",
+  "index_size": 109
+}
+```
+
+Or if index not loaded:
+```json
+{
+  "status": "unhealthy",
+  "reason": "Index not loaded"
+}
+```
+
+### `GET /ready`
+
+Readiness check - ensures index is loaded. Returns 503 if index not ready, 200 when ready.
+
+**Response (ready):**
+```json
+{
+  "status": "ready",
+  "index_size": 109
+}
+```
+
+**Response (not ready):**
+- HTTP 503 with error detail
+
+### `POST /rag/reload`
+
+Reload the index from PostgreSQL without restarting the service.
+
+## Environment Variables
+
+- `DATABASE_URL` - PostgreSQL connection URL (required)
+- `EMBEDDINGS_LLM_URL` - URL of the embedding service (default: `http://alm-embedding:8080`)
+- `RAG_MODEL_NAME` - Name of the embedding model (default: `nomic-ai/nomic-embed-text-v1.5`)
+- `PORT` - Service port (default: `8002`)
+
+## Startup Behavior
+
+The service uses a **background task** to load the index, allowing it to start even if embeddings aren't available yet:
+
+1. **Service starts** → FastAPI application becomes available
+2. **Background task starts** → Begins polling PostgreSQL every 5 seconds
+3. **If embeddings found** → Loads index, service becomes ready
+4. **If embeddings not found** → Continues polling (up to 10 minutes)
+5. **Service state**:
+   - `/health` always returns 200 (service is running)
+   - `/ready` returns 503 until index loaded, then 200
+
+This design allows the RAG service to start independently of the init job, eliminating circular dependencies.
+
+## Deployment
+
+The service is deployed as a Kubernetes deployment via Helm chart.
+
+**Prerequisites:**
+- PostgreSQL with `pgvector` extension enabled
+- `ragembedding` table (created automatically by init job)
+- Embeddings populated in database (via init job)
+
+**Startup Sequence:**
+1. RAG service pod starts
+2. Waits for PostgreSQL (initContainer)
+3. Service starts, begins background polling
+4. When embeddings available, loads index automatically
+5. Service becomes ready for queries
+
+## Dependencies
+
+- **PostgreSQL** with `ragembedding` table populated (via init job)
+- **pgvector extension** - for vector storage and queries
+- **Embedding service (TEI)** - for generating query embeddings
+- **FAISS** - for in-memory similarity search
+- **FastAPI** - web framework
+- **asyncpg** - async PostgreSQL driver
+
diff --git a/services/rag/index_loader.py b/services/rag/index_loader.py
new file mode 100644
index 0000000..072c582
--- /dev/null
+++ b/services/rag/index_loader.py
@@ -0,0 +1,189 @@
+"""
+Load RAG embeddings from PostgreSQL and build FAISS index.
+"""
+
+import numpy as np
+from typing import Dict, Any, Optional, Tuple
+import faiss
+from sqlalchemy.ext.asyncio import create_async_engine
+from sqlalchemy.orm import sessionmaker
+from sqlmodel.ext.asyncio.session import AsyncSession as SQLModelAsyncSession
+
+# Import models - we'll need to make these available
+# For now, we'll define a simple structure or import from the main codebase
+# In production, these should be in a shared package
+
+
+class RAGIndexLoader:
+    """
+    Loads embeddings from PostgreSQL and builds FAISS index in memory.
+    """
+
+    def __init__(
+        self, database_url: str, model_name: str = "nomic-ai/nomic-embed-text-v1.5"
+    ):
+        """
+        Initialize the index loader.
+
+        Args:
+            database_url: PostgreSQL connection URL
+            model_name: Name of the embedding model (for validation)
+        """
+        self.database_url = database_url.replace("+asyncpg", "").replace(
+            "postgresql", "postgresql+asyncpg"
+        )
+        self.model_name = model_name
+        self.embedding_dim = 768  # nomic-embed-text-v1.5 dimension
+
+        self.engine = create_async_engine(self.database_url)
+        self.session_factory = sessionmaker(
+            self.engine, class_=SQLModelAsyncSession, expire_on_commit=False
+        )
+
+        self.index: Optional[faiss.Index] = None
+        self.error_store: Dict[str, Dict[str, Any]] = {}
+        self.index_to_error_id: Dict[int, str] = {}
+        self._loaded = False
+
+    async def load_index(
+        self,
+    ) -> Tuple[faiss.Index, Dict[str, Dict[str, Any]], Dict[int, str]]:
+        """
+        Load embeddings from PostgreSQL and build FAISS index.
+
+        Returns:
+            Tuple of (FAISS index, error_store, index_to_error_id mapping)
+        """
+        if self._loaded and self.index is not None:
+            return self.index, self.error_store, self.index_to_error_id
+
+        print("Loading embeddings from PostgreSQL...")
+
+        # Define RAGEmbedding model inline (or import from shared package)
+        # For now, we'll use raw SQL to avoid circular dependencies
+        from sqlalchemy import text
+
+        async with self.engine.begin() as conn:
+            # Query all embeddings
+            # Note: pgvector Vector type may be returned as string, we'll parse it in Python
+            result = await conn.execute(
+                text("""
+                    SELECT 
+                        error_id,
+                        embedding,
+                        error_title,
+                        error_metadata,
+                        model_name,
+                        embedding_dim
+                    FROM ragembedding
+                    ORDER BY error_id
+                """)
+            )
+            rows = result.fetchall()
+
+        if not rows:
+            raise ValueError("No embeddings found in PostgreSQL. Run init job first.")
+
+        print(f"Found {len(rows)} embeddings in database")
+
+        # Extract data
+        embeddings_list = []
+        error_ids = []
+        error_store = {}
+        index_to_error_id = {}
+
+        for idx, row in enumerate(rows):
+            error_id = row[0]
+            embedding = row[1]  # This is a list/array
+            error_title = row[2]
+            error_metadata = row[3] if row[3] else {}
+            model_name_db = row[4]
+            embedding_dim_db = row[5]
+
+            # Validate model
+            if model_name_db != self.model_name:
+                print(
+                    f"Warning: Model mismatch. DB has {model_name_db}, expected {self.model_name}"
+                )
+
+            if embedding_dim_db != self.embedding_dim:
+                raise ValueError(
+                    f"Embedding dimension mismatch: DB has {embedding_dim_db}, "
+                    f"expected {self.embedding_dim}"
+                )
+
+            # Convert embedding to numpy array
+            # Handle both array and string representations from pgvector
+            if isinstance(embedding, str):
+                # Parse string representation (e.g., "[0.1, 0.2, ...]")
+                import json
+                import ast
+
+                try:
+                    # Try JSON first (safer)
+                    embedding = json.loads(embedding)
+                except json.JSONDecodeError:
+                    # If JSON parsing fails, use ast.literal_eval (safe for literals)
+                    # pgvector returns vectors as string like '[0.1,0.2,...]'
+                    try:
+                        embedding = ast.literal_eval(embedding)
+                    except (ValueError, SyntaxError):
+                        raise ValueError(
+                            f"Could not parse embedding for {error_id}: invalid format"
+                        )
+
+            embedding_array = np.array(embedding, dtype=np.float32)
+
+            # Validate embedding shape
+            if embedding_array.shape[0] != self.embedding_dim:
+                raise ValueError(
+                    f"Invalid embedding shape for {error_id}: "
+                    f"expected {self.embedding_dim}, got {embedding_array.shape[0]}"
+                )
+
+            embeddings_list.append(embedding_array)
+            error_ids.append(error_id)
+
+            # Build error_store
+            error_store[error_id] = {
+                "error_id": error_id,
+                "error_title": error_title,
+                "sections": error_metadata.get("sections", {}),
+                "metadata": error_metadata.get("metadata", {}),
+            }
+
+            index_to_error_id[idx] = error_id
+
+        # Convert to numpy array
+        embeddings = np.array(embeddings_list, dtype=np.float32)
+
+        print(f"Loaded {len(embeddings)} embeddings, shape: {embeddings.shape}")
+
+        # Verify embeddings are normalized
+        norms = np.linalg.norm(embeddings, axis=1)
+        print(
+            f"Embedding norms: min={norms.min():.4f}, max={norms.max():.4f}, mean={norms.mean():.4f}"
+        )
+
+        # Build FAISS index
+        print(f"Building FAISS IndexFlatIP with dimension {self.embedding_dim}...")
+        index = faiss.IndexFlatIP(self.embedding_dim)
+        index.add(embeddings)
+
+        print(f"FAISS index created with {index.ntotal} vectors")
+
+        # Store for reuse
+        self.index = index
+        self.error_store = error_store
+        self.index_to_error_id = index_to_error_id
+        self._loaded = True
+
+        return index, error_store, index_to_error_id
+
+    async def reload_index(self):
+        """Force reload of index from database."""
+        self._loaded = False
+        self.index = None
+        self.error_store = {}
+        self.index_to_error_id = {}
+        return await self.load_index()
diff --git a/services/rag/main.py b/services/rag/main.py
new file mode 100644
index 0000000..7717b50
--- /dev/null
+++ b/services/rag/main.py
@@ -0,0 +1,281 @@
+"""
+RAG Service - FastAPI service for RAG queries.
+"""
+
+import os
+import asyncio
+from typing import Optional, List, Dict, Any
+import numpy as np
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from index_loader import RAGIndexLoader
+import time
+
+app = FastAPI(title="RAG Service", version="0.1.0")
+
+# Global index loader (initialized on startup)
+index_loader: Optional[RAGIndexLoader] = None
+
+
+class QueryRequest(BaseModel):
+    """Request model for RAG query."""
+
+    query: str = Field(description="Query text to search for")
+    top_k: int = Field(
+        default=10, ge=1, le=100, description="Number of top candidates to retrieve"
+    )
+    top_n: int = Field(
+        default=3, ge=1, le=20, description="Number of final results to return"
+    )
+    similarity_threshold: float = Field(
+        default=0.6, ge=0.0, le=1.0, description="Minimum similarity threshold (0-1)"
+    )
+
+
+class ErrorSection(BaseModel):
+    """Error section data."""
+
+    description: Optional[str] = None
+    symptoms: Optional[str] = None
+    resolution: Optional[str] = None
+    code: Optional[str] = None
+    benefits: Optional[str] = None
+
+
+class ErrorResult(BaseModel):
+    """Single error result."""
+
+    error_id: str
+    error_title: str
+    similarity_score: float
+    source_file: Optional[str] = None
+    page: Optional[int] = None
+    sections: ErrorSection
+
+
+class QueryResponse(BaseModel):
+    """Response model for RAG query."""
+
+    query: str
+    results: List[ErrorResult]
+    metadata: Dict[str, Any]
+
+
+async def load_index_background():
+    """Background task to load index from PostgreSQL (polls until available)."""
+    global index_loader
+
+    database_url = os.getenv("DATABASE_URL")
+    if not database_url:
+        print("ERROR: DATABASE_URL environment variable is required")
+        return
+
+    model_name = os.getenv("RAG_MODEL_NAME", "nomic-ai/nomic-embed-text-v1.5")
+
+    print("Initializing RAG index loader...")
+    index_loader = RAGIndexLoader(database_url=database_url, model_name=model_name)
+
+    # Wait for embeddings to be available (poll PostgreSQL)
+    # This allows the service to start before the init job completes
+    max_wait_time = 600  # 10 minutes
+    wait_interval = 5  # Check every 5 seconds
+    elapsed = 0
+
+    print("Waiting for embeddings to be available in PostgreSQL...")
+    while elapsed < max_wait_time:
+        try:
+            await index_loader.load_index()
+            print("✓ RAG index loaded successfully")
+            return
+        except ValueError as e:
+            if "No embeddings found" in str(e):
+                if elapsed == 0 or elapsed % 30 == 0:  # Print every 30 seconds
+                    print(
+                        f"Embeddings not yet available (waited {elapsed}s), retrying in {wait_interval}s..."
+                    )
+                await asyncio.sleep(wait_interval)
+                elapsed += wait_interval
+            else:
+                print(f"✗ Failed to load RAG index: {e}")
+                return  # Don't raise, just return - service will stay in "not ready" state
+        except Exception as e:
+            print(f"✗ Failed to load RAG index: {e}")
+            return  # Don't raise, just return - service will stay in "not ready" state
+
+    # If we get here, we've timed out
+    print(f"⚠ WARNING: Failed to load RAG index after {max_wait_time} seconds")
+    print("  Service will remain in 'not ready' state until embeddings are available")
+
+
+@app.on_event("startup")
+async def startup_event():
+    """Start background task to load index."""
+    # Start background task - don't block startup
+    asyncio.create_task(load_index_background())
+
+
+@app.get("/health")
+def health_check():
+    """Health check endpoint for Kubernetes probes."""
+    if index_loader is None or index_loader.index is None:
+        return {"status": "unhealthy", "reason": "Index not loaded"}
+    return {
+        "status": "healthy",
+        "index_size": index_loader.index.ntotal if index_loader.index else 0,
+    }
+
+
+@app.get("/ready")
+def readiness_check():
+    """Readiness check - ensures index is loaded."""
+    if index_loader is None or index_loader.index is None:
+        raise HTTPException(status_code=503, detail="Index not loaded")
+    return {"status": "ready", "index_size": index_loader.index.ntotal}
+
+
+@app.post("/rag/query", response_model=QueryResponse)
+async def query_rag(request: QueryRequest):
+    """
+    Query the RAG system for relevant error solutions.
+
+    This endpoint:
+    1. Generates embedding for the query
+    2. Performs similarity search using FAISS
+    3. Returns top-N most relevant errors
+    """
+    if index_loader is None or index_loader.index is None:
+        raise HTTPException(
+            status_code=503, detail="RAG index not loaded. Service is not ready."
+        )
+
+    start_time = time.time()
+
+    try:
+        # Step 1: Generate query embedding
+        # For now, we'll need to call the embedding service
+        # This should be the same TEI service used during indexing
+        embedding_url = os.getenv("EMBEDDINGS_LLM_URL", "http://alm-embedding:8080")
+
+        import httpx
+
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            # Prepare query text with task prefix (for nomic models)
+            query_text = f"search_query: {request.query}"
+
+            # Call embedding service
+            embedding_response = await client.post(
+                f"{embedding_url}/embeddings",
+                json={
+                    "input": [query_text],
+                    "model": "nomic-embed-text-v1.5",
+                },
+            )
+            embedding_response.raise_for_status()
+
+            # Extract embedding
+            embedding_data = embedding_response.json()
+            if "data" in embedding_data and len(embedding_data["data"]) > 0:
+                query_embedding = np.array(
+                    embedding_data["data"][0]["embedding"], dtype=np.float32
+                )
+            elif (
+                "embeddings" in embedding_data and len(embedding_data["embeddings"]) > 0
+            ):
+                query_embedding = np.array(
+                    embedding_data["embeddings"][0], dtype=np.float32
+                )
+            else:
+                raise ValueError("Unexpected embedding response format")
+
+        # Normalize embedding
+        norm = np.linalg.norm(query_embedding)
+        if norm > 0:
+            query_embedding = query_embedding / norm
+
+        # Step 2: Similarity search in FAISS
+        query_vector = query_embedding.reshape(1, -1)
+        similarities, indices = index_loader.index.search(query_vector, request.top_k)
+
+        # Flatten results
+        similarities = similarities[0]
+        indices = indices[0]
+
+        # Step 3: Filter by threshold and format results
+        results = []
+        for idx, similarity in zip(indices, similarities):
+            if idx == -1:  # FAISS returns -1 when not enough results
+                continue
+
+            if similarity < request.similarity_threshold:
+                continue
+
+            error_id = index_loader.index_to_error_id[idx]
+            error_data = index_loader.error_store[error_id]
+
+            # Extract sections
+            sections = error_data.get("sections", {})
+            metadata = error_data.get("metadata", {})
+
+            result = ErrorResult(
+                error_id=error_id,
+                error_title=error_data.get("error_title", error_id),
+                similarity_score=float(similarity),
+                source_file=metadata.get("source_file"),
+                page=metadata.get("page"),
+                sections=ErrorSection(
+                    description=sections.get("description"),
+                    symptoms=sections.get("symptoms"),
+                    resolution=sections.get("resolution"),
+                    code=sections.get("code"),
+                    benefits=sections.get("benefits"),
+                ),
+            )
+            results.append(result)
+
+        # Step 4: Take top-N results
+        results = results[: request.top_n]
+
+        search_time_ms = (time.time() - start_time) * 1000
+
+        return QueryResponse(
+            query=request.query,
+            results=results,
+            metadata={
+                "num_results": len(results),
+                "search_time_ms": search_time_ms,
+                "top_k": request.top_k,
+                "top_n": request.top_n,
+                "similarity_threshold": request.similarity_threshold,
+            },
+        )
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
+
+
+@app.post("/rag/reload")
+async def reload_index():
+    """
+    Reload the index from PostgreSQL.
+
+    Useful for updating the index without restarting the service.
+    """
+    if index_loader is None:
+        raise HTTPException(status_code=503, detail="Index loader not initialized")
+
+    try:
+        await index_loader.reload_index()
+        return {
+            "status": "success",
+            "message": "Index reloaded",
+            "index_size": index_loader.index.ntotal if index_loader.index else 0,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error reloading index: {str(e)}")
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    port = int(os.getenv("PORT", "8002"))
+    uvicorn.run(app, host="0.0.0.0", port=port)
diff --git a/services/rag/pyproject.toml b/services/rag/pyproject.toml
new file mode 100644
index 0000000..f8c3bc1
--- /dev/null
+++ b/services/rag/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "rag-service"
+version = "0.1.0"
+description = "RAG service for Ansible Log Monitoring"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "fastapi>=0.116.1",
+    "uvicorn>=0.37.0",
+    "httpx>=0.27.2",
+    "sqlmodel>=0.0.8",
+    "psycopg2-binary>=2.9.0",
+    "asyncpg>=0.30.0",
+    "faiss-cpu>=1.7.4",
+    "numpy>=1.24.0",
+    "pydantic>=2.0.0",
+]
+
diff --git a/src/alm/agents/get_more_context_agent/rag_handler.py b/src/alm/agents/get_more_context_agent/rag_handler.py
index 135e348..ec28bd1 100644
--- a/src/alm/agents/get_more_context_agent/rag_handler.py
+++ b/src/alm/agents/get_more_context_agent/rag_handler.py
@@ -1,5 +1,6 @@
 import os
-from typing import Optional
+from typing import Optional, Dict, Any
+import httpx
 
 from alm.utils.logger import get_logger
 
@@ -11,13 +12,13 @@ class RAGHandler:
     Handles RAG (Retrieval-Augmented Generation) operations for retrieving
     relevant context from the knowledge base.
 
-    Uses lazy loading singleton pattern to ensure the FAISS index is loaded
-    only once and reused across requests.
+    Uses HTTP client to communicate with the RAG service.
     """
 
     _instance: Optional["RAGHandler"] = None
-    _pipeline = None
     _enabled: Optional[bool] = None
+    _rag_service_url: Optional[str] = None
+    _client: Optional[httpx.AsyncClient] = None
 
     def __new__(cls):
         """Singleton pattern implementation."""
@@ -25,106 +26,108 @@ def __new__(cls):
             cls._instance = super(RAGHandler, cls).__new__(cls)
         return cls._instance
 
-    def _initialize_rag_pipeline(self):
+    async def cleanup(self):
         """
-        Initialize RAG pipeline with lazy loading (singleton pattern).
-        This ensures the FAISS index is loaded only once and reused across requests.
+        Cleanup HTTP client resources.
+
+        Should be called during application shutdown to properly close
+        the HTTP connection pool and avoid resource leaks.
+        """
+        if self._client is not None:
+            try:
+                await self._client.aclose()
+                logger.info("RAG service HTTP client closed")
+            except Exception as e:
+                logger.warning("Error closing RAG service HTTP client: %s", e)
+            finally:
+                self._client = None
+
+    def _initialize_rag_service(self):
+        """
+        Initialize RAG service client.
 
         Returns:
-            AnsibleErrorQueryPipeline instance or None if RAG is disabled/failed
+            True if service is available, False otherwise
         """
         # Check if already initialized
         if self._enabled is not None:
-            return self._pipeline
+            return self._enabled
 
         # Check if RAG is enabled via environment variable
         rag_enabled_env = os.getenv("RAG_ENABLED", "true").lower()
         if rag_enabled_env not in ["true", "1", "yes"]:
             logger.info("RAG is disabled (RAG_ENABLED=%s)", rag_enabled_env)
             self._enabled = False
-            self._pipeline = None
-            return None
-
-        try:
-            logger.info("Initializing RAG pipeline (lazy loading)...")
+            return False
 
-            from alm.rag.query_pipeline import AnsibleErrorQueryPipeline
+        # Get RAG service URL
+        self._rag_service_url = os.getenv("RAG_SERVICE_URL", "http://alm-rag:8002")
 
-            # Get configuration from environment variables
-            top_k = int(os.getenv("RAG_TOP_K", "10"))
-            top_n = int(os.getenv("RAG_TOP_N", "3"))
-            similarity_threshold = float(os.getenv("RAG_SIMILARITY_THRESHOLD", "0.6"))
-
-            # Initialize pipeline (this loads the FAISS index)
-            self._pipeline = AnsibleErrorQueryPipeline(
-                top_k=top_k,
-                top_n=top_n,
-                similarity_threshold=similarity_threshold,
+        # Create HTTP client and initialize (wrapped in try-except for error handling)
+        try:
+            # Create HTTP client
+            self._client = httpx.AsyncClient(
+                base_url=self._rag_service_url,
+                timeout=30.0,
             )
 
+            # We'll do a lazy check on first request instead of blocking here
             self._enabled = True
             logger.info(
-                "✓ RAG pipeline initialized successfully with %d errors in index",
-                len(self._pipeline.embedder.error_store),
-            )
-
-            return self._pipeline
-
-        except FileNotFoundError as e:
-            logger.warning("RAG index not found: %s", e)
-            logger.warning(
-                "RAG functionality disabled - proceeding without cheat sheet context"
+                "RAG service client initialized (URL: %s)", self._rag_service_url
             )
-            self._enabled = False
-            self._pipeline = None
-            return None
-
+            return True
         except Exception as e:
-            logger.error("Failed to initialize RAG pipeline: %s", e, exc_info=True)
-            logger.warning(
-                "RAG functionality disabled - proceeding without cheat sheet context"
-            )
+            logger.warning("Failed to initialize RAG service client: %s", e)
             self._enabled = False
-            self._pipeline = None
-            return None
+            # Clean up client if it was partially created
+            self._client = None
+            return False
 
-    def _format_rag_results(self, response) -> str:
+    def _format_rag_results(self, response_data: Dict[str, Any]) -> str:
         """
         Format RAG query results into a structured string for LLM context.
 
         Args:
-            response: QueryResponse from RAG pipeline
+            response_data: Response dictionary from RAG service
 
         Returns:
             Formatted string with error solutions
         """
-        if not response.results:
+        results = response_data.get("results", [])
+        if not results:
             return "No matching solutions found in knowledge base."
 
         output = ["## Relevant Error Solutions from Knowledge Base\n"]
 
-        for i, result in enumerate(response.results, 1):
-            output.append(f"### Error {i}: {result.error_title}")
-            output.append(f"**Confidence Score:** {result.similarity_score:.2f}\n")
+        for i, result in enumerate(results, 1):
+            error_title = result.get(
+                "error_title", result.get("error_id", f"Error {i}")
+            )
+            similarity_score = result.get("similarity_score", 0.0)
+            sections = result.get("sections", {})
+
+            output.append(f"### Error {i}: {error_title}")
+            output.append(f"**Confidence Score:** {similarity_score:.2f}\n")
 
-            if result.sections.description:
+            if sections.get("description"):
                 output.append("**Description:**")
-                output.append(result.sections.description)
+                output.append(sections["description"])
                 output.append("")
 
-            if result.sections.symptoms:
+            if sections.get("symptoms"):
                 output.append("**Symptoms:**")
-                output.append(result.sections.symptoms)
+                output.append(sections["symptoms"])
                 output.append("")
 
-            if result.sections.resolution:
+            if sections.get("resolution"):
                 output.append("**Resolution:**")
-                output.append(result.sections.resolution)
+                output.append(sections["resolution"])
                 output.append("")
 
-            if result.sections.code:
+            if sections.get("code"):
                 output.append("**Code Example:**")
-                output.append(f"```\n{result.sections.code}\n```")
+                output.append(f"```\n{sections['code']}\n```")
                 output.append("")
 
             output.append("---\n")
@@ -136,8 +139,8 @@ async def get_cheat_sheet_context(self, log_summary: str) -> str:
         Retrieve relevant context from the RAG knowledge base for solving the error.
 
         This function:
-        1. Lazily initializes the RAG pipeline (loads FAISS index on first call)
-        2. Queries the knowledge base with the log summary
+        1. Initializes the RAG service client (if not already done)
+        2. Queries the RAG service with the log summary
         3. Formats the results for LLM consumption
         4. Returns empty string if RAG is disabled or fails
 
@@ -149,32 +152,64 @@ async def get_cheat_sheet_context(self, log_summary: str) -> str:
         """
         logger.info("Retrieving cheat sheet context for log summary")
 
-        # Initialize RAG pipeline (lazy loading)
-        pipeline = self._initialize_rag_pipeline()
+        # Initialize RAG service client (lazy loading)
+        if not self._initialize_rag_service():
+            logger.debug("RAG service not available, returning empty context")
+            return ""
 
-        if pipeline is None:
-            logger.debug("RAG pipeline not available, returning empty context")
+        if self._client is None:
+            logger.debug("RAG service client not initialized, returning empty context")
             return ""
 
         try:
-            # Query the RAG system
+            # Get configuration from environment variables
+            top_k = int(os.getenv("RAG_TOP_K", "10"))
+            top_n = int(os.getenv("RAG_TOP_N", "3"))
+            similarity_threshold = float(os.getenv("RAG_SIMILARITY_THRESHOLD", "0.6"))
+
+            # Query the RAG service
             logger.debug(
-                "Querying RAG system with log summary: %s...", log_summary[:100]
+                "Querying RAG service with log summary: %s...", log_summary[:100]
             )
-            response = pipeline.query(log_summary)
+
+            response = await self._client.post(
+                "/rag/query",
+                json={
+                    "query": log_summary,
+                    "top_k": top_k,
+                    "top_n": top_n,
+                    "similarity_threshold": similarity_threshold,
+                },
+            )
+
+            response.raise_for_status()
+            response_data = response.json()
 
             # Format results
-            formatted_context = self._format_rag_results(response)
+            formatted_context = self._format_rag_results(response_data)
 
+            metadata = response_data.get("metadata", {})
             logger.info(
                 "✓ Retrieved %d relevant errors from knowledge base (search time: %.2fms)",
-                response.metadata["num_results"],
-                response.metadata["search_time_ms"],
+                metadata.get("num_results", 0),
+                metadata.get("search_time_ms", 0.0),
             )
 
             return formatted_context
 
+        except httpx.HTTPStatusError as e:
+            logger.error(
+                "RAG service returned error status %d: %s",
+                e.response.status_code,
+                e.response.text,
+            )
+            logger.warning("Proceeding without cheat sheet context")
+            return ""
+        except httpx.RequestError as e:
+            logger.error("Error connecting to RAG service: %s", e)
+            logger.warning("Proceeding without cheat sheet context")
+            return ""
         except Exception as e:
-            logger.error("Error querying RAG system: %s", e, exc_info=True)
+            logger.error("Error querying RAG service: %s", e, exc_info=True)
             logger.warning("Proceeding without cheat sheet context")
             return ""
diff --git a/src/alm/database.py b/src/alm/database.py
index f817e08..14e9f8c 100644
--- a/src/alm/database.py
+++ b/src/alm/database.py
@@ -4,9 +4,10 @@
 from datetime import datetime
 from typing import Generator
 
+from sqlalchemy import text
 from sqlalchemy.ext.asyncio import create_async_engine
 from sqlmodel.ext.asyncio.session import AsyncSession
-from alm.models import GrafanaAlert
+from alm.models import GrafanaAlert, RAGEmbedding
 from alm.agents.state import GrafanaAlertState
 from alm.models import LogEntry
 from alm.utils.logger import get_logger
@@ -26,8 +27,24 @@ async def init_tables(delete_tables=False):
     async with engine.begin() as conn:
         if delete_tables:
             logger.info("Starting to delete tables")
+            # Only delete GrafanaAlert table, NOT RAGEmbedding
+            # RAG embeddings should persist across training pipeline runs
             await conn.run_sync(GrafanaAlert.metadata.drop_all)
+            # RAGEmbedding table is NOT deleted - it persists across runs
+
+        # Ensure pgvector extension is enabled (must be done before creating tables)
+        try:
+            await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
+            logger.info("pgvector extension enabled")
+        except Exception as e:
+            logger.warning(f"Could not enable pgvector extension: {e}")
+            logger.warning(
+                "This is OK if extension is already enabled or not available"
+            )
+
+        # Create all tables
         await conn.run_sync(GrafanaAlert.metadata.create_all)
+        await conn.run_sync(RAGEmbedding.metadata.create_all)
 
 
 def get_session():
diff --git a/src/alm/main_fastapi.py b/src/alm/main_fastapi.py
index bb8129b..b375543 100644
--- a/src/alm/main_fastapi.py
+++ b/src/alm/main_fastapi.py
@@ -27,6 +27,15 @@ def create_app() -> FastAPI:
     async def read_root() -> dict[str, str]:
         return {"service": "alm", "status": "ok"}
 
+    @app.on_event("shutdown")
+    async def shutdown_event():
+        """Cleanup resources on application shutdown."""
+        from alm.agents.get_more_context_agent.rag_handler import RAGHandler
+
+        # Cleanup RAG handler HTTP client
+        handler = RAGHandler()
+        await handler.cleanup()
+
     return app
 
 
diff --git a/src/alm/models.py b/src/alm/models.py
index e6dc2d7..f0ea779 100644
--- a/src/alm/models.py
+++ b/src/alm/models.py
@@ -5,6 +5,7 @@
 
 from sqlalchemy import JSON
 from sqlmodel import Column, Field, SQLModel
+from pgvector.sqlalchemy import Vector
 from enum import Enum
 
 from pydantic import BaseModel
@@ -86,3 +87,48 @@ def convert_datetime_to_str(cls, v):
         if isinstance(v, datetime):
             return v.isoformat()
         return v
+
+
+# RAG Embeddings Model
+class RAGEmbedding(SQLModel, table=True):
+    """
+    Stores RAG embeddings and metadata for knowledge base retrieval.
+
+    This table stores the embeddings generated from knowledge base PDFs,
+    along with the complete error metadata needed for RAG queries.
+    """
+
+    error_id: str = Field(
+        primary_key=True, description="Unique identifier for the error"
+    )
+
+    # Embedding vector (stored using pgvector Vector type)
+    # Note: pgvector extension must be enabled in PostgreSQL
+    # Dimension is 768 for nomic-embed-text-v1.5 model
+    embedding: list[float] = Field(
+        sa_column=Column(Vector(768)),
+        description="Embedding vector (768 dimensions for nomic-embed-text-v1.5)",
+    )
+
+    # Error metadata stored as JSONB for flexibility
+    error_title: Optional[str] = Field(default=None, description="Title of the error")
+    error_metadata: dict = Field(
+        default_factory=dict,
+        description="Complete error metadata including sections (description, symptoms, resolution, code, benefits) and source information",
+        sa_column=Column(JSON),
+    )
+
+    # Model information
+    model_name: str = Field(description="Name of the embedding model used")
+    embedding_dim: int = Field(
+        default=768, description="Dimension of the embedding vector"
+    )
+
+    # Timestamps
+    created_at: datetime = Field(
+        default_factory=datetime.now,
+        description="Timestamp when the embedding was created",
+    )
+    updated_at: Optional[datetime] = Field(
+        default=None, description="Timestamp when the embedding was last updated"
+    )
diff --git a/src/alm/rag/embed_and_index.py b/src/alm/rag/embed_and_index.py
index 0ee7e1c..70cefa0 100644
--- a/src/alm/rag/embed_and_index.py
+++ b/src/alm/rag/embed_and_index.py
@@ -22,6 +22,7 @@
 from typing import List, Dict, Any, Tuple, Optional
 from collections import defaultdict
 from pathlib import Path
+from datetime import datetime
 
 from langchain_core.documents import Document
 import faiss
@@ -233,6 +234,8 @@ def __init__(
 
         self.index = None
         self.error_store = {}
+        self.index_to_error_id = {}
+        self._embeddings_array = None  # Store embeddings for PostgreSQL saving
 
         logger.info("Embedder initialized")
         logger.info("  Mode: TEI Service")
@@ -423,6 +426,9 @@ def build_faiss_index(
         logger.info("STEP:CREATING FAISS INDEX")
         logger.info("=" * 60)
 
+        # Store embeddings array for PostgreSQL saving
+        self._embeddings_array = embeddings.copy()
+
         # Verify embeddings are normalized
         norms = np.linalg.norm(embeddings, axis=1)
         logger.info(
@@ -484,6 +490,112 @@ def save_index(self):
         logger.info("  Metadata size: %.2f MB", metadata_size_mb)
         logger.info("  Total storage: %.2f MB", index_size_mb + metadata_size_mb)
 
+    async def save_to_postgresql(self):
+        """
+        Save embeddings and metadata to PostgreSQL.
+
+        This method saves the current FAISS index data (embeddings, error_store)
+        to the PostgreSQL database for use by the RAG service.
+        """
+        if self.index is None:
+            raise ValueError("FAISS index must be built before saving to PostgreSQL")
+
+        if not self.error_store:
+            raise ValueError(
+                "Error store must be populated before saving to PostgreSQL"
+            )
+
+        logger.info("=" * 60)
+        logger.info("SAVING EMBEDDINGS TO POSTGRESQL")
+        logger.info("=" * 60)
+
+        from alm.database import get_session
+        from alm.models import RAGEmbedding
+
+        # Get embeddings from FAISS index
+        # FAISS doesn't have a direct "get all vectors" method, so we need to reconstruct
+        # We'll use the index_to_error_id mapping and error_store to get the data
+        # Actually, we need to store embeddings separately or reconstruct from error_store
+        # For now, let's assume we have the embeddings array from build_faiss_index
+
+        # Since we don't have direct access to the embeddings array after it's added to FAISS,
+        # we need to either:
+        # 1. Store embeddings in memory during build_faiss_index
+        # 2. Re-embed from error_store (inefficient)
+        # 3. Store embeddings before adding to FAISS
+
+        # For now, we'll need to modify build_faiss_index to keep embeddings
+        # Let's add a property to store them
+        if not hasattr(self, "_embeddings_array") or self._embeddings_array is None:
+            logger.warning("Embeddings array not available, cannot save to PostgreSQL")
+            logger.warning(
+                "This method should be called immediately after build_faiss_index"
+            )
+            return
+
+        embeddings_array = self._embeddings_array
+        error_ids = list(self.index_to_error_id.values())
+
+        logger.info("Saving %d embeddings to PostgreSQL...", len(error_ids))
+
+        async with get_session() as session:
+            saved_count = 0
+            updated_count = 0
+
+            for idx, error_id in enumerate(error_ids):
+                embedding_vector = embeddings_array[
+                    idx
+                ].tolist()  # Convert numpy to list
+                error_data = self.error_store[error_id]
+
+                # Prepare error metadata
+                error_metadata = {
+                    "sections": error_data.get("sections", {}),
+                    "metadata": error_data.get("metadata", {}),
+                }
+
+                # Check if embedding already exists
+                from sqlmodel import select
+
+                result = await session.exec(
+                    select(RAGEmbedding).where(RAGEmbedding.error_id == error_id)
+                )
+                existing = result.first()
+
+                if existing:
+                    # Update existing
+                    existing.embedding = embedding_vector
+                    existing.error_title = error_data.get("error_title")
+                    existing.error_metadata = error_metadata
+                    existing.updated_at = datetime.now()
+                    session.add(existing)
+                    updated_count += 1
+                else:
+                    # Create new
+                    rag_embedding = RAGEmbedding(
+                        error_id=error_id,
+                        embedding=embedding_vector,
+                        error_title=error_data.get("error_title"),
+                        error_metadata=error_metadata,
+                        model_name=self.model_name,
+                        embedding_dim=self.embedding_dim,
+                    )
+                    session.add(rag_embedding)
+                    saved_count += 1
+
+                if (saved_count + updated_count) % 100 == 0:
+                    logger.info(
+                        "  Progress: %d embeddings processed",
+                        saved_count + updated_count,
+                    )
+
+            await session.commit()
+
+            logger.info("✓ Embeddings saved to PostgreSQL")
+            logger.info("  New embeddings: %d", saved_count)
+            logger.info("  Updated embeddings: %d", updated_count)
+            logger.info("  Total: %d", saved_count + updated_count)
+
     def load_index(self):
         """Load FAISS index and metadata from disk."""
         logger.info("=" * 60)
@@ -528,6 +640,25 @@ def ingest_and_index(self, chunks: List[Document]):
         logger.info("INGESTION AND INDEXING COMPLETE")
         logger.info("=" * 70)
 
+    async def ingest_and_index_to_postgresql(self, chunks: List[Document]):
+        """
+        Complete ingestion and indexing pipeline, saving to PostgreSQL.
+
+        This is the async version that saves to PostgreSQL instead of disk.
+        """
+        logger.info("=" * 70)
+        logger.info("ANSIBLE ERROR RAG SYSTEM - INGESTION AND INDEXING (PostgreSQL)")
+        logger.info("=" * 70)
+
+        error_store = self.group_chunks_by_error(chunks)
+        embeddings, error_ids = self.create_composite_embeddings(error_store)
+        self.build_faiss_index(embeddings, error_ids, error_store)
+        await self.save_to_postgresql()
+
+        logger.info("=" * 70)
+        logger.info("INGESTION AND INDEXING COMPLETE (PostgreSQL)")
+        logger.info("=" * 70)
+
 
 def main():
     """Process all PDFs in knowledge_base directory."""
diff --git a/uv.lock b/uv.lock
index 60ecf02..a6e86b3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -155,6 +155,7 @@ dependencies = [
     { name = "langgraph" },
     { name = "minio" },
     { name = "openinference-instrumentation-langchain" },
+    { name = "pgvector" },
     { name = "psycopg2-binary" },
     { name = "pypdf" },
     { name = "python-dateutil" },
@@ -196,6 +197,7 @@ requires-dist = [
     { name = "langgraph", specifier = ">=0.6.5" },
     { name = "minio", specifier = ">=7.2.17" },
     { name = "openinference-instrumentation-langchain", specifier = ">=0.1.33" },
+    { name = "pgvector", specifier = ">=0.2.5" },
     { name = "psycopg2-binary", specifier = ">=2.9.0" },
     { name = "pypdf", specifier = ">=5.0.0" },
     { name = "python-dateutil", specifier = ">=2.8.0" },
@@ -923,10 +925,10 @@ wheels = [
 [[package]]
 name = "filelock"
 version = "3.20.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" }
+source = { registry = "https://download.pytorch.org/whl/cpu" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" },
+    { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2" },
 ]
 
 [[package]]
@@ -1051,8 +1053,9 @@ wheels = [
 name = "fsspec"
 version = "2025.9.0"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/e0/bab50af11c2d75c9c4a2a26a5254573c0bd97cea152254401510950486fa/fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19" }
 wheels = [
-    { url = "https://download.pytorch.org/whl/fsspec-2025.9.0-py3-none-any.whl" },
+    { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7" },
 ]
 
 [[package]]
@@ -2234,8 +2237,9 @@ wheels = [
 name = "mpmath"
 version = "1.3.0"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f" }
 wheels = [
-    { url = "https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c" },
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c" },
 ]
 
 [[package]]
@@ -2414,8 +2418,9 @@ wheels = [
 name = "networkx"
 version = "3.5"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037" }
 wheels = [
-    { url = "https://download.pytorch.org/whl/networkx-3.5-py3-none-any.whl" },
+    { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec" },
 ]
 
 [[package]]
@@ -2442,64 +2447,64 @@ wheels = [
 [[package]]
 name = "numpy"
 version = "2.3.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a", size = 20582187, upload-time = "2025-10-15T16:18:11.77Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11", size = 20957727, upload-time = "2025-10-15T16:15:44.9Z" },
-    { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9", size = 14187262, upload-time = "2025-10-15T16:15:47.761Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667", size = 5115992, upload-time = "2025-10-15T16:15:50.144Z" },
-    { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef", size = 6648672, upload-time = "2025-10-15T16:15:52.442Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e", size = 14284156, upload-time = "2025-10-15T16:15:54.351Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a", size = 16641271, upload-time = "2025-10-15T16:15:56.67Z" },
-    { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16", size = 16059531, upload-time = "2025-10-15T16:15:59.412Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786", size = 18578983, upload-time = "2025-10-15T16:16:01.804Z" },
-    { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc", size = 6291380, upload-time = "2025-10-15T16:16:03.938Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32", size = 12787999, upload-time = "2025-10-15T16:16:05.801Z" },
-    { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db", size = 10197412, upload-time = "2025-10-15T16:16:07.854Z" },
-    { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966", size = 20950335, upload-time = "2025-10-15T16:16:10.304Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3", size = 14179878, upload-time = "2025-10-15T16:16:12.595Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197", size = 5108673, upload-time = "2025-10-15T16:16:14.877Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e", size = 6641438, upload-time = "2025-10-15T16:16:16.805Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7", size = 14281290, upload-time = "2025-10-15T16:16:18.764Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953", size = 16636543, upload-time = "2025-10-15T16:16:21.072Z" },
-    { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37", size = 16056117, upload-time = "2025-10-15T16:16:23.369Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd", size = 18577788, upload-time = "2025-10-15T16:16:27.496Z" },
-    { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646", size = 6282620, upload-time = "2025-10-15T16:16:29.811Z" },
-    { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d", size = 12784672, upload-time = "2025-10-15T16:16:31.589Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc", size = 10196702, upload-time = "2025-10-15T16:16:33.902Z" },
-    { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879", size = 21049003, upload-time = "2025-10-15T16:16:36.101Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562", size = 14302980, upload-time = "2025-10-15T16:16:39.124Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a", size = 5231472, upload-time = "2025-10-15T16:16:41.168Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6", size = 6739342, upload-time = "2025-10-15T16:16:43.777Z" },
-    { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7", size = 14354338, upload-time = "2025-10-15T16:16:46.081Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0", size = 16702392, upload-time = "2025-10-15T16:16:48.455Z" },
-    { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f", size = 16134998, upload-time = "2025-10-15T16:16:51.114Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64", size = 18651574, upload-time = "2025-10-15T16:16:53.429Z" },
-    { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb", size = 6413135, upload-time = "2025-10-15T16:16:55.992Z" },
-    { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c", size = 12928582, upload-time = "2025-10-15T16:16:57.943Z" },
-    { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40", size = 10266691, upload-time = "2025-10-15T16:17:00.048Z" },
-    { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e", size = 20955580, upload-time = "2025-10-15T16:17:02.509Z" },
-    { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff", size = 14188056, upload-time = "2025-10-15T16:17:04.873Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f", size = 5116555, upload-time = "2025-10-15T16:17:07.499Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b", size = 6643581, upload-time = "2025-10-15T16:17:09.774Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7", size = 14299186, upload-time = "2025-10-15T16:17:11.937Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2", size = 16638601, upload-time = "2025-10-15T16:17:14.391Z" },
-    { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52", size = 16074219, upload-time = "2025-10-15T16:17:17.058Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26", size = 18576702, upload-time = "2025-10-15T16:17:19.379Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc", size = 6337136, upload-time = "2025-10-15T16:17:22.886Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9", size = 12920542, upload-time = "2025-10-15T16:17:24.783Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868", size = 10480213, upload-time = "2025-10-15T16:17:26.935Z" },
-    { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec", size = 21052280, upload-time = "2025-10-15T16:17:29.638Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3", size = 14302930, upload-time = "2025-10-15T16:17:32.384Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365", size = 5231504, upload-time = "2025-10-15T16:17:34.515Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252", size = 6739405, upload-time = "2025-10-15T16:17:36.128Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e", size = 14354866, upload-time = "2025-10-15T16:17:38.884Z" },
-    { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0", size = 16703296, upload-time = "2025-10-15T16:17:41.564Z" },
-    { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0", size = 16136046, upload-time = "2025-10-15T16:17:43.901Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f", size = 18652691, upload-time = "2025-10-15T16:17:46.247Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d", size = 6485782, upload-time = "2025-10-15T16:17:48.872Z" },
-    { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6", size = 13113301, upload-time = "2025-10-15T16:17:50.938Z" },
-    { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29", size = 10547532, upload-time = "2025-10-15T16:17:53.48Z" },
+source = { registry = "https://download.pytorch.org/whl/cpu" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11" },
+    { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9" },
+    { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667" },
+    { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef" },
+    { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e" },
+    { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a" },
+    { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16" },
+    { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786" },
+    { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc" },
+    { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32" },
+    { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db" },
+    { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966" },
+    { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3" },
+    { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197" },
+    { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e" },
+    { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7" },
+    { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953" },
+    { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37" },
+    { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd" },
+    { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646" },
+    { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d" },
+    { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc" },
+    { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879" },
+    { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562" },
+    { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a" },
+    { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6" },
+    { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7" },
+    { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0" },
+    { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f" },
+    { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64" },
+    { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb" },
+    { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c" },
+    { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40" },
+    { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e" },
+    { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff" },
+    { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f" },
+    { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b" },
+    { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7" },
+    { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2" },
+    { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52" },
+    { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26" },
+    { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc" },
+    { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9" },
+    { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868" },
+    { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec" },
+    { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3" },
+    { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365" },
+    { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252" },
+    { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e" },
+    { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0" },
+    { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0" },
+    { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f" },
+    { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d" },
+    { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6" },
+    { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29" },
 ]
 
 [[package]]
@@ -2876,54 +2881,82 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" },
 ]
 
+[[package]]
+name = "pgvector"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/25/6c/6d8b4b03b958c02fa8687ec6063c49d952a189f8c91ebbe51e877dfab8f7/pgvector-0.4.2.tar.gz", hash = "sha256:322cac0c1dc5d41c9ecf782bd9991b7966685dee3a00bc873631391ed949513a", size = 31354, upload-time = "2025-12-05T01:07:17.87Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/26/6cee8a1ce8c43625ec561aff19df07f9776b7525d9002c86bceb3e0ac970/pgvector-0.4.2-py3-none-any.whl", hash = "sha256:549d45f7a18593783d5eec609ea1684a724ba8405c4cb182a0b2b08aeff04e08", size = 27441, upload-time = "2025-12-05T01:07:16.536Z" },
+]
+
 [[package]]
 name = "pillow"
 version = "11.3.0"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
-wheels = [
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp312-cp312-win_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313-win_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp313-cp313t-win_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314-win_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/pillow-11.3.0-cp314-cp314t-win_arm64.whl" },
+sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4" },
+    { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69" },
+    { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d" },
+    { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6" },
+    { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7" },
+    { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024" },
+    { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809" },
+    { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d" },
+    { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149" },
+    { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d" },
+    { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542" },
+    { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd" },
+    { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8" },
+    { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f" },
+    { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c" },
+    { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd" },
+    { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e" },
+    { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1" },
+    { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805" },
+    { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8" },
+    { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2" },
+    { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b" },
+    { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3" },
+    { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51" },
+    { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580" },
+    { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e" },
+    { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d" },
+    { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced" },
+    { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c" },
+    { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8" },
+    { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59" },
+    { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe" },
+    { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c" },
+    { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788" },
+    { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31" },
+    { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e" },
+    { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12" },
+    { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a" },
+    { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632" },
+    { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673" },
+    { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027" },
+    { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77" },
+    { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874" },
+    { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a" },
+    { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214" },
+    { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635" },
+    { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6" },
+    { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae" },
+    { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653" },
+    { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6" },
+    { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36" },
+    { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b" },
+    { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477" },
+    { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50" },
+    { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b" },
+    { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12" },
+    { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db" },
+    { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa" },
 ]
 
 [[package]]
@@ -4095,8 +4128,9 @@ source = { registry = "https://download.pytorch.org/whl/cpu" }
 dependencies = [
     { name = "mpmath" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517" }
 wheels = [
-    { url = "https://download.pytorch.org/whl/sympy-1.14.0-py3-none-any.whl" },
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5" },
 ]
 
 [[package]]

From 979cda3a205cc43f28593de87d039f4488e3705e Mon Sep 17 00:00:00 2001
From: mtalvi <mtalvi@redhat.com>
Date: Mon, 15 Dec 2025 14:03:46 +0200
Subject: [PATCH 2/4] updating quay reo for tei image

---
 .../charts/text-embeddings-inference/values.yaml                | 2 +-
 deploy/local/compose.yaml                                       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml b/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml
index a8baccf..945a187 100644
--- a/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml
+++ b/deploy/helm/ansible-log-monitor/charts/text-embeddings-inference/values.yaml
@@ -4,7 +4,7 @@ replicaCount: 1
 strategy: {}
 
 image:
-  repository: quay.io/rh-ai-quickstart/alm-backend
+  repository: quay.io/rh-ai-quickstart/alm-rag
   pullPolicy: Always
   tag: "tei-rag-v1"
 
diff --git a/deploy/local/compose.yaml b/deploy/local/compose.yaml
index 9ac7b21..634d5af 100644
--- a/deploy/local/compose.yaml
+++ b/deploy/local/compose.yaml
@@ -221,7 +221,7 @@ services:
       retries: 3
 
   alm-embedding:
-    image: quay.io/rh-ai-quickstart/alm-backend:tei-rag-v1
+    image: quay.io/rh-ai-quickstart/alm-rag:tei-rag-v1
     container_name: alm-embedding
     # Entrypoint is already set to text-embeddings-router in the image
     ports:

From 2ace1eab35bddbd5ceb0ee25b98a4b608eb47477 Mon Sep 17 00:00:00 2001
From: mtalvi <mtalvi@redhat.com>
Date: Tue, 16 Dec 2025 13:05:11 +0200
Subject: [PATCH 3/4] local deployment

---
 .env.example              |  4 ++
 Makefile                  |  6 ++-
 deploy/local/Makefile     | 95 ++++++++++++++++++++++++++++++++-------
 deploy/local/compose.yaml | 34 +++++++++++++-
 init_pipeline.py          | 34 +++++++-------
 src/alm/database.py       | 23 +++++-----
 6 files changed, 151 insertions(+), 45 deletions(-)

diff --git a/.env.example b/.env.example
index 029f18d..a8bc893 100644
--- a/.env.example
+++ b/.env.example
@@ -20,6 +20,10 @@ PROD_CORS_ORIGIN=http://localhost:3000
 # RAG (Retrieval-Augmented Generation) Configuration
 # ============================================================================
 EMBEDDINGS_LLM_URL=http://localhost:8080
+# RAG Service URL (microservice endpoint)
+# Backend communicates with RAG service via HTTP
+# Default: http://alm-rag:8002 (for Kubernetes) or http://localhost:8002 (for local)
+RAG_SERVICE_URL=http://localhost:8002
 # Enable/disable RAG functionality (default: true)
 RAG_ENABLED=true
 
diff --git a/Makefile b/Makefile
index 218c79f..4a875db 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # This makefile routes targets to local or helm specific makefiles
-.PHONY: all local helm help
+.PHONY: all local helm help rag-status test-rag
 
 # ifneq (,$(wildcard .env))
 # # ifneq (,$(filter local,$(MAKECMDGOALS)))
@@ -34,3 +34,7 @@ local/%: ## Route local targets to deploy/local/Makefile
 
 cluster/%: ## Route deploy targets to deploy/helm/Makefile
 	@$(MAKE) -C deploy/helm $*
+
+# Convenience targets for common local commands
+rag-status: local/rag-status ## Check RAG service status
+test-rag: local/test-rag ## Test RAG service
diff --git a/deploy/local/Makefile b/deploy/local/Makefile
index 1740ba4..ec1c855 100644
--- a/deploy/local/Makefile
+++ b/deploy/local/Makefile
@@ -1,4 +1,4 @@
-.PHONY: deploy start stop status help postgres phoenix loki-stack aap-mock replay stop-replay backend ui annotation health logs restart clean stop-postgres stop-phoenix stop-loki-stack stop-aap-mock embedding stop-embedding wait-for-embedding test-embedding test-rag
+.PHONY: deploy start stop status help postgres phoenix loki-stack aap-mock replay stop-replay backend ui annotation health logs restart clean stop-postgres stop-phoenix stop-loki-stack stop-aap-mock embedding stop-embedding wait-for-embedding rag stop-rag test-embedding test-rag
 .DEFAULT_GOAL := help
 
 include ../../.env
@@ -36,8 +36,10 @@ help: ## Show this help message
 	@echo ""
 	@echo "📚 RAG Setup:"
 	@echo "  1. Place PDF files in data/knowledge_base/"
-	@echo "  2. Run 'make run-whole-training-pipeline' to build RAG index"
-	@echo "  3. Ensure RAG_ENABLED=true in .env file"
+	@echo "  2. Start services: 'make start' (includes RAG service)"
+	@echo "  3. Run 'make run-whole-training-pipeline' to build RAG embeddings in PostgreSQL"
+	@echo "  4. RAG service will automatically load embeddings when available"
+	@echo "  5. Ensure RAG_ENABLED=true and RAG_SERVICE_URL=http://alm-rag:8002 in .env file"
 	@echo ""
 
 
@@ -51,6 +53,7 @@ start: stop ## 🚀 Start all services locally
 	@$(MAKE) -s embedding
 	@echo "   ⚠️  Note: Embedding service may take 3-5 minutes to load the model"
 	@echo "   You can check status with: make test-embedding"
+	@$(MAKE) -s rag
 	@$(MAKE) -s aap-mock
 	@$(MAKE) -s backend
 	@$(MAKE) -s ui
@@ -79,6 +82,12 @@ embedding: ## 🤖 Start Embedding Service (TEI)
 	@echo "   Starting Embedding Service (TEI)..."
 	@$(COMPOSE_CMD) -f compose.yaml up -d alm-embedding
 
+rag: ## 🔍 Start RAG Service
+	@echo "   Starting RAG Service..."
+	@$(COMPOSE_CMD) -f compose.yaml up -d alm-rag
+	@echo "   ⚠️  Note: RAG service will poll PostgreSQL for embeddings"
+	@echo "   It will become ready once embeddings are available (after init job runs)"
+
 wait-for-embedding: ## ⏳ Wait for embedding service to be ready (optional, for manual use)
 	@echo "   Waiting for embedding service to be ready (this may take 3-5 minutes for model loading)..."
 	@timeout=300; \
@@ -167,6 +176,7 @@ stop: ## 🛑 Stop all services
 	@$(MAKE) -s stop-postgres
 	@$(MAKE) -s stop-phoenix
 	@$(MAKE) -s stop-embedding
+	@$(MAKE) -s stop-rag
 	@$(MAKE) -s kill-ports
 	@echo "👋 All services stopped"
 
@@ -190,6 +200,10 @@ stop-embedding: ## 🛑 Stop Embedding Service
 	@$(COMPOSE_CMD) -f compose.yaml down alm-embedding || true
 	@echo "   ✓ Embedding service stopped and removed"
 
+stop-rag: ## 🛑 Stop RAG Service
+	@$(COMPOSE_CMD) -f compose.yaml down alm-rag || true
+	@echo "   ✓ RAG service stopped and removed"
+
 kill-ports: ## 🔌 Kill processes using required ports
 	@if [ "$$(uname)" = "Darwin" ]; then \
 		lsof -ti :7860 | xargs kill -9 2>/dev/null || true; \
@@ -239,6 +253,12 @@ kill-ports: ## 🔌 Kill processes using required ports
 		fuser -k 8081/tcp 2>/dev/null || true; \
 	fi
 	@echo "   ✓ Loki MCP Server 8081 killed"
+	@if [ "$$(uname)" = "Darwin" ]; then \
+		lsof -ti :8002 | xargs kill -9 2>/dev/null || true; \
+	else \
+		fuser -k 8002/tcp 2>/dev/null || true; \
+	fi
+	@echo "   ✓ RAG Service 8002 killed"
 	@if [ "$$(uname)" = "Darwin" ]; then \
 		lsof -ti :3000 | xargs kill -9 2>/dev/null || true; \
 	else \
@@ -259,6 +279,7 @@ status: ## 📊 Show status of all services
 	@echo "   📊 Loki: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'loki' | grep -v 'loki-mcp' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:3100"
 	@echo "   🔍 Loki MCP Server: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'loki-mcp-server' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8081"
 	@echo "   🤖 Embedding Service: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'alm-embedding' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8080"
+	@echo "   🔍 RAG Service: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'alm-rag' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8002"
 	@echo "   📈 Grafana: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'grafana' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:3000"
 	@echo "   📝 Promtail: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'promtail' | grep -q 'Up' && echo 'Running' || echo 'Stopped')"
 	@echo "   🎭 AAP Mock: $(shell $(COMPOSE_CMD) ps 2>/dev/null | grep 'aap-mock' | grep -q 'Up' && echo 'Running' || echo 'Stopped') - http://localhost:8082"
@@ -293,14 +314,20 @@ run-whole-training-pipeline: ## 🔍 Run whole training pipeline (builds RAG ind
 	@( cd ../.. && uv run init_pipeline.py )
 
 # Add new target to check RAG status
-rag-status: ## 📊 Check RAG index status
-	@echo "📊 RAG Index Status:"
-	@if [ -f "../../data/ansible_errors.index" ] && [ -f "../../data/error_metadata.pkl" ]; then \
-		echo "   ✅ RAG index exists"; \
-		ls -lh ../../data/ansible_errors.index ../../data/error_metadata.pkl; \
+rag-status: ## 📊 Check RAG service and index status
+	@echo "📊 RAG Service Status:"
+	@if curl -s -f http://localhost:8002/health >/dev/null 2>&1; then \
+		echo "   ✅ RAG service is running"; \
+		if curl -s -f http://localhost:8002/ready >/dev/null 2>&1; then \
+			index_size=$$(curl -s http://localhost:8002/ready | grep -o '"index_size":[0-9]*' | grep -o '[0-9]*' || echo "unknown"); \
+			echo "   ✅ RAG index is loaded ($$index_size embeddings)"; \
+		else \
+			echo "   ⚠️  RAG index not loaded yet (waiting for embeddings)"; \
+			echo "   Run 'make run-whole-training-pipeline' to build embeddings"; \
+		fi; \
 	else \
-		echo "   ❌ RAG index not found"; \
-		echo "   Run 'make run-whole-training-pipeline' to build it"; \
+		echo "   ❌ RAG service is not running"; \
+		echo "   Start it with: make rag"; \
 	fi
 	@if [ -d "../../data/knowledge_base" ]; then \
 		pdf_count=$$(ls -1 ../../data/knowledge_base/*.pdf 2>/dev/null | wc -l); \
@@ -335,14 +362,40 @@ test-embedding: ## 🧪 Test embedding service
 	@echo ""
 	@echo "✅ Embedding service test passed!"
 
-test-rag: ## 🧪 Test RAG system (requires RAG index)
-	@echo "🧪 Testing RAG System..."
-	@if [ ! -f "../../data/ansible_errors.index" ]; then \
-		echo "❌ RAG index not found. Run 'make run-whole-training-pipeline' first."; \
+test-rag: ## 🧪 Test RAG service (requires RAG service running and embeddings in PostgreSQL)
+	@echo "🧪 Testing RAG Service..."
+	@echo ""
+	@echo "1. Health Check:"
+	@if curl -s -f http://localhost:8002/health >/dev/null 2>&1; then \
+		echo "   ✅ Service is healthy"; \
+	else \
+		echo "   ❌ Service is not responding"; \
+		exit 1; \
+	fi
+	@echo ""
+	@echo "2. Readiness Check:"
+	@if curl -s -f http://localhost:8002/ready >/dev/null 2>&1; then \
+		echo "   ✅ Service is ready (index loaded)"; \
+	else \
+		echo "   ⚠️  Service is not ready (index not loaded yet)"; \
+		echo "   Run 'make run-whole-training-pipeline' to build embeddings"; \
+		exit 1; \
+	fi
+	@echo ""
+	@echo "3. Test Query:"
+	@response=$$(curl -s -X POST http://localhost:8002/rag/query \
+		-H "Content-Type: application/json" \
+		-d '{"query": "ansible playbook execution failed", "top_k": 5, "top_n": 3, "similarity_threshold": 0.6}'); \
+	if echo "$$response" | grep -q '"results"'; then \
+		echo "   ✅ Query successful"; \
+		echo "   Response preview: $$(echo $$response | head -c 200)..."; \
+	else \
+		echo "   ❌ Query failed"; \
+		echo "   Response: $$response"; \
 		exit 1; \
 	fi
-	@echo "Running RAG test script..."
-	@(cd ../.. && uv run python tests/rag/test_embeddings.py)
+	@echo ""
+	@echo "✅ RAG service test passed!"
 
 health: ## 🔍 Check health of running services
 	@echo "🔍 Health Checks:"
@@ -381,6 +434,16 @@ health: ## 🔍 Check health of running services
 	else \
 		echo "   🤖  Embedding Service http://localhost:8080: Unhealthy (may still be loading model)"; \
 	fi
+	@if curl -s http://localhost:8002/health >/dev/null 2>&1; then \
+		echo "   🔍  RAG Service http://localhost:8002: Healthy"; \
+		if curl -s http://localhost:8002/ready >/dev/null 2>&1; then \
+			echo "   🔍  RAG Service http://localhost:8002: Ready (index loaded)"; \
+		else \
+			echo "   🔍  RAG Service http://localhost:8002: Not ready (index not loaded yet)"; \
+		fi; \
+	else \
+		echo "   🔍  RAG Service http://localhost:8002: Unhealthy"; \
+	fi
 	@if curl -s http://localhost:3000 >/dev/null 2>&1; then \
 		echo "   📈  Grafana http://localhost:3000: Healthy"; \
 	else \
diff --git a/deploy/local/compose.yaml b/deploy/local/compose.yaml
index 634d5af..22795ac 100644
--- a/deploy/local/compose.yaml
+++ b/deploy/local/compose.yaml
@@ -125,7 +125,7 @@ services:
     restart: unless-stopped
 
   postgres:
-    image: postgres:15
+    image: pgvector/pgvector:pg15  # PostgreSQL with pgvector extension
     environment:
       - POSTGRES_USER=user
       - POSTGRES_PASSWORD=password
@@ -173,6 +173,7 @@ services:
       - PROD_CORS_ORIGIN=${PROD_CORS_ORIGIN}
       # RAG Configuration
       - RAG_ENABLED=${RAG_ENABLED}
+      - RAG_SERVICE_URL=${RAG_SERVICE_URL:-http://alm-rag:8002}
       - DATA_DIR=${DATA_DIR}
       - KNOWLEDGE_BASE_DIR=${KNOWLEDGE_BASE_DIR}
       - RAG_TOP_K=${RAG_TOP_K}
@@ -194,6 +195,8 @@ services:
         condition: service_healthy
       alm-embedding:
         condition: service_healthy
+      alm-rag:
+        condition: service_started  # RAG service can start before embeddings are ready
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
@@ -244,6 +247,35 @@ services:
     # Note: This service requires significant memory (8Gi recommended)
     # Adjust resources based on your system capabilities
 
+  alm-rag:
+    build:
+      context: ../..
+      dockerfile: services/rag/Containerfile
+    container_name: alm-rag
+    ports:
+      - "8002:8002"
+    environment:
+      # Use container network name for PostgreSQL (not localhost)
+      # Override any local DATABASE_URL to use container network
+      - DATABASE_URL=postgresql+asyncpg://user:password@postgres:5432/logsdb
+      - EMBEDDINGS_LLM_URL=http://alm-embedding:8080
+      - RAG_MODEL_NAME=nomic-ai/nomic-embed-text-v1.5
+      - PORT=8002
+    networks:
+      - alm
+    depends_on:
+      postgres:
+        condition: service_healthy
+      alm-embedding:
+        condition: service_healthy
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s  # Service starts immediately, but index may take time to load
+
 volumes:
   postgres_data:
   aap_mock_data:
diff --git a/init_pipeline.py b/init_pipeline.py
index c22c5a6..9ef1b15 100644
--- a/init_pipeline.py
+++ b/init_pipeline.py
@@ -29,21 +29,21 @@ def setup_data_directories():
     print(f"  ✓ {data_dir}")
     print(f"  ✓ {logs_dir}")
 
-    # Check for knowledge base PDFs in image
-    image_kb_dir = Path("/app/data/knowledge_base")
-    if image_kb_dir.exists():
-        image_pdfs = list(image_kb_dir.glob("*.pdf"))
-        if image_pdfs:
-            print(f"\n✓ Found {len(image_pdfs)} PDF file(s) in container image:")
-            for pdf in image_pdfs:
+    # Check for knowledge base PDFs
+    # Use config path (works for both local and container)
+    kb_dir = Path(config.storage.knowledge_base_dir)
+    if kb_dir.exists():
+        pdfs = list(kb_dir.glob("*.pdf"))
+        if pdfs:
+            print(f"\n✓ Found {len(pdfs)} PDF file(s) in knowledge base ({kb_dir}):")
+            for pdf in pdfs:
                 print(f"  - {pdf.name}")
         else:
-            print(f"\n⚠ No PDF files found in image at {image_kb_dir}")
+            print(f"\n⚠ No PDF files found in {kb_dir}")
+            print("  Add PDF files to the knowledge base directory to enable RAG")
     else:
-        print(f"\n⚠ Knowledge base directory not found in image at {image_kb_dir}")
-        print(
-            "  PDFs should be baked into the container image at /app/data/knowledge_base"
-        )
+        print(f"\n⚠ Knowledge base directory not found at {kb_dir}")
+        print("  Create the directory and add PDF files to enable RAG")
 
     print("=" * 70)
 
@@ -108,13 +108,13 @@ async def build_rag_index():
         parser = AnsibleErrorParser()
         embedder = AnsibleErrorEmbedder()
 
-        # Find PDFs in knowledge base (from container image)
-        # PDFs should be baked into the image at /app/data/knowledge_base
-        image_kb_dir = Path("/app/data/knowledge_base")
-        pdf_files = sorted(glob.glob(str(image_kb_dir / "*.pdf")))
+        # Find PDFs in knowledge base
+        # Use config path (works for both local and container)
+        kb_dir = Path(config.storage.knowledge_base_dir)
+        pdf_files = sorted(glob.glob(str(kb_dir / "*.pdf")))
 
         if not pdf_files:
-            print(f"⚠ WARNING: No PDF files found in {image_kb_dir}")
+            print(f"⚠ WARNING: No PDF files found in {kb_dir}")
             print("  RAG index will not be created")
             return
 
diff --git a/src/alm/database.py b/src/alm/database.py
index 14e9f8c..56f320b 100644
--- a/src/alm/database.py
+++ b/src/alm/database.py
@@ -24,6 +24,19 @@
 
 # Create tables
 async def init_tables(delete_tables=False):
+    # First, try to enable pgvector extension in a separate transaction
+    # This prevents transaction abort errors if extension creation fails
+    try:
+        async with engine.begin() as ext_conn:
+            await ext_conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
+            logger.info("pgvector extension enabled")
+    except Exception as e:
+        logger.warning(f"Could not enable pgvector extension: {e}")
+        logger.warning("This is OK if extension is already enabled or not available")
+        # For local dev without pgvector, we'll continue but RAG won't work
+        # In production, this should fail
+
+    # Now create tables in a separate transaction
     async with engine.begin() as conn:
         if delete_tables:
             logger.info("Starting to delete tables")
@@ -32,16 +45,6 @@ async def init_tables(delete_tables=False):
             await conn.run_sync(GrafanaAlert.metadata.drop_all)
             # RAGEmbedding table is NOT deleted - it persists across runs
 
-        # Ensure pgvector extension is enabled (must be done before creating tables)
-        try:
-            await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
-            logger.info("pgvector extension enabled")
-        except Exception as e:
-            logger.warning(f"Could not enable pgvector extension: {e}")
-            logger.warning(
-                "This is OK if extension is already enabled or not available"
-            )
-
         # Create all tables
         await conn.run_sync(GrafanaAlert.metadata.create_all)
         await conn.run_sync(RAGEmbedding.metadata.create_all)

From adf7331692b97943870cf68073884674f5813dba Mon Sep 17 00:00:00 2001
From: mtalvi <mtalvi@redhat.com>
Date: Tue, 16 Dec 2025 18:18:14 +0200
Subject: [PATCH 4/4] final fix

---
 services/rag/main.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/services/rag/main.py b/services/rag/main.py
index 7717b50..97b98f1 100644
--- a/services/rag/main.py
+++ b/services/rag/main.py
@@ -99,8 +99,27 @@ async def load_index_background():
                 print(f"✗ Failed to load RAG index: {e}")
                 return  # Don't raise, just return - service will stay in "not ready" state
         except Exception as e:
-            print(f"✗ Failed to load RAG index: {e}")
-            return  # Don't raise, just return - service will stay in "not ready" state
+            # Check if this is a "table doesn't exist" error - continue polling
+            error_str = str(e).lower()
+            if (
+                "does not exist" in error_str
+                or "undefinedtable" in error_str
+                or "relation" in error_str
+            ):
+                # Table doesn't exist yet - init job is still creating it
+                if elapsed == 0 or elapsed % 30 == 0:  # Print every 30 seconds
+                    print(
+                        f"Table not yet created (waited {elapsed}s), retrying in {wait_interval}s..."
+                    )
+                await asyncio.sleep(wait_interval)
+                elapsed += wait_interval
+            else:
+                # Some other error - log and continue polling (might be transient)
+                if elapsed == 0 or elapsed % 30 == 0:  # Print every 30 seconds
+                    print(f"Error loading index (waited {elapsed}s): {e}")
+                    print(f"  Retrying in {wait_interval}s...")
+                await asyncio.sleep(wait_interval)
+                elapsed += wait_interval
 
     # If we get here, we've timed out
     print(f"⚠ WARNING: Failed to load RAG index after {max_wait_time} seconds")