rh-ai-quickstart · mtalvi · Dec 15, 2025 · Dec 15, 2025 · Dec 16, 2025 · Dec 16, 2025
@@ -20,6 +20,10 @@ PROD_CORS_ORIGIN=http://localhost:3000
 # RAG (Retrieval-Augmented Generation) Configuration
 # ============================================================================
 EMBEDDINGS_LLM_URL=http://localhost:8080
+# RAG Service URL (microservice endpoint)
+# Backend communicates with RAG service via HTTP
+# Default: http://alm-rag:8002 (for Kubernetes) or http://localhost:8002 (for local)
+RAG_SERVICE_URL=http://localhost:8002
 # Enable/disable RAG functionality (default: true)
 RAG_ENABLED=true
 

@@ -1,5 +1,5 @@
 # This makefile routes targets to local or helm specific makefiles
-.PHONY: all local helm help
+.PHONY: all local helm help rag-status test-rag
 
 # ifneq (,$(wildcard .env))
 # # ifneq (,$(filter local,$(MAKECMDGOALS)))
@@ -34,3 +34,7 @@ local/%: ## Route local targets to deploy/local/Makefile
 
 cluster/%: ## Route deploy targets to deploy/helm/Makefile
 	@$(MAKE) -C deploy/helm $*
+
+# Convenience targets for common local commands
+rag-status: local/rag-status ## Check RAG service status
+test-rag: local/test-rag ## Test RAG service
@@ -19,11 +19,14 @@ data:
   {{- if .Values.rag.enabled }}
   # RAG Configuration
   RAG_ENABLED: {{ .Values.rag.enabled | quote }}
-  # Model is hardcoded to nomic-ai/nomic-embed-text-v1.5, no env var needed
-  # API URL defaults to http://alm-embedding:8080 (local cluster service)
+  # RAG Service URL (microservice endpoint)
+  RAG_SERVICE_URL: {{ .Values.rag.serviceUrl | default "http://alm-rag:8002" | quote }}
+  # Embedding service URL (for init job, not used by backend)
   EMBEDDINGS_LLM_URL: {{ .Values.rag.embedding.apiUrl | default "http://alm-embedding:8080" | quote }}
+  # Data paths (for init job only)
   DATA_DIR: {{ .Values.rag.dataDir | quote }}
   KNOWLEDGE_BASE_DIR: {{ .Values.rag.knowledgeBaseDir | quote }}
+  # Query configuration
   RAG_TOP_K: {{ .Values.rag.query.topK | quote }}
   RAG_TOP_N: {{ .Values.rag.query.topN | quote }}
   RAG_SIMILARITY_THRESHOLD: {{ .Values.rag.query.similarityThreshold | quote }}

@@ -76,20 +76,10 @@ spec:
             {{- toYaml . | nindent 12 }}
           {{- end }}
           volumeMounts:
-          {{- if .Values.rag.enabled }}
-            - name: rag-data
-              mountPath: {{ .Values.rag.pvcMountPath }}
-              readOnly: true
-          {{- end }}
           {{- with .Values.volumeMounts }}
             {{- toYaml . | nindent 12 }}
           {{- end }}
       volumes:
-      {{- if .Values.rag.enabled }}
-        - name: rag-data
-          persistentVolumeClaim:
-            claimName: {{ include "backend.fullname" . }}-rag-data
-      {{- end }}
       {{- with .Values.volumes }}
         {{- toYaml . | nindent 8 }}
       {{- end }}

@@ -107,10 +107,6 @@ spec:
           volumeMounts:
             - name: init-sync
               mountPath: /init-sync
-          {{- if .Values.rag.enabled }}
-            - name: rag-data
-              mountPath: {{ .Values.rag.pvcMountPath }}
-          {{- end }}
           {{- with .Values.volumeMounts }}
             {{- toYaml . | nindent 12 }}
           {{- end }}
@@ -133,11 +129,6 @@ spec:
       volumes:
         - name: init-sync
           emptyDir: {}
-      {{- if .Values.rag.enabled }}
-        - name: rag-data
-          persistentVolumeClaim:
-            claimName: {{ include "backend.fullname" . }}-rag-data
-      {{- end }}
       {{- with .Values.volumes }}
         {{- toYaml . | nindent 8 }}
       {{- end }}

@@ -235,7 +235,11 @@ rag:
   # Enable or disable RAG functionality
   enabled: true
 
-  # Embedding model configuration
+  # RAG Service URL (microservice endpoint)
+  # Backend pods communicate with RAG service via HTTP
+  serviceUrl: "http://alm-rag:8002"
+
+  # Embedding model configuration (used by init job for building index)
   # NOTE: API credentials (apiKey, apiUrl, modelName) are provided during 'make install'
   # and stored in the 'model-secret' Kubernetes secret
   embedding:
@@ -244,24 +248,12 @@ rag:
     apiUrl: "http://alm-embedding:8080"  # TEI service URL (defaults to local cluster service)
     port: 8080  # Port for the embedding service (TEI)
 
-  # Data paths
+  # Data paths (used by init job for knowledge base PDFs)
+  # Note: PDFs should be baked into the container image at /app/data/knowledge_base
+  # The init job will read PDFs from the image and process them
+  # The RAG index (embeddings) is stored in PostgreSQL
   dataDir: "/app/data/rag"
-  knowledgeBaseDir: "/app/data/rag/knowledge_base"
-  # PVC mount path (mounted directly at /app/data/rag)
-  pvcMountPath: "/app/data/rag"
-
-  # Persistence configuration for RAG index storage
-  persistence:
-    # Storage size for RAG index and metadata
-    size: "2Gi"
-    # Access mode: ReadWriteOnce (RWO) is used because:
-    # 1. Init job writes the index once
-    # 2. Backend pods only read (never write)
-    # 3. AWS EBS (gp3-csi) only supports RWO
-    # Note: For RWO, all backend pods must be scheduled on the same node as the PVC
-    accessMode: "ReadWriteOnce"
-    # Storage class (leave empty for default)
-    storageClassName: ""
+  knowledgeBaseDir: "/app/data/knowledge_base"  # PDFs should be in container image
 
   # Query configuration
   query:

@@ -0,0 +1,9 @@
+apiVersion: v2
+name: rag
+description: A Helm chart for RAG service
+
+type: application
+
+version: 0.1.0
+appVersion: "0.1.0"
+
@@ -0,0 +1,36 @@
+1. Get the application URL by running these commands:
+{{- if .Values.httpRoute.enabled }}
+{{- if .Values.httpRoute.hostnames }}
+    export APP_HOSTNAME={{ .Values.httpRoute.hostnames | first }}
+{{- else }}
+    export APP_HOSTNAME=$(kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o jsonpath="{.spec.listeners[0].hostname}")
+  {{- end }}
+{{- if and .Values.httpRoute.rules (first .Values.httpRoute.rules).matches (first (first .Values.httpRoute.rules).matches).path.value }}
+    echo "Visit http://$APP_HOSTNAME{{ (first (first .Values.httpRoute.rules).matches).path.value }} to use your application"
+
+    NOTE: Your HTTPRoute depends on the listener configuration of your gateway and your HTTPRoute rules.
+    The rules can be set for path, method, header and query parameters.
+    You can check the gateway configuration with 'kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o yaml'
+{{- end }}
+{{- else if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "rag.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "rag.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "rag.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "rag.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
+
@@ -0,0 +1,63 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "rag.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "rag.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "rag.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "rag.labels" -}}
+helm.sh/chart: {{ include "rag.chart" . }}
+{{ include "rag.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "rag.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "rag.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "rag.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "rag.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
@@ -0,0 +1,101 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "rag.fullname" . }}
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "rag.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "rag.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "rag.serviceAccountName" . }}
+      initContainers:
+        - name: wait-for-postgres
+          image: postgres:15-alpine
+          command:
+            - sh
+            - -c
+            - |
+              until pg_isready -d "$DATABASE_URL"; do
+                echo "Waiting for PostgreSQL to be ready..."
+                sleep 5
+              done
+              echo "PostgreSQL is ready!"
+          env:
+            - name: DATABASE_URL
+              valueFrom:
+                secretKeyRef:
+                  name: pgvector
+                  key: uri
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          {{- with .Values.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.env }}
+          env:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.volumeMounts }}
+          volumeMounts:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      {{- with .Values.volumes }}
+      volumes:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+
@@ -0,0 +1,33 @@
+{{- if .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "rag.fullname" . }}
+  labels:
+    {{- include "rag.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "rag.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
+