Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ PROD_CORS_ORIGIN=http://localhost:3000
# RAG (Retrieval-Augmented Generation) Configuration
# ============================================================================
EMBEDDINGS_LLM_URL=http://localhost:8080
# RAG Service URL (microservice endpoint)
# Backend communicates with RAG service via HTTP
# Default: http://alm-rag:8002 (for Kubernetes) or http://localhost:8002 (for local)
RAG_SERVICE_URL=http://localhost:8002
# Enable/disable RAG functionality (default: true)
RAG_ENABLED=true

Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This makefile routes targets to local or helm specific makefiles
.PHONY: all local helm help
.PHONY: all local helm help rag-status test-rag

# ifneq (,$(wildcard .env))
# # ifneq (,$(filter local,$(MAKECMDGOALS)))
Expand Down Expand Up @@ -34,3 +34,7 @@ local/%: ## Route local targets to deploy/local/Makefile

cluster/%: ## Route deploy targets to deploy/helm/Makefile
@$(MAKE) -C deploy/helm $*

# Convenience targets for common local commands
rag-status: local/rag-status ## Check RAG service status
test-rag: local/test-rag ## Test RAG service
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ data:
{{- if .Values.rag.enabled }}
# RAG Configuration
RAG_ENABLED: {{ .Values.rag.enabled | quote }}
# Model is hardcoded to nomic-ai/nomic-embed-text-v1.5, no env var needed
# API URL defaults to http://alm-embedding:8080 (local cluster service)
# RAG Service URL (microservice endpoint)
RAG_SERVICE_URL: {{ .Values.rag.serviceUrl | default "http://alm-rag:8002" | quote }}
# Embedding service URL (for init job, not used by backend)
EMBEDDINGS_LLM_URL: {{ .Values.rag.embedding.apiUrl | default "http://alm-embedding:8080" | quote }}
# Data paths (for init job only)
DATA_DIR: {{ .Values.rag.dataDir | quote }}
KNOWLEDGE_BASE_DIR: {{ .Values.rag.knowledgeBaseDir | quote }}
# Query configuration
RAG_TOP_K: {{ .Values.rag.query.topK | quote }}
RAG_TOP_N: {{ .Values.rag.query.topN | quote }}
RAG_SIMILARITY_THRESHOLD: {{ .Values.rag.query.similarityThreshold | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,10 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
volumeMounts:
{{- if .Values.rag.enabled }}
- name: rag-data
mountPath: {{ .Values.rag.pvcMountPath }}
readOnly: true
{{- end }}
{{- with .Values.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
{{- if .Values.rag.enabled }}
- name: rag-data
persistentVolumeClaim:
claimName: {{ include "backend.fullname" . }}-rag-data
{{- end }}
{{- with .Values.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,6 @@ spec:
volumeMounts:
- name: init-sync
mountPath: /init-sync
{{- if .Values.rag.enabled }}
- name: rag-data
mountPath: {{ .Values.rag.pvcMountPath }}
{{- end }}
{{- with .Values.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
Expand All @@ -133,11 +129,6 @@ spec:
volumes:
- name: init-sync
emptyDir: {}
{{- if .Values.rag.enabled }}
- name: rag-data
persistentVolumeClaim:
claimName: {{ include "backend.fullname" . }}-rag-data
{{- end }}
{{- with .Values.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down

This file was deleted.

28 changes: 10 additions & 18 deletions deploy/helm/ansible-log-monitor/charts/backend/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,11 @@ rag:
# Enable or disable RAG functionality
enabled: true

# Embedding model configuration
# RAG Service URL (microservice endpoint)
# Backend pods communicate with RAG service via HTTP
serviceUrl: "http://alm-rag:8002"

# Embedding model configuration (used by init job for building index)
# NOTE: API credentials (apiKey, apiUrl, modelName) are provided during 'make install'
# and stored in the 'model-secret' Kubernetes secret
embedding:
Expand All @@ -244,24 +248,12 @@ rag:
apiUrl: "http://alm-embedding:8080" # TEI service URL (defaults to local cluster service)
port: 8080 # Port for the embedding service (TEI)

# Data paths
# Data paths (used by init job for knowledge base PDFs)
# Note: PDFs should be baked into the container image at /app/data/knowledge_base
# The init job will read PDFs from the image and process them
# The RAG index (embeddings) is stored in PostgreSQL
dataDir: "/app/data/rag"
knowledgeBaseDir: "/app/data/rag/knowledge_base"
# PVC mount path (mounted directly at /app/data/rag)
pvcMountPath: "/app/data/rag"

# Persistence configuration for RAG index storage
persistence:
# Storage size for RAG index and metadata
size: "2Gi"
# Access mode: ReadWriteOnce (RWO) is used because:
# 1. Init job writes the index once
# 2. Backend pods only read (never write)
# 3. AWS EBS (gp3-csi) only supports RWO
# Note: For RWO, all backend pods must be scheduled on the same node as the PVC
accessMode: "ReadWriteOnce"
# Storage class (leave empty for default)
storageClassName: ""
knowledgeBaseDir: "/app/data/knowledge_base" # PDFs should be in container image

# Query configuration
query:
Expand Down
9 changes: 9 additions & 0 deletions deploy/helm/ansible-log-monitor/charts/rag/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v2
name: rag
description: A Helm chart for RAG service

type: application

version: 0.1.0
appVersion: "0.1.0"

36 changes: 36 additions & 0 deletions deploy/helm/ansible-log-monitor/charts/rag/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
1. Get the application URL by running these commands:
{{- if .Values.httpRoute.enabled }}
{{- if .Values.httpRoute.hostnames }}
export APP_HOSTNAME={{ .Values.httpRoute.hostnames | first }}
{{- else }}
export APP_HOSTNAME=$(kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o jsonpath="{.spec.listeners[0].hostname}")
{{- end }}
{{- if and .Values.httpRoute.rules (first .Values.httpRoute.rules).matches (first (first .Values.httpRoute.rules).matches).path.value }}
echo "Visit http://$APP_HOSTNAME{{ (first (first .Values.httpRoute.rules).matches).path.value }} to use your application"

NOTE: Your HTTPRoute depends on the listener configuration of your gateway and your HTTPRoute rules.
The rules can be set for path, method, header and query parameters.
You can check the gateway configuration with 'kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o yaml'
{{- end }}
{{- else if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "rag.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "rag.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "rag.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "rag.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "rag.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "rag.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "rag.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "rag.labels" -}}
helm.sh/chart: {{ include "rag.chart" . }}
{{ include "rag.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "rag.selectorLabels" -}}
app.kubernetes.io/name: {{ include "rag.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "rag.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "rag.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

101 changes: 101 additions & 0 deletions deploy/helm/ansible-log-monitor/charts/rag/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "rag.fullname" . }}
labels:
{{- include "rag.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "rag.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "rag.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "rag.serviceAccountName" . }}
initContainers:
- name: wait-for-postgres
image: postgres:15-alpine
command:
- sh
- -c
- |
until pg_isready -d "$DATABASE_URL"; do
echo "Waiting for PostgreSQL to be ready..."
sleep 5
done
echo "PostgreSQL is ready!"
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: pgvector
key: uri
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
{{- with .Values.livenessProbe }}
livenessProbe:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.readinessProbe }}
readinessProbe:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.env }}
env:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

33 changes: 33 additions & 0 deletions deploy/helm/ansible-log-monitor/charts/rag/templates/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "rag.fullname" . }}
labels:
{{- include "rag.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "rag.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

Loading