@@ -3,52 +3,227 @@ name: DeepEval RAG System Tests
33on :
44 pull_request :
55 types : [opened, synchronize, reopened]
6+ branches : ["rag-33-debug", "RAG-33-31okt", "wip_3_12"]
67 paths :
78 - ' src/**'
89 - ' tests/**'
10+ - ' data/**'
11+ - ' docker-compose-eval.yml'
12+ - ' Dockerfile.llm_orchestration_service'
913 - ' .github/workflows/deepeval-tests.yml'
1014
1115jobs :
1216 deepeval-tests :
1317 runs-on : ubuntu-latest
14- timeout-minutes : 40
18+ timeout-minutes : 80
1519
1620 steps :
1721 - name : Checkout code
1822 uses : actions/checkout@v4
19-
23+
24+ - name : Validate required secrets
25+ id : validate_secrets
26+ run : |
27+ echo "Validating required environment variables..."
28+ MISSING_SECRETS=()
29+
30+ # Check Azure OpenAI secrets
31+ if [ -z "${{ secrets.AZURE_OPENAI_ENDPOINT }}" ]; then
32+ MISSING_SECRETS+=("AZURE_OPENAI_ENDPOINT")
33+ fi
34+
35+ if [ -z "${{ secrets.AZURE_OPENAI_API_KEY }}" ]; then
36+ MISSING_SECRETS+=("AZURE_OPENAI_API_KEY")
37+ fi
38+
39+ if [ -z "${{ secrets.AZURE_OPENAI_DEPLOYMENT }}" ]; then
40+ MISSING_SECRETS+=("AZURE_OPENAI_DEPLOYMENT")
41+ fi
42+
43+ if [ -z "${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}" ]; then
44+ MISSING_SECRETS+=("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
45+ fi
46+
47+ if [ -z "${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}" ]; then
48+ MISSING_SECRETS+=("AZURE_OPENAI_DEEPEVAL_DEPLOYMENT")
49+ fi
50+
51+
52+
53+ if [ -z "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" ]; then
54+ MISSING_SECRETS+=("AZURE_STORAGE_CONNECTION_STRING")
55+ fi
56+
57+ if [ -z "${{ secrets.AZURE_STORAGE_CONTAINER_NAME }}" ]; then
58+ MISSING_SECRETS+=("AZURE_STORAGE_CONTAINER_NAME")
59+ fi
60+
61+ if [ -z "${{ secrets.AZURE_STORAGE_BLOB_NAME }}" ]; then
62+ MISSING_SECRETS+=("AZURE_STORAGE_BLOB_NAME")
63+ fi
64+
65+
66+ # If any secrets are missing, fail
67+ if [ ${#MISSING_SECRETS[@]} -gt 0 ]; then
68+ echo "missing=true" >> $GITHUB_OUTPUT
69+ echo "secrets_list=${MISSING_SECRETS[*]}" >> $GITHUB_OUTPUT
70+ echo " Missing required secrets: ${MISSING_SECRETS[*]}"
71+ exit 1
72+ else
73+ echo "missing=false" >> $GITHUB_OUTPUT
74+ echo " All required secrets are configured"
75+ fi
76+
77+ - name : Comment PR with missing secrets error
78+ if : failure() && steps.validate_secrets.outputs.missing == 'true'
79+ uses : actions/github-script@v7
80+ with :
81+ script : |
82+ const missingSecrets = '${{ steps.validate_secrets.outputs.secrets_list }}'.split(' ');
83+ const secretsList = missingSecrets.map(s => `- \`${s}\``).join('\n');
84+
85+ const comment = `## DeepEval Tests: Missing Required Secrets
86+
87+ The DeepEval RAG system tests cannot run because the following GitHub secrets are not configured:
88+
89+ ${secretsList}
90+
91+ ### How to Fix
92+
93+ 1. Go to **Settings** → **Secrets and variables** → **Actions**
94+ 2. Add the missing secrets with the appropriate values:
95+
96+ **Azure OpenAI Configuration:**
97+ - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`)
98+ - \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key
99+ - \`AZURE_OPENAI_DEPLOYMENT\` - Chat model deployment name (e.g., \`gpt-4o-mini\`)
100+ - \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`)
101+ - \`AZURE_STORAGE_CONNECTION_STRING\` - Connection string for Azure Blob Storage
102+ - \`AZURE_STORAGE_CONTAINER_NAME\` - Container name in Azure Blob Storage
103+ - \`AZURE_STORAGE_BLOB_NAME\` - Blob name for dataset in Azure
104+ - \`AZURE_OPENAI_DEEPEVAL_DEPLOYMENT\` - DeepEval model deployment name (e.g., \`gpt-4.1\`)
105+
106+ 3. Re-run the workflow after adding the secrets
107+
108+ ### Note
109+ Tests will not run until all required secrets are configured.
110+
111+ ---
112+ *Workflow: ${context.workflow} | Run: [#${context.runNumber}](${context.payload.repository.html_url}/actions/runs/${context.runId})*`;
113+
114+ // Find existing comment
115+ const comments = await github.rest.issues.listComments({
116+ owner: context.repo.owner,
117+ repo: context.repo.repo,
118+ issue_number: context.issue.number
119+ });
120+
121+ const existingComment = comments.data.find(
122+ comment => comment.user.login === 'github-actions[bot]' &&
123+ comment.body.includes('DeepEval Tests: Missing Required Secrets')
124+ );
125+
126+ if (existingComment) {
127+ await github.rest.issues.updateComment({
128+ owner: context.repo.owner,
129+ repo: context.repo.repo,
130+ comment_id: existingComment.id,
131+ body: comment
132+ });
133+ } else {
134+ await github.rest.issues.createComment({
135+ owner: context.repo.owner,
136+ repo: context.repo.repo,
137+ issue_number: context.issue.number,
138+ body: comment
139+ });
140+ }
141+
20142 - name : Set up Python
143+ if : success()
21144 uses : actions/setup-python@v5
22145 with :
23146 python-version-file : ' .python-version'
24-
147+
25148 - name : Set up uv
149+ if : success()
26150 uses : astral-sh/setup-uv@v6
27-
151+
28152 - name : Install dependencies (locked)
153+ if : success()
29154 run : uv sync --frozen
30-
31- - name : Run DeepEval tests
155+
156+ - name : Create test directories with proper permissions
157+ if : success()
158+ run : |
159+ mkdir -p test-vault/agents/llm
160+ mkdir -p test-vault/agent-out
161+ # Set ownership to current user and make writable
162+ sudo chown -R $(id -u):$(id -g) test-vault
163+ chmod -R 777 test-vault
164+ # Ensure the agent-out directory is world-readable after writes
165+ sudo chmod -R a+rwX test-vault/agent-out
166+
167+ - name : Set up Deepeval with azure
168+ if : success()
169+ run : |
170+ uv run deepeval set-azure-openai \
171+ --openai-endpoint "${{ secrets.AZURE_OPENAI_ENDPOINT }}" \
172+ --openai-api-key "${{ secrets.AZURE_OPENAI_API_KEY }}" \
173+ --deployment-name "${{ secrets.AZURE_OPENAI_DEPLOYMENT }}" \
174+ --openai-model-name "${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}" \
175+ --openai-api-version="2024-12-01-preview"
176+
177+ - name : Run DeepEval tests with testcontainers
178+ if : success()
32179 id : run_tests
180+ continue-on-error : true
33181 env :
34- ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
35- OPENAI_API_KEY : ${{ secrets.OPENAI_API_KEY }}
36- run : uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short
37-
182+ # LLM API Keys
183+ AZURE_OPENAI_DEEPEVAL_DEPLOYMENT : ${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}
184+ # Azure OpenAI - Chat Model
185+ AZURE_OPENAI_API_KEY : ${{ secrets.AZURE_OPENAI_API_KEY }}
186+ AZURE_OPENAI_ENDPOINT : ${{ secrets.AZURE_OPENAI_ENDPOINT }}
187+ AZURE_OPENAI_DEPLOYMENT : ${{ secrets.AZURE_OPENAI_DEPLOYMENT }}
188+ # Azure OpenAI - Embedding Model
189+ AZURE_OPENAI_EMBEDDING_DEPLOYMENT : ${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}
190+ # Evaluation mode
191+ AZURE_STORAGE_CONNECTION_STRING : ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}
192+ AZURE_STORAGE_CONTAINER_NAME : ${{ secrets.AZURE_STORAGE_CONTAINER_NAME }}
193+ AZURE_STORAGE_BLOB_NAME : ${{ secrets.AZURE_STORAGE_BLOB_NAME }}
194+ EVAL_MODE : " true"
195+ run : |
196+ # Run tests with testcontainers managing Docker Compose
197+ uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO
198+
199+ - name : Fix permissions on test artifacts
200+ if : always()
201+ run : |
202+ sudo chown -R $(id -u):$(id -g) test-vault || true
203+ sudo chmod -R a+rX test-vault || true
204+
38205 - name : Generate evaluation report
39206 if : always()
40- run : python tests/deepeval_tests/report_generator.py
41-
207+ run : uv run python tests/deepeval_tests/report_generator.py
208+
209+ - name : Save test artifacts
210+ if : always()
211+ uses : actions/upload-artifact@v4
212+ with :
213+ name : test-results
214+ path : |
215+ pytest_captured_results.json
216+ test_report.md
217+ retention-days : 30
218+
42219 - name : Comment PR with test results
43220 if : always() && github.event_name == 'pull_request'
44221 uses : actions/github-script@v7
45222 with :
46223 script : |
47224 const fs = require('fs');
48-
49225 try {
50226 const reportContent = fs.readFileSync('test_report.md', 'utf8');
51-
52227 const comments = await github.rest.issues.listComments({
53228 owner: context.repo.owner,
54229 repo: context.repo.repo,
57232
58233 const existingComment = comments.data.find(
59234 comment => comment.user.login === 'github-actions[bot]' &&
60- comment.body.includes('RAG System Evaluation Report')
235+ comment.body.includes('RAG System Evaluation Report')
61236 );
62237
63238 if (existingComment) {
@@ -75,36 +250,35 @@ jobs:
75250 body: reportContent
76251 });
77252 }
78-
79253 } catch (error) {
80254 console.error('Failed to post test results:', error);
81-
82255 await github.rest.issues.createComment({
83256 issue_number: context.issue.number,
84257 owner: context.repo.owner,
85258 repo: context.repo.repo,
86259 body: `## RAG System Evaluation Report\n\n**Error generating test report**\n\nFailed to read or post test results. Check workflow logs for details.\n\nError: ${error.message}`
87260 });
88261 }
89-
262+
90263 - name : Check test results and fail if needed
91264 if : always()
92265 run : |
93- # Check if pytest ran (look at step output)
94- if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
266+ # Check if pytest ran (look at step output)
267+ if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
95268 echo "Tests ran but failed - this is expected if RAG performance is below threshold"
96- fi
97- if [ -f "pytest_captured_results.json" ]; then
269+ fi
270+
271+ if [ -f "pytest_captured_results.json" ]; then
98272 total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
99273 passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)
100-
274+
101275 if [ "$total_tests" -eq 0 ]; then
102276 echo "ERROR: No tests were executed"
103277 exit 1
104278 fi
105-
279+
106280 pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")
107-
281+
108282 echo "DeepEval Test Results:"
109283 echo "Total Tests: $total_tests"
110284 echo "Passed Tests: $passed_tests"
@@ -117,7 +291,13 @@ jobs:
117291 else
118292 echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
119293 fi
120- else
294+ else
121295 echo "ERROR: No test results file found"
122296 exit 1
123- fi
297+ fi
298+
299+ - name : Cleanup Docker resources
300+ if : always()
301+ run : |
302+ docker compose -f docker-compose-eval.yml down -v --remove-orphans || true
303+ docker system prune -f || true
0 commit comments