Skip to content

Commit 20993d2

Browse files
authored
Merge pull request #14 from ckittask/deepeval_readteam_tests
deepeval_redteam_tests
2 parents 97f0583 + 965dab6 commit 20993d2

File tree

73 files changed

+9767
-808
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+9767
-808
lines changed

.github/workflows/deepeval-tests.yml

Lines changed: 207 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,52 +3,227 @@ name: DeepEval RAG System Tests
33
on:
44
pull_request:
55
types: [opened, synchronize, reopened]
6+
branches: ["rag-33-debug", "RAG-33-31okt", "wip_3_12"]
67
paths:
78
- 'src/**'
89
- 'tests/**'
10+
- 'data/**'
11+
- 'docker-compose-eval.yml'
12+
- 'Dockerfile.llm_orchestration_service'
913
- '.github/workflows/deepeval-tests.yml'
1014

1115
jobs:
1216
deepeval-tests:
1317
runs-on: ubuntu-latest
14-
timeout-minutes: 40
18+
timeout-minutes: 80
1519

1620
steps:
1721
- name: Checkout code
1822
uses: actions/checkout@v4
19-
23+
24+
- name: Validate required secrets
25+
id: validate_secrets
26+
run: |
27+
echo "Validating required environment variables..."
28+
MISSING_SECRETS=()
29+
30+
# Check Azure OpenAI secrets
31+
if [ -z "${{ secrets.AZURE_OPENAI_ENDPOINT }}" ]; then
32+
MISSING_SECRETS+=("AZURE_OPENAI_ENDPOINT")
33+
fi
34+
35+
if [ -z "${{ secrets.AZURE_OPENAI_API_KEY }}" ]; then
36+
MISSING_SECRETS+=("AZURE_OPENAI_API_KEY")
37+
fi
38+
39+
if [ -z "${{ secrets.AZURE_OPENAI_DEPLOYMENT }}" ]; then
40+
MISSING_SECRETS+=("AZURE_OPENAI_DEPLOYMENT")
41+
fi
42+
43+
if [ -z "${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}" ]; then
44+
MISSING_SECRETS+=("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
45+
fi
46+
47+
if [ -z "${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}" ]; then
48+
MISSING_SECRETS+=("AZURE_OPENAI_DEEPEVAL_DEPLOYMENT")
49+
fi
50+
51+
52+
53+
if [ -z "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" ]; then
54+
MISSING_SECRETS+=("AZURE_STORAGE_CONNECTION_STRING")
55+
fi
56+
57+
if [ -z "${{ secrets.AZURE_STORAGE_CONTAINER_NAME }}" ]; then
58+
MISSING_SECRETS+=("AZURE_STORAGE_CONTAINER_NAME")
59+
fi
60+
61+
if [ -z "${{ secrets.AZURE_STORAGE_BLOB_NAME }}" ]; then
62+
MISSING_SECRETS+=("AZURE_STORAGE_BLOB_NAME")
63+
fi
64+
65+
66+
# If any secrets are missing, fail
67+
if [ ${#MISSING_SECRETS[@]} -gt 0 ]; then
68+
echo "missing=true" >> $GITHUB_OUTPUT
69+
echo "secrets_list=${MISSING_SECRETS[*]}" >> $GITHUB_OUTPUT
70+
echo " Missing required secrets: ${MISSING_SECRETS[*]}"
71+
exit 1
72+
else
73+
echo "missing=false" >> $GITHUB_OUTPUT
74+
echo " All required secrets are configured"
75+
fi
76+
77+
- name: Comment PR with missing secrets error
78+
if: failure() && steps.validate_secrets.outputs.missing == 'true'
79+
uses: actions/github-script@v7
80+
with:
81+
script: |
82+
const missingSecrets = '${{ steps.validate_secrets.outputs.secrets_list }}'.split(' ');
83+
const secretsList = missingSecrets.map(s => `- \`${s}\``).join('\n');
84+
85+
const comment = `## DeepEval Tests: Missing Required Secrets
86+
87+
The DeepEval RAG system tests cannot run because the following GitHub secrets are not configured:
88+
89+
${secretsList}
90+
91+
### How to Fix
92+
93+
1. Go to **Settings** → **Secrets and variables** → **Actions**
94+
2. Add the missing secrets with the appropriate values:
95+
96+
**Azure OpenAI Configuration:**
97+
- \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`)
98+
- \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key
99+
- \`AZURE_OPENAI_DEPLOYMENT\` - Chat model deployment name (e.g., \`gpt-4o-mini\`)
100+
- \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`)
101+
- \`AZURE_STORAGE_CONNECTION_STRING\` - Connection string for Azure Blob Storage
102+
- \`AZURE_STORAGE_CONTAINER_NAME\` - Container name in Azure Blob Storage
103+
- \`AZURE_STORAGE_BLOB_NAME\` - Blob name for dataset in Azure
104+
- \`AZURE_OPENAI_DEEPEVAL_DEPLOYMENT\` - DeepEval model deployment name (e.g., \`gpt-4.1\`)
105+
106+
3. Re-run the workflow after adding the secrets
107+
108+
### Note
109+
Tests will not run until all required secrets are configured.
110+
111+
---
112+
*Workflow: ${context.workflow} | Run: [#${context.runNumber}](${context.payload.repository.html_url}/actions/runs/${context.runId})*`;
113+
114+
// Find existing comment
115+
const comments = await github.rest.issues.listComments({
116+
owner: context.repo.owner,
117+
repo: context.repo.repo,
118+
issue_number: context.issue.number
119+
});
120+
121+
const existingComment = comments.data.find(
122+
comment => comment.user.login === 'github-actions[bot]' &&
123+
comment.body.includes('DeepEval Tests: Missing Required Secrets')
124+
);
125+
126+
if (existingComment) {
127+
await github.rest.issues.updateComment({
128+
owner: context.repo.owner,
129+
repo: context.repo.repo,
130+
comment_id: existingComment.id,
131+
body: comment
132+
});
133+
} else {
134+
await github.rest.issues.createComment({
135+
owner: context.repo.owner,
136+
repo: context.repo.repo,
137+
issue_number: context.issue.number,
138+
body: comment
139+
});
140+
}
141+
20142
- name: Set up Python
143+
if: success()
21144
uses: actions/setup-python@v5
22145
with:
23146
python-version-file: '.python-version'
24-
147+
25148
- name: Set up uv
149+
if: success()
26150
uses: astral-sh/setup-uv@v6
27-
151+
28152
- name: Install dependencies (locked)
153+
if: success()
29154
run: uv sync --frozen
30-
31-
- name: Run DeepEval tests
155+
156+
- name: Create test directories with proper permissions
157+
if: success()
158+
run: |
159+
mkdir -p test-vault/agents/llm
160+
mkdir -p test-vault/agent-out
161+
# Set ownership to current user and make writable
162+
sudo chown -R $(id -u):$(id -g) test-vault
163+
chmod -R 777 test-vault
164+
# Ensure the agent-out directory is world-readable after writes
165+
sudo chmod -R a+rwX test-vault/agent-out
166+
167+
- name: Set up Deepeval with azure
168+
if: success()
169+
run: |
170+
uv run deepeval set-azure-openai \
171+
--openai-endpoint "${{ secrets.AZURE_OPENAI_ENDPOINT }}" \
172+
--openai-api-key "${{ secrets.AZURE_OPENAI_API_KEY }}" \
173+
--deployment-name "${{ secrets.AZURE_OPENAI_DEPLOYMENT }}" \
174+
--openai-model-name "${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}" \
175+
--openai-api-version="2024-12-01-preview"
176+
177+
- name: Run DeepEval tests with testcontainers
178+
if: success()
32179
id: run_tests
180+
continue-on-error: true
33181
env:
34-
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
35-
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
36-
run: uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short
37-
182+
# LLM API Keys
183+
AZURE_OPENAI_DEEPEVAL_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}
184+
# Azure OpenAI - Chat Model
185+
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
186+
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
187+
AZURE_OPENAI_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_DEPLOYMENT }}
188+
# Azure OpenAI - Embedding Model
189+
AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}
190+
# Evaluation mode
191+
AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}
192+
AZURE_STORAGE_CONTAINER_NAME: ${{ secrets.AZURE_STORAGE_CONTAINER_NAME }}
193+
AZURE_STORAGE_BLOB_NAME: ${{ secrets.AZURE_STORAGE_BLOB_NAME }}
194+
EVAL_MODE: "true"
195+
run: |
196+
# Run tests with testcontainers managing Docker Compose
197+
uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO
198+
199+
- name: Fix permissions on test artifacts
200+
if: always()
201+
run: |
202+
sudo chown -R $(id -u):$(id -g) test-vault || true
203+
sudo chmod -R a+rX test-vault || true
204+
38205
- name: Generate evaluation report
39206
if: always()
40-
run: python tests/deepeval_tests/report_generator.py
41-
207+
run: uv run python tests/deepeval_tests/report_generator.py
208+
209+
- name: Save test artifacts
210+
if: always()
211+
uses: actions/upload-artifact@v4
212+
with:
213+
name: test-results
214+
path: |
215+
pytest_captured_results.json
216+
test_report.md
217+
retention-days: 30
218+
42219
- name: Comment PR with test results
43220
if: always() && github.event_name == 'pull_request'
44221
uses: actions/github-script@v7
45222
with:
46223
script: |
47224
const fs = require('fs');
48-
49225
try {
50226
const reportContent = fs.readFileSync('test_report.md', 'utf8');
51-
52227
const comments = await github.rest.issues.listComments({
53228
owner: context.repo.owner,
54229
repo: context.repo.repo,
@@ -57,7 +232,7 @@ jobs:
57232
58233
const existingComment = comments.data.find(
59234
comment => comment.user.login === 'github-actions[bot]' &&
60-
comment.body.includes('RAG System Evaluation Report')
235+
comment.body.includes('RAG System Evaluation Report')
61236
);
62237
63238
if (existingComment) {
@@ -75,36 +250,35 @@ jobs:
75250
body: reportContent
76251
});
77252
}
78-
79253
} catch (error) {
80254
console.error('Failed to post test results:', error);
81-
82255
await github.rest.issues.createComment({
83256
issue_number: context.issue.number,
84257
owner: context.repo.owner,
85258
repo: context.repo.repo,
86259
body: `## RAG System Evaluation Report\n\n**Error generating test report**\n\nFailed to read or post test results. Check workflow logs for details.\n\nError: ${error.message}`
87260
});
88261
}
89-
262+
90263
- name: Check test results and fail if needed
91264
if: always()
92265
run: |
93-
# Check if pytest ran (look at step output)
94-
if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
266+
# Check if pytest ran (look at step output)
267+
if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
95268
echo "Tests ran but failed - this is expected if RAG performance is below threshold"
96-
fi
97-
if [ -f "pytest_captured_results.json" ]; then
269+
fi
270+
271+
if [ -f "pytest_captured_results.json" ]; then
98272
total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
99273
passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)
100-
274+
101275
if [ "$total_tests" -eq 0 ]; then
102276
echo "ERROR: No tests were executed"
103277
exit 1
104278
fi
105-
279+
106280
pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")
107-
281+
108282
echo "DeepEval Test Results:"
109283
echo "Total Tests: $total_tests"
110284
echo "Passed Tests: $passed_tests"
@@ -117,7 +291,13 @@ jobs:
117291
else
118292
echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
119293
fi
120-
else
294+
else
121295
echo "ERROR: No test results file found"
122296
exit 1
123-
fi
297+
fi
298+
299+
- name: Cleanup Docker resources
300+
if: always()
301+
run: |
302+
docker compose -f docker-compose-eval.yml down -v --remove-orphans || true
303+
docker system prune -f || true

0 commit comments

Comments
 (0)