Skip to content

Commit 301a03a

Browse files
authored
Merge pull request #92 from buerokratt/wip
Update inference results and and budget (buerokratt#181)
2 parents 8ce90c3 + 58b23a1 commit 301a03a

File tree

11 files changed

+1294
-115
lines changed

11 files changed

+1294
-115
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
SELECT
2+
id,
3+
connection_name,
4+
used_budget,
5+
monthly_budget,
6+
warn_budget_threshold,
7+
stop_budget_threshold,
8+
environment,
9+
connection_status,
10+
created_at,
11+
llm_platform,
12+
llm_model,
13+
embedding_platform,
14+
embedding_model,
15+
CASE
16+
WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget'
17+
WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget'
18+
WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget'
19+
WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed'
20+
ELSE 'within_budget'
21+
END AS budget_status
22+
FROM llm_connections
23+
WHERE environment = 'testing'
24+
ORDER BY created_at DESC
25+
LIMIT 1;

DSL/Resql/rag-search/POST/store-production-inference-result.sql renamed to DSL/Resql/rag-search/POST/store-inference-result.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ INSERT INTO inference_results (
77
embedding_scores,
88
final_answer,
99
environment,
10+
llm_connection_id,
1011
created_at
1112
) VALUES (
1213
:chat_id,
@@ -17,6 +18,7 @@ INSERT INTO inference_results (
1718
:embedding_scores::JSONB,
1819
:final_answer,
1920
:environment,
21+
:llm_connection_id,
2022
:created_at::timestamp with time zone
2123
) RETURNING
2224
id,
@@ -28,4 +30,5 @@ INSERT INTO inference_results (
2830
embedding_scores,
2931
final_answer,
3032
environment,
33+
llm_connection_id,
3134
created_at;

DSL/Ruuter.private/rag-search/POST/inference/results/test/store.yml

Lines changed: 0 additions & 94 deletions
This file was deleted.

DSL/Ruuter.private/rag-search/POST/inference/results/production/store.yml renamed to DSL/Ruuter.public/rag-search/POST/inference/results/store.yml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ declaration:
2929
- field: final_answer
3030
type: string
3131
description: "LLM's final generated answer"
32+
- field: environment
33+
type: string
34+
description: "Environment identifier (e.g., production, testing)"
35+
- field: llm_connection_id
36+
type: string
37+
description: "Connection identifier"
3238

3339
extract_request_data:
3440
assign:
@@ -39,6 +45,8 @@ extract_request_data:
3945
ranked_chunks: ${JSON.stringify(incoming.body.ranked_chunks) || null}
4046
embedding_scores: ${JSON.stringify(incoming.body.embedding_scores) || null}
4147
final_answer: ${incoming.body.final_answer}
48+
environment: ${incoming.body.environment}
49+
llm_connection_id: ${incoming.body.llm_connection_id}
4250
created_at: ${new Date().toISOString()}
4351
next: validate_required_fields
4452

@@ -51,7 +59,7 @@ validate_required_fields:
5159
store_production_inference_result:
5260
call: http.post
5361
args:
54-
url: "[#RAG_SEARCH_RESQL]/store-production-inference-result"
62+
url: "[#RAG_SEARCH_RESQL]/store-inference-result"
5563
body:
5664
chat_id: ${chat_id}
5765
user_question: ${user_question}
@@ -60,7 +68,8 @@ store_production_inference_result:
6068
ranked_chunks: ${ranked_chunks}
6169
embedding_scores: ${embedding_scores}
6270
final_answer: ${final_answer}
63-
environment: "production"
71+
environment: ${environment}
72+
llm_connection_id: ${llm_connection_id}
6473
created_at: ${created_at}
6574
result: store_result
6675
next: check_status

Dockerfile.llm_orchestration_service

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ RUN uv sync --locked
2121
EXPOSE 8100
2222

2323
# Run the FastAPI app via uvicorn
24-
CMD ["uv","run","uvicorn", "src.llm_orchestration_service_api:app", "--host", "0.0.0.0", "--port", "8100"]
24+
CMD ["uv","run","uvicorn", "src.llm_orchestration_service_api:app", "--host", "0.0.0.0", "--port", "8100"]

0 commit comments

Comments
 (0)