From 1cfd7f90fd4741540d398f5126dbf16828a320fc Mon Sep 17 00:00:00 2001 From: erangi-ar Date: Mon, 18 Aug 2025 15:49:03 +0530 Subject: [PATCH 01/11] nginx configs --- GUI/nginx-config/global-classifier-dev | 686 +++++++++++++++++++++++++ 1 file changed, 686 insertions(+) create mode 100644 GUI/nginx-config/global-classifier-dev diff --git a/GUI/nginx-config/global-classifier-dev b/GUI/nginx-config/global-classifier-dev new file mode 100644 index 00000000..713d58ba --- /dev/null +++ b/GUI/nginx-config/global-classifier-dev @@ -0,0 +1,686 @@ +# Global Classifier Development Environment +# /etc/nginx/sites-available/global-classifier-dev +# Domain: global-classifier-dev.rootcode.software +# Public IP: 193.40.152.204 +# +# To enable this site: +# sudo ln -s /etc/nginx/sites-available/global-classifier-dev /etc/nginx/sites-enabled/ +# sudo nginx -t +# sudo systemctl reload nginx + +map $http_upgrade $connection_upgrade { + default upgrade; + '' close; +} + +# Upstream definitions for all services +upstream ruuter_public { + server 127.0.0.1:8086; + keepalive 32; +} + +upstream ruuter_private { + server 127.0.0.1:8088; + keepalive 32; +} + +upstream data_mapper { + server 127.0.0.1:3000; + keepalive 32; +} + +upstream tim { + server 127.0.0.1:8085; + keepalive 32; +} + +upstream authentication_layer { + server 127.0.0.1:3004; + keepalive 32; +} + +upstream resql { + server 127.0.0.1:8082; + keepalive 32; +} + +upstream cron_manager { + server 127.0.0.1:9010; + keepalive 32; +} + +upstream mlflow { + server 127.0.0.1:5001; + keepalive 32; +} + +upstream minio_api { + server 127.0.0.1:9000; + keepalive 32; +} + +upstream minio_console { + server 127.0.0.1:9001; + keepalive 32; +} + +upstream gc_s3_ferry { + server 127.0.0.1:3006; + keepalive 32; +} + +upstream opensearch_node { + server 127.0.0.1:9200; + keepalive 32; +} + +upstream opensearch_dashboards { + server 127.0.0.1:5601; + keepalive 32; +} + +upstream notifications_node { + server 127.0.0.1:4040; + keepalive 32; +} + +upstream gui { + server 127.0.0.1:3003; + keepalive 32; +} + +upstream dataset_gen_service { + server 127.0.0.1:8000; + keepalive 32; +} + +upstream dataset_gen_ollama { + server 127.0.0.1:11434; + keepalive 32; +} + +upstream triton_production_server_http { + server 127.0.0.1:6000; + keepalive 32; +} + +upstream triton_production_server_grpc { + server 127.0.0.1:6001; + keepalive 32; +} + +upstream triton_production_server_metrics { + server 127.0.0.1:6002; + keepalive 32; +} + +upstream triton_test_server_http { + server 127.0.0.1:4000; + keepalive 32; +} + +upstream triton_test_server_grpc { + server 127.0.0.1:4001; + keepalive 32; +} + +upstream triton_test_server_metrics { + server 127.0.0.1:4002; + keepalive 32; +} + +upstream grafana { + server 127.0.0.1:4005; + keepalive 32; +} + +upstream loki { + server 127.0.0.1:3100; + keepalive 32; +} + +# Rate limiting zones +limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s; +limit_req_zone $binary_remote_addr zone=inference:10m rate=5r/s; +limit_req_zone $binary_remote_addr zone=general:10m rate=50r/s; + +# Main server block +server { + listen 80; + server_name global-classifier-dev.rootcode.software 193.40.152.204; + + # Basic security headers + add_header X-Frame-Options SAMEORIGIN always; + add_header X-Content-Type-Options nosniff always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + + # Client settings + client_max_body_size 100M; + client_body_timeout 60s; + client_header_timeout 60s; + + # Logging + access_log /var/log/nginx/global-classifier-dev-access.log; + error_log /var/log/nginx/global-classifier-dev-error.log; + + # Main GUI application (default route) + # React Dev Server with Hot Reload + location / { + limit_req zone=general burst=20 nodelay; + + # This ensures all requests under / go to the dev server + proxy_pass http://127.0.0.1:3003; + + # WebSocket and HTTP headers + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Don't cache WebSocket upgrades + proxy_cache_bypass $http_upgrade; + + # Timeout settings for dev + proxy_read_timeout 300s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + } + + # Ruuter Public + location /ruuter-public/ { + limit_req zone=api burst=50 nodelay; + rewrite ^/ruuter-public/(.*) /$1 break; + proxy_pass http://ruuter_public; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # CORS headers + add_header Access-Control-Allow-Origin "*" always; + add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS, PATCH" always; + add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization,X-API-Key" always; + add_header Access-Control-Expose-Headers "Content-Length,Content-Range" always; + + if ($request_method = 'OPTIONS') { + add_header Access-Control-Max-Age 1728000; + add_header Content-Type 'text/plain; charset=utf-8'; + add_header Content-Length 0; + return 204; + } + } + + # Ruuter Private + location /ruuter-private/ { + limit_req zone=api burst=30 nodelay; + rewrite ^/ruuter-private/(.*) /$1 break; + proxy_pass http://ruuter_private; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Internal API - more restrictive CORS + add_header Access-Control-Allow-Origin "$host" always; + add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always; + add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always; + } + + # Data Mapper + location /data-mapper/ { + limit_req zone=api burst=20 nodelay; + rewrite ^/data-mapper/(.*) /$1 break; + proxy_pass http://data_mapper; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # TIM (Token Identity Management) + location /tim/ { + limit_req zone=api burst=10 nodelay; + rewrite ^/tim/(.*) /$1 break; + proxy_pass http://tim; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Authentication Layer + location /authentication-layer/ { + limit_req zone=api burst=20 nodelay; + rewrite ^/authentication-layer/(.*) /$1 break; + proxy_pass http://authentication_layer; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Authentication specific headers + proxy_set_header X-Original-URI $request_uri; + proxy_set_header X-Original-Method $request_method; + } + + # ResQL + location /resql/ { + limit_req zone=api burst=30 nodelay; + rewrite ^/resql/(.*) /$1 break; + proxy_pass http://resql; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Cron Manager + location /cron-manager/ { + limit_req zone=api burst=10 nodelay; + rewrite ^/cron-manager/(.*) /$1 break; + proxy_pass http://cron_manager; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # MLflow + location /mlflow/ { + limit_req zone=general burst=30 nodelay; + proxy_pass http://mlflow/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix /mlflow; + + # MLflow specific settings + proxy_buffering off; + proxy_request_buffering off; + } + + # MinIO (S3 API) + location /minio/ { + limit_req zone=api burst=100 nodelay; + rewrite ^/minio/(.*) /$1 break; + proxy_pass http://minio_api; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # MinIO specific headers + proxy_set_header X-Forwarded-Host $host; + proxy_connect_timeout 300; + proxy_http_version 1.1; + proxy_set_header Connection ""; + chunked_transfer_encoding off; + + # Large file upload support + proxy_request_buffering off; + proxy_max_temp_file_size 0; + } + + # MinIO Console (separate location for web UI) + location /minio-console/ { + limit_req zone=general burst=20 nodelay; + proxy_pass http://minio_console/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix /minio-console; + + # WebSocket support for MinIO console + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + + # GC S3 Ferry + location /gc-s3-ferry/ { + limit_req zone=api burst=50 nodelay; + rewrite ^/gc-s3-ferry/(.*) /$1 break; + proxy_pass http://gc_s3_ferry; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # OpenSearch Node + location /opensearch-node/ { + limit_req zone=api burst=50 nodelay; + rewrite ^/opensearch-node/(.*) /$1 break; + proxy_pass http://opensearch_node; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # OpenSearch specific settings + proxy_send_timeout 300s; + proxy_read_timeout 300s; + } + + # OpenSearch Dashboards + location /opensearch-dashboards/ { + limit_req zone=general burst=20 nodelay; + proxy_pass http://opensearch_dashboards/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix /opensearch-dashboards; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + + # Notifications Node + location /notifications-node/ { + limit_req zone=api burst=100 nodelay; + rewrite ^/notifications-node/(.*) /$1 break; + proxy_pass http://notifications_node; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # WebSocket support for real-time notifications + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_cache_bypass $http_upgrade; + } + + # GUI (also accessible via container name) + location /gui/ { + limit_req zone=general burst=20 nodelay; + rewrite ^/gui/(.*) /$1 break; + proxy_pass http://gui; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # WebSocket support for development hot reload + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_cache_bypass $http_upgrade; + } + + # Dataset Generator Service + location /dataset-gen-service/ { + limit_req zone=api burst=10 nodelay; + rewrite ^/dataset-gen-service/(.*) /$1 break; + proxy_pass http://dataset_gen_service; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Long timeout for dataset generation + proxy_read_timeout 1800s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + } + + # Dataset Generator Ollama + location /dataset-gen-ollama/ { + limit_req zone=inference burst=10 nodelay; + rewrite ^/dataset-gen-ollama/(.*) /$1 break; + proxy_pass http://dataset_gen_ollama; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Long timeouts for AI model requests + proxy_read_timeout 600s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + + # Streaming support + proxy_buffering off; + proxy_request_buffering off; + } + + # Triton Production Server - HTTP + location /triton-production-server/ { + limit_req zone=inference burst=20 nodelay; + rewrite ^/triton-production-server/(.*) /$1 break; + proxy_pass http://triton_production_server_http; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Model inference timeouts + proxy_read_timeout 300s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + } + + # Triton Production Server - gRPC (separate endpoint) + location /triton-production-server-grpc/ { + limit_req zone=inference burst=20 nodelay; + grpc_pass grpc://triton_production_server_grpc; + grpc_set_header Host $host; + grpc_set_header X-Real-IP $remote_addr; + grpc_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + grpc_set_header X-Forwarded-Proto $scheme; + + # Model inference timeouts + grpc_read_timeout 300s; + grpc_connect_timeout 300s; + grpc_send_timeout 300s; + } + + # Triton Production Server - Metrics + location /triton-production-server-metrics/ { + rewrite ^/triton-production-server-metrics/(.*) /$1 break; + proxy_pass http://triton_production_server_metrics; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Triton Test Server - HTTP + location /triton-test-server/ { + limit_req zone=inference burst=20 nodelay; + rewrite ^/triton-test-server/(.*) /$1 break; + proxy_pass http://triton_test_server_http; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Model inference timeouts + proxy_read_timeout 300s; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + } + + # Triton Test Server - gRPC (separate endpoint) + location /triton-test-server-grpc/ { + limit_req zone=inference burst=20 nodelay; + grpc_pass grpc://triton_test_server_grpc; + grpc_set_header Host $host; + grpc_set_header X-Real-IP $remote_addr; + grpc_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + grpc_set_header X-Forwarded-Proto $scheme; + + # Model inference timeouts + grpc_read_timeout 300s; + grpc_connect_timeout 300s; + grpc_send_timeout 300s; + } + + # Triton Test Server - Metrics + location /triton-test-server-metrics/ { + rewrite ^/triton-test-server-metrics/(.*) /$1 break; + proxy_pass http://triton_test_server_metrics; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Grafana + location /grafana/ { + limit_req zone=general burst=30 nodelay; + proxy_pass http://grafana/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Prefix /grafana; + + # WebSocket support for Grafana live features + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + + # Loki + location /loki/ { + limit_req zone=api burst=100 nodelay; + rewrite ^/loki/(.*) /$1 break; + proxy_pass http://loki; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Loki log ingestion settings + proxy_read_timeout 300s; + proxy_send_timeout 300s; + } + + # Legacy API endpoints (for backwards compatibility) + # These can be removed once all clients are updated to use container names + location /api/public/ { + return 301 /ruuter-public/$1; + } + + location /api/private/ { + return 301 /ruuter-private/$1; + } + + location /auth/ { + return 301 /authentication-layer/$1; + } + + location /cron/ { + return 301 /cron-manager/$1; + } + + location /s3-ferry/ { + return 301 /gc-s3-ferry/$1; + } + + location /opensearch/ { + return 301 /opensearch-node/$1; + } + + location /notifications/ { + return 301 /notifications-node/$1; + } + + location /dataset-generator/ { + return 301 /dataset-gen-service/$1; + } + + location /ollama/ { + return 301 /dataset-gen-ollama/$1; + } + + location /triton/production/ { + return 301 /triton-production-server/$1; + } + + location /triton/testing/ { + return 301 /triton-test-server/$1; + } + + # Health check endpoint + location /health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + + # Nginx status endpoint (restricted access) + location /nginx-status { + stub_status on; + access_log off; + allow 127.0.0.1; + allow 10.0.0.0/8; + allow 172.16.0.0/12; + allow 192.168.0.0/16; + deny all; + } + + # Security - Block common attack vectors + location ~ /\. { + deny all; + access_log off; + log_not_found off; + } + + location ~ ~$ { + deny all; + access_log off; + log_not_found off; + } + + # Error pages + error_page 404 /404.html; + error_page 500 502 503 504 /50x.html; + + location = /404.html { + internal; + return 404 "Page Not Found"; + add_header Content-Type text/plain; + } + + location = /50x.html { + internal; + return 500 "Internal Server Error"; + add_header Content-Type text/plain; + } +} + +# HTTPS server block (ready for SSL certificate) +# Uncomment and configure when SSL certificate is obtained +# server { +# listen 443 ssl http2; +# server_name global-classifier-dev.rootcode.software; +# +# # SSL Configuration +# ssl_certificate /etc/ssl/certs/global-classifier-dev.rootcode.software.pem; +# ssl_certificate_key /etc/ssl/private/global-classifier-dev.rootcode.software.key; +# +# # Modern SSL configuration +# ssl_protocols TLSv1.2 TLSv1.3; +# ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384; +# ssl_prefer_server_ciphers off; +# ssl_session_cache shared:SSL:10m; +# ssl_session_timeout 1d; +# ssl_session_tickets off; +# +# # HSTS +# add_header Strict-Transport-Security "max-age=63072000" always; +# +# # Include all the same location blocks as the HTTP server above +# } \ No newline at end of file From 928ce0006ea4800f4e02f8320aa4aadc4b59f82f Mon Sep 17 00:00:00 2001 From: erangi-ar Date: Mon, 18 Aug 2025 15:57:37 +0530 Subject: [PATCH 02/11] fix gui container --- GUI/nginx-config/global-classifier-dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GUI/nginx-config/global-classifier-dev b/GUI/nginx-config/global-classifier-dev index 713d58ba..d48f56c4 100644 --- a/GUI/nginx-config/global-classifier-dev +++ b/GUI/nginx-config/global-classifier-dev @@ -405,7 +405,7 @@ server { location /gui/ { limit_req zone=general burst=20 nodelay; rewrite ^/gui/(.*) /$1 break; - proxy_pass http://gui; + proxy_pass http://localhost:3003; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; From e5afe8215d05bc3bc4b169b728813f3cbee50422 Mon Sep 17 00:00:00 2001 From: erangi-ar Date: Mon, 18 Aug 2025 16:20:19 +0530 Subject: [PATCH 03/11] Add allowedHosts configuration to Vite server settings --- docker-compose-dev.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 23ce707d..8bfd9fae 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -330,6 +330,8 @@ services: - PORT=3001 - REACT_APP_SERVICE_ID=conversations,settings,monitoring - REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE + - VITE_HOST=0.0.0.0 + - VITE_ALLOWED_HOSTS=global-classifier-dev.rootcode.software,localhost,127.0.0.1 build: context: ./GUI From aae502bdffca81a14e9fb2f27667d89c68b19bbf Mon Sep 17 00:00:00 2001 From: erangi-ar Date: Tue, 2 Sep 2025 17:50:32 +0530 Subject: [PATCH 04/11] bug fixes --- .../POST/get-all-datamodel-versions.sql | 1 + .../POST/update-datasets-connected-models.sql | 2 +- GUI/src/components/DataTable/index.tsx | 16 +- .../FormTextarea/FormTextarea.scss | 4 +- .../FormElements/FormTextarea/index.tsx | 16 +- .../molecules/DataModelCard/index.tsx | 4 +- .../molecules/DataModelForm/index.tsx | 6 +- GUI/src/pages/DataModels/DataModels.scss | 65 +++++++ GUI/src/pages/DataModels/index.tsx | 169 +++++++++--------- GUI/src/pages/TestModel/index.tsx | 41 ++--- GUI/src/pages/ViewDataset/index.tsx | 42 ++--- GUI/src/utils/commonUtilts.ts | 2 +- GUI/translations/en/common.json | 2 +- 13 files changed, 228 insertions(+), 142 deletions(-) diff --git a/DSL/Resql/global-classifier/POST/get-all-datamodel-versions.sql b/DSL/Resql/global-classifier/POST/get-all-datamodel-versions.sql index 7b5b6b7e..306b4f55 100644 --- a/DSL/Resql/global-classifier/POST/get-all-datamodel-versions.sql +++ b/DSL/Resql/global-classifier/POST/get-all-datamodel-versions.sql @@ -4,4 +4,5 @@ SELECT major, minor FROM public.data_models +WHERE training_status = 'trained' ORDER BY model_id; \ No newline at end of file diff --git a/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql b/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql index 80939209..0dbc3dbf 100644 --- a/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql +++ b/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql @@ -8,7 +8,7 @@ SET ELSE connected_models END, - updated_at = CURRENT_TIMESTAMP + last_trained = CURRENT_TIMESTAMP WHERE id = :datasetId RETURNING diff --git a/GUI/src/components/DataTable/index.tsx b/GUI/src/components/DataTable/index.tsx index 5673a6ad..8b9bbbf2 100644 --- a/GUI/src/components/DataTable/index.tsx +++ b/GUI/src/components/DataTable/index.tsx @@ -32,6 +32,7 @@ import { Icon, Track } from 'components'; import Filter from './Filter'; import './DataTable.scss'; import DropdownFilter from './DropdownFilter'; +import NoDataView from 'components/molecules/NoDataView'; type DataTableProps = { data: any; @@ -209,7 +210,15 @@ const DataTable: FC = ( )} - {tableBodyPrefix} + {!data || data.length === 0 ? ( + + + + + + ):( + <> + {tableBodyPrefix} {table.getRowModel().rows.map((row) => ( {row.getVisibleCells().map((cell) => ( @@ -217,6 +226,11 @@ const DataTable: FC = ( ))} ))} + + ) + + } + {pagination && ( diff --git a/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss b/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss index ff1971ac..51750b6d 100644 --- a/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss +++ b/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss @@ -50,7 +50,7 @@ &__max-length-top { position: absolute; top: 10px; - right: 8px; + right: 20px; font-size: $veera-font-size-80; color: get-color(black-coral-12); pointer-events: none; @@ -103,7 +103,7 @@ &--maxlength-shown { textarea { - padding-right: 70px; + padding-right: 90px; } } } diff --git a/GUI/src/components/FormElements/FormTextarea/index.tsx b/GUI/src/components/FormElements/FormTextarea/index.tsx index 4190c782..eb63dc23 100644 --- a/GUI/src/components/FormElements/FormTextarea/index.tsx +++ b/GUI/src/components/FormElements/FormTextarea/index.tsx @@ -25,18 +25,25 @@ const FormTextarea = forwardRef(( maxLengthBottom, defaultValue, onChange, + className, ...rest }, ref, ) => { const id = useId(); const [currentLength, setCurrentLength] = useState((typeof defaultValue === 'string' && defaultValue.length) || 0); + const textareaClasses = clsx( 'textarea', disabled && 'textarea--disabled', showMaxLength && 'textarea--maxlength-shown', ); + const textareaAutosizeClasses = clsx( + className, + showMaxLength && 'textarea--maxlength-shown' + ); + const handleOnChange = (e: ChangeEvent) => { if (showMaxLength) { setCurrentLength(e.target.value.length); @@ -47,6 +54,9 @@ const FormTextarea = forwardRef((
{label && !hideLabel && }
+ {showMaxLength && ( +
{currentLength}/{maxLength}
+ )} (( maxRows={maxRows} ref={ref} defaultValue={defaultValue} + className={textareaAutosizeClasses} aria-label={hideLabel ? label : undefined} onChange={(e) => { if (onChange) onChange(e); @@ -61,12 +72,9 @@ const FormTextarea = forwardRef(( }} {...rest} /> - {showMaxLength && ( -
{currentLength}/{maxLength}
- )}
); }); -export default FormTextarea; +export default FormTextarea; \ No newline at end of file diff --git a/GUI/src/components/molecules/DataModelCard/index.tsx b/GUI/src/components/molecules/DataModelCard/index.tsx index c14b3064..6723b782 100644 --- a/GUI/src/components/molecules/DataModelCard/index.tsx +++ b/GUI/src/components/molecules/DataModelCard/index.tsx @@ -120,7 +120,7 @@ const DataModelCard: FC> = ({

{t('dataModels.dataModelCard.lastTrained') ?? ''}:{' '} - {lastTrained && formatDate(new Date(lastTrained), 'D.M.yy-H:m')} + {lastTrained ? formatDate(new Date(lastTrained), 'D.M.yy-H:m'):"N/A"}

@@ -144,7 +144,7 @@ const DataModelCard: FC> = ({ ), size: 'large', content: ( -
+
{results ? ( ) : ( diff --git a/GUI/src/components/molecules/DataModelForm/index.tsx b/GUI/src/components/molecules/DataModelForm/index.tsx index fcacb5b7..c7e6b324 100644 --- a/GUI/src/components/molecules/DataModelForm/index.tsx +++ b/GUI/src/components/molecules/DataModelForm/index.tsx @@ -91,8 +91,8 @@ const DataModelForm: FC = ({ onSelectionChange={(selection) => { handleChange('datasetId', selection?.value); }} - value={dataModel?.datasetId === null && t('dataModels.dataModelForm.errors.datasetVersionNotExist')} - defaultValue={dataModel?.datasetId ? dataModel?.datasetId : t('dataModels.dataModelForm.errors.datasetVersionNotExist')} + value={dataModel?.datasetId === null && ""} + defaultValue={dataModel?.datasetId ? dataModel?.datasetId : ""} error={errors?.datasetId} />
@@ -154,3 +154,5 @@ const DataModelForm: FC = ({ }; export default DataModelForm; + + diff --git a/GUI/src/pages/DataModels/DataModels.scss b/GUI/src/pages/DataModels/DataModels.scss index 49197f97..2df62fa4 100644 --- a/GUI/src/pages/DataModels/DataModels.scss +++ b/GUI/src/pages/DataModels/DataModels.scss @@ -89,4 +89,69 @@ body { padding: 2rem; border: 1px solid #D73E3E; margin-bottom: 2rem; +} + +.search-panel { + width: 100%; + margin-bottom: 20px; +} + +.models-filter-div { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 16px; + width: 100%; + + @media (max-width: 768px) { + grid-template-columns: 1fr; + gap: 12px; + } + + @media (min-width: 769px) and (max-width: 1024px) { + grid-template-columns: repeat(2, 1fr); + } + + @media (min-width: 1025px) { + grid-template-columns: repeat(4, 1fr) auto; + } +} + +.filter-reset-button { + display: flex; + justify-content: center; + + @media (max-width: 1024px) { + justify-content: stretch; + + button { + width: fit-content; + } + } +} + +// Additional responsive utilities +@media (max-width: 768px) { + .container { + padding: 16px 12px; + } + + .title_container { + flex-direction: column; + gap: 16px; + align-items: stretch; + + .title { + text-align: center; + } + } + + .grid-container { + grid-template-columns: 1fr; + } +} + +@media (min-width: 769px) and (max-width: 1024px) { + .grid-container { + grid-template-columns: repeat(2, 1fr); + } } \ No newline at end of file diff --git a/GUI/src/pages/DataModels/index.tsx b/GUI/src/pages/DataModels/index.tsx index 3ea98ac7..5ee8ac33 100644 --- a/GUI/src/pages/DataModels/index.tsx +++ b/GUI/src/pages/DataModels/index.tsx @@ -87,94 +87,87 @@ const DataModels: FC = () => { {t('dataModels.createModel')}
-
-
- - - handleFilterChange('modelStatus', selection?.value ?? '') - } - defaultValue={filters?.modelStatus} - style={{ width: '15%' }} - /> - - - handleFilterChange('trainingStatus', selection?.value) - } - defaultValue={filters?.trainingStatus} - style={{ width: '15%' }} - /> - - handleFilterChange('deploymentEnvironment', selection?.value) - } - defaultValue={filters?.deploymentEnvironment} - style={{ width: '25%' }} - /> - - handleFilterChange('sort', selection?.value) - } - defaultValue={filters?.sort} - style={{ width: '25%' }} - /> - -
- -
+
+
+ + handleFilterChange('modelStatus', selection?.value ?? '') + } + defaultValue={filters?.modelStatus} + /> + + + handleFilterChange('trainingStatus', selection?.value) + } + defaultValue={filters?.trainingStatus} + /> + + + handleFilterChange('deploymentEnvironment', selection?.value) + } + defaultValue={filters?.deploymentEnvironment} + /> + + + handleFilterChange('sort', selection?.value) + } + defaultValue={filters?.sort} + /> + +
+ +
+
+
{prodDataModel != null &&

Deployed Model

diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx index 2f980dfd..77aa1847 100644 --- a/GUI/src/pages/TestModel/index.tsx +++ b/GUI/src/pages/TestModel/index.tsx @@ -70,24 +70,24 @@ const TestModel: FC = () => { }, }); -const processClassificationResult = (result: any) => { - if (!result || !Array.isArray(result) || result.length === 0) return []; - - // Get the first array (which contains the classification results) - const resultData = result[0]; - - // Check if resultData is an array of classification objects - if (!Array.isArray(resultData)) return []; - - return resultData.map((item: any, index: number) => { - return { - rank: index + 1, - agencyId: item.agency_id, - agencyName: item.agency_name?.replace(/_/g, ' ') || `Agency ${item.agency_id}`, - confidence: item.confidence || 0 - }; - }).sort((a, b) => b.confidence - a.confidence); // Sort by confidence descending -}; + const processClassificationResult = (result: any) => { + if (!result || !Array.isArray(result) || result.length === 0) return []; + + // Get the first array (which contains the classification results) + const resultData = result[0]; + + // Check if resultData is an array of classification objects + if (!Array.isArray(resultData)) return []; + + return resultData.map((item: any, index: number) => { + return { + rank: index + 1, + agencyId: item.agency_id, + agencyName: item.agency_name?.replace(/_/g, ' ') || `Agency ${item.agency_id}`, + confidence: item.confidence || 0 + }; + }).sort((a, b) => b.confidence - a.confidence); // Sort by confidence descending + }; const processedResults = classificationResult ? processClassificationResult(classificationResult) : []; @@ -112,6 +112,7 @@ const processClassificationResult = (result: any) => { placeholder={t('testModels.placeholder') ?? ''} onSelectionChange={(selection) => { handleChange('modelId', selection?.value as string); + setIsClassifyEnabled(false); }} value={testModel?.modelId === null ? t('testModels.errors.modelNotExist') : undefined} defaultValue={testModel?.modelId ?? undefined} /> @@ -135,8 +136,8 @@ const processClassificationResult = (result: any) => {
diff --git a/GUI/src/pages/ViewDataset/index.tsx b/GUI/src/pages/ViewDataset/index.tsx index 9f1c647a..2b8e7869 100644 --- a/GUI/src/pages/ViewDataset/index.tsx +++ b/GUI/src/pages/ViewDataset/index.tsx @@ -36,14 +36,14 @@ const ViewDataset = () => { const [editedRows, setEditedRows] = useState([]); const [selectedAgencyId, setSelectedAgencyId] = useState("all"); const [originalDataset, setOriginalDataset] = useState([]); - const [updatePayload, setUpdatePayload] = useState<{ + const [updatePayload, setUpdatePayload] = useState<{ updatedDataItems: SelectedRowPayload[]; deletedRows: (string | number)[]; updatedRowsLength: number; deletedRowsLength: number; } | null>(null); const navigate = useNavigate(); - const { data: metadata, isLoading: isMetadataLoading,refetch: refetchMetadata } = useQuery({ + const { data: metadata, isLoading: isMetadataLoading, refetch: refetchMetadata } = useQuery({ queryKey: datasetQueryKeys.GET_META_DATA(datasetVersionId ?? 0), queryFn: () => getDatasetMetadata(datasetVersionId ?? 0), }); @@ -242,9 +242,9 @@ const ViewDataset = () => { setDeletedRowIds([]); }, 3000); await Promise.all([ - refetchMetadata(), - refetchDataset() - ]); + refetchMetadata(), + refetchDataset() + ]); }, onError: () => { setIsUpdating(false); @@ -258,8 +258,8 @@ const ViewDataset = () => { }, }); - - const minorUpdate = () => { + + const minorUpdate = () => { setIsProgressModalOpen(true); // Create payload inside the function @@ -310,7 +310,7 @@ const ViewDataset = () => { {metadata && !isMetadataLoading && (
-
+
@@ -321,17 +321,21 @@ const ViewDataset = () => { isHeaderLight={false} >
-
+

{t('datasets.detailedView.version') ?? ''} : {`V${metadata?.major}.${metadata?.minor}`}

-
-
-

{t('datasets.detailedView.connectedModels') ?? ''} :

{metadata?.connectedModels?.join(', ') ?? ''}

+

{t('datasets.detailedView.connectedModels') ?? ''} : + {metadata?.connectedModels.length>0 ? metadata?.connectedModels?.join(', ') : 'N/A'}

{t('datasets.detailedView.noOfItems') ?? ''} : {metadata?.totalDataCount ?? "-"}

+
@@ -352,7 +356,7 @@ const ViewDataset = () => {
{datasetIsLoading && } - {!datasetIsLoading && updatedDataset && updatedDataset?.length > 0 && ( + {!datasetIsLoading && ( []} @@ -387,11 +391,7 @@ const ViewDataset = () => { isClientSide={false} /> )} - { - updatedDataset?.length === 0 && ( - - ) - } +
@@ -457,7 +459,7 @@ const ViewDataset = () => {
} - > + > {isUpdating ? (

{t('datasets.detailedView.dataBeingUpdated')}

diff --git a/GUI/src/utils/commonUtilts.ts b/GUI/src/utils/commonUtilts.ts index d3b3fc2b..f720386f 100644 --- a/GUI/src/utils/commonUtilts.ts +++ b/GUI/src/utils/commonUtilts.ts @@ -10,7 +10,7 @@ type FormattedOption = { // convert flat array to label, value pairs export const formattedArray = (data: string[]|undefined): FormattedOption[]|undefined => { return data?.map((name) => ({ - label: name, + label: name?.charAt(0).toUpperCase() + name?.slice(1), value: name, })); }; diff --git a/GUI/translations/en/common.json b/GUI/translations/en/common.json index 8bfc8b8b..45e25599 100644 --- a/GUI/translations/en/common.json +++ b/GUI/translations/en/common.json @@ -408,7 +408,7 @@ }, "configureDataModel": { "saveChangesTitile": "Changes Saved Successfully", - "saveChangesDesc": "You have successfully saved the changes. You can view the data model in the \"All Data Models\" view.", + "saveChangesDesc": "You have successfully saved the changes. You can view the data model in the \"Data Models\" view.", "updateErrorTitile": "Error Updating Data Model", "updateErrorDesc": "There was an issue updating the data model. Please try again. If the problem persists, contact support for assistance.", "deleteErrorTitle": "Cannot Delete Model", From 4f68eddb7f17e18be5f7b7f5de228a34e10d0e2c Mon Sep 17 00:00:00 2001 From: erangi-ar Date: Tue, 2 Sep 2025 18:21:44 +0530 Subject: [PATCH 05/11] resolve pr comments --- GUI/src/components/DataTable/index.tsx | 50 +++++++++---------- .../FormElements/FormTextarea/index.tsx | 1 + .../molecules/DataModelCard/DataModel.scss | 5 ++ .../molecules/DataModelCard/index.tsx | 2 +- GUI/src/pages/ViewDataset/index.tsx | 1 - 5 files changed, 32 insertions(+), 27 deletions(-) diff --git a/GUI/src/components/DataTable/index.tsx b/GUI/src/components/DataTable/index.tsx index 4f40e00e..1693489d 100644 --- a/GUI/src/components/DataTable/index.tsx +++ b/GUI/src/components/DataTable/index.tsx @@ -55,9 +55,9 @@ type DataTableProps = { meta?: TableMeta; dropdownFilters?: DropdownFilterConfig[]; onSelect?: (value: string | number) => void | undefined - showPageSizeSelector?: boolean; + showPageSizeSelector?: boolean; pageSizeOptions?: number[]; - rowSelection?: RowSelectionState; + rowSelection?: RowSelectionState; setRowSelection?: (state: RowSelectionState) => void; }; @@ -131,7 +131,7 @@ const DataTable: FC = ( const id = useId(); const { t } = useTranslation(); const [columnFilters, setColumnFilters] = React.useState([]); -const table = useReactTable({ + const table = useReactTable({ data, columns, filterFns: { @@ -153,12 +153,12 @@ const table = useReactTable({ enableRowSelection: !!setRowSelection, onRowSelectionChange: setRowSelection ? (updaterOrValue) => { - if (typeof updaterOrValue === 'function') { - setRowSelection(updaterOrValue(table.getState().rowSelection)); - } else { - setRowSelection(updaterOrValue); - } + if (typeof updaterOrValue === 'function') { + setRowSelection(updaterOrValue(table.getState().rowSelection)); + } else { + setRowSelection(updaterOrValue); } + } : undefined, onSortingChange: (updater) => { if (typeof updater !== 'function') return; @@ -177,14 +177,14 @@ const table = useReactTable({ pageCount: isClientSide ? undefined : pagesCount, }); - const handlePageSizeChange = (newPageSize: number) => { + const handlePageSizeChange = (newPageSize: number) => { if (setPagination && pagination) { setPagination({ - pageIndex: 0, + pageIndex: 0, pageSize: newPageSize, }); } - }; + }; return (
@@ -213,7 +213,7 @@ const table = useReactTable({ const dropdownConfig = dropdownFilters?.find( (df) => df.columnId === header.column.id ); - + if (dropdownConfig) { return ( ); } - + })() )} {filterable && header.column.getCanFilter() && ( @@ -244,26 +244,26 @@ const table = useReactTable({ - ):( + ) : ( <> - {tableBodyPrefix} - {table.getRowModel().rows.map((row) => ( - - {row.getVisibleCells().map((cell) => ( - {flexRender(cell.column.columnDef.cell, cell.getContext())} + {tableBodyPrefix} + {table.getRowModel().rows.map((row) => ( + + {row.getVisibleCells().map((cell) => ( + {flexRender(cell.column.columnDef.cell, cell.getContext())} + ))} + ))} - - ))} ) - - } - + + } + {pagination && (
- {showPageSizeSelector && ( + {showPageSizeSelector && (
{t('global.showEntries') || 'Show'} diff --git a/GUI/src/components/FormElements/FormTextarea/index.tsx b/GUI/src/components/FormElements/FormTextarea/index.tsx index eb63dc23..b1f23fe1 100644 --- a/GUI/src/components/FormElements/FormTextarea/index.tsx +++ b/GUI/src/components/FormElements/FormTextarea/index.tsx @@ -10,6 +10,7 @@ type TextareaProps = TextareaAutosizeProps & { hideLabel?: boolean; showMaxLength?: boolean; maxLengthBottom?: boolean; + className?: string; }; const FormTextarea = forwardRef(( diff --git a/GUI/src/components/molecules/DataModelCard/DataModel.scss b/GUI/src/components/molecules/DataModelCard/DataModel.scss index 32d938a3..0943b5bb 100644 --- a/GUI/src/components/molecules/DataModelCard/DataModel.scss +++ b/GUI/src/components/molecules/DataModelCard/DataModel.scss @@ -11,4 +11,9 @@ .mt-3{ margin-top: 3rem; +} + +.training-results-container { + overflow-y: scroll; + max-height: 70vh; } \ No newline at end of file diff --git a/GUI/src/components/molecules/DataModelCard/index.tsx b/GUI/src/components/molecules/DataModelCard/index.tsx index 6723b782..cc52450e 100644 --- a/GUI/src/components/molecules/DataModelCard/index.tsx +++ b/GUI/src/components/molecules/DataModelCard/index.tsx @@ -144,7 +144,7 @@ const DataModelCard: FC> = ({ ), size: 'large', content: ( -
+
{results ? ( ) : ( diff --git a/GUI/src/pages/ViewDataset/index.tsx b/GUI/src/pages/ViewDataset/index.tsx index 8472c216..e8400132 100644 --- a/GUI/src/pages/ViewDataset/index.tsx +++ b/GUI/src/pages/ViewDataset/index.tsx @@ -542,7 +542,6 @@ const ViewDataset = () => { appearance={ButtonAppearanceTypes.SECONDARY} onClick={() => setIsProgressModalOpen(false)} disabled={isUpdating} - > {t('global.cancel')} From 4bbdcf8cb4a47013d647117b9fbd52acc6e24caf Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Thu, 4 Sep 2025 10:41:50 +0530 Subject: [PATCH 06/11] test and fixed ruff linting issues in integration of dataset generator with eval pipeline --- .env | 7 + DSL/DatasetGenerator/config/config.yaml | 199 ++++++++++--- DSL/DatasetGenerator/config/model_config.yaml | 39 --- DSL/DatasetGenerator/config/redis.conf | 72 +++++ config.env | 1 - docker-compose-dev.yml | 32 ++- .../deployment_orchestrator.py | 241 +++++++++------- src/model-training/constants.py | 28 +- src/model-training/create_triton_configs.py | 14 +- src/model-training/datapipeline.py | 6 +- src/model-training/loki_logger.py | 0 .../post-processing/1/model.py | 1 - .../pre-processing/1/model.py | 2 +- src/model-training/model_trainer.py | 266 +++++++++++------- src/model-training/s3_ferry.py | 6 +- src/model-training/trainingpipeline.py | 8 +- 16 files changed, 593 insertions(+), 329 deletions(-) create mode 100644 .env delete mode 100644 DSL/DatasetGenerator/config/model_config.yaml create mode 100644 DSL/DatasetGenerator/config/redis.conf create mode 100644 src/model-training/loki_logger.py diff --git a/.env b/.env new file mode 100644 index 00000000..a30f1b57 --- /dev/null +++ b/.env @@ -0,0 +1,7 @@ +AWS_ACCESS_KEY_ID=your_aws_access_key_id +AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key +BEDROCK_AWS_REGION=eu-west-1 +AZURE_OPENAI_API_KEY=your_azure_openai_api_key +AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o +PROVIDER_NAME=azure-openai \ No newline at end of file diff --git a/DSL/DatasetGenerator/config/config.yaml b/DSL/DatasetGenerator/config/config.yaml index d3174a6f..dc7a7305 100644 --- a/DSL/DatasetGenerator/config/config.yaml +++ b/DSL/DatasetGenerator/config/config.yaml @@ -1,11 +1,71 @@ -# LLM Provider configuration -provider: - name: "ollama" +# Bedrock Anthropic provider configuration (when provider.name = "bedrock-anthropic") +bedrock_anthropic: + model_name: "eu.anthropic.claude-3-7-sonnet-20250219-v1:0" + aws_region: "eu-west-1" + temperature: 0.7 + max_tokens: 4096 + top_p: 1.0 + tpm_limit: 200000 + stop_sequences: [] + batch_generation: + enabled: true + max_batch_size: 10 + max_tokens_per_batch: 20000 + +# Azure OpenAI provider configuration (when provider.name = "azure-openai") +azure_openai: + # Required: Set via environment variables AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_KEY, AZURE_OPENAI_DEPLOYMENT_NAME + api_version: "2024-12-01-preview" + model_name: "gpt-4o" + temperature: 0.7 + max_tokens: 4096 + top_p: 1.0 + tpm_limit: 200000 + rpm_limit: 6000 + batch_generation: + enabled: true + max_batch_size: 10 + max_tokens_per_batch: 20000 + +# Ollama provider configuration (when provider.name = "ollama") +ollama: model_name: "gemma3:1b-it-qat" - api_url: "http://ollama:11434" + host: "http://ollama:11434" timeout: 60 max_retries: 3 retry_delay: 5 + batch_generation: + enabled: false + +# MAIN PROVIDER SELECTION - THIS IS THE KEY SETTING +provider: + name: "azure-openai" # THIS DETERMINES WHICH PROVIDER TO USE + timeout: 60 + max_retries: 3 + retry_delay: 5 + +# Processing settings +processing: + wait_between_requests: 1 + timeout_seconds: 1300 # 1 hour timeout for dataset processing + max_consecutive_failures: 3 + success_rate_threshold: 0.8 # 80% success rate required + retry_on_failure: true + +# Callback configuration for external project notifications +callback: + url: "http://ruuter-public:8086/global-classifier/data/callback" + timeout: 60.0 # Callback timeout in seconds + retries: 3 # Number of retry attempts + retry_backoff: true # Use exponential backoff + include_error_details: true + include_summary: true + +# Batch generation settings (global) +batch_generation: + enabled: true + max_batch_size: 10 + fallback_on_failure: true # API connection settings api: @@ -18,8 +78,8 @@ directories: output: "output_datasets" templates: "templates" user_configs: "user_configs" - -# Default generation settings (can be overridden per generation request) + +# Default generation settings generation: default_num_examples: 10 default_language: "et" @@ -28,22 +88,42 @@ generation: temperature: 0.7 max_tokens: 4096 +# Language and Prompt Settings +language_settings: + default_system_prompt: "You are a helpful assistant providing accurate information based on topic content." + default_language: "et" + supported_languages: + en: "English" + et: "Estonian" + fi: "Finnish" + +# Storage configuration +storage: + datasets_dir: "datasets" + templates_dir: "templates" + user_configs_dir: "user_configs" + +# Default Output Settings +output_defaults: + save_format: "json" + supported_formats: + - "json" + - "text" + # Dataset generation configuration dataset_generation: structure_name: "single_question" prompt_template_name: "institute_topic_question" - traversal_strategy: "pattern" # Options: "flat", "recursive", "institutional", "pattern" + traversal_strategy: "pattern" output_format: "json" num_samples: 10 - post_processing: "aggregation" # Options: "zip", "aggregation" - # Aggregation-specific configuration (only used when post_processing = "aggregation") + post_processing: "aggregation" aggregation: output_filename: "12" - output_format: "csv" + output_format: "csv" merge_strategy: "combine_arrays" include_metadata: true enable_shuffling: false - field_mapping: enabled: true payload_to_output: @@ -54,15 +134,12 @@ dataset_generation: dataset_version_id: version_id content_fields: question: data_item - - csv_field_order: - - item_id - - agency_name - - agency_id - - data_item - - dataset_version_id - + - item_id + - agency_name + - agency_id + - data_item + - dataset_version_id parameters: language: "et" temperature: 0.7 @@ -72,10 +149,6 @@ dataset_generation: style: "clear and concise" system_prompt: "You are a helpful assistant for generating synthetic questions for given contexts." filter: {} - -# Processing settings -processing: - wait_between_requests: 1 # MLflow tracking mlflow: @@ -89,31 +162,65 @@ data_sources: patterns: ["**/cleaned.txt"] recursive: true -callback: - url: "http://ruuter-public:8086/global-classifier/data/callback" - max_retries: 3 - timeout: 30 -# Relevance Score Analysis -relevance_score: - enabled: true - embedding_model: "paraphrase-multilingual-mpnet-base-v2" - segment_weight: 0.6 - query_weight: 0.3 - term_weight: 0.1 - threshold_good: 0.7 - threshold_acceptable: 0.5 - min_df: 1 - max_df: 0.9 - ngram_range: (1, 2) - -# Information Coverage Analysis -information_coverage: - enabled: true - similarity_threshold: 0.5 # Model settings models: embedding_model: "paraphrase-multilingual-mpnet-base-v2" - qualitative_model: "google/gemma-2-2b-it" - use_4bit_quantization: true \ No newline at end of file + + +# redis configuration +redis: + # Redis connection URL - modify according to your setup + # url: "redis://localhost:6379" # For local Redis + url: "redis://redis:6379" # For Docker Compose setup + # url: "redis://:password@redis-host:6379" # With password + + # Redis database number to use (0-15 typically available) + db: 0 + + # Connection pool settings + max_connections: 10 + retry_on_timeout: true + socket_timeout: 5 + socket_connect_timeout: 5 + +# Embedding-specific settings +embedding: + # Path to topic documents + topic_documents_path: "/app/data" + + # TTL settings for different embedding types + ttl: + # Persistent embeddings (topic documents) - no expiration by default + persistent: null # null means no expiration + # Temporary embeddings (questions) - expire after successful evaluation + temporary: 3600 # 1 hour in seconds + # Failed evaluation sessions - keep longer for potential retry + failed_session: 7200 # 2 hours in seconds + +# Evaluation configuration +evaluation: + # Enable automatic embedding cleanup after successful evaluation + auto_cleanup_on_success: true + # Keep embeddings for failed evaluations (for potential retry) + keep_failed_embeddings: true + # Batch size for embedding operations + embedding_batch_size: 32 + max_regeneration_attempts: 3 + # Evaluation thresholds + thresholds: + topic_coverage_min: 0.7 + information_coverage_min: 0.6 + similarity_coverage_min: 0.6 + overall_min: 0.65 + context_coverage_min: 0.5 + +# Monitoring and logging +monitoring: + # Log cache hit/miss statistics + log_cache_stats: true + # Log embedding operations + log_embedding_operations: true + # Cache statistics reporting interval (seconds) + stats_interval: 300 # 5 minutes \ No newline at end of file diff --git a/DSL/DatasetGenerator/config/model_config.yaml b/DSL/DatasetGenerator/config/model_config.yaml deleted file mode 100644 index 4eee2282..00000000 --- a/DSL/DatasetGenerator/config/model_config.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Ollama Client and Model Settings -model_name: "gemma3:1b-it-qat" -ollama_host: "http://ollama:11434" -ollama_timeout: 60 -ollama_max_retries: 3 -ollama_retry_delay: 5 - -# Generation defaults -generation_defaults: - temperature: 0.95 - max_tokens_per_response: 5000 - num_predict: 5000 - -# Content Processing -content_processing: - max_content_length: 15000 - content_overlap: 500 - -# Language and Prompt Settings -language_settings: - default_system_prompt: "You are a helpful assistant providing accurate information based on topic content." - default_language: "et" - supported_languages: - en: "English" - et: "Estonian" - fi: "Finnish" - -# Storage configuration -storage: - datasets_dir: "datasets" - templates_dir: "templates" - user_configs_dir: "user_configs" - -# Default Output Settings -output_defaults: - save_format: "json" - supported_formats: - - "json" - - "text" \ No newline at end of file diff --git a/DSL/DatasetGenerator/config/redis.conf b/DSL/DatasetGenerator/config/redis.conf new file mode 100644 index 00000000..8cbc38e8 --- /dev/null +++ b/DSL/DatasetGenerator/config/redis.conf @@ -0,0 +1,72 @@ + +# Network +bind 0.0.0.0 +port 6379 +timeout 0 +tcp-keepalive 300 + +# General +daemonize no +supervised no +pidfile /var/run/redis_6379.pid +loglevel notice +logfile "" +databases 16 + +# Snapshotting +save 900 1 +save 300 10 +save 60 10000 +stop-writes-on-bgsave-error yes +rdbcompression yes +rdbchecksum yes +dbfilename dump.rdb +dir /data + +# Replication +replica-serve-stale-data yes +replica-read-only yes + +# Security +# requirepass yourpassword # Uncomment and set password if needed + +# Memory Management +maxmemory 1gb +maxmemory-policy allkeys-lru + +# Append Only File +appendonly yes +appendfilename "appendonly.aof" +appendfsync everysec +no-appendfsync-on-rewrite no +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# Lua scripting +lua-time-limit 5000 + +# Slow log +slowlog-log-slower-than 10000 +slowlog-max-len 128 + +# Client output buffer limits +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit replica 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Advanced config +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 +list-max-ziplist-size -2 +list-compress-depth 0 +set-max-intset-entries 512 +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 +hll-sparse-max-bytes 3000 +stream-node-max-bytes 4096 +stream-node-max-entries 100 +activerehashing yes +hz 10 +dynamic-hz yes +aof-rewrite-incremental-fsync yes +rdb-save-incremental-fsync yes \ No newline at end of file diff --git a/config.env b/config.env index e5eb24e6..0351a028 100644 --- a/config.env +++ b/config.env @@ -14,4 +14,3 @@ GF_SECURITY_ADMIN_USER=admin GF_SECURITY_ADMIN_PASSWORD=admin123 GF_USERS_ALLOW_SIGN_UP=false PORT=3000 - diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 8bfd9fae..a1739783 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -351,10 +351,8 @@ services: container_name: dataset-gen-service ports: - "8000:8000" - environment: - - PROVIDER_API_URL=http://dataset-gen-ollama:11434 - - SERVICE_DEBUG=false - - MLFLOW_TRACKING_URI=http://dataset-gen-mlflow:5000 + env_file: + - .env volumes: - ./DSL/DatasetGenerator/config:/app/config - ./DSL/DatasetGenerator/templates:/app/templates @@ -362,11 +360,29 @@ services: - cron_data:/app/data - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets - ./DSL/DatasetGenerator/logs:/app/logs - depends_on: - - dataset-gen-ollama networks: - bykstack - + + redis: + image: redis:7-alpine + container_name: redis-embeddings + ports: + - "6378:6379" + volumes: + - redis_data:/data + - ./DSL/DatasetGenerator/config/redis.conf:/usr/local/etc/redis/redis.conf + command: redis-server /usr/local/etc/redis/redis.conf + environment: + - REDIS_REPLICATION_MODE=master + restart: unless-stopped + networks: + - bykstack + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 3 + dataset-gen-ollama: image: synthesisai/dataset-generator-ollama:latest container_name: dataset-gen-ollama @@ -575,6 +591,8 @@ volumes: name: loki-data grafana-data: name: grafana-data + redis_data: + name: redis_data networks: diff --git a/src/inference/inference_scripts/deployment_orchestrator.py b/src/inference/inference_scripts/deployment_orchestrator.py index 3f059bcb..18918e48 100644 --- a/src/inference/inference_scripts/deployment_orchestrator.py +++ b/src/inference/inference_scripts/deployment_orchestrator.py @@ -326,8 +326,8 @@ def get_ensemble_model_names(self) -> list: return [ f"{self.model_id}-classifier-ensemble", f"{self.model_id}-pre-processing", - f"{self.model_id}-post-processing", - f"{self.model_id}-text-classifier" + f"{self.model_id}-post-processing", + f"{self.model_id}-text-classifier", ] def check_server_health(self, server_url: str) -> bool: @@ -356,44 +356,50 @@ def load_model_to_triton(self, server_url: str, model_name: str) -> bool: """Load a model to Triton server""" try: self.log_message(f"Loading model {model_name} to {server_url}") - + response = requests.post( f"{server_url}/v2/repository/models/{model_name}/load" ) - + if response.status_code in [200, 201]: self.log_message(f"Successfully loaded model {model_name}") return True else: - self.log_error(f"Failed to load model {model_name}: HTTP {response.status_code} - {response.text}") + self.log_error( + f"Failed to load model {model_name}: HTTP {response.status_code} - {response.text}" + ) return False - + except requests.exceptions.RequestException as e: self.log_error(f"Error loading model {model_name}: {e}") return False - def unload_model_from_triton(self, server_url: str, model_name: str, unload_dependents: bool = False) -> bool: + def unload_model_from_triton( + self, server_url: str, model_name: str, unload_dependents: bool = False + ) -> bool: """Unload a model from Triton server""" try: self.log_message(f"Unloading model {model_name} from {server_url}") - + payload = {} if unload_dependents: payload["unload_dependents"] = True - + response = requests.post( f"{server_url}/v2/repository/models/{model_name}/unload", json=payload if payload else None, - timeout=60 + timeout=60, ) - + if response.status_code in [200, 201]: self.log_message(f"Successfully unloaded model {model_name}") return True else: - self.log_error(f"Failed to unload model {model_name}: HTTP {response.status_code} - {response.text}") + self.log_error( + f"Failed to unload model {model_name}: HTTP {response.status_code} - {response.text}" + ) return False - + except requests.exceptions.RequestException as e: self.log_error(f"Error unloading model {model_name}: {e}") return False @@ -403,66 +409,79 @@ def get_loaded_models(self, server_url: str) -> list: try: # Use repository index endpoint with ready=true to get only loaded models payload = {"ready": True} - response = requests.post(f"{server_url}/v2/repository/index", json=payload, timeout=30) - + response = requests.post( + f"{server_url}/v2/repository/index", json=payload, timeout=30 + ) + if response.status_code == 200: models_data = response.json() loaded_models = [] - + # The response is an array of model objects for model in models_data: if model.get("state") == "READY": loaded_models.append(model.get("name")) - + return loaded_models else: - self.log_error(f"Failed to get models list: HTTP {response.status_code}") + self.log_error( + f"Failed to get models list: HTTP {response.status_code}" + ) return [] - + except requests.exceptions.RequestException as e: self.log_error(f"Error getting models list: {e}") return [] - def test_model_inference(self, server_url: str, max_retries: int = 10, retry_delay: int = 5) -> bool: + def test_model_inference( + self, server_url: str, max_retries: int = 10, retry_delay: int = 5 + ) -> bool: """Test if the ensemble model can perform inference""" ensemble_model_name = f"{self.model_id}-classifier-ensemble" - + # Simple test input for text classification test_payload = { "inputs": [ { "name": "TEXT", "datatype": "BYTES", - "shape": [1,1], - "data": ["This is a test message for classification."] + "shape": [1, 1], + "data": ["This is a test message for classification."], } ] } - + for attempt in range(max_retries): try: - self.log_message(f"Testing inference for {ensemble_model_name} (attempt {attempt + 1}/{max_retries})") - + self.log_message( + f"Testing inference for {ensemble_model_name} (attempt {attempt + 1}/{max_retries})" + ) + response = requests.post( f"{server_url}/v2/models/{ensemble_model_name}/infer", json=test_payload, - timeout=30 + timeout=30, ) - + if response.status_code == 200: - self.log_message(f"Inference test successful for {ensemble_model_name}") + self.log_message( + f"Inference test successful for {ensemble_model_name}" + ) return True else: - self.log_error(f"Inference test failed: HTTP {response.status_code} - {response.text}") - + self.log_error( + f"Inference test failed: HTTP {response.status_code} - {response.text}" + ) + except requests.exceptions.RequestException as e: self.log_error(f"Inference test error (attempt {attempt + 1}): {e}") - + if attempt < max_retries - 1: self.log_message(f"Waiting {retry_delay} seconds before retry...") import time + time.sleep(retry_delay) - + self.log_error(f"Inference test failed after {max_retries} attempts") return False @@ -470,203 +489,215 @@ def unload_existing_models(self, server_url: str) -> bool: """Unload any existing models with the same model ID""" model_names = self.get_ensemble_model_names() loaded_models = self.get_loaded_models(server_url) - - models_to_unload = [model for model in loaded_models if any(model.startswith(f"{self.model_id}-") for model in model_names)] - + + models_to_unload = [ + model + for model in loaded_models + if any(model.startswith(f"{self.model_id}-") for model in model_names) + ] + if not models_to_unload: self.log_message("No existing models to unload") return True - + self.log_message(f"Found existing models to unload: {models_to_unload}") - + # Unload ensemble first (with dependents) to avoid conflicts ensemble_name = f"{self.model_id}-classifier-ensemble" if ensemble_name in models_to_unload: - if not self.unload_model_from_triton(server_url, ensemble_name, unload_dependents=True): + if not self.unload_model_from_triton( + server_url, ensemble_name, unload_dependents=True + ): return False models_to_unload.remove(ensemble_name) - + # Unload remaining individual models for model_name in models_to_unload: if not self.unload_model_from_triton(server_url, model_name): return False - + return True def load_ensemble_models(self, server_url: str) -> bool: """Load all models for the ensemble in the correct order""" model_names = self.get_ensemble_model_names() - + # Load individual models first (dependencies) - individual_models = [name for name in model_names if not name.endswith("-classifier-ensemble")] + individual_models = [ + name for name in model_names if not name.endswith("-classifier-ensemble") + ] ensemble_model = f"{self.model_id}-classifier-ensemble" - + # Load individual models first for model_name in individual_models: if not self.load_model_to_triton(server_url, model_name): self.log_error(f"Failed to load dependency model {model_name}") return False - + # Load ensemble model last if not self.load_model_to_triton(server_url, ensemble_model): self.log_error(f"Failed to load ensemble model {ensemble_model}") return False - + return True def deploy_model(self) -> bool: """ Deploy model based on current and target environments. - + Returns: bool: True if deployment was successful, False otherwise """ - self.log_message(f"Starting model deployment from {self.current_env} to {self.target_env}") - + self.log_message( + f"Starting model deployment from {self.current_env} to {self.target_env}" + ) + # Case 1: undeployed -> undeployed (no-op) if self.current_env == "undeployed" and self.target_env == "undeployed": - self.log_message("Both current and target environments are 'undeployed'. No deployment needed.") + self.log_message( + "Both current and target environments are 'undeployed'. No deployment needed." + ) return True - + # Case 2: testing -> production elif self.current_env == "testing" and self.target_env == "production": self.log_message("Deploying from testing to production") - + # Check production server health prod_url = self.get_triton_server_url("production") if not self.check_server_health(prod_url): self.log_error("Production server health check failed") return False - + # Unload existing models in production if not self.unload_existing_models(prod_url): self.log_error("Failed to unload existing models from production") return False - + # Load models to production if not self.load_ensemble_models(prod_url): self.log_error("Failed to load models to production") return False - + # Test inference if not self.test_model_inference(prod_url): self.log_error("Production inference test failed") return False - + self.log_message("Successfully deployed model from testing to production") return True - + # Case 3: testing -> undeployed elif self.current_env == "testing" and self.target_env == "undeployed": self.log_message("Undeploying model from testing") - + test_url = self.get_triton_server_url("testing") if not self.check_server_health(test_url): self.log_error("Testing server health check failed") return False - + # Unload models from testing if not self.unload_existing_models(test_url): self.log_error("Failed to unload models from testing") return False - + self.log_message("Successfully undeployed model from testing") return True - + # Case 4: production -> testing elif self.current_env == "production" and self.target_env == "testing": self.log_message("Moving model from production to testing") - + # Unload from production prod_url = self.get_triton_server_url("production") if not self.check_server_health(prod_url): self.log_error("Production server health check failed") return False - + if not self.unload_existing_models(prod_url): self.log_error("Failed to unload models from production") return False - + # Load to testing test_url = self.get_triton_server_url("testing") if not self.check_server_health(test_url): self.log_error("Testing server health check failed") return False - + if not self.unload_existing_models(test_url): self.log_error("Failed to unload existing models from testing") return False - + if not self.load_ensemble_models(test_url): self.log_error("Failed to load models to testing") return False - + if not self.test_model_inference(test_url): self.log_error("Testing inference test failed") return False - + self.log_message("Successfully moved model from production to testing") return True - + # Case 5: undeployed -> testing elif self.current_env == "undeployed" and self.target_env == "testing": self.log_message("Deploying model to testing") - + test_url = self.get_triton_server_url("testing") if not self.check_server_health(test_url): self.log_error("Testing server health check failed") return False - + if not self.unload_existing_models(test_url): self.log_error("Failed to unload existing models from testing") return False - + if not self.load_ensemble_models(test_url): self.log_error("Failed to load models to testing") return False - + if not self.test_model_inference(test_url): self.log_error("Testing inference test failed") return False - + self.log_message("Successfully deployed model to testing") return True - + # Case 6: undeployed -> production elif self.current_env == "undeployed" and self.target_env == "production": self.log_message("Deploying model directly to production") - + prod_url = self.get_triton_server_url("production") if not self.check_server_health(prod_url): self.log_error("Production server health check failed") return False - + if not self.unload_existing_models(prod_url): self.log_error("Failed to unload existing models from production") return False - + if not self.load_ensemble_models(prod_url): self.log_error("Failed to load models to production") return False - + if not self.test_model_inference(prod_url): self.log_error("Production inference test failed") return False - + self.log_message("Successfully deployed model directly to production") return True - + # Invalid case else: - self.log_error(f"Invalid deployment path: {self.current_env} -> {self.target_env}") + self.log_error( + f"Invalid deployment path: {self.current_env} -> {self.target_env}" + ) return False def load_model_to_repository(self): """Main model repository upload process""" try: - self.log_message( - f"Starting model upload for model ID: {self.model_id}" - ) + self.log_message(f"Starting model upload for model ID: {self.model_id}") # Step 1: Create working directory self.create_working_directory() @@ -680,9 +711,7 @@ def load_model_to_repository(self): # Step 4: Upload to model repository self.upload_to_model_repository() - self.log_message( - "Model files uploaded successfully to both environments" - ) + self.log_message("Model files uploaded successfully to both environments") except KeyboardInterrupt: self.log_error("Model upload interrupted by user") @@ -746,14 +775,20 @@ def main(): try: args = parser.parse_args() except SystemExit as e: - logger.error(f"Argument parsing failed with error arparse error code: {e} - Check the INFO logs for more details.") + logger.error( + f"Argument parsing failed with error arparse error code: {e} - Check the INFO logs for more details." + ) raise except Exception as e: - logger.error(f"Unexpected error during argument parsing error code: {e} - Check the INFO logs for more details.") + logger.error( + f"Unexpected error during argument parsing error code: {e} - Check the INFO logs for more details." + ) raise # Log all passed arguments for debugging - logger.info(f"Starting deployment with arguments: {vars(args)}", model_id=args.model_id) + logger.info( + f"Starting deployment with arguments: {vars(args)}", model_id=args.model_id + ) try: deployer = ModelDeploymentOrchestrator( @@ -766,23 +801,31 @@ def main(): target_env=args.target_env, first_deployment=args.first_deployment.lower() == "true", ) - + # Step 1: Upload model files to repository (only on first deployment) if args.first_deployment.lower() == "true": - logger.info("First deployment detected - uploading model files to repository", model_id=args.model_id) + logger.info( + "First deployment detected - uploading model files to repository", + model_id=args.model_id, + ) deployer.load_model_to_repository() else: - logger.info("Not a first deployment - skipping model repository upload", model_id=args.model_id) - + logger.info( + "Not a first deployment - skipping model repository upload", + model_id=args.model_id, + ) + # Step 2: Deploy model to Triton inference servers if not deployer.deploy_model(): - logger.error("Model deployment to Triton servers failed", model_id=args.model_id) + logger.error( + "Model deployment to Triton servers failed", model_id=args.model_id + ) sys.exit(1) - + logger.info("Model deployment completed successfully", model_id=args.model_id) - + except Exception as e: - logger.error(f"Fatal error: {e}", model_id=getattr(args, 'model_id', None)) + logger.error(f"Fatal error: {e}", model_id=getattr(args, "model_id", None)) sys.exit(1) diff --git a/src/model-training/constants.py b/src/model-training/constants.py index c0ce3a16..71c3a776 100644 --- a/src/model-training/constants.py +++ b/src/model-training/constants.py @@ -1,12 +1,18 @@ -UPDATE_MODEL_TRAINING_STATUS_ENDPOINT = "http://ruuter-public:8086/global-classifier/datamodels/training/status/update" +UPDATE_MODEL_TRAINING_STATUS_ENDPOINT = ( + "http://ruuter-public:8086/global-classifier/datamodels/training/status/update" +) -CREATE_TRAINING_PROGRESS_SESSION_ENDPOINT = "http://ruuter-public:8086/global-classifier/datamodels/progress/create" +CREATE_TRAINING_PROGRESS_SESSION_ENDPOINT = ( + "http://ruuter-public:8086/global-classifier/datamodels/progress/create" +) -UPDATE_TRAINING_PROGRESS_SESSION_ENDPOINT = "http://ruuter-public:8086/global-classifier/datamodels/progress/update" +UPDATE_TRAINING_PROGRESS_SESSION_ENDPOINT = ( + "http://ruuter-public:8086/global-classifier/datamodels/progress/update" +) DEPLOYMENT_ENDPOINT = "http://ruuter-public:8086/global-classifier/inference/deploy" -MODEL_TRAINING_SOURCE_PATH = "/app/src/training" #path in container +MODEL_TRAINING_SOURCE_PATH = "/app/src/training" # path in container TRAINING_LOGS_PATH = "/app/src/training/training_logs.log" @@ -37,19 +43,25 @@ MODEL_TRAINED_AND_DEPLOYED_PROGRESS_STATUS = "Model Trained And Deployed" -TRAINING_FAILED_STATUS= "Training Failed" +TRAINING_FAILED_STATUS = "Training Failed" DEPLOYMENT_FAILED_STATUS = "Deployment Failed" INITIATING_TRAINING_PROGRESS_MESSAGE = "Download and preparing dataset" -TRAINING_IN_PROGRESS_PROGRESS_MESSAGE = "The dataset is being trained on all selected models" +TRAINING_IN_PROGRESS_PROGRESS_MESSAGE = ( + "The dataset is being trained on all selected models" +) -DEPLOYING_MODEL_PROGRESS_MESSAGE = "Model training complete. The trained model is now being deployed" +DEPLOYING_MODEL_PROGRESS_MESSAGE = ( + "Model training complete. The trained model is now being deployed" +) -MODEL_TRAINED_AND_DEPLOYED_PROGRESS_MESSAGE = "The model was trained and deployed successfully to the environment" +MODEL_TRAINED_AND_DEPLOYED_PROGRESS_MESSAGE = ( + "The model was trained and deployed successfully to the environment" +) TRAINING_FAILED_STATUS_MESSAGE = "Model training has failed" diff --git a/src/model-training/create_triton_configs.py b/src/model-training/create_triton_configs.py index 476f92dd..6e301f96 100644 --- a/src/model-training/create_triton_configs.py +++ b/src/model-training/create_triton_configs.py @@ -182,7 +182,7 @@ def generate_preprocessing_config( }}, """ if supports_token_type_ids: - config += f"""{{ + config += """{{ name: "token_type_ids" data_type: TYPE_INT64 dims: [ -1 ] @@ -211,7 +211,7 @@ def generate_preprocessing_config( {{ key: "ood_method" value: {{ - string_value: "{ood_method if ood_method else 'none'}" + string_value: "{ood_method if ood_method else "none"}" }} }}, {{ @@ -270,7 +270,7 @@ def generate_text_classifier_config( # Add token_type_ids input if supported if supports_token_type_ids: - config += f""", + config += """, {{ name: "token_type_ids" data_type: TYPE_INT64 @@ -289,7 +289,7 @@ def generate_text_classifier_config( dims: [ {num_labels} ] }}""" - config += f""" + config += """ ] @@ -368,7 +368,7 @@ def generate_postprocessing_config( {{ key: "ood_method" value: {{ - string_value: "{ood_method if ood_method else 'none'}" + string_value: "{ood_method if ood_method else "none"}" }} }}, {{ @@ -392,7 +392,7 @@ def generate_postprocessing_config( {{ key: "uncertainty_strategy" value: {{ - string_value: "{uncertainty_strategy if uncertainty_strategy else 'none'}" + string_value: "{uncertainty_strategy if uncertainty_strategy else "none"}" }} }}, {{ @@ -499,5 +499,3 @@ def generate_all_triton_configs( } return configs - - diff --git a/src/model-training/datapipeline.py b/src/model-training/datapipeline.py index fdb1ae3e..f84020bf 100644 --- a/src/model-training/datapipeline.py +++ b/src/model-training/datapipeline.py @@ -1,12 +1,10 @@ import pandas as pd -from loguru import logger -import sys from s3_ferry import S3Ferry import os from loki_logger import LokiLogger -logger = LokiLogger(service_name="model-trainer") +logger = LokiLogger(service_name="model-trainer") class DataPipeline: @@ -56,7 +54,7 @@ def __init__(self, dataset_id): def extract_input_columns(self): """Extract input columns from validation rules""" validation_rules = self.hierarchy["validationCriteria"]["validationRules"] - input_columns: list[str | Unknown] = [ + input_columns: list[str] = [ key for key, value in validation_rules.items() if not value["isDataClass"] ] logger.info(f"Input columns identified: {input_columns}") diff --git a/src/model-training/loki_logger.py b/src/model-training/loki_logger.py new file mode 100644 index 00000000..e69de29b diff --git a/src/model-training/model-repository/post-processing/1/model.py b/src/model-training/model-repository/post-processing/1/model.py index 7bbc8a8c..56e9f022 100644 --- a/src/model-training/model-repository/post-processing/1/model.py +++ b/src/model-training/model-repository/post-processing/1/model.py @@ -24,7 +24,6 @@ def initialize(self, args): model_path = str(args) except Exception as e: - print(f"Warning: Failed to parse args in initialize: {e}") label_file = os.path.join(model_path, "1", "label_mappings.json") diff --git a/src/model-training/model-repository/pre-processing/1/model.py b/src/model-training/model-repository/pre-processing/1/model.py index 74dcb37e..000a6426 100644 --- a/src/model-training/model-repository/pre-processing/1/model.py +++ b/src/model-training/model-repository/pre-processing/1/model.py @@ -190,4 +190,4 @@ def execute(self, requests): def finalize(self): """Clean up resources""" if hasattr(self, "tokenizer"): - del self.tokenizer \ No newline at end of file + del self.tokenizer diff --git a/src/model-training/model_trainer.py b/src/model-training/model_trainer.py index bed7bc40..2a590a82 100644 --- a/src/model-training/model_trainer.py +++ b/src/model-training/model_trainer.py @@ -1,7 +1,6 @@ from datapipeline import DataPipeline from trainingpipeline import TrainingPipeline, create_training_pipeline import os -import sys import shutil import json from datetime import datetime, timezone @@ -14,9 +13,9 @@ UNCERTAINTY_CONFIGS, F1_WEIGHT, SEQUENCE_LENGTH, - MODEL_TRAINING_SOURCE_PATH, - DEPLOYMENT_ENDPOINT, - CREATE_TRAINING_PROGRESS_SESSION_ENDPOINT, + MODEL_TRAINING_SOURCE_PATH, + DEPLOYMENT_ENDPOINT, + CREATE_TRAINING_PROGRESS_SESSION_ENDPOINT, UPDATE_TRAINING_PROGRESS_SESSION_ENDPOINT, UPDATE_MODEL_TRAINING_STATUS_ENDPOINT, INITIATING_TRAINING_PROGRESS_STATUS, @@ -24,26 +23,23 @@ DEPLOYING_MODEL_PROGRESS_STATUS, MODEL_TRAINED_AND_DEPLOYED_PROGRESS_STATUS, TRAINING_FAILED_STATUS, - DEPLOYMENT_FAILED_STATUS, INITIATING_TRAINING_PROGRESS_PERCENTAGE, TRAINING_IN_PROGRESS_PROGRESS_PERCENTAGE, - DEPLOYING_MODEL_PROGRESS_PERCENTAGE, MODEL_TRAINED_AND_DEPLOYED_PROGRESS_PERCENTAGE, INITIATING_TRAINING_PROGRESS_MESSAGE, TRAINING_IN_PROGRESS_PROGRESS_MESSAGE, DEPLOYING_MODEL_PROGRESS_MESSAGE, MODEL_TRAINED_AND_DEPLOYED_PROGRESS_MESSAGE, TRAINING_FAILED_STATUS_MESSAGE, - TRAINING_FAILED_PROGRESS_PERCENTAGE - + TRAINING_FAILED_PROGRESS_PERCENTAGE, ) -from loguru import logger import requests import argparse from loki_logger import LokiLogger + logger = LokiLogger(service_name="model-trainer") @@ -100,7 +96,7 @@ def create_training_progress_session(self): This function should be implemented to create a training progress session in the database. """ logger.info("Creating training progress session") - + payload = { "modelId": int(self.model_id), "modelName": self.model_name, @@ -110,95 +106,120 @@ def create_training_progress_session(self): } logger.info(f"Prepared training progress session payload {payload}") - + try: # Make request to create training progress session endpoint response = requests.post( url=CREATE_TRAINING_PROGRESS_SESSION_ENDPOINT, json=payload, headers={"Content-Type": "application/json"}, - timeout=300 # 5 minute timeout for creating progress session + timeout=300, # 5 minute timeout for creating progress session + ) + + logger.info( + f"Create training progress session response - {response.status_code} - {response.text}" ) - - logger.info(f"Create training progress session response - {response.status_code} - {response.text}") - + # Check if request was successful logger.info("Training progress session created successfully") session_data = response.json() session_id = session_data["response"]["sessionId"] - + self.progress_session_id = session_id - + return response.json() - + except requests.HTTPError as e: error_msg = f"HTTP error during creating training progress session: {e.response.status_code} - {e.response.text}" - logger.error(error_msg, model_id=self.model_id, status_code=e.response.status_code) + logger.error( + error_msg, model_id=self.model_id, status_code=e.response.status_code + ) raise - + except requests.RequestException as e: - error_msg = f"Network error during creating training progress session: {str(e)}" + error_msg = ( + f"Network error during creating training progress session: {str(e)}" + ) logger.error(error_msg, model_id=self.model_id) raise - + except Exception as e: - error_msg = f"Unexpected error during creating training progress session: {str(e)}" + error_msg = ( + f"Unexpected error during creating training progress session: {str(e)}" + ) logger.error(error_msg, model_id=self.model_id) - raise + raise - def update_training_progression_session(self,training_status:str, training_message:str, progress_percentage:int, process_complete:bool): + def update_training_progression_session( + self, + training_status: str, + training_message: str, + progress_percentage: int, + process_complete: bool, + ): """ Update the training progress session in the database. This function should be implemented to update the training progress session in the database. """ logger.info("Updating training progress session") - + if not self.progress_session_id: - logger.error("Progress session ID is not set. Cannot update training progress session.") - raise ValueError("Progress session ID is required to update the training progress session.") - - else: + logger.error( + "Progress session ID is not set. Cannot update training progress session." + ) + raise ValueError( + "Progress session ID is required to update the training progress session." + ) + else: payload = { "sessionId": self.progress_session_id, "trainingStatus": training_status, "trainingMessage": training_message, "progressPercentage": progress_percentage, - "processComplete": process_complete + "processComplete": process_complete, } logger.info(f"Prepared training progress session update payload {payload}") - + try: # Make request to update training progress session endpoint response = requests.post( url=UPDATE_TRAINING_PROGRESS_SESSION_ENDPOINT, json=payload, headers={"Content-Type": "application/json"}, - timeout=300 # 5 minute timeout for updating progress session + timeout=300, # 5 minute timeout for updating progress session + ) + + logger.info( + f"Update training progress session response - {response.status_code} - {response.text}" ) - - logger.info(f"Update training progress session response - {response.status_code} - {response.text}") - + # Check if request was successful response.raise_for_status() - + logger.info("Training progress session updated successfully") - + return response.json() - + except requests.HTTPError as e: error_msg = f"HTTP error during updating training progress session: {e.response.status_code} - {e.response.text}" - logger.error(error_msg, model_id=self.model_id, status_code=e.response.status_code) + logger.error( + error_msg, + model_id=self.model_id, + status_code=e.response.status_code, + ) raise - + except requests.RequestException as e: - error_msg = f"Network error during updating training progress session: {str(e)}" + error_msg = ( + f"Network error during updating training progress session: {str(e)}" + ) logger.error(error_msg, model_id=self.model_id) raise - + except Exception as e: error_msg = f"Unexpected error during updating training progress session: {str(e)}" logger.error(error_msg, model_id=self.model_id) @@ -210,51 +231,65 @@ def update_training_results(self, training_results, model_s3_location): This function should be implemented to update the training results in the database. """ logger.info("Updating training results in the database") - + payload = { "modelId": self.model_id, "trainingResults": training_results, - "modelS3Location": model_s3_location + "modelS3Location": model_s3_location, } logger.info(f"Prepared deployment payload {payload}") - + try: # Make request to deployment endpoint response = requests.post( url=UPDATE_MODEL_TRAINING_STATUS_ENDPOINT, json=payload, - headers={"Content-Type": "application/json"}) - - logger.info(f"Update model endpoint response - {response.status_code} - {response.text}") - + headers={"Content-Type": "application/json"}, + ) + + logger.info( + f"Update model endpoint response - {response.status_code} - {response.text}" + ) + # Check if request was successful response.raise_for_status() - + logger.info("Model training data pushed to database successfully") - + return response.json() - + except requests.HTTPError as e: error_msg = f"HTTP error during model deployment: {e.response.status_code} - {e.response.text}" - logger.error(error_msg, model_id=self.model_id, - current_env=self.current_deployment_platform, target_env=self.target_deployment_platform, - status_code=e.response.status_code) + logger.error( + error_msg, + model_id=self.model_id, + current_env=self.current_deployment_platform, + target_env=self.target_deployment_platform, + status_code=e.response.status_code, + ) raise - + except requests.RequestException as e: error_msg = f"Network error during model deployment: {str(e)}" - logger.error(error_msg, model_id=self.model_id, - current_env=self.current_deployment_platform, target_env=self.target_deployment_platform) + logger.error( + error_msg, + model_id=self.model_id, + current_env=self.current_deployment_platform, + target_env=self.target_deployment_platform, + ) raise - + except Exception as e: error_msg = f"Unexpected error during model deployment: {str(e)}" - logger.error(error_msg, model_id=self.model_id, - current_env=self.current_deployment_platform, target_env=self.target_deployment_platform) + logger.error( + error_msg, + model_id=self.model_id, + current_env=self.current_deployment_platform, + target_env=self.target_deployment_platform, + ) raise - def calculate_combined_score(self, accuracies, f1_scores): """Calculate combined score using weighted average""" if not accuracies or not f1_scores: @@ -266,14 +301,16 @@ def calculate_combined_score(self, accuracies, f1_scores): combined_score = (ACCURACY_WEIGHT * avg_accuracy) + (F1_WEIGHT * avg_f1) return combined_score - def deploy_model(self, deployment_environment) : + def deploy_model(self, deployment_environment): """Deploy the model to the specified environment""" logger.info(f"DEPLOYING MODEL TO {deployment_environment}") # Placeholder for deployment logic # This could involve calling a deployment service, updating configs, etc. # For now, just log the action - logger.info(f"MODEL {self.model_name} (ID: {self.model_id}) deployed to {deployment_environment}") + logger.info( + f"MODEL {self.model_name} (ID: {self.model_id}) deployed to {deployment_environment}" + ) def train(self): """UNIFIED TRAINING METHOD - TRAINS ALL VARIANTS""" @@ -285,8 +322,8 @@ def train(self): training_status=INITIATING_TRAINING_PROGRESS_STATUS, training_message=INITIATING_TRAINING_PROGRESS_MESSAGE, progress_percentage=INITIATING_TRAINING_PROGRESS_PERCENTAGE, - process_complete=False) - + process_complete=False, + ) # Initialize services s3_ferry = S3Ferry() @@ -307,11 +344,11 @@ def train(self): training_status=TRAINING_IN_PROGRESS_PROGRESS_STATUS, training_message=TRAINING_IN_PROGRESS_PROGRESS_MESSAGE, progress_percentage=TRAINING_IN_PROGRESS_PROGRESS_PERCENTAGE, - process_complete=False) + process_complete=False, + ) # Add standard models for base_model in self.model_types: - model_variants.append( { "name": base_model + "-sngp", @@ -352,7 +389,11 @@ def train(self): ood_method=variant["ood_method"], ) else: - training_pipeline = TrainingPipeline(dfs, variant["base_model"],full_name=variant["full_model_name"],) + training_pipeline = TrainingPipeline( + dfs, + variant["base_model"], + full_name=variant["full_model_name"], + ) # Train the variant model_dir, metrics = training_pipeline.train() @@ -506,8 +547,9 @@ def train(self): s3_save_location = f"{S3_FERRY_MODEL_STORAGE_PATH}/{str(self.model_id)}.zip" # Removing /app from path since S3 Ferry will already add /app to the path as defined in the config.env - local_source_location = f"{MODEL_RESULTS_PATH.replace('/app/', '')}/{str(self.model_id)}.zip" - + local_source_location = ( + f"{MODEL_RESULTS_PATH.replace('/app/', '')}/{str(self.model_id)}.zip" + ) logger.info("INITIATING MODEL UPLOAD TO S3") _ = s3_ferry.transfer_file( @@ -546,7 +588,6 @@ def train(self): f"INITIATING DEPLOYMENT OF {best_variant['name']} TO {self.current_deployment_platform}" ) - logger.info("=" * 60) logger.info("UNIFIED TRAINING COMPLETED SUCCESSFULLY") logger.info(f"BEST MODEL: {best_variant['name']}") @@ -556,15 +597,15 @@ def train(self): logger.info("Updating training results to database") self.update_training_results( - training_results=all_results, - model_s3_location=s3_save_location) + training_results=all_results, model_s3_location=s3_save_location + ) trainer.update_training_progression_session( training_status=DEPLOYING_MODEL_PROGRESS_STATUS, training_message=DEPLOYING_MODEL_PROGRESS_MESSAGE, progress_percentage=100, - process_complete=True) - + process_complete=True, + ) except Exception as e: import traceback @@ -576,83 +617,94 @@ def train(self): training_status=TRAINING_FAILED_STATUS, training_message=TRAINING_FAILED_STATUS_MESSAGE, progress_percentage=TRAINING_FAILED_PROGRESS_PERCENTAGE, - process_complete=False) + process_complete=False, + ) raise - - def deploy(self): + def deploy(self): """ Deploy a model from current environment to target environment using Ruuter endpoint. - + Args: model_id: The ID of the model to deploy current_env: Current deployment environment (e.g., 'testing', 'production') target_env: Target deployment environment to deploy to first_deployment: Whether this is the first deployment (default: False) - + """ - - #TODO - Add sessionId here to pass session ID to the deployment endpoint + # TODO - Add sessionId here to pass session ID to the deployment endpoint logger.info("Starting model deployment") - + # Prepare request payload payload = { "modelId": self.model_id, "currentEnv": self.current_deployment_platform, "targetEnv": self.target_deployment_platform, - "firstDeployment": True + "firstDeployment": True, } logger.info(f"Prepared deployment payload {payload}") - + try: # Make request to deployment endpoint response = requests.post( DEPLOYMENT_ENDPOINT, json=payload, headers={"Content-Type": "application/json"}, - timeout=300 # 5 minute timeout for deployment operations + timeout=300, # 5 minute timeout for deployment operations + ) + + logger.info( + f"Deployment endpoint response - {response.status_code} - {response.text}" ) - - logger.info(f"Deployment endpoint response - {response.status_code} - {response.text}") - + # Check if request was successful response.raise_for_status() - + logger.info("Model deployment completed successfully") trainer.update_training_progression_session( training_status=MODEL_TRAINED_AND_DEPLOYED_PROGRESS_STATUS, training_message=MODEL_TRAINED_AND_DEPLOYED_PROGRESS_MESSAGE, progress_percentage=MODEL_TRAINED_AND_DEPLOYED_PROGRESS_PERCENTAGE, - process_complete=True) + process_complete=True, + ) - return response.json() - + except requests.HTTPError as e: error_msg = f"HTTP error during model deployment: {e.response.status_code} - {e.response.text}" - logger.error(error_msg, model_id=self.model_id, - current_env=self.current_deployment_platform, target_env=self.target_deployment_platform, - status_code=e.response.status_code) + logger.error( + error_msg, + model_id=self.model_id, + current_env=self.current_deployment_platform, + target_env=self.target_deployment_platform, + status_code=e.response.status_code, + ) raise - + except requests.RequestException as e: error_msg = f"Network error during model deployment: {str(e)}" - logger.error(error_msg, model_id=self.model_id, - current_env=self.current_deployment_platform, target_env=self.target_deployment_platform) + logger.error( + error_msg, + model_id=self.model_id, + current_env=self.current_deployment_platform, + target_env=self.target_deployment_platform, + ) raise - + except Exception as e: error_msg = f"Unexpected error during model deployment: {str(e)}" - logger.error(error_msg, model_id=self.model_id, - current_env=self.current_deployment_platform, target_env=self.target_deployment_platform) + logger.error( + error_msg, + model_id=self.model_id, + current_env=self.current_deployment_platform, + target_env=self.target_deployment_platform, + ) raise - - # ----------------------TODO: Uncomment the CLI section when needed---------------------- def parse_args(): @@ -717,4 +769,4 @@ def parse_args(): trainer.create_training_progress_session() trainer.train() - trainer.deploy() \ No newline at end of file + trainer.deploy() diff --git a/src/model-training/s3_ferry.py b/src/model-training/s3_ferry.py index eb5c00b3..66d4dcd4 100644 --- a/src/model-training/s3_ferry.py +++ b/src/model-training/s3_ferry.py @@ -1,12 +1,11 @@ import requests -from loguru import logger from constants import S3_FERRY_ENDPOINT -import sys - from loki_logger import LokiLogger + logger = LokiLogger(service_name="model-trainer-s3-ferry") + class S3Ferry: def __init__(self): # Updated to use correct Docker service name @@ -26,7 +25,6 @@ def transfer_file( source_storage_type, ) - logger.info(f"Transferring file with payload: {payload}") response = requests.post(self.url, json=payload) diff --git a/src/model-training/trainingpipeline.py b/src/model-training/trainingpipeline.py index 5e918630..6d0aefe5 100644 --- a/src/model-training/trainingpipeline.py +++ b/src/model-training/trainingpipeline.py @@ -21,8 +21,8 @@ from pathlib import Path import pandas as pd import numpy as np -import sys -from typing import Counter, Union +from typing import Union +from loki_logger import LokiLogger from constants import ( MODEL_CONFIGS, SUPPORTED_BASE_MODELS, @@ -34,7 +34,6 @@ TEST_SIZE_RATIO, SEQUENCE_LENGTH, ) -from loguru import logger import os from transformers import logging as transformers_logging import warnings @@ -46,7 +45,8 @@ transformers_logging.set_verbosity_error() -from loki_logger import LokiLogger + + logger = LokiLogger(service_name="model-trainer") From b24e3ace6dccf85b81173249915608e717c05d4e Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Thu, 4 Sep 2025 10:46:59 +0530 Subject: [PATCH 07/11] fix ruff linting issues --- grafana-configs/loki_logger.py | 46 ++++++++++++++------------ src/model-training/trainingpipeline.py | 2 -- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/grafana-configs/loki_logger.py b/grafana-configs/loki_logger.py index 3cac164b..444db473 100644 --- a/grafana-configs/loki_logger.py +++ b/grafana-configs/loki_logger.py @@ -14,11 +14,13 @@ class LokiLogger: """Simple logger that sends logs directly to Loki API""" - - def __init__(self, loki_url: str = "http://loki:3100", service_name: str = "default"): + + def __init__( + self, loki_url: str = "http://loki:3100", service_name: str = "default" + ): """ Initialize LokiLogger - + Args: loki_url: URL for Loki service (default: container URL in bykstack network) service_name: Name of the service for labeling logs @@ -29,20 +31,20 @@ def __init__(self, loki_url: str = "http://loki:3100", service_name: str = "defa self.session = requests.Session() # Set default timeout for all requests self.timeout = 5 - + def _send_to_loki(self, level: str, message: str, **extra_fields): """Send log entry directly to Loki API""" try: # Create timestamp in nanoseconds (Loki requirement) timestamp_ns = str(int(time.time() * 1_000_000_000)) - + # Prepare labels for Loki labels = { "service": self.service_name, "level": level, "hostname": self.hostname, } - + # Add extra fields as labels, filtering out None values except for model_id for key, value in extra_fields.items(): if key == "model_id": @@ -50,7 +52,7 @@ def _send_to_loki(self, level: str, message: str, **extra_fields): labels[key] = str(value) if value is not None else "None" elif value is not None: labels[key] = str(value) - + # Create log entry log_entry = { "timestamp": datetime.now().isoformat(), @@ -58,53 +60,55 @@ def _send_to_loki(self, level: str, message: str, **extra_fields): "message": message, "hostname": self.hostname, "service": self.service_name, - **extra_fields + **extra_fields, } - + # Prepare Loki payload payload = { "streams": [ { "stream": labels, - "values": [ - [timestamp_ns, json.dumps(log_entry)] - ] + "values": [[timestamp_ns, json.dumps(log_entry)]], } ] } - + # Send to Loki (non-blocking, fire-and-forget) self.session.post( f"{self.loki_url}/loki/api/v1/push", json=payload, headers={"Content-Type": "application/json"}, - timeout=self.timeout + timeout=self.timeout, ) - + except Exception: # Silently ignore logging errors to not affect main application pass - + # Also print to console for immediate feedback timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - model_info = f" [Model: {extra_fields.get('model_id', 'N/A')}]" if extra_fields.get('model_id') else "" + model_info = ( + f" [Model: {extra_fields.get('model_id', 'N/A')}]" + if extra_fields.get("model_id") + else "" + ) print(f"[{timestamp}] {level: <8}{model_info} | {message}") - + def info(self, message: str, model_id: str | None = None, **extra_fields): if model_id: extra_fields["model_id"] = model_id self._send_to_loki("INFO", message, **extra_fields) - + def error(self, message: str, model_id: str | None = None, **extra_fields): if model_id: extra_fields["model_id"] = model_id self._send_to_loki("ERROR", message, **extra_fields) - + def warning(self, message: str, model_id: str | None = None, **extra_fields): if model_id: extra_fields["model_id"] = model_id self._send_to_loki("WARNING", message, **extra_fields) - + def debug(self, message: str, model_id: str | None = None, **extra_fields): if model_id: extra_fields["model_id"] = model_id diff --git a/src/model-training/trainingpipeline.py b/src/model-training/trainingpipeline.py index 6d0aefe5..527a5626 100644 --- a/src/model-training/trainingpipeline.py +++ b/src/model-training/trainingpipeline.py @@ -45,8 +45,6 @@ transformers_logging.set_verbosity_error() - - logger = LokiLogger(service_name="model-trainer") From 2031e22ef89e72ea3c0819988a33847a7672a6ec Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Sat, 6 Sep 2025 10:27:57 +0530 Subject: [PATCH 08/11] update .env --- .env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env b/.env index a30f1b57..219fbae3 100644 --- a/.env +++ b/.env @@ -2,6 +2,6 @@ AWS_ACCESS_KEY_ID=your_aws_access_key_id AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key BEDROCK_AWS_REGION=eu-west-1 AZURE_OPENAI_API_KEY=your_azure_openai_api_key -AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint +AZURE_OPENAI_ENDPOINT=your_azure_apenai_endpoint AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o PROVIDER_NAME=azure-openai \ No newline at end of file From 0e49c115f5a1cee1b570a299e272cda2e243f73e Mon Sep 17 00:00:00 2001 From: erangi-ar Date: Mon, 8 Sep 2025 11:20:31 +0530 Subject: [PATCH 09/11] eslint issues fixed --- GUI/src/pages/DataModels/ConfigureDataModel.tsx | 4 ++-- GUI/src/pages/DataModels/CreateDataModel.tsx | 2 +- GUI/src/pages/TestModel/index.tsx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GUI/src/pages/DataModels/ConfigureDataModel.tsx b/GUI/src/pages/DataModels/ConfigureDataModel.tsx index c6d0e34a..af524f97 100644 --- a/GUI/src/pages/DataModels/ConfigureDataModel.tsx +++ b/GUI/src/pages/DataModels/ConfigureDataModel.tsx @@ -97,7 +97,7 @@ const ConfigureDataModel: FC = () => { open({ title: t('dataModels.configureDataModel.saveChangesTitile'), content: t('dataModels.configureDataModel.saveChangesDesc'), - footer: (
) + footer: (
) }); }, @@ -115,7 +115,7 @@ const ConfigureDataModel: FC = () => { open({ title: t('dataModels.configureDataModel.deployDataModalSuccessTitle'), content: t('dataModels.configureDataModel.deployDataModalSuccessDesc'), - footer: (
) + footer: (
) }); }, diff --git a/GUI/src/pages/DataModels/CreateDataModel.tsx b/GUI/src/pages/DataModels/CreateDataModel.tsx index 4db498cf..02e88a55 100644 --- a/GUI/src/pages/DataModels/CreateDataModel.tsx +++ b/GUI/src/pages/DataModels/CreateDataModel.tsx @@ -73,7 +73,7 @@ const CreateDataModel: FC = () => { open({ title: t('dataModels.createDataModel.successTitle'), content: t('dataModels.createDataModel.successDesc'), - footer: (
) + footer: (
) }); }, diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx index 6409b34c..77fd2766 100644 --- a/GUI/src/pages/TestModel/index.tsx +++ b/GUI/src/pages/TestModel/index.tsx @@ -116,7 +116,7 @@ const TestModel: FC = () => { }} value={testModel?.modelId === null ? t('testModels.errors.modelNotExist') : undefined} defaultValue={testModel?.modelId ?? undefined} /> -
{modelLoadingStatus}
From 77a13e197a19f845ac4795c1becbf92acf329fdd Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Mon, 8 Sep 2025 15:36:41 +0530 Subject: [PATCH 10/11] fixed issues issues that are sugested by PR --- src/model-training/create_triton_configs.py | 6 +++--- src/model-training/datapipeline.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/model-training/create_triton_configs.py b/src/model-training/create_triton_configs.py index 6e301f96..5f99ca56 100644 --- a/src/model-training/create_triton_configs.py +++ b/src/model-training/create_triton_configs.py @@ -211,7 +211,7 @@ def generate_preprocessing_config( {{ key: "ood_method" value: {{ - string_value: "{ood_method if ood_method else "none"}" + string_value: "{ood_method if ood_method else 'none'}" }} }}, {{ @@ -368,7 +368,7 @@ def generate_postprocessing_config( {{ key: "ood_method" value: {{ - string_value: "{ood_method if ood_method else "none"}" + string_value: "{ood_method if ood_method else 'none'}" }} }}, {{ @@ -392,7 +392,7 @@ def generate_postprocessing_config( {{ key: "uncertainty_strategy" value: {{ - string_value: "{uncertainty_strategy if uncertainty_strategy else "none"}" + string_value: "{uncertainty_strategy if uncertainty_strategy else 'none'}" }} }}, {{ diff --git a/src/model-training/datapipeline.py b/src/model-training/datapipeline.py index f84020bf..45b05507 100644 --- a/src/model-training/datapipeline.py +++ b/src/model-training/datapipeline.py @@ -55,7 +55,8 @@ def extract_input_columns(self): """Extract input columns from validation rules""" validation_rules = self.hierarchy["validationCriteria"]["validationRules"] input_columns: list[str] = [ - key for key, value in validation_rules.items() if not value["isDataClass"] + key for key, value in validation_rules.items() + if isinstance(key, str) and not value["isDataClass"] ] logger.info(f"Input columns identified: {input_columns}") return input_columns From 3251fef80da700ef607a9a3c230bff9f8d0d863f Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Mon, 8 Sep 2025 15:39:41 +0530 Subject: [PATCH 11/11] fixed ruff linting issues --- src/model-training/create_triton_configs.py | 6 +++--- src/model-training/datapipeline.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/model-training/create_triton_configs.py b/src/model-training/create_triton_configs.py index 5f99ca56..6e301f96 100644 --- a/src/model-training/create_triton_configs.py +++ b/src/model-training/create_triton_configs.py @@ -211,7 +211,7 @@ def generate_preprocessing_config( {{ key: "ood_method" value: {{ - string_value: "{ood_method if ood_method else 'none'}" + string_value: "{ood_method if ood_method else "none"}" }} }}, {{ @@ -368,7 +368,7 @@ def generate_postprocessing_config( {{ key: "ood_method" value: {{ - string_value: "{ood_method if ood_method else 'none'}" + string_value: "{ood_method if ood_method else "none"}" }} }}, {{ @@ -392,7 +392,7 @@ def generate_postprocessing_config( {{ key: "uncertainty_strategy" value: {{ - string_value: "{uncertainty_strategy if uncertainty_strategy else 'none'}" + string_value: "{uncertainty_strategy if uncertainty_strategy else "none"}" }} }}, {{ diff --git a/src/model-training/datapipeline.py b/src/model-training/datapipeline.py index 45b05507..f8b48cc8 100644 --- a/src/model-training/datapipeline.py +++ b/src/model-training/datapipeline.py @@ -55,7 +55,8 @@ def extract_input_columns(self): """Extract input columns from validation rules""" validation_rules = self.hierarchy["validationCriteria"]["validationRules"] input_columns: list[str] = [ - key for key, value in validation_rules.items() + key + for key, value in validation_rules.items() if isinstance(key, str) and not value["isDataClass"] ] logger.info(f"Input columns identified: {input_columns}")