From 85c0b82eb8fe4bcdc9bf5b7d9428830b6f0205e1 Mon Sep 17 00:00:00 2001
From: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com>
Date: Wed, 12 Nov 2025 21:03:57 +0530
Subject: [PATCH] Merge pull request #383 from
 rootcodelabs/deployment-bug-fixes-Bimsara

Bug fixes and create separate endpoint for generating presigned urls
---
 .env                                          |   6 +-
 .../DSL/fetch_chunk_without_filter.yml        |   5 -
 DSL/CronManager/DSL/fetch_multi_chunk.yml     |   5 -
 .../DSL/mock_signed_url_generation.yml        |   5 +
 DSL/CronManager/script/callback_format.sh     |  46 ++---
 DSL/CronManager/script/dataset_pipeline_s3.sh | 171 +++++++++++++---
 DSL/CronManager/script/fetch_multi_chunk.sh   | 120 ------------
 DSL/CronManager/script/fetch_single_chunk.sh  |  95 ---------
 .../script/presigned_url_generate.sh          |  78 ++++++++
 .../script/train_script_starter.sh            | 166 +++++++++-------
 .../global-classifier/POST/get-agencies.sql   |   2 +-
 .../global-classifier/POST/get-datasets.sql   |   5 +
 .../POST/insert-agency-presigned-url.sql      |   3 +
 .../POST/update-agency-presigned-url.sql      |   6 +
 .../global-classifier/GET/datasets/list.yml   |   5 +
 .../POST/ckb/agency-data-url.yml              |  59 ++++++
 .../global-classifier/POST/data/callback.yml  |   4 +-
 .../FormElements/FormSelect/FormSelect.scss   |   8 +
 .../FormElements/FormSelect/index.tsx         |   4 +-
 .../FormTextarea/FormTextarea.scss            |   6 +-
 .../FormElements/FormTextarea/index.tsx       |   1 +
 .../molecules/DataModelForm/index.tsx         |   5 +
 GUI/src/pages/DataModels/CreateDataModel.tsx  |   3 +-
 GUI/src/pages/Datasets/index.tsx              |   4 +-
 GUI/src/pages/TestModel/index.tsx             |  16 +-
 GUI/src/pages/ViewDataset/index.tsx           |   2 +-
 GUI/src/services/datasets.ts                  |   4 +-
 GUI/src/utils/commonUtilts.ts                 |  15 +-
 GUI/src/utils/queryKeys.ts                    |   4 +-
 GUI/translations/en/common.json               |   8 +-
 GUI/translations/et/common.json               |   6 +-
 docker-compose-dev.yml                        |  24 +--
 src/s3_dataset_processor/constants.py         |   1 +
 .../dataset_generation_callback_processor.py  |  53 +++++
 src/scripts/constants.py                      |   8 +
 src/scripts/generate_signed_urls.py           | 185 ++++++++++++++++++
 36 files changed, 737 insertions(+), 401 deletions(-)
 delete mode 100644 DSL/CronManager/DSL/fetch_chunk_without_filter.yml
 delete mode 100644 DSL/CronManager/DSL/fetch_multi_chunk.yml
 create mode 100644 DSL/CronManager/DSL/mock_signed_url_generation.yml
 delete mode 100755 DSL/CronManager/script/fetch_multi_chunk.sh
 delete mode 100755 DSL/CronManager/script/fetch_single_chunk.sh
 create mode 100644 DSL/CronManager/script/presigned_url_generate.sh
 create mode 100644 DSL/Resql/global-classifier/POST/insert-agency-presigned-url.sql
 create mode 100644 DSL/Resql/global-classifier/POST/update-agency-presigned-url.sql
 create mode 100644 DSL/Ruuter.public/global-classifier/POST/ckb/agency-data-url.yml
 create mode 100644 src/scripts/constants.py
 create mode 100644 src/scripts/generate_signed_urls.py

diff --git a/.env b/.env
index a30f1b57..90abe763 100644
--- a/.env
+++ b/.env
@@ -1,7 +1,7 @@
 AWS_ACCESS_KEY_ID=your_aws_access_key_id
 AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
 BEDROCK_AWS_REGION=eu-west-1
-AZURE_OPENAI_API_KEY=your_azure_openai_api_key
-AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint
-AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o
+AZURE_OPENAI_API_KEY=your_openai_api_key
+AZURE_OPENAI_ENDPOINT=your_openai_endpoint
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o-mini
 PROVIDER_NAME=azure-openai
\ No newline at end of file
diff --git a/DSL/CronManager/DSL/fetch_chunk_without_filter.yml b/DSL/CronManager/DSL/fetch_chunk_without_filter.yml
deleted file mode 100644
index 6f12fb8e..00000000
--- a/DSL/CronManager/DSL/fetch_chunk_without_filter.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-fetch_single_chunk:
-  trigger: off
-  type: exec
-  command: "../app/scripts/fetch_single_chunk.sh"
-  allowedEnvs: ['datasetId', 'pageNum']
\ No newline at end of file
diff --git a/DSL/CronManager/DSL/fetch_multi_chunk.yml b/DSL/CronManager/DSL/fetch_multi_chunk.yml
deleted file mode 100644
index f52a735e..00000000
--- a/DSL/CronManager/DSL/fetch_multi_chunk.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-multi_chunk:
-  trigger: off
-  type: exec
-  command: "../app/scripts/fetch_multi_chunk.sh"
-  allowedEnvs: ['datasetId', 'chunkIds']
\ No newline at end of file
diff --git a/DSL/CronManager/DSL/mock_signed_url_generation.yml b/DSL/CronManager/DSL/mock_signed_url_generation.yml
new file mode 100644
index 00000000..88c665a1
--- /dev/null
+++ b/DSL/CronManager/DSL/mock_signed_url_generation.yml
@@ -0,0 +1,5 @@
+mock_signed_url_generate:
+  trigger: off
+  type: exec
+  command: "../app/scripts/presigned_url_generate.sh"
+  allowedEnvs: ['centopsAgencies']
\ No newline at end of file
diff --git a/DSL/CronManager/script/callback_format.sh b/DSL/CronManager/script/callback_format.sh
index 1eee8dda..62517c7a 100755
--- a/DSL/CronManager/script/callback_format.sh
+++ b/DSL/CronManager/script/callback_format.sh
@@ -14,16 +14,16 @@ log() {
 }
 PROGRESS_UPDATE_URL="http://ruuter-public:8086/global-classifier/datasets/progress/update"
 # Debug: Check Python environment
-log "🔍 Python version: $(python3 --version)"
-log "🔍 Python path: $(which python3)"
+log "Python version: $(python3 --version)"
+log "Python path: $(which python3)"
 
 # Install required packages
-log "🔍 Installing required Python packages..."
+log "Installing required Python packages..."
 python3 -m pip install --quiet --no-cache-dir requests pydantic pandas || {
-    log "❌ Failed to install packages"
+    log "Failed to install packages"
     exit 1
 }
-log "✅ Required packages installed"
+log "Required packages installed"
 
 log "Dataset generation callback processing started"
 log "File path: $filePath"
@@ -35,7 +35,7 @@ log "Extracted dataset ID: $dataset_id"
 # Direct Python script path for processing generation callback (inside container)
 CALLBACK_SCRIPT="/app/src/s3_dataset_processor/dataset_generation_callback_processor.py"
 
-log "🔍 Calling direct Python script to process generation callback..."
+log "Calling direct Python script to process generation callback..."
 
 # Create temporary file for response
 temp_response="/tmp/callback_response.json"
@@ -65,46 +65,46 @@ python3 "$CALLBACK_SCRIPT" \
   > /tmp/callback_stdout.log 2> /tmp/callback_stderr.log
 exit_code=$?
 
-log "🪵 Python STDOUT:"
+log "Python STDOUT:"
 cat /tmp/callback_stdout.log
 
-log "🪵 Python STDERR:"
+log "Python STDERR:"
 cat /tmp/callback_stderr.log
 
-log "🔍 Python script exit code: $exit_code"
+log "Python script exit code: $exit_code"
 
 if [ -f "$temp_response" ]; then
-    log "📄 Contents of output JSON:"
+    log "Contents of output JSON:"
     cat "$temp_response"
 else
-    log "⚠️ No output JSON file was generated."
+    log "No output JSON file was generated."
 fi
 
 # Check if script execution was successful
 if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
-    log "✅ Python script execution successful"
+    log "Python script execution successful"
     
     response_body=$(cat "$temp_response")
-    log "🔍 Response: $response_body"
+    log "Response: $response_body"
     
     # Parse the response to get status information
     if command -v jq >/dev/null 2>&1; then
         status=$(echo "$response_body" | jq -r '.status // "unknown"')
         message=$(echo "$response_body" | jq -r '.message // "unknown"')
         
-        log "📊 Callback Processing Status:"
+        log "Callback Processing Status:"
         log "  - Status: $status"
         log "  - Message: $message"
         log "  - Dataset ID: $dataset_id"
         
     else
         # Fallback parsing without jq
-        log "⚠️ jq not available, using grep/sed for parsing"
+        log "jq not available, using grep/sed for parsing"
         
         status=$(echo "$response_body" | grep -o '"status":"[^"]*"' | sed 's/.*"status":"\([^"]*\)".*/\1/' || echo "unknown")
         message=$(echo "$response_body" | grep -o '"message":"[^"]*"' | sed 's/.*"message":"\([^"]*\)".*/\1/' || echo "unknown")
         
-        log "📊 Callback Processing Status:"
+        log "Callback Processing Status:"
         log "  - Status: $status"
         log "  - Message: $message"
         log "  - Dataset ID: $dataset_id"
@@ -112,22 +112,22 @@ if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
     
     # Check if callback processing was completed
     if [ "$status" = "completed" ]; then
-        log "✅ Dataset generation callback processed successfully"
-        log "🔄 Callback payload has been sent to status update endpoint"
+        log "Dataset generation callback processed successfully"
+        log "Callback payload has been sent to status update endpoint"
         log "   - agencies: [{agencyId: X, syncStatus: Synced_with_CKB/Sync_with_CKB_Failed}, ...]"
         log "   - datasetId: $dataset_id"
         log "   - generationStatus: Generation_Success/Generation_Failed"
         
     else
-        log "⚠️ Unexpected status received: $status"
-        log "⚠️ Message: $message"
+        log "Unexpected status received: $status"
+        log "Message: $message"
     fi
     
     # Cleanup temp file
     rm -f "$temp_response"
     
 else
-    log "❌ Python script execution failed with exit code: $exit_code"
+    log "Python script execution failed with exit code: $exit_code"
     if [ -f "$temp_response" ]; then
         log "Error response: $(cat $temp_response)"
         rm -f "$temp_response"
@@ -135,7 +135,7 @@ else
     exit 1
 fi
 
-log "✅ Dataset generation callback processing completed successfully"
-log "📋 Summary: Dataset ID: $dataset_id, Request Status: $status"
+log "Dataset generation callback processing completed successfully"
+log "Summary: Dataset ID: $dataset_id, Request Status: $status"
 
 exit 0
\ No newline at end of file
diff --git a/DSL/CronManager/script/dataset_pipeline_s3.sh b/DSL/CronManager/script/dataset_pipeline_s3.sh
index 898b7619..01795e79 100755
--- a/DSL/CronManager/script/dataset_pipeline_s3.sh
+++ b/DSL/CronManager/script/dataset_pipeline_s3.sh
@@ -11,6 +11,77 @@ log() {
   echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
 }
 
+# Failure notification function
+send_failure_status_update() {
+    local failure_message="$1"
+    local dataset_id="$2"
+    local response_body="$3"
+    local failure_type="$4"  # "extraction_failure" or "generation_failure"
+    
+    STATUS_UPDATE_URL="http://ruuter-public:8086/global-classifier/agencies/data/generation"
+    
+    agencies_array="[]"
+    
+    if [ -n "$response_body" ] && [ "$response_body" != "null" ]; then
+        if command -v jq >/dev/null 2>&1; then
+            if [ "$failure_type" = "extraction_failure" ]; then
+                # Only agencies with extraction_success = false
+                agencies_array=$(echo "$response_body" | jq -r '[.downloaded_files[]? | select(.extraction_success == false) | {"agencyId": .agency_id, "syncStatus": "Sync_with_CKB_Failed"}]' 2>/dev/null || echo "[]")
+            else
+                # All agencies failed
+                agencies_array=$(echo "$response_body" | jq -r '[.downloaded_files[]? | {"agencyId": .agency_id, "syncStatus": "Sync_with_CKB_Failed"}]' 2>/dev/null || echo "[]")
+            fi
+        else
+            # Fallback parsing
+            agencies_array="["
+            first_agency=true
+            
+            if [ "$failure_type" = "extraction_failure" ]; then
+                # Only include agencies where extraction_success is false
+                echo "$response_body" | grep -o '"agency_id"[[:space:]]*:[[:space:]]*"[^"]*"[^}]*"extraction_success"[[:space:]]*:[[:space:]]*false' | sed 's/.*"agency_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | while read -r agency_id; do
+                    if [ -n "$agency_id" ]; then
+                        if [ "$first_agency" = false ]; then
+                            agencies_array="$agencies_array,"
+                        fi
+                        agencies_array="$agencies_array{\"agencyId\": \"$agency_id\", \"syncStatus\": \"Sync_with_CKB_Failed\"}"
+                        first_agency=false
+                    fi
+                done
+            else
+                # All agencies failed
+                echo "$response_body" | grep -o '"agency_id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"agency_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | while read -r agency_id; do
+                    if [ -n "$agency_id" ]; then
+                        if [ "$first_agency" = false ]; then
+                            agencies_array="$agencies_array,"
+                        fi
+                        agencies_array="$agencies_array{\"agencyId\": \"$agency_id\", \"syncStatus\": \"Sync_with_CKB_Failed\"}"
+                        first_agency=false
+                    fi
+                done
+            fi
+            agencies_array="$agencies_array]"
+        fi
+    fi
+    
+    failure_payload=$(cat <<EOF
+{
+  "agencies": $agencies_array,
+  "datasetId": $dataset_id,
+  "generationStatus": "Generation_Failed"
+}
+EOF
+)
+    
+    log "Sending failure status update: $failure_message"
+    log "Failure payload: $failure_payload"
+    
+    failure_response=$(curl -s -X POST "$STATUS_UPDATE_URL" \
+        -H "Content-Type: application/json" \
+        -d "$failure_payload")
+        
+    log "Failure status update response: $failure_response"
+}
+
 echo "Started Shell Script for S3 DataSet Processing"
 PROGRESS_CREATE_URL="http://ruuter-public:8086/global-classifier/datasets/progress/create"
 PROGRESS_UPDATE_URL="http://ruuter-public:8086/global-classifier/datasets/progress/update"
@@ -43,12 +114,12 @@ data_generation_request="$signedUrls"
 chmod 777 /app/data
 
 # Install required Python packages if not present
-echo "🔍 Installing required Python packages..."
+echo "Installing required Python packages..."
 python3 -m pip install --quiet --no-cache-dir requests pydantic || {
-    echo "❌ Failed to install packages"
+    echo "Failed to install packages"
     exit 1
 }
-echo "✅ Required packages installed"
+echo "Required packages installed"
 
 log "S3 data processing request received"
 log "Encoded data length: ${#data_generation_request} characters"
@@ -58,7 +129,7 @@ DOWNLOAD_SCRIPT="/app/src/s3_dataset_processor/download_source_dataset.py"
 CURRENT_DATASET_ID="$datasetId"
 CURRENT_DATASET_ID=$(echo "$CURRENT_DATASET_ID" | tr -d '"')
 
-log "🔍 Calling direct Python script to download files..."
+log "Calling direct Python script to download files..."
 
 # Update progress session with initial status
 progress_update_payload=$(cat <<EOF
@@ -87,14 +158,14 @@ python3 "$DOWNLOAD_SCRIPT" \
   --output-json "$temp_response"
 
 exit_code=$?
-log "🔍 Python script exit code: $exit_code"
+log "Python script exit code: $exit_code"
 
 if [ -f "$temp_response" ]; then
-    log "📄 Contents of output JSON:"
+    log "Contents of output JSON:"
     cat "$temp_response"
     ls -l "$temp_response"
 else
-    log "⚠️ No output JSON file was generated."
+    log "No output JSON file was generated."
 fi
 
 echo "DEBUG: exit_code='$exit_code'"
@@ -102,10 +173,10 @@ echo "DEBUG: temp_response='$temp_response'"
 ls -l "$temp_response"
 # Check if script execution was successful
 if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
-    log "✅ Python script execution successful"
+    log "Python script execution successful"
     
     response_body=$(cat "$temp_response")
-    log "🔍 Response: $response_body"
+    log "Response: $response_body"
     
     # Improved JSON parsing - remove whitespace and check for success
     # Use multiple methods to ensure we catch the success field
@@ -113,7 +184,7 @@ if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
     success_check2=$(echo "$response_body" | grep -o '"success":true' | wc -l)
     success_check3=$(echo "$response_body" | tr -d ' \n\r\t' | grep -o '"success":true' | wc -l)
     
-    log "🔍 Success check results: method1=$success_check1, method2=$success_check2, method3=$success_check3"
+    log "Success check results: method1=$success_check1, method2=$success_check2, method3=$success_check3"
     
     if [ "$success_check1" -gt 0 ] || [ "$success_check2" -gt 0 ] || [ "$success_check3" -gt 0 ]; then
         success_status="true"
@@ -121,16 +192,42 @@ if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
         success_status="false"
     fi
     
-    log "🔍 Success status: $success_status"
+    log "Success status: $success_status"
     
     if [ "$success_status" = "true" ]; then
-        log "✅ S3 download and extraction successful"
+        log "S3 download and extraction successful"
         
         # Get successful downloads count using improved parsing
         successful_downloads=$(echo "$response_body" | grep -o '"successful_downloads"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | tail -1)
         [ -z "$successful_downloads" ] && successful_downloads=0
         log "Successfully downloaded and extracted $successful_downloads files"
 
+        # Check if we have any successful downloads before proceeding
+        if [ "$successful_downloads" -eq 0 ]; then
+            log "No files were successfully downloaded and extracted. Aborting dataset generation."
+            
+            # Update progress status to indicate failure
+            progress_update_payload=$(cat <<EOF
+{
+  "sessionId": "$sessionId",
+  "generationStatus": "Fail",
+  "generationMessage": "Generation Failed",
+  "progressPercentage": 100,
+  "processComplete": true
+}
+EOF
+)
+
+            progress_update_response=$(curl -s -X POST "$PROGRESS_UPDATE_URL" \
+            -H "Content-Type: application/json" \
+            -d "$progress_update_payload")
+            log "Progress status updated to failed: $progress_update_response"
+            
+            send_failure_status_update "All agency downloads failed - no data available for generation" "$CURRENT_DATASET_ID" "$response_body" "extraction_failure"
+            rm -f /tmp/download_response.json
+            exit 1
+        fi
+
         # Update progress session with successful downloads
         progress_update_payload=$(cat <<EOF
 {
@@ -149,7 +246,7 @@ EOF
         echo "Progress session update response: $progress_update_response"
 
         # Prepare dataset generation payload as a list
-        log "🔄 Preparing dataset generation payload..."
+        log "Preparing dataset generation payload..."
         
         # Create temporary file for building the JSON payload
         temp_payload="/tmp/dataset_payload.json"
@@ -179,7 +276,7 @@ EOF
             done
         else
             # Fallback parsing without jq - improved regex
-            log "⚠️ jq not available, using grep/sed for parsing"
+            log "jq not available, using grep/sed for parsing"
             
             # Clean the response body and extract agency_id and folder_path pairs
             cleaned_response=$(echo "$response_body" | tr -d '\n\r\t' | tr -s ' ')
@@ -211,10 +308,10 @@ EOF
         
         # Read the complete payload
         payload_content=$(cat "$temp_payload")
-        log "🔍 Dataset generation payload: $payload_content"
+        log "Dataset generation payload: $payload_content"
         
         # Call the dataset generation service with the list payload
-        log "🔄 Calling dataset generation service for bulk processing..."
+        log "Calling dataset generation service for bulk processing..."
         
         dataset_response=$(curl -s -o /tmp/dataset_response_body.txt -w "%{http_code}" -X POST "http://dataset-gen-service:8000/generate-bulk" \
             -H "Content-Type: application/json" \
@@ -223,8 +320,8 @@ EOF
         dataset_http_code="$dataset_response"
         dataset_response_body=$(cat /tmp/dataset_response_body.txt)
 
-        log "🔍 Dataset Generation HTTP Status Code: $dataset_http_code"
-        log "🔍 Dataset Generation Response: $dataset_response_body"
+        log "Dataset Generation HTTP Status Code: $dataset_http_code"
+        log "Dataset Generation Response: $dataset_response_body"
         
         if [ "$dataset_http_code" = "200" ]; then
             progress_update_payload=$(cat <<EOF
@@ -242,13 +339,32 @@ EOF
             -H "Content-Type: application/json" \
             -d "$progress_update_payload")
             echo "Progress session update response: $progress_update_response"
-            log "✅ Dataset generation request submitted successfully"
-            log "✅ Background task initiated for dataset processing"
+            log "Dataset generation request submitted successfully"
+            log "Background task initiated for dataset processing"
             log "Response: $dataset_response_body"
         else
-            log "❌ Failed to submit dataset generation request"
+            log "Failed to submit dataset generation request"
             log "HTTP Status: $dataset_http_code"
             log "Error response: $dataset_response_body"
+            
+            # Update progress status to indicate failure
+            progress_update_payload=$(cat <<EOF
+{
+  "sessionId": "$sessionId",
+  "generationStatus": "Fail",
+  "generationMessage": "Generation Failed",
+  "progressPercentage": 100,
+  "processComplete": true
+}
+EOF
+)
+
+            progress_update_response=$(curl -s -X POST "$PROGRESS_UPDATE_URL" \
+            -H "Content-Type: application/json" \
+            -d "$progress_update_payload")
+            log "Progress status updated to failed: $progress_update_response"
+            
+            send_failure_status_update "Dataset generation service call failed" "$CURRENT_DATASET_ID" "$response_body" "generation_failure"
             # Cleanup temp files
             rm -f /tmp/dataset_payload.json /tmp/dataset_response_body.txt /tmp/download_response.json
             exit 1
@@ -257,18 +373,19 @@ EOF
         # Cleanup temp files
         rm -f /tmp/dataset_payload.json /tmp/dataset_response_body.txt
         
-        log "✅ S3 Dataset Processing completed successfully"
-        log "✅ Dataset generation is running in background"
+        log "S3 Dataset Processing completed successfully"
+        log "Dataset generation is running in background"
         
     else
-        log "❌ S3 download failed - success status: $success_status"
+        log "S3 download failed - success status: $success_status"
         log "Response: $response_body"
+        send_failure_status_update "S3 download and extraction failed" "$CURRENT_DATASET_ID" "$response_body" "extraction_failure"
         rm -f /tmp/download_response.json
         exit 1
     fi
     
 else
-    log "❌ Python script execution failed with exit code: $exit_code"
+    log "Python script execution failed with exit code: $exit_code"
     if [ -f "$temp_response" ]; then
         log "Error response: $(cat $temp_response)"
         rm -f /tmp/download_response.json
@@ -279,7 +396,7 @@ fi
 # Cleanup temp file
 rm -f /tmp/download_response.json
 
-log "🎉 S3 Dataset Processing script completed successfully"
-log "📋 Note: Dataset generation is running as a background task"
+log "S3 Dataset Processing script completed successfully"
+log "Note: Dataset generation is running as a background task"
 
 exit 0
\ No newline at end of file
diff --git a/DSL/CronManager/script/fetch_multi_chunk.sh b/DSL/CronManager/script/fetch_multi_chunk.sh
deleted file mode 100755
index 3f719e6c..00000000
--- a/DSL/CronManager/script/fetch_multi_chunk.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-
-echo "Started Shell Script for Multi-Chunk Download and Aggregation"
-
-# Check if environment variables are set
-if [ -z "$datasetId" ] || [ -z "$chunkIds" ]; then
-  echo "Please set the datasetId and chunkIds environment variables."
-  exit 1
-fi
-
-# Logging function
-log() {
-  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >&2
-}
-
-log "Multi-chunk download request started"
-log "Dataset ID: $datasetId"
-log "Chunk IDs: $chunkIds"
-
-# Clean the parameters
-DATASET_ID=$(echo "$datasetId" | tr -d '"')
-CHUNK_IDS=$(echo "$chunkIds" | tr -d '"')
-
-log "Cleaned Dataset ID: $DATASET_ID"
-log "Cleaned Chunk IDs: $CHUNK_IDS"
-
-# Validate chunk IDs format
-if [[ ! "$CHUNK_IDS" =~ ^[0-9]+([[:space:]]+[0-9]+)*$ ]]; then
-    log "❌ Invalid chunk IDs format. Expected space-separated numbers."
-    error_response="{\"success\": false, \"dataset_id\": \"$DATASET_ID\", \"chunk_ids\": \"$CHUNK_IDS\", \"error\": \"Invalid chunk IDs format\", \"message\": \"Expected space-separated numbers like '1 2 3'\"}"
-    echo "$error_response"
-    exit 1
-fi
-
-# Create temp_chunks directory if it doesn't exist
-mkdir -p /app/temp_chunks
-log "Created/verified temp_chunks directory"
-
-# Install required Python packages if not present
-log "🔍 Installing required Python packages..."
-python3 -m pip install --quiet --no-cache-dir requests pydantic || {
-    log "❌ Failed to install packages"
-    exit 1
-}
-log "✅ Required packages installed"
-
-# Direct Python script path for downloading multiple chunks (inside container)
-DOWNLOAD_SCRIPT="/app/src/s3_dataset_processor/fetch_multi_chunk.py"
-
-log "🔍 Calling Python script to download and aggregate chunks..."
-
-# Create temporary file for response
-temp_response="/tmp/multi_chunk_response.json"
-
-# Call the Python script
-python3 "$DOWNLOAD_SCRIPT" \
-  --dataset-id "$DATASET_ID" \
-  --chunk-ids "$CHUNK_IDS" \
-  --output-json "$temp_response"
-
-exit_code=$?
-log "🔍 Python script exit code: $exit_code"
-
-if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
-    log "✅ Multi-chunk processing successful"
-    
-    response_body=$(cat "$temp_response")
-    
-    # Check if aggregation was successful
-    success_check=$(echo "$response_body" | grep -o '"success"[[:space:]]*:[[:space:]]*true' | wc -l)
-    
-    if [ "$success_check" -gt 0 ]; then
-        log "✅ Chunks aggregated successfully"
-        
-        # Extract summary information for logging
-        if command -v jq >/dev/null 2>&1; then
-            total_items=$(echo "$response_body" | jq -r '.download_summary.total_items_aggregated // 0' 2>/dev/null || echo "0")
-            successful_chunks=$(echo "$response_body" | jq -r '.download_summary.successful_downloads // 0' 2>/dev/null || echo "0")
-            failed_chunks=$(echo "$response_body" | jq -r '.download_summary.failed_downloads // 0' 2>/dev/null || echo "0")
-            
-            log "📊 Aggregation Summary:"
-            log "  - Total items aggregated: $total_items"
-            log "  - Successful chunk downloads: $successful_chunks"
-            log "  - Failed chunk downloads: $failed_chunks"
-        else
-            log "📊 Multi-chunk aggregation completed (install jq for detailed summary)"
-        fi
-        
-        # Output the JSON response to stdout (this goes to CronManager caller)
-        cat "$temp_response"
-        
-        # Cleanup
-        rm -f "$temp_response"
-        
-        log "✅ Multi-chunk aggregation completed successfully"
-        exit 0
-    else
-        log "❌ Multi-chunk aggregation failed - check response for details"
-        
-        # Still output the response so caller can see the error
-        cat "$temp_response"
-        
-        # Cleanup
-        rm -f "$temp_response"
-        exit 1
-    fi
-    
-else
-    log "❌ Python script execution failed with exit code: $exit_code"
-    
-    # Create error response
-    error_response="{\"success\": false, \"dataset_id\": \"$DATASET_ID\", \"chunk_ids\": \"$CHUNK_IDS\", \"error\": \"Script execution failed\", \"message\": \"Python script failed with exit code $exit_code\"}"
-    echo "$error_response"
-    
-    if [ -f "$temp_response" ]; then
-        log "Error response: $(cat $temp_response)"
-        rm -f "$temp_response"
-    fi
-    exit 1
-fi
\ No newline at end of file
diff --git a/DSL/CronManager/script/fetch_single_chunk.sh b/DSL/CronManager/script/fetch_single_chunk.sh
deleted file mode 100755
index 77df4869..00000000
--- a/DSL/CronManager/script/fetch_single_chunk.sh
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/bin/bash
-
-echo "Started Shell Script for Chunk Download"
-
-# Check if environment variables are set
-if [ -z "$datasetId" ] || [ -z "$pageNum" ]; then
-  echo "Please set the datasetId and pageNum environment variables."
-  exit 1
-fi
-
-# Logging function
-log() {
-  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >&2
-}
-
-log "Chunk download request started"
-log "Dataset ID: $datasetId"
-log "Page Number: $pageNum"
-
-# Clean the parameters
-DATASET_ID=$(echo "$datasetId" | tr -d '"')
-PAGE_NUM=$(echo "$pageNum" | tr -d '"')
-
-log "Cleaned Dataset ID: $DATASET_ID"
-log "Cleaned Page Number: $PAGE_NUM"
-
-# Install required Python packages if not present
-log "🔍 Installing required Python packages..."
-python3 -m pip install --quiet --no-cache-dir requests pydantic || {
-    log "❌ Failed to install packages"
-    exit 1
-}
-log "✅ Required packages installed"
-
-# Direct Python script path for downloading chunk (inside container)
-DOWNLOAD_SCRIPT="/app/src/s3_dataset_processor/fetch_chunk_without_filter.py"
-
-log "🔍 Calling Python script to download chunk..."
-
-# Create temporary file for response
-temp_response="/tmp/chunk_response.json"
-
-# Call the Python script
-python3 "$DOWNLOAD_SCRIPT" \
-  --dataset-id "$DATASET_ID" \
-  --page-num "$PAGE_NUM" \
-  --output-json "$temp_response"
-
-exit_code=$?
-log "🔍 Python script exit code: $exit_code"
-
-if [ "$exit_code" -eq 0 ] && [ -f "$temp_response" ]; then
-    log "✅ Chunk download successful"
-    
-    response_body=$(cat "$temp_response")
-    log "🔍 Response: $response_body"
-    
-    # Check if download was successful
-    success_check=$(echo "$response_body" | grep -o '"success"[[:space:]]*:[[:space:]]*true' | wc -l)
-    
-    if [ "$success_check" -gt 0 ]; then
-        log "✅ Chunk downloaded successfully"
-        
-        # Output the JSON response to stdout (this goes to CronManager caller)
-        cat "$temp_response"
-        
-        # Cleanup
-        rm -f "$temp_response"
-        
-        log "✅ Chunk download completed successfully"
-        exit 0
-    else
-        log "❌ Chunk download failed - check response for details"
-        
-        # Still output the response so caller can see the error
-        cat "$temp_response"
-        
-        # Cleanup
-        rm -f "$temp_response"
-        exit 1
-    fi
-    
-else
-    log "❌ Python script execution failed with exit code: $exit_code"
-    
-    # Create error response
-    error_response="{\"success\": false, \"dataset_id\": \"$DATASET_ID\", \"page_num\": $PAGE_NUM, \"error\": \"Script execution failed\", \"message\": \"Python script failed with exit code $exit_code\"}"
-    echo "$error_response"
-    
-    if [ -f "$temp_response" ]; then
-        log "Error response: $(cat $temp_response)"
-        rm -f "$temp_response"
-    fi
-    exit 1
-fi
\ No newline at end of file
diff --git a/DSL/CronManager/script/presigned_url_generate.sh b/DSL/CronManager/script/presigned_url_generate.sh
new file mode 100644
index 00000000..49fe4a31
--- /dev/null
+++ b/DSL/CronManager/script/presigned_url_generate.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+echo "Starting presigned URL generation..."
+
+# Check if environment variable is set
+if [ -z "$centopsAgencies" ]; then
+  echo "Error: centopsAgencies environment variable is not set"
+  exit 1
+fi
+
+echo "Received centopsAgencies: $centopsAgencies"
+
+# Decode the URL-encoded string for debugging
+decoded_agencies=$(python3 -c "import urllib.parse, sys; print(urllib.parse.unquote(sys.argv[1]))" "$centopsAgencies" 2>/dev/null)
+echo "Decoded agencies: $decoded_agencies"
+
+# Install uv if not found (using unmanaged installation for security)
+UV_INSTALL_DIR="/app/tools/uv"
+UV_BIN="$UV_INSTALL_DIR/uv"
+
+if [ ! -f "$UV_BIN" ]; then
+    echo "[UV] Installing uv to isolated directory..."
+    
+    # Create installation directory
+    mkdir -p "$UV_INSTALL_DIR" || {
+        echo "[ERROR] Failed to create UV installation directory"
+        exit 1
+    }
+    
+    # Use unmanaged installation to avoid root directory modifications
+    curl -LsSf https://astral.sh/uv/install.sh | env UV_UNMANAGED_INSTALL="$UV_INSTALL_DIR" sh || {
+        echo "[ERROR] Failed to install uv"
+        exit 1
+    }
+    
+    # Verify installation
+    if [ ! -x "$UV_BIN" ]; then
+        echo "[ERROR] UV installation failed or not executable"
+        exit 1
+    fi
+    
+    # Verify functionality
+    "$UV_BIN" --version || {
+        echo "[ERROR] UV installation corrupted"
+        exit 1
+    }
+    
+    echo "[UV] Successfully installed uv (unmanaged) to $UV_INSTALL_DIR"
+fi
+
+# Activate Python virtual environment
+VENV_PATH="/app/python_virtual_env"
+echo "[VENV] Activating virtual environment at: $VENV_PATH"
+source "$VENV_PATH/bin/activate" || {
+    echo "[ERROR] Failed to activate virtual environment"
+    exit 1
+}
+
+# Install required packages
+echo "[PACKAGES] Installing required packages..."
+"$UV_BIN" pip install --python "$VENV_PATH/bin/python3" "boto3>=1.35.0" || exit 1
+"$UV_BIN" pip install --python "$VENV_PATH/bin/python3" "botocore>=1.35.0" || exit 1
+"$UV_BIN" pip install --python "$VENV_PATH/bin/python3" "requests>=2.32.0" || exit 1
+echo "[PACKAGES] All packages installed successfully"
+
+export PYTHONPATH="/app:/app/src:$PYTHONPATH"
+
+# Call Python script with the agencies data
+echo "Calling Python script..."
+python3 "/app/src/scripts/generate_signed_urls.py" "$centopsAgencies" 2>&1
+
+# Check if Python script execution was successful
+if [ $? -eq 0 ]; then
+    echo "Presigned URL generation completed successfully"
+else
+    echo "Error: Presigned URL generation failed"
+    exit 1
+fi
diff --git a/DSL/CronManager/script/train_script_starter.sh b/DSL/CronManager/script/train_script_starter.sh
index 790429e3..dc1e199b 100755
--- a/DSL/CronManager/script/train_script_starter.sh
+++ b/DSL/CronManager/script/train_script_starter.sh
@@ -7,34 +7,32 @@ GET_FIRST_COME_TRAINING_JOB_SQL="http://resql:8082/global-classifier/get-queued-
 GET_DATA_MODEL_BY_MODEL_ID_SQL="http://resql:8082/global-classifier/get-data-model-info-by-given-model-id"
 UPDATE_JOB_STATUS="http://resql:8082/global-classifier/update-training-job-status"
 
-echo "🔄 [START] Training script starter"
+echo "[START] Training script starter"
 
 # Check if training is in progress
-echo "🔍 [CHECK] Checking if training is in progress..."
+echo "[CHECK] Checking if training is in progress..."
 response_job_status_in_progres=$(curl -s -X POST "$CHECK_JOB_STATUS_IN_PROGRESS_SQL")
-echo "🔍 [DEBUG] Training status response: '$response_job_status_in_progres'"
+echo "[DEBUG] Training status response: '$response_job_status_in_progres'"
 
 if [ $? -ne 0 ] || [ -z "$response_job_status_in_progres" ]; then
-    echo "❌ [ERROR] Failed to check training status"
+    echo "[ERROR] Failed to check training status"
     exit 1
 fi
 
 if echo "$response_job_status_in_progres" | grep -q '"hasTrainingInProgress":true'; then
-    echo "⚠️ [INFO] Training is already in progress. Exiting..."
+    echo "[INFO] Training is already in progress. Exiting..."
     exit 0
 fi
 
-echo "✅ [AVAILABLE] No training in progress."
-
+echo "[AVAILABLE] No training in progress."
 # Get first queued training job
-echo "🎯 [QUEUE] Getting first queued training job..."
+echo "[QUEUE] Getting first queued training job..."
 response_first_come_training_job=$(curl -s -X POST "$GET_FIRST_COME_TRAINING_JOB_SQL")
-echo "🔍 [DEBUG] First queued job response: '$response_first_come_training_job'"
-
+echo "[DEBUG] First queued job response: '$response_first_come_training_job'"
 # Handle empty response (no queued jobs) - this is normal, not an error
 if [ -z "$response_first_come_training_job" ]; then
-    echo "ℹ️ [INFO] No queued training jobs found. Nothing to process."
-    echo "✅ [DONE] Training script starter completed - no work to do"
+    echo "[INFO] No queued training jobs found. Nothing to process."
+    echo "[DONE] Training script starter completed - no work to do"
     exit 0
 fi
 
@@ -45,8 +43,8 @@ if echo "$response_first_come_training_job" | grep -q '"hasQueuedJobs":false' ||
    [ "$response_first_come_training_job" = "{}" ] || \
    [ "$response_first_come_training_job" = "null" ] || \
    [ "$response_first_come_training_job" = "[]" ]; then
-    echo "ℹ️ [INFO] No queued training jobs available. Queue is empty."
-    echo "✅ [DONE] Training script starter completed - no work to do"
+    echo "[INFO] No queued training jobs available. Queue is empty."
+    echo "[DONE] Training script starter completed - no work to do"
     exit 0
 fi
 
@@ -59,74 +57,74 @@ minor_version=$(echo "$response_first_come_training_job" | sed -E 's/.*"minorVer
 latest=$(echo "$response_first_come_training_job" | sed -E 's/.*"latest":[[:space:]]*(true|false).*/\1/')
 deployment_environment=$(echo "$response_first_come_training_job" | sed -E 's/.*"deploymentEnvironment":"?([^",}]+)"?.*/\1/')
 
-echo "🔍 [----DEBUG----] Raw response: '$response_first_come_training_job'"
+echo "[DEBUG] Raw response: '$response_first_come_training_job'"
 
 if [ -z "$model_id" ]; then
-    echo "❌ [ERROR] Model ID not found in response"
-    echo "🔍 [DEBUG] Raw response: '$response_first_come_training_job'"
+    echo "[ERROR] Model ID not found in response"
+    echo "[DEBUG] Raw response: '$response_first_come_training_job'"
     exit 1
 fi
 
 if [ -z "$job_id" ] || [ "$job_id" = "$response_first_come_training_job" ]; then
-    echo "❌ [ERROR] Job ID not found or invalid in response"
-    echo "🔍 [DEBUG] Raw response: '$response_first_come_training_job'"
+    echo "[ERROR] Job ID not found or invalid in response"
+    echo "[DEBUG] Raw response: '$response_first_come_training_job'"
     exit 1
 fi
 
-echo "📦 [MODEL] Model ID: $model_id"
-echo "📦 [JOB] Job ID: $job_id"
-echo "📦 [MODEL] Model Name: $model_name"
-echo "📦 [VERSION] Major Version: $major_version"
-echo "📦 [VERSION] Minor Version: $minor_version"
-echo "📦 [VERSION] Latest: $latest"
-echo "📦 [ENVIRONMENT] Deployment Environment: $deployment_environment"
+echo "[MODEL] Model ID: $model_id"
+echo "[JOB] Job ID: $job_id"
+echo "[MODEL] Model Name: $model_name"
+echo "[VERSION] Major Version: $major_version"
+echo "[VERSION] Minor Version: $minor_version"
+echo "[VERSION] Latest: $latest"
+echo "[ENVIRONMENT] Deployment Environment: $deployment_environment"
 
 response_update_job_status=$(curl -s -X POST "$UPDATE_JOB_STATUS" \
     -H "Content-Type: application/json" \
     -d "{\"jobId\": $job_id, \"jobStatus\": \"training-in-progress\"}")
-echo "🔍 [DEBUG] Update job status response: '$response_update_job_status'"
+echo "[DEBUG] Update job status response: '$response_update_job_status'"
 
 # Get dataset ID
 response_get_dataset_id=$(curl -s -X POST "$GET_DATA_MODEL_BY_MODEL_ID_SQL" \
     -H "Content-Type: application/json" \
     -d "{\"model_id\": $model_id}")
-echo "🔍 [DEBUG] Dataset ID response: '$response_get_dataset_id'"
+echo "[DEBUG] Dataset ID response: '$response_get_dataset_id'"
 
 # Handle empty response
 if [ -z "$response_get_dataset_id" ] || [ "$response_get_dataset_id" = "[]" ]; then
-    echo "❌ [ERROR] No dataset information found for model ID: $model_id"
+    echo "[ERROR] No dataset information found for model ID: $model_id"
     exit 1
 fi
 
 dataset_id=$(echo "$response_get_dataset_id" | sed -E 's/.*"connectedDsId":([0-9]+).*/\1/')
 
 if [ -z "$dataset_id" ] || [ "$dataset_id" = "$response_get_dataset_id" ]; then
-    echo "❌ [ERROR] Connected Dataset ID not found in response"
-    echo "🔍 [DEBUG] Raw response: '$response_get_dataset_id'"
+    echo "[ERROR] Connected Dataset ID not found in response"
+    echo "[DEBUG] Raw response: '$response_get_dataset_id'"
     exit 1
 fi
 
-echo "📦 [DATASET] Dataset ID: $dataset_id"
+echo "[DATASET] Dataset ID: $dataset_id"
 
 base_models_json=$(echo "$response_get_dataset_id" | sed -nE 's/.*"value":"(\[[^]]+\])".*/\1/p' | sed 's/\\"/"/g')
 
 if [[ "$base_models_json" == "["* ]] && [[ "$base_models_json" == *"]" ]]; then
     model_types="$base_models_json"
-    echo "📦 [MODELS] Model types extracted from DB: $model_types"
+    echo "[MODELS] Model types extracted from DB: $model_types"
 else
-    echo "❌ [ERROR] Failed to extract base models from response"
-    echo "❌ [ERROR] Raw response: $response_get_dataset_id"
-    echo "❌ [ERROR] Extracted base_models: $base_models_json"
+    echo "[ERROR] Failed to extract base models from response"
+    echo "[ERROR] Raw response: $response_get_dataset_id"
+    echo "[ERROR] Extracted base_models: $base_models_json"
     exit 1
 fi
 
 # Activate existing virtualenv
-echo "✅ Activating existing virtualenv at /app/python_virtual_env"
-source /app/python_virtual_env/bin/activate || { echo "❌ Failed to activate virtualenv"; exit 1; }
+echo "[INFO] Activating existing virtualenv at /app/python_virtual_env"
+source /app/python_virtual_env/bin/activate || { echo "[ERROR] Failed to activate virtualenv"; exit 1; }
 export PYTHONPATH="/app:/app/src:/app/src/training:/app/src/s3_dataset_processor:$PYTHONPATH"
-echo "🔍 [DEBUG] PYTHONPATH set to: $PYTHONPATH"
+echo "[DEBUG] PYTHONPATH set to: $PYTHONPATH"
 # Add these debug commands
-echo "🔍 [DEBUG] Virtual environment debugging:"
+echo "[DEBUG] Virtual environment debugging:"
 echo "  - VIRTUAL_ENV: $VIRTUAL_ENV"
 echo "  - Python path: $(which python)"
 echo "  - Python version: $(python --version)"
@@ -134,53 +132,80 @@ echo "  - Pip path: $(which pip)"
 echo "  - Site packages: $(python -c "import site; print(site.getsitepackages())")"
 
 # List installed packages
-echo "📦 [DEBUG] Installed packages in current environment:"
+echo "[DEBUG] Installed packages in current environment:"
 pip list | head -20  # Show first 20 packages
 
 # Check required packages
-echo "🔍 [DEBUG] Testing individual package imports inside virtualenv..."
+echo "[DEBUG] Testing individual package imports inside virtualenv..."
 missing_pkgs=()
 for pkg in torch transformers sklearn mlflow pandas numpy loguru; do
-    echo "🔍 [DEBUG] Testing import for $pkg"
+    echo "[DEBUG] Testing import for $pkg"
     if ! python -c "import $pkg" &>/dev/null; then
-        echo "❌ [MISSING or failed import] Package '$pkg'"
+        echo "[ERROR] Package '$pkg' is missing or failed to import"
         missing_pkgs+=("$pkg")
     else
-        echo "✅ [FOUND] Package '$pkg'"
+        echo "[INFO] Package '$pkg' found"
     fi
 done
 
 # Install if missing
 if [ ${#missing_pkgs[@]} -ne 0 ]; then
-    echo "⚡ [ACTION] Missing packages detected: ${missing_pkgs[*]}"
+    echo "[ACTION] Missing packages detected: ${missing_pkgs[*]}"
 
-    if ! command -v uv &>/dev/null; then
-        echo "⚡ Installing uv inside virtualenv..."
-        pip install uv || { echo "❌ Failed to install uv"; exit 1; }
-    else
-        echo "✅ uv already installed."
+    # Install uv using secure unmanaged installation (same as presigned_url_generate.sh)
+    UV_INSTALL_DIR="/app/tools/uv"
+    UV_BIN="$UV_INSTALL_DIR/uv"
+
+    if [ ! -f "$UV_BIN" ]; then
+        echo "[UV] Installing uv to isolated directory..."
+        
+        # Create installation directory
+        mkdir -p "$UV_INSTALL_DIR" || {
+            echo "[ERROR] Failed to create UV installation directory"
+            exit 1
+        }
+        
+        # Use unmanaged installation to avoid root directory modifications
+        curl -LsSf https://astral.sh/uv/install.sh | env UV_UNMANAGED_INSTALL="$UV_INSTALL_DIR" sh || {
+            echo "[ERROR] Failed to install uv"
+            exit 1
+        }
+        
+        # Verify installation
+        if [ ! -x "$UV_BIN" ]; then
+            echo "[ERROR] UV installation failed or not executable"
+            exit 1
+        fi
+        
+        # Verify functionality
+        "$UV_BIN" --version || {
+            echo "[ERROR] UV installation corrupted"
+            exit 1
+        }
+        
+        echo "[UV] Successfully installed uv (unmanaged) to $UV_INSTALL_DIR"
     fi
 
     if [ ! -f /app/src/training/requirements-gpu.txt ]; then
-        echo "❌ /app/src/training/requirements-gpu.txt not found!"
+        echo "/app/src/training/requirements-gpu.txt not found!"
         exit 1
     fi
 
-    echo "📦 [INSTALL] Installing from /app/src/training/requirements-gpu.txt using uv..."
-    uv pip install -r /app/src/training/requirements-gpu.txt || {
-        echo "⚠️ uv install failed — trying pip as fallback..."
+    echo "[INSTALL] Installing from /app/src/training/requirements-gpu.txt using secure uv..."
+    "$UV_BIN" pip install --python "$VIRTUAL_ENV/bin/python3" -r /app/src/training/requirements-gpu.txt || {
+        echo "[WARNING] uv install failed — trying pip as fallback..."
         pip install -r /app/src/training/requirements-gpu.txt || {
-            echo "❌ Both uv and pip install failed inside virtualenv"
+            echo "[ERROR] Both uv and pip install failed inside virtualenv"
             exit 1
         }
     }
 
-    echo "🎉 [SUCCESS] Required packages installed successfully inside virtualenv."
+    echo "[SUCCESS] Required packages installed successfully inside virtualenv."
 else
-    echo "🎉 [SUCCESS] All required Python packages are already installed inside virtualenv."
+    echo "[SUCCESS] All required Python packages are already installed inside virtualenv."
 fi
-echo "✅ [VIRTUALENV] All checks passed, proceeding with training script..."
-echo "🚀 [TRAINING] Starting training for Model ID: $model_id, Dataset ID: $dataset_id, Model Major Version: $major_version, Model Minor Version: $minor_version, Model Name: $model_name"
+echo "[SUCCESS] All checks passed, proceeding with training script..."
+echo "[INFO] Starting training for Model ID: $model_id, Dataset ID: $dataset_id, Model Major Version: $major_version, Model Minor Version: $minor_version, Model Name: $model_name"
 
 # Set up training parameters
 TRAINING_SCRIPT="/app/src/training/model_trainer.py"
@@ -192,13 +217,7 @@ PROCESSED_DATA_DIR="/app/data/processed"
 training_output_dir="${TRAINING_OUTPUT_DIR}/model_${model_id}"
 mkdir -p "$training_output_dir"
 
-# # Set default training parameters (can be made configurable)
-# max_seq_length=128
-# num_epochs=3
-# batch_size=8
-# learning_rate=2e-5
-
-echo "📋 [PARAMS] Training parameters:"
+echo "[INFO] Training parameters:"
 echo "  - Dataset ID: $dataset_id"
 echo "  - Model ID: $model_id"
 echo "  - Model Type: $model_types"
@@ -211,7 +230,7 @@ echo "  - Is Latest: $latest"
 echo "  - Deployment Environment: $deployment_environment"
 
 # Call the training script
-echo "🎓 [EXECUTE] Calling training script..."
+echo "[EXECUTE] Calling training script..."
 
 python3 "$TRAINING_SCRIPT" \
     --model_types "$model_types" \
@@ -223,16 +242,13 @@ python3 "$TRAINING_SCRIPT" \
     --minor_version "$minor_version" \
     --latest "$latest" \
     --deployment_environment "$deployment_environment" \
-    # --data_dir "$PROCESSED_DATA_DIR" \
-    # --output_dir "$training_output_dir" \
-    # --mlflow_tracking_uri "$MLFLOW_TRACKING_URI" \
 
 training_exit_code=$?
 
 # Check training result
 if [ $training_exit_code -eq 0 ]; then
-    echo "🎉 [SUCCESS] Training completed successfully"
-    echo "📁 [OUTPUT] Training outputs saved to: $training_output_dir"
+    echo "[SUCCESS] Training completed successfully"
+    echo "[OUTPUT] Training outputs saved to: $training_output_dir"
 
     # Update job status to trained
     echo "[UPDATE] Updating job status to trained..."
@@ -240,7 +256,7 @@ if [ $training_exit_code -eq 0 ]; then
     -H "Content-Type: application/json" \
     -d "{\"jobId\": $job_id, \"jobStatus\": \"trained\"}")
         
-    echo "🔍 [DEBUG] Update job status to trained response: '$response_update_job_status_trained'"
+    echo "[DEBUG] Update job status to trained response: '$response_update_job_status_trained'"
 else
     echo "[FAILED] Training failed with exit code: $training_exit_code"
 
@@ -252,4 +268,4 @@ else
     exit 1
 fi
 
-echo "✅ [DONE] Training script starter completed"
\ No newline at end of file
+echo "[DONE] Training script starter completed"
\ No newline at end of file
diff --git a/DSL/Resql/global-classifier/POST/get-agencies.sql b/DSL/Resql/global-classifier/POST/get-agencies.sql
index 514aed0b..b2bc9db0 100644
--- a/DSL/Resql/global-classifier/POST/get-agencies.sql
+++ b/DSL/Resql/global-classifier/POST/get-agencies.sql
@@ -16,7 +16,7 @@ SELECT
 FROM 
     integrated_agencies
 WHERE
-    (:agency_name = 'all' OR agency_name ILIKE '%' || :agency_name || '%')
+    (:agency_name = 'all' OR agency_name ILIKE '%' || REPLACE(:agency_name,'_', '\_') || '%' ESCAPE '\')
 ORDER BY
     CASE WHEN :sort_by = 'agency_name' AND :sort_type = 'asc' THEN agency_name END ASC,
     CASE WHEN :sort_by = 'agency_name' AND :sort_type = 'desc' THEN agency_name END DESC,
diff --git a/DSL/Resql/global-classifier/POST/get-datasets.sql b/DSL/Resql/global-classifier/POST/get-datasets.sql
index bd2febd8..1245a0e5 100644
--- a/DSL/Resql/global-classifier/POST/get-datasets.sql
+++ b/DSL/Resql/global-classifier/POST/get-datasets.sql
@@ -11,6 +11,11 @@ FROM
     dataset_versions
 WHERE
     (:generation_status = 'all' OR generation_status ILIKE '%' || :generation_status || '%')
+    AND (:dataset_name = 'all' 
+         OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT('v', major, '.', minor))) > 0
+         OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT(major, '.', minor))) > 0
+         OR POSITION(LOWER(:dataset_name) IN LOWER(major::text)) > 0
+         OR POSITION(LOWER(:dataset_name) IN LOWER(minor::text)) > 0)
 ORDER BY
     CASE WHEN :sort_by = 'created_at' AND :sort_type = 'asc' THEN created_at END ASC,
     CASE WHEN :sort_by = 'created_at' AND :sort_type = 'desc' THEN created_at END DESC,
diff --git a/DSL/Resql/global-classifier/POST/insert-agency-presigned-url.sql b/DSL/Resql/global-classifier/POST/insert-agency-presigned-url.sql
new file mode 100644
index 00000000..34f96ad3
--- /dev/null
+++ b/DSL/Resql/global-classifier/POST/insert-agency-presigned-url.sql
@@ -0,0 +1,3 @@
+-- Insert agency presigned URL
+INSERT INTO public.mock_ckb (agency_id, agency_data_hash, data_url)
+VALUES (:agencyId, :agencyDataHash, :dataUrl);
\ No newline at end of file
diff --git a/DSL/Resql/global-classifier/POST/update-agency-presigned-url.sql b/DSL/Resql/global-classifier/POST/update-agency-presigned-url.sql
new file mode 100644
index 00000000..69a640bf
--- /dev/null
+++ b/DSL/Resql/global-classifier/POST/update-agency-presigned-url.sql
@@ -0,0 +1,6 @@
+-- Update agency presigned URL
+UPDATE public.mock_ckb
+SET 
+    data_url = :dataUrl,
+    created_at = NOW()
+WHERE agency_id = :agencyId;
\ No newline at end of file
diff --git a/DSL/Ruuter.private/global-classifier/GET/datasets/list.yml b/DSL/Ruuter.private/global-classifier/GET/datasets/list.yml
index 3790fd8d..3696b583 100644
--- a/DSL/Ruuter.private/global-classifier/GET/datasets/list.yml
+++ b/DSL/Ruuter.private/global-classifier/GET/datasets/list.yml
@@ -23,6 +23,9 @@ declaration:
       - field: sortType
         type: string
         description: "Query parameter 'sortType' for sort direction (asc, desc)"
+      - field: datasetName
+        type: string
+        description: "Query parameter 'datasetName' for filtering datasets by name"
 
 extractRequestData:
   assign:
@@ -31,6 +34,7 @@ extractRequestData:
     generationStatus: ${incoming.params.generationStatus || 'all'}
     sortBy: ${incoming.params.sortBy || ''}
     sortType: ${incoming.params.sortType || 'desc'}
+    datasetName: ${incoming.params.datasetName || 'all'}
 
 getAllDatasets:
   call: http.post
@@ -42,6 +46,7 @@ getAllDatasets:
       generation_status: ${generationStatus}
       sort_by: ${sortBy}
       sort_type: ${sortType}
+      dataset_name: ${datasetName}
   result: datasets_res
   next: return_result
 
diff --git a/DSL/Ruuter.public/global-classifier/POST/ckb/agency-data-url.yml b/DSL/Ruuter.public/global-classifier/POST/ckb/agency-data-url.yml
new file mode 100644
index 00000000..2d8b03c0
--- /dev/null
+++ b/DSL/Ruuter.public/global-classifier/POST/ckb/agency-data-url.yml
@@ -0,0 +1,59 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Mock endpoint for generating agency data URLs"
+  method: post
+  accepts: json
+  returns: json
+  namespace: global-classifier
+
+get_centops_agencies:
+  call: http.post
+  args:
+    url: "[#GLOBAL_CLASSIFIER_RESQL]/mock-get-agencies-from-centops"
+  result: fetch_agencies_from_centops
+  next: log_result
+
+log_result:
+  log: ${fetch_agencies_from_centops.response.body}
+  next: assign_request_data
+
+assign_request_data:
+  assign:
+    centops_agencies: ${encodeURIComponent(JSON.stringify(fetch_agencies_from_centops.response.body))}
+  next: execute_cron_manager
+
+execute_cron_manager:
+  call: http.post
+  args:
+    url: "[#GLOBAL_CLASSIFIER_CRON_MANAGER]/execute/mock_signed_url_generation/mock_signed_url_generate"
+    query:
+      centopsAgencies: ${centops_agencies}
+  result: res
+  next: assign_success_response
+
+assign_success_response:
+  assign:
+    format_res: {
+      message: "Centops agency data URLs synchronized successfully",
+      operationSuccessful: true,
+    }
+  next: return_ok
+
+assign_fail_response:
+  assign:
+    format_res: {
+      message: "Centops agency data URLs synchronization failed",
+      operationSuccessful: false,
+    }
+  next: return_bad_request
+
+return_ok:
+  status: 200
+  return: ${format_res}
+  next: end
+
+return_bad_request:
+  status: 400
+  return: ${format_res}
+  next: end
\ No newline at end of file
diff --git a/DSL/Ruuter.public/global-classifier/POST/data/callback.yml b/DSL/Ruuter.public/global-classifier/POST/data/callback.yml
index e069deab..9aec279c 100644
--- a/DSL/Ruuter.public/global-classifier/POST/data/callback.yml
+++ b/DSL/Ruuter.public/global-classifier/POST/data/callback.yml
@@ -25,7 +25,7 @@ declare:
         description: "List of agency IDs for which the dataset generation was completed"
 
 log_callback_received:
-  log: "📞 Dataset generation callback received - Task ID: ${incoming.body.task_id}, Status: ${incoming.body.status}, File Path: ${incoming.body.filePath}"
+  log: "Dataset generation callback received - Task ID: ${incoming.body.task_id}, Status: ${incoming.body.status}, File Path: ${incoming.body.filePath}"
   next: extract_callback_data
 
 extract_callback_data:
@@ -38,7 +38,7 @@ extract_callback_data:
   next: log_detailed_info
 
 log_detailed_info:
-  log: "📋 Callback Details - Task: ${task_id}, Status: ${status}, Message: ${message}, filePath: ${file_path}, results: ${results}"
+  log: "Callback Details - Task: ${task_id}, Status: ${status}, Message: ${message}, filePath: ${file_path}, results: ${results}"
   next: check_for_request_data
 
 check_for_request_data:
diff --git a/GUI/src/components/FormElements/FormSelect/FormSelect.scss b/GUI/src/components/FormElements/FormSelect/FormSelect.scss
index b6b4f434..6db2b3b7 100644
--- a/GUI/src/components/FormElements/FormSelect/FormSelect.scss
+++ b/GUI/src/components/FormElements/FormSelect/FormSelect.scss
@@ -124,5 +124,13 @@
     &:focus {
       background-color: get-color(black-coral-0);
     }
+    &--disabled {
+     color: get-color(black-coral-6);
+     cursor: not-allowed;
+     pointer-events: none; 
+     background-color: get-color(white);
+   }
+
   }
+
 }
diff --git a/GUI/src/components/FormElements/FormSelect/index.tsx b/GUI/src/components/FormElements/FormSelect/index.tsx
index e1187a49..e1f4bd42 100644
--- a/GUI/src/components/FormElements/FormSelect/index.tsx
+++ b/GUI/src/components/FormElements/FormSelect/index.tsx
@@ -18,6 +18,7 @@ import { ControllerRenderProps } from 'react-hook-form';
 type FormSelectOption = {
   label: string;
   value: string | { name: string; id: string };
+  disabled?: boolean;
 };
 
 type FormSelectProps = Partial<ControllerRenderProps> &
@@ -130,9 +131,10 @@ const FormSelect = forwardRef<HTMLSelectElement, FormSelectProps>(
                 <li
                   className={clsx('select__option', {
                     'select__option--selected': highlightedIndex === index,
+                    'select__option--disabled': item.disabled,
                   })}
                   key={`${item.value}${index}`}
-                  {...getItemProps({ item, index })}
+                  {...getItemProps({ item, index, disabled: item.disabled })}
                 >
                   {item.label}
                 </li>
diff --git a/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss b/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss
index 51750b6d..17e45330 100644
--- a/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss
+++ b/GUI/src/components/FormElements/FormTextarea/FormTextarea.scss
@@ -95,7 +95,11 @@
     }
   }
 
-  &--disabled & {
+  &--disabled {
+    textarea {
+      cursor: not-allowed;
+      resize: none;
+    }
     input {
       background-color: get-color(black-coral-0);
     }
diff --git a/GUI/src/components/FormElements/FormTextarea/index.tsx b/GUI/src/components/FormElements/FormTextarea/index.tsx
index b1f23fe1..55ea5cd1 100644
--- a/GUI/src/components/FormElements/FormTextarea/index.tsx
+++ b/GUI/src/components/FormElements/FormTextarea/index.tsx
@@ -67,6 +67,7 @@ const FormTextarea = forwardRef<HTMLTextAreaElement, TextareaProps>((
           defaultValue={defaultValue}
           className={textareaAutosizeClasses}
           aria-label={hideLabel ? label : undefined}
+          disabled={disabled}
           onChange={(e) => {
             if (onChange) onChange(e);
             handleOnChange(e);
diff --git a/GUI/src/components/molecules/DataModelForm/index.tsx b/GUI/src/components/molecules/DataModelForm/index.tsx
index c7e6b324..abf7263b 100644
--- a/GUI/src/components/molecules/DataModelForm/index.tsx
+++ b/GUI/src/components/molecules/DataModelForm/index.tsx
@@ -62,6 +62,11 @@ const DataModelForm: FC<DataModelFormType> = ({
               error={errors?.modelName}
             />
           </div>
+          {dataModel.modelName && dataModel.modelName.length > 256 && (
+            <div style={{ color: 'red', fontSize: '13px', marginTop: '8px', marginBottom: '16px' }}>
+              {t('dataModels.dataModelForm.errors.modelNameLength')}
+            </div>
+          )}
           <div className="grey-card">
             {t('dataModels.dataModelForm.modelVersion')}{' '}
             <Label type="success">{dataModel?.version}</Label>
diff --git a/GUI/src/pages/DataModels/CreateDataModel.tsx b/GUI/src/pages/DataModels/CreateDataModel.tsx
index 02e88a55..5e3a8d30 100644
--- a/GUI/src/pages/DataModels/CreateDataModel.tsx
+++ b/GUI/src/pages/DataModels/CreateDataModel.tsx
@@ -73,7 +73,7 @@ const CreateDataModel: FC = () => {
       open({
         title: t('dataModels.createDataModel.successTitle'),
         content: t('dataModels.createDataModel.successDesc'),
-        footer: (<div className='flex-grid'><Button appearance={ButtonAppearanceTypes.SECONDARY} onClick={() => { close() }}>Close</Button><Button onClick={() => { navigate('/data-models'); close(); }}>View all Data Models</Button></div>)
+        footer: (<div className='flex-grid'><Button appearance={ButtonAppearanceTypes.SECONDARY} onClick={() => { close() }}>Close</Button><Button onClick={() => { navigate('/data-models'), close() }}>View all Data Models</Button></div>)
       });
 
     },
@@ -110,6 +110,7 @@ const CreateDataModel: FC = () => {
   const isCreateDisabled = () => {
     return (
       !dataModel.modelName ||
+      dataModel.modelName.length > 256 ||
       !dataModel.datasetId ||
       !dataModel.baseModels ||
       (Array.isArray(dataModel.baseModels) && dataModel.baseModels.length === 0) ||
diff --git a/GUI/src/pages/Datasets/index.tsx b/GUI/src/pages/Datasets/index.tsx
index e7ed28a8..213c93a6 100644
--- a/GUI/src/pages/Datasets/index.tsx
+++ b/GUI/src/pages/Datasets/index.tsx
@@ -22,8 +22,8 @@ const Datasets: FC = () => {
   const [searchTerm, setSearchTerm] = useState<string>('all');
 
   const { data: datasets, isLoading } = useQuery({
-    queryKey: datasetQueryKeys.DATASET_OVERVIEW(pageIndex, sortOption),
-    queryFn: () => getDatasetsOverview(pageIndex, sortOption),
+    queryKey: datasetQueryKeys.DATASET_OVERVIEW(pageIndex, sortOption, searchTerm),
+    queryFn: () => getDatasetsOverview(pageIndex, sortOption, searchTerm),
   });
 
   const pageCount = datasets?.[0]?.totalPages ?? 1;
diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx
index 77fd2766..fb08882c 100644
--- a/GUI/src/pages/TestModel/index.tsx
+++ b/GUI/src/pages/TestModel/index.tsx
@@ -1,3 +1,4 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions */
 import { useMutation, useQuery } from '@tanstack/react-query';
 import { Button, FormSelect, FormTextarea } from 'components';
 import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
@@ -91,6 +92,10 @@ const TestModel: FC = () => {
 
   const processedResults = classificationResult ? processClassificationResult(classificationResult) : [];
 
+  const selectOptions = modelVersions?.length > 0
+    ? toLabelValueArray(modelVersions, 'id', 'version') ?? []
+    : [{ label: t('testModels.noModels') ?? 'No models available', value: '', disabled: true }];
+
 
   return (
     <div>
@@ -108,15 +113,17 @@ const TestModel: FC = () => {
               <FormSelect
                 label=""
                 name="modelId"
-                options={modelVersions ? toLabelValueArray(modelVersions, 'id', 'version') ?? [] : []}
+                options={selectOptions}
                 placeholder={t('testModels.placeholder') ?? ''}
                 onSelectionChange={(selection) => {
-                  handleChange('modelId', selection?.value as string);
-                  setIsClassifyEnabled(false);
+                  if (selection && !selection.disabled) {
+                    handleChange('modelId', selection?.value as string);
+                    setIsClassifyEnabled(false);
+                  }
                 }}
                 value={testModel?.modelId === null ? t('testModels.errors.modelNotExist') : undefined} defaultValue={testModel?.modelId ?? undefined}
               />
-              <Button showLoadingIcon={mutation.isLoading} disabled={!testModel.modelId || mutation.isLoading} onClick={() => { setModelLoadingStatus(t('dataModels.loadDataModel.loading') ?? ""); mutation.mutate(testModel.modelId); setColor("#005aa3"); }}>
+              <Button showLoadingIcon={mutation.isLoading} disabled={!testModel.modelId ||  mutation.isLoading} onClick={() => { setModelLoadingStatus(t('dataModels.loadDataModel.loading') ?? ""), mutation.mutate(testModel.modelId), setColor("#005aa3") }}>
                 Load Model
               </Button>
               <div style={{ width: "100%", color: color }} >{modelLoadingStatus}</div>
@@ -131,6 +138,7 @@ const TestModel: FC = () => {
               maxLength={1000}
               onChange={(e) => handleChange('text', e.target.value)}
               showMaxLength={true}
+              disabled={!isClassifyEnabled}
             />
           </div>
           <div className="testModalClassifyButton">
diff --git a/GUI/src/pages/ViewDataset/index.tsx b/GUI/src/pages/ViewDataset/index.tsx
index e8400132..131e9651 100644
--- a/GUI/src/pages/ViewDataset/index.tsx
+++ b/GUI/src/pages/ViewDataset/index.tsx
@@ -456,7 +456,7 @@ const ViewDataset = () => {
         {datasetIsLoading && <SkeletonTable rowCount={10} />}
         {!datasetIsLoading && (
           <DataTable
-            data={updatedDataset}
+            data={updatedDataset ?? []}
             columns={dataColumns as ColumnDef<string, string>[]}
             pagination={pagination}
             rowSelection={rowSelection}
diff --git a/GUI/src/services/datasets.ts b/GUI/src/services/datasets.ts
index 1e4d668b..d2f519b9 100644
--- a/GUI/src/services/datasets.ts
+++ b/GUI/src/services/datasets.ts
@@ -4,7 +4,8 @@ import { DATASET_PAGE_SIZE, OVERVIEW_PAGE_SIZE } from 'utils/constants';
 
 export async function getDatasetsOverview(
   pageNum: number,
-  sort: string
+  sort: string,
+  searchTerm: string = 'all'
 ) {
   const { data } = await apiDev.get(datasetsEndpoints.GET_OVERVIEW(), {
     params: {
@@ -13,6 +14,7 @@ export async function getDatasetsOverview(
       sortBy: sort?.split(" ")?.[0],
       sortType: sort?.split(" ")?.[1],
       pageSize: OVERVIEW_PAGE_SIZE,
+      datasetName: searchTerm,
     },
   });
   return data?.response ?? [];
diff --git a/GUI/src/utils/commonUtilts.ts b/GUI/src/utils/commonUtilts.ts
index f720386f..b436c1b9 100644
--- a/GUI/src/utils/commonUtilts.ts
+++ b/GUI/src/utils/commonUtilts.ts
@@ -16,17 +16,20 @@ export const formattedArray = (data: string[]|undefined): FormattedOption[]|unde
 };
 
 export const toLabelValueArray = <T>(
-  data: T[] | undefined,
+  data: T[] | undefined | null,
   valueField: keyof T,
   labelField: keyof T
-): { label: string; value: string }[] | undefined => {
-  return data?.map((item) => ({
-    label: String(item[labelField]),
-    value: String(item[valueField]),
+): { label: string; value: string }[] => {
+  if (!Array.isArray(data)) {
+    console.warn('toLabelValueArray: Expected array, got', typeof data, data);
+    return [];
+  }
+  return data.map((item) => ({
+    label: String(item[labelField] ?? ''),
+    value: String(item[valueField] ?? ''),
   }));
 };
 
-
 export const convertTimestampToDateTime = (timestamp: number) => {
   return moment.unix(timestamp).format('YYYY-MM-DD HH:mm:ss');
 };
diff --git a/GUI/src/utils/queryKeys.ts b/GUI/src/utils/queryKeys.ts
index 27affe50..b2a6feb6 100644
--- a/GUI/src/utils/queryKeys.ts
+++ b/GUI/src/utils/queryKeys.ts
@@ -29,13 +29,15 @@ export const datasetQueryKeys = {
   DATASET_OVERVIEW: function (
     pageIndex?: number,
     generationStatus?: string,
-    sort?: string
+    sort?: string,
+    searchTerm?: string
   ) {
     return [
       'datasets/overview',
       pageIndex,
       generationStatus,
       sort,
+      searchTerm,
     ].filter((val) => val !== undefined);
   },
   GET_META_DATA: function (datasetId?: number|string) {
diff --git a/GUI/translations/en/common.json b/GUI/translations/en/common.json
index c9c92a31..470beb1c 100644
--- a/GUI/translations/en/common.json
+++ b/GUI/translations/en/common.json
@@ -327,7 +327,7 @@
     "title": "Data Generation Sessions",
     "inprogress": "Data Generation in-Progress",
     "fail": "Data Generation failed because {{class}} class found in the {{column}} column does not exist in hierarchy",
-    "noSessions": "No ongoing Data Generation sessions available"
+    "noSessions": "No ongoing data generation sessions available"
   },
   "correctedTexts": {
     "title": "Corrected Texts",
@@ -447,7 +447,8 @@
       "baseModels": "Select Base Models",
       "deploymentPlatform": "Select Deployment Environment",
       "errors": {
-        "datasetVersionNotExist": "Dataset version does not exist"
+        "datasetVersionNotExist": "Dataset version does not exist",
+        "modelNameLength": "Model name must be less than 256 characters"
       }
     }
   },
@@ -455,13 +456,14 @@
     "title": "Training Sessions",
     "inprogress": "Validation in-Progress",
     "fail": "Validation failed because {{class}} class found in the {{column}} column does not exist in hierarchy",
-    "noSessions": "No Active Training Sessions",
+    "noSessions": "No active training sessions",
     "noSessionsDesc": "There are currently no active training sessions. Once you start a training session, it will appear here. In the meantime, you can initiate a new training session to begin improving your models."
   },
   "testModels": {
     "title": "Test Model",
     "selectionLabel": "Model",
     "placeholder": "Choose model",
+    "noModels": "No models available",
     "classifyTextLabel": "Enter Text",
     "classify": "Classify",
     "predictedHierarchy": "Predicted Class Hierarchy : ",
diff --git a/GUI/translations/et/common.json b/GUI/translations/et/common.json
index 924845fb..ee25fbb3 100644
--- a/GUI/translations/et/common.json
+++ b/GUI/translations/et/common.json
@@ -447,7 +447,10 @@
       "datasetGroup": "Vali andmestiku grupp",
       "baseModels": "Vali baasmudelid",
       "deploymentPlatform": "Vali rakenduse platvorm",
-      "maturityLabel": "Vali valmiduse silt"
+      "maturityLabel": "Vali valmiduse silt",
+      "errors": {
+        "modelNameLength": "Mudeli nimi peab olema vähem kui 256 tähemärki"
+      }
     }
   },
   "trainingSessions": {
@@ -461,6 +464,7 @@
     "title": "Testige mudelit",
     "selectionLabel": "Mudel",
     "placeholder": "Valige mudel",
+    "noModels": "Mudeleid pole saadaval",
     "classifyTextLabel": "Sisestage tekst",
     "classify": "Klassifitseeri",
     "predictedHierarchy": "Prognoositud klassihierarhia: ",
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index a1739783..4cf20111 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -173,6 +173,7 @@ services:
       - ./grafana-configs/loki_logger.py:/app/src/training/loki_logger.py
       - ./constants.ini:/app/inference_scripts/constants.ini
       - cron_data:/app/data
+      - ./src/scripts:/app/src/scripts
     runtime: nvidia 
     environment:
       - NVIDIA_VISIBLE_DEVICES=all
@@ -383,29 +384,6 @@ services:
       timeout: 3s
       retries: 3
 
-  dataset-gen-ollama:
-    image: synthesisai/dataset-generator-ollama:latest
-    container_name: dataset-gen-ollama
-    ports:
-      - "11434:11434"
-    environment:
-      - NVIDIA_VISIBLE_DEVICES=all
-      - OLLAMA_USE_GPU=1
-      - OLLAMA_HOST=0.0.0.0
-    volumes:
-      - dataset_gen_ollama_models:/root/.ollama
-      - ./DSL/DatasetGenerator/ollama-entrypoint.sh:/ollama-entrypoint.sh
-    entrypoint: ["bash", "/ollama-entrypoint.sh"]
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    networks:
-      - bykstack
-
   #temporarary container to initialize S3 storage with necessary buckets and models  
   init-storage:
 
diff --git a/src/s3_dataset_processor/constants.py b/src/s3_dataset_processor/constants.py
index cfa2b978..09062281 100644
--- a/src/s3_dataset_processor/constants.py
+++ b/src/s3_dataset_processor/constants.py
@@ -13,3 +13,4 @@
 SYNCED_WITH_CKB = "Synced_with_CKB"
 SYNC_WITH_CKB_FAILED = "Sync_with_CKB_Failed"
 OUTPUT_DATA_DIR = "/app/output_datasets"
+DATA_DIRECTORY = "/app/data"
diff --git a/src/s3_dataset_processor/dataset_generation_callback_processor.py b/src/s3_dataset_processor/dataset_generation_callback_processor.py
index f8624e4c..2c9441a4 100644
--- a/src/s3_dataset_processor/dataset_generation_callback_processor.py
+++ b/src/s3_dataset_processor/dataset_generation_callback_processor.py
@@ -12,6 +12,7 @@
 import requests
 import traceback
 import os
+import shutil
 import pandas as pd
 from constants import (
     DATASET_UPDATE_URL,
@@ -22,6 +23,7 @@
     SYNC_WITH_CKB_FAILED,
     SCRIPT_DIR,
     PROGRESS_UPDATE_URL,
+    DATA_DIRECTORY,
 )
 
 # --- Logging Setup ---
@@ -189,6 +191,53 @@ def send_status_update(dataset_id: int, encoded_results: str) -> None:
         traceback.print_exc()
 
 
+def cleanup_temporary_files() -> None:
+    """Clean up all temporary files and directories after successful S3 upload."""
+    cleanup_summary = []
+
+    try:
+        # Clean up /app/data directory (downloaded and extracted source datasets)
+        data_dir = DATA_DIRECTORY
+        if os.path.exists(data_dir):
+            _cleanup_directory_contents(data_dir, cleanup_summary)
+
+        # Clean up ENTIRE /app/output_datasets directory (all generated files)
+        output_dir = OUTPUT_DATA_DIR
+        if os.path.exists(output_dir):
+            _cleanup_directory_contents(output_dir, cleanup_summary)
+
+        # Log cleanup results
+        _log_cleanup_results(cleanup_summary)
+
+    except Exception as e:
+        logger.error(f"Error during cleanup: {e}")
+
+
+def _cleanup_directory_contents(directory: str, cleanup_summary: list) -> None:
+    """Clean up all contents of a directory while preserving the directory itself."""
+    for item in os.listdir(directory):
+        item_path = os.path.join(directory, item)
+        try:
+            if os.path.isfile(item_path):
+                os.remove(item_path)
+                cleanup_summary.append(f"Removed file: {item_path}")
+            elif os.path.isdir(item_path):
+                shutil.rmtree(item_path)
+                cleanup_summary.append(f"Removed directory: {item_path}")
+        except Exception as e:
+            logger.warning(f"Failed to remove {item_path}: {e}")
+
+
+def _log_cleanup_results(cleanup_summary: list) -> None:
+    """Log the results of the cleanup operation."""
+    if cleanup_summary:
+        logger.info("Cleanup completed successfully:")
+        for item in cleanup_summary:
+            logger.info(f"  - {item}")
+    else:
+        logger.info("No temporary files found to clean up")
+
+
 def process_callback_background(
     file_path: str, encoded_results: str, session_id: int
 ) -> None:
@@ -256,6 +305,10 @@ def process_callback_background(
         send_status_update(dataset_id, encoded_results)
 
         logger.info("Processing completed successfully")
+
+        # Clean up temporary files before final notification
+        cleanup_temporary_files()
+
         notify_progress_uploading_to_s3(session_id)
 
     except Exception as e:
diff --git a/src/scripts/constants.py b/src/scripts/constants.py
new file mode 100644
index 00000000..a9f31b1b
--- /dev/null
+++ b/src/scripts/constants.py
@@ -0,0 +1,8 @@
+DATA_URL_INSERT_URL = "http://resql:8082/global-classifier/insert-agency-presigned-url"
+DATA_URL_UPDATE_URL = "http://resql:8082/global-classifier/update-agency-presigned-url"
+MINIO_ENDPOINT = "http://minio:9000"
+MINIO_USER_ID = "minioadmin"
+MINIO_USER_KEY = "minioadmin"
+REGION_NAME = "us-east-1"
+SIGNATURE_VERSION = "s3v4"
+BUCKET_NAME = "ckb"
diff --git a/src/scripts/generate_signed_urls.py b/src/scripts/generate_signed_urls.py
new file mode 100644
index 00000000..a8878b79
--- /dev/null
+++ b/src/scripts/generate_signed_urls.py
@@ -0,0 +1,185 @@
+import boto3
+from botocore.client import Config
+import sys
+import json
+import urllib.parse
+import requests
+from typing import List, Dict
+from src.scripts.constants import (
+    DATA_URL_INSERT_URL,
+    DATA_URL_UPDATE_URL,
+    MINIO_ENDPOINT,
+    MINIO_USER_ID,
+    MINIO_USER_KEY,
+    REGION_NAME,
+    SIGNATURE_VERSION,
+    BUCKET_NAME,
+)
+
+
+def upsert_agency_to_database(agency_id: str, agency_name: str, data_url: str) -> bool:
+    """
+    Upsert agency data to mock_ckb table via Resql endpoint
+    Try INSERT first, if it fails with conflict, then UPDATE
+    """
+    try:
+        agency_data_hash = f"{agency_name}_hash"
+
+        payload = {
+            "agencyId": agency_id,
+            "agencyDataHash": agency_data_hash,
+            "dataUrl": data_url,
+        }
+
+        # Try INSERT first
+        insert_url = DATA_URL_INSERT_URL
+        response = requests.post(insert_url, json=payload, timeout=30)
+
+        if response.status_code == 200:
+            print(f"Successfully inserted new agency {agency_id} to database")
+            return True
+        elif response.status_code == 400 and "duplicate key" in response.text.lower():
+            # If INSERT fails due to duplicate key, try UPDATE
+            print(f"Agency {agency_id} exists, updating...")
+
+            update_url = DATA_URL_UPDATE_URL
+            update_response = requests.post(update_url, json=payload, timeout=30)
+
+            if update_response.status_code == 200:
+                print(f"Successfully updated agency {agency_id} in database")
+                return True
+            else:
+                print(
+                    f"Failed to update agency {agency_id}: HTTP {update_response.status_code}"
+                )
+                print(f"Response: {update_response.text}")
+                return False
+        else:
+            print(f"Failed to insert agency {agency_id}: HTTP {response.status_code}")
+            print(f"Response: {response.text}")
+            return False
+
+    except Exception as e:
+        print(f"Error upserting agency {agency_id} to database: {e}")
+        return False
+
+
+def main():
+    print("Python script started...")
+
+    # Check if agencies data is provided as command line argument
+    if len(sys.argv) < 2:
+        print('Usage: python generate_signed_urls.py "<url_encoded_agencies_json>"')
+        print(
+            "Expected format: URL-encoded JSON array with agencyId and agencyName fields"
+        )
+        sys.exit(1)
+
+    try:
+        # Decode the URL-encoded string first
+        encoded_agencies = sys.argv[1]
+        decoded_agencies_str = urllib.parse.unquote(encoded_agencies)
+
+        # Parse JSON
+        agencies = json.loads(decoded_agencies_str)
+        print(f"Processing {len(agencies)} agencies")
+
+    except json.JSONDecodeError as e:
+        print(f"Error: Failed to parse agencies JSON: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error during parsing: {e}")
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+    try:
+        s3_client = boto3.client(
+            "s3",
+            endpoint_url=MINIO_ENDPOINT,
+            aws_access_key_id=MINIO_USER_ID,
+            aws_secret_access_key=MINIO_USER_KEY,
+            config=Config(signature_version=SIGNATURE_VERSION),
+            region_name=REGION_NAME,
+        )
+    except Exception as e:
+        print(f"Error creating S3 client: {e}")
+        sys.exit(1)
+
+    # Build list of files to process from agencies
+    files_to_process: List[Dict[str, str]] = []
+    for agency in agencies:
+        agency_name = agency.get("agencyName")
+        agency_id = agency.get("agencyId")
+
+        if agency_name:
+            files_to_process.append(
+                {
+                    "bucket": BUCKET_NAME,
+                    "key": f"{agency_name}/{agency_name}.zip",
+                    "agencyId": agency_id,
+                }
+            )
+        else:
+            print(f"Warning: Agency missing agencyName: {agency}")
+
+    if not files_to_process:
+        print("Error: No valid agencies found to process")
+        sys.exit(1)
+
+    # Generate presigned URLs
+    presigned_urls: List[str] = []
+    successful_agencies: List[Dict] = []
+
+    print("Generating presigned URLs...")
+    for file_info in files_to_process:
+        try:
+            url = s3_client.generate_presigned_url(
+                ClientMethod="get_object",
+                Params={"Bucket": file_info["bucket"], "Key": file_info["key"]},
+                ExpiresIn=24 * 3600,  # 24 hours in seconds
+            )
+            presigned_urls.append(url)
+            successful_agencies.append(
+                {
+                    "agency_id": file_info.get("agencyId"),
+                    "agency_name": file_info["key"].split("/")[
+                        0
+                    ],  # Extract agency name from key
+                    "data_url": url,
+                }
+            )
+        except Exception as e:
+            print(f"Failed to generate URL for: {file_info['key']}")
+            print(f"   Error: {str(e)}")
+
+    print(f"Generated {len(presigned_urls)} URLs successfully")
+
+    # Upsert agencies to database
+    if successful_agencies:
+        print("Storing agencies in database...")
+        db_success_count = 0
+
+        for agency_data in successful_agencies:
+            success = upsert_agency_to_database(
+                agency_data["agency_id"],
+                agency_data["agency_name"],
+                agency_data["data_url"],
+            )
+            if success:
+                db_success_count += 1
+
+        print(
+            f"Successfully stored {db_success_count}/{len(successful_agencies)} agencies in database"
+        )
+
+    # Check if any URLs were generated
+    if not presigned_urls:
+        print("No URLs were generated successfully")
+        sys.exit(1)
+
+    print("Presigned URL generation completed successfully")
+
+
+if __name__ == "__main__":
+    main()