SANDAG · JYSD1 · Oct 3, 2025 · Sep 30, 2025 · Oct 1, 2025 · Oct 1, 2025
diff --git a/.github/workflows/validation_sp_update_kng_validation-tool(dev).yml b/.github/workflows/validation_sp_update_kng_validation-tool(dev).yml
@@ -0,0 +1,71 @@
+# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
+# More GitHub Actions for Azure: https://github.com/Azure/actions
+# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions
+
+name: Build and deploy Python app to Azure Web App - validation-tool
+
+on:
+  push:
+    branches:
+      - validation_sp_update_kng
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read #This is required for actions/checkout
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python version
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Create and start virtual environment
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      # Optional: Add step to run tests here (PyTest, Django test suites, etc.)
+
+      - name: Upload artifact for deployment jobs
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-app
+          path: |
+            .
+            !venv/
+
+  deploy:
+    runs-on: ubuntu-latest
+    needs: build
+    permissions:
+      id-token: write #This is required for requesting the JWT
+      contents: read #This is required for actions/checkout
+
+    steps:
+      - name: Download artifact from build job
+        uses: actions/download-artifact@v4
+        with:
+          name: python-app
+
+      - name: Login to Azure
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_728F9D7925DE4182BF6B18B9C5F323F5 }}
+          tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_1DB700BC4B8B4361AC2EBDCC1686194C }}
+          subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_DE30FBD7016B4320BD60B3E657D06741 }}
+
+      - name: 'Deploy to Azure Web App'
+        uses: azure/webapps-deploy@v3
+        id: deploy-to-webapp
+        with:
+          app-name: 'validation-tool'
+          slot-name: 'dev'
+
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,6 @@
 __pycache__/
 .env
 venv_1/
-venv_2/
+venv_2/
+.vscode/
+.venv/
diff --git a/load_data.py b/load_data.py
@@ -6,6 +6,7 @@
 import yaml
 from pathlib import Path
 from databricks import sql
+from databricks.sdk.core import Config, oauth_service_principal
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -89,30 +90,58 @@ def read_metadata(scenario_path):
         df_route = pd.concat(dfs["df_route"],ignore_index=True)
         df_scenario = pd.concat(dfs["df_scenario"], ignore_index=True)
 
+    # elif ENV == 'Azure':
+    #     raw_ids = os.getenv("AZURE_SCENARIO_LIST", "")
+    #     scenario_id_list = [int(s.strip()) for s in raw_ids.split(',') if s.strip().isdigit()]
+    #     scenario_str = ','.join(map(str, scenario_id_list))
+    #     catalog = os.getenv("DBRICKS_CATALOG", "tam")
+
+    #     server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME")
+
+    #     def credential_provider():
+    #         config = Config(
+    #             host          = f"https://{server_hostname}",
+    #             client_id     = os.getenv("DATABRICKS_CLIENT_ID"),
+    #             client_secret = os.getenv("DATABRICKS_CLIENT_SECRET"))
+    #         return oauth_service_principal(config)
+
+    #     def query_to_df(cursor, query):
+    #         cursor.execute(query)
+    #         return cursor.fetchall_arrow().to_pandas()
+
     elif ENV == 'Azure':
         raw_ids = os.getenv("AZURE_SCENARIO_LIST", "")
         scenario_id_list = [int(s.strip()) for s in raw_ids.split(',') if s.strip().isdigit()]
         scenario_str = ','.join(map(str, scenario_id_list))
         catalog = os.getenv("DBRICKS_CATALOG", "tam")
-
-        def query_to_df(cursor, query):
-            cursor.execute(query)
-            return cursor.fetchall_arrow().to_pandas()
-
-        with sql.connect(
-            server_hostname=os.getenv("DATABRICKS_SERVER_HOSTNAME"),
-            http_path=os.getenv("DATABRICKS_HTTP_PATH"),
-            access_token=os.getenv("DATABRICKS_TOKEN"),
-        ) as connection:
-            with connection.cursor() as cursor:
-                df1 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.fwy WHERE scenario_id IN ({scenario_str})")
-                df2 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.all_class WHERE scenario_id IN ({scenario_str})")
-                df3 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.truck WHERE scenario_id IN ({scenario_str})")
-                df4 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.board WHERE scenario_id IN ({scenario_str})")
-                df5 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.regional_vmt WHERE scenario_id IN ({scenario_str})")
-                df_link = query_to_df(cursor, f"SELECT scenario_id, ID, Length, geometry FROM {catalog}.abm3.network__emme_hwy_tcad WHERE  scenario_id IN ({scenario_str})")
-                df_route = query_to_df(cursor, f"SELECT scenario_id, route_name, earlyam_hours, evening_hours, transit_route_shape as geometry FROM {catalog}.abm3.network__transit_route WHERE  scenario_id IN ({scenario_str})")
-                df_scenario = query_to_df(cursor, f"SELECT scenario_id, scenario_name, scenario_yr FROM {catalog}.abm3.main__scenario WHERE scenario_id IN ({scenario_str})")
+        server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME")
+
+        def credential_provider():
+            config = Config(
+                host          = f"https://{server_hostname}",
+                client_id     = os.getenv("DATABRICKS_CLIENT_ID"),
+                client_secret = os.getenv("DATABRICKS_CLIENT_SECRET"))
+            return oauth_service_principal(config)
+
+        def query_to_df(query):
+            """Execute query lazily - connects only when called"""
+            with sql.connect(
+                server_hostname=server_hostname,
+                http_path=os.getenv("DATABRICKS_HTTP_PATH"),
+                credentials_provider=credential_provider
+            ) as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute(query)
+                    return cursor.fetchall_arrow().to_pandas()
+
+        df1 = query_to_df(f"SELECT * FROM {catalog}.validation.fwy WHERE scenario_id IN ({scenario_str})")
+        df2 = query_to_df(f"SELECT * FROM {catalog}.validation.all_class WHERE scenario_id IN ({scenario_str})")
+        df3 = query_to_df(f"SELECT * FROM {catalog}.validation.truck WHERE scenario_id IN ({scenario_str})")
+        df4 = query_to_df(f"SELECT * FROM {catalog}.validation.board WHERE scenario_id IN ({scenario_str})")
+        df5 = query_to_df(f"SELECT * FROM {catalog}.validation.regional_vmt WHERE scenario_id IN ({scenario_str})")
+        df_link = query_to_df(f"SELECT scenario_id, ID, Length, geometry FROM {catalog}.abm3.network__emme_hwy_tcad WHERE  scenario_id IN ({scenario_str})")
+        df_route = query_to_df(f"SELECT scenario_id, route_name, earlyam_hours, evening_hours, transit_route_shape as geometry FROM {catalog}.abm3.network__transit_route WHERE  scenario_id IN ({scenario_str})")
+        df_scenario = query_to_df(f"SELECT scenario_id, scenario_name, scenario_yr FROM {catalog}.abm3.main__scenario WHERE scenario_id IN ({scenario_str})")
 
         # Clean up data
         df1 = df1.dropna(subset=['count_day', 'day_flow']).drop(columns=['loader__delta_hash_key','loader__updated_date'], errors='ignore').drop_duplicates()

diff --git a/requirements.txt b/requirements.txt
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,4 +3,6 @@ @@
     __pycache__/
     .env
     venv_1/
-    venv_2/
+    venv_2/
+    .vscode/
+    .venv/