Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
# More GitHub Actions for Azure: https://github.com/Azure/actions
# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions

name: Build and deploy Python app to Azure Web App - validation-tool

on:
push:
branches:
- validation_sp_update_kng
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read #This is required for actions/checkout

steps:
- uses: actions/checkout@v4

- name: Set up Python version
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Create and start virtual environment
run: |
python -m venv venv
source venv/bin/activate

- name: Install dependencies
run: pip install -r requirements.txt

# Optional: Add step to run tests here (PyTest, Django test suites, etc.)

- name: Upload artifact for deployment jobs
uses: actions/upload-artifact@v4
with:
name: python-app
path: |
.
!venv/

deploy:
runs-on: ubuntu-latest
needs: build
permissions:
id-token: write #This is required for requesting the JWT
contents: read #This is required for actions/checkout

steps:
- name: Download artifact from build job
uses: actions/download-artifact@v4
with:
name: python-app

- name: Login to Azure
uses: azure/login@v2
with:
client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_728F9D7925DE4182BF6B18B9C5F323F5 }}
tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_1DB700BC4B8B4361AC2EBDCC1686194C }}
subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_DE30FBD7016B4320BD60B3E657D06741 }}

- name: 'Deploy to Azure Web App'
uses: azure/webapps-deploy@v3
id: deploy-to-webapp
with:
app-name: 'validation-tool'
slot-name: 'dev'

4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
__pycache__/
.env
venv_1/
venv_2/
venv_2/
.vscode/
.venv/
67 changes: 48 additions & 19 deletions load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import yaml
from pathlib import Path
from databricks import sql
from databricks.sdk.core import Config, oauth_service_principal
from dotenv import load_dotenv

load_dotenv()
Expand Down Expand Up @@ -89,30 +90,58 @@ def read_metadata(scenario_path):
df_route = pd.concat(dfs["df_route"],ignore_index=True)
df_scenario = pd.concat(dfs["df_scenario"], ignore_index=True)

# elif ENV == 'Azure':
# raw_ids = os.getenv("AZURE_SCENARIO_LIST", "")
# scenario_id_list = [int(s.strip()) for s in raw_ids.split(',') if s.strip().isdigit()]
# scenario_str = ','.join(map(str, scenario_id_list))
# catalog = os.getenv("DBRICKS_CATALOG", "tam")

# server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME")

# def credential_provider():
# config = Config(
# host = f"https://{server_hostname}",
# client_id = os.getenv("DATABRICKS_CLIENT_ID"),
# client_secret = os.getenv("DATABRICKS_CLIENT_SECRET"))
# return oauth_service_principal(config)

# def query_to_df(cursor, query):
# cursor.execute(query)
# return cursor.fetchall_arrow().to_pandas()

elif ENV == 'Azure':
raw_ids = os.getenv("AZURE_SCENARIO_LIST", "")
scenario_id_list = [int(s.strip()) for s in raw_ids.split(',') if s.strip().isdigit()]
scenario_str = ','.join(map(str, scenario_id_list))
catalog = os.getenv("DBRICKS_CATALOG", "tam")

def query_to_df(cursor, query):
cursor.execute(query)
return cursor.fetchall_arrow().to_pandas()

with sql.connect(
server_hostname=os.getenv("DATABRICKS_SERVER_HOSTNAME"),
http_path=os.getenv("DATABRICKS_HTTP_PATH"),
access_token=os.getenv("DATABRICKS_TOKEN"),
) as connection:
with connection.cursor() as cursor:
df1 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.fwy WHERE scenario_id IN ({scenario_str})")
df2 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.all_class WHERE scenario_id IN ({scenario_str})")
df3 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.truck WHERE scenario_id IN ({scenario_str})")
df4 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.board WHERE scenario_id IN ({scenario_str})")
df5 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.regional_vmt WHERE scenario_id IN ({scenario_str})")
df_link = query_to_df(cursor, f"SELECT scenario_id, ID, Length, geometry FROM {catalog}.abm3.network__emme_hwy_tcad WHERE scenario_id IN ({scenario_str})")
df_route = query_to_df(cursor, f"SELECT scenario_id, route_name, earlyam_hours, evening_hours, transit_route_shape as geometry FROM {catalog}.abm3.network__transit_route WHERE scenario_id IN ({scenario_str})")
df_scenario = query_to_df(cursor, f"SELECT scenario_id, scenario_name, scenario_yr FROM {catalog}.abm3.main__scenario WHERE scenario_id IN ({scenario_str})")
server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME")

def credential_provider():
config = Config(
host = f"https://{server_hostname}",
client_id = os.getenv("DATABRICKS_CLIENT_ID"),
client_secret = os.getenv("DATABRICKS_CLIENT_SECRET"))
return oauth_service_principal(config)

def query_to_df(query):
"""Execute query lazily - connects only when called"""
with sql.connect(
server_hostname=server_hostname,
http_path=os.getenv("DATABRICKS_HTTP_PATH"),
credentials_provider=credential_provider
) as connection:
with connection.cursor() as cursor:
cursor.execute(query)
return cursor.fetchall_arrow().to_pandas()

df1 = query_to_df(f"SELECT * FROM {catalog}.validation.fwy WHERE scenario_id IN ({scenario_str})")
df2 = query_to_df(f"SELECT * FROM {catalog}.validation.all_class WHERE scenario_id IN ({scenario_str})")
df3 = query_to_df(f"SELECT * FROM {catalog}.validation.truck WHERE scenario_id IN ({scenario_str})")
df4 = query_to_df(f"SELECT * FROM {catalog}.validation.board WHERE scenario_id IN ({scenario_str})")
df5 = query_to_df(f"SELECT * FROM {catalog}.validation.regional_vmt WHERE scenario_id IN ({scenario_str})")
df_link = query_to_df(f"SELECT scenario_id, ID, Length, geometry FROM {catalog}.abm3.network__emme_hwy_tcad WHERE scenario_id IN ({scenario_str})")
df_route = query_to_df(f"SELECT scenario_id, route_name, earlyam_hours, evening_hours, transit_route_shape as geometry FROM {catalog}.abm3.network__transit_route WHERE scenario_id IN ({scenario_str})")
df_scenario = query_to_df(f"SELECT scenario_id, scenario_name, scenario_yr FROM {catalog}.abm3.main__scenario WHERE scenario_id IN ({scenario_str})")

# Clean up data
df1 = df1.dropna(subset=['count_day', 'day_flow']).drop(columns=['loader__delta_hash_key','loader__updated_date'], errors='ignore').drop_duplicates()
Expand Down
Binary file modified requirements.txt
Binary file not shown.