diff --git a/.github/workflows/validation_sp_update_kng_validation-tool(dev).yml b/.github/workflows/validation_sp_update_kng_validation-tool(dev).yml new file mode 100644 index 0000000..7289b55 --- /dev/null +++ b/.github/workflows/validation_sp_update_kng_validation-tool(dev).yml @@ -0,0 +1,71 @@ +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + +name: Build and deploy Python app to Azure Web App - validation-tool + +on: + push: + branches: + - validation_sp_update_kng + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read #This is required for actions/checkout + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python version + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Create and start virtual environment + run: | + python -m venv venv + source venv/bin/activate + + - name: Install dependencies + run: pip install -r requirements.txt + + # Optional: Add step to run tests here (PyTest, Django test suites, etc.) + + - name: Upload artifact for deployment jobs + uses: actions/upload-artifact@v4 + with: + name: python-app + path: | + . + !venv/ + + deploy: + runs-on: ubuntu-latest + needs: build + permissions: + id-token: write #This is required for requesting the JWT + contents: read #This is required for actions/checkout + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: python-app + + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_728F9D7925DE4182BF6B18B9C5F323F5 }} + tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_1DB700BC4B8B4361AC2EBDCC1686194C }} + subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_DE30FBD7016B4320BD60B3E657D06741 }} + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v3 + id: deploy-to-webapp + with: + app-name: 'validation-tool' + slot-name: 'dev' + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 019a2a4..009139d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ __pycache__/ .env venv_1/ -venv_2/ \ No newline at end of file +venv_2/ +.vscode/ +.venv/ \ No newline at end of file diff --git a/load_data.py b/load_data.py index 2dfd959..399ad48 100644 --- a/load_data.py +++ b/load_data.py @@ -6,6 +6,7 @@ import yaml from pathlib import Path from databricks import sql +from databricks.sdk.core import Config, oauth_service_principal from dotenv import load_dotenv load_dotenv() @@ -89,30 +90,58 @@ def read_metadata(scenario_path): df_route = pd.concat(dfs["df_route"],ignore_index=True) df_scenario = pd.concat(dfs["df_scenario"], ignore_index=True) + # elif ENV == 'Azure': + # raw_ids = os.getenv("AZURE_SCENARIO_LIST", "") + # scenario_id_list = [int(s.strip()) for s in raw_ids.split(',') if s.strip().isdigit()] + # scenario_str = ','.join(map(str, scenario_id_list)) + # catalog = os.getenv("DBRICKS_CATALOG", "tam") + + # server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") + + # def credential_provider(): + # config = Config( + # host = f"https://{server_hostname}", + # client_id = os.getenv("DATABRICKS_CLIENT_ID"), + # client_secret = os.getenv("DATABRICKS_CLIENT_SECRET")) + # return oauth_service_principal(config) + + # def query_to_df(cursor, query): + # cursor.execute(query) + # return cursor.fetchall_arrow().to_pandas() + elif ENV == 'Azure': raw_ids = os.getenv("AZURE_SCENARIO_LIST", "") scenario_id_list = [int(s.strip()) for s in raw_ids.split(',') if s.strip().isdigit()] scenario_str = ','.join(map(str, scenario_id_list)) catalog = os.getenv("DBRICKS_CATALOG", "tam") - - def query_to_df(cursor, query): - cursor.execute(query) - return cursor.fetchall_arrow().to_pandas() - - with sql.connect( - server_hostname=os.getenv("DATABRICKS_SERVER_HOSTNAME"), - http_path=os.getenv("DATABRICKS_HTTP_PATH"), - access_token=os.getenv("DATABRICKS_TOKEN"), - ) as connection: - with connection.cursor() as cursor: - df1 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.fwy WHERE scenario_id IN ({scenario_str})") - df2 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.all_class WHERE scenario_id IN ({scenario_str})") - df3 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.truck WHERE scenario_id IN ({scenario_str})") - df4 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.board WHERE scenario_id IN ({scenario_str})") - df5 = query_to_df(cursor, f"SELECT * FROM {catalog}.validation.regional_vmt WHERE scenario_id IN ({scenario_str})") - df_link = query_to_df(cursor, f"SELECT scenario_id, ID, Length, geometry FROM {catalog}.abm3.network__emme_hwy_tcad WHERE scenario_id IN ({scenario_str})") - df_route = query_to_df(cursor, f"SELECT scenario_id, route_name, earlyam_hours, evening_hours, transit_route_shape as geometry FROM {catalog}.abm3.network__transit_route WHERE scenario_id IN ({scenario_str})") - df_scenario = query_to_df(cursor, f"SELECT scenario_id, scenario_name, scenario_yr FROM {catalog}.abm3.main__scenario WHERE scenario_id IN ({scenario_str})") + server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") + + def credential_provider(): + config = Config( + host = f"https://{server_hostname}", + client_id = os.getenv("DATABRICKS_CLIENT_ID"), + client_secret = os.getenv("DATABRICKS_CLIENT_SECRET")) + return oauth_service_principal(config) + + def query_to_df(query): + """Execute query lazily - connects only when called""" + with sql.connect( + server_hostname=server_hostname, + http_path=os.getenv("DATABRICKS_HTTP_PATH"), + credentials_provider=credential_provider + ) as connection: + with connection.cursor() as cursor: + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + df1 = query_to_df(f"SELECT * FROM {catalog}.validation.fwy WHERE scenario_id IN ({scenario_str})") + df2 = query_to_df(f"SELECT * FROM {catalog}.validation.all_class WHERE scenario_id IN ({scenario_str})") + df3 = query_to_df(f"SELECT * FROM {catalog}.validation.truck WHERE scenario_id IN ({scenario_str})") + df4 = query_to_df(f"SELECT * FROM {catalog}.validation.board WHERE scenario_id IN ({scenario_str})") + df5 = query_to_df(f"SELECT * FROM {catalog}.validation.regional_vmt WHERE scenario_id IN ({scenario_str})") + df_link = query_to_df(f"SELECT scenario_id, ID, Length, geometry FROM {catalog}.abm3.network__emme_hwy_tcad WHERE scenario_id IN ({scenario_str})") + df_route = query_to_df(f"SELECT scenario_id, route_name, earlyam_hours, evening_hours, transit_route_shape as geometry FROM {catalog}.abm3.network__transit_route WHERE scenario_id IN ({scenario_str})") + df_scenario = query_to_df(f"SELECT scenario_id, scenario_name, scenario_yr FROM {catalog}.abm3.main__scenario WHERE scenario_id IN ({scenario_str})") # Clean up data df1 = df1.dropna(subset=['count_day', 'day_flow']).drop(columns=['loader__delta_hash_key','loader__updated_date'], errors='ignore').drop_duplicates() diff --git a/requirements.txt b/requirements.txt index 3303feb..c4cea9e 100644 Binary files a/requirements.txt and b/requirements.txt differ