From 83d49ed040866af5bd8c43fda6910e2f95c4e8a1 Mon Sep 17 00:00:00 2001 From: jaredgraff_microsoft Date: Tue, 7 Jan 2025 19:25:28 -0500 Subject: [PATCH 1/3] Added Anomalous Process notebook - Jared Graff --- ...nomalous Process Network Connections.ipynb | 851 ++++++++++++++++++ 1 file changed, 851 insertions(+) create mode 100644 machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb diff --git a/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb b/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb new file mode 100644 index 00000000..7776f727 --- /dev/null +++ b/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb @@ -0,0 +1,851 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Guided Hunting - Anomalous Process Network Connections\n", + "
\n", + " Details...\n", + "**Notebook Version:** 1.0
\n", + "**Python Version:** Python 3.8 (including Python 3.8 - AzureML)
\n", + "**Required Packages**: msticpy, pandas, numpy, matplotlib, plotly, ipywidgets, ipython, sklearn
\n", + "\n", + "**Data Sources Required**:\n", + "- Log Analytics - DeviceNetworkEvents\n", + "\n", + "
\n", + "\n", + "Brings together a series of queries and visualizations to help you investigate anomalous processes in your network. There are then guided hunting steps to investigate these occurences in further dept. This notebook authenticates with environment variables and requires the following:\n", + "- msticpyconfig.yaml has been properly configured\n", + "- Registered application has been created with API permissions given to Log Analytics API\n", + "- Key vault set up with a secret to the Registered Application" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup Environment Variables\n", + "Please set the following environment variables in the code block below:\n", + "- AZURE_TENANT_ID\n", + "- AZURE_CLIENT_ID\n", + "- key_vault_name\n", + "- key_vault_url\n", + "- secret_client" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import msticpy as mp\n", + "from azure.identity import DefaultAzureCredential\n", + "from azure.keyvault.secrets import SecretClient\n", + "from azure.mgmt.resource import ResourceManagementClient\n", + "\n", + "# Set environment variables for tenant ID and client ID\n", + "os.environ['AZURE_TENANT_ID'] = ''\n", + "os.environ['AZURE_CLIENT_ID'] = ''\n", + "\n", + "# Initialize DefaultAzureCredential\n", + "credential = DefaultAzureCredential()\n", + "\n", + "# Create a SecretClient to interact with the Key Vault\n", + "key_vault_name = \"\"\n", + "key_vault_url = f\"\"\n", + "secret_client = SecretClient(vault_url=key_vault_url, credential=credential)\n", + "\n", + "# Retrieve the secret from Key Vault\n", + "secret_name = \"\"\n", + "retrieved_secret = secret_client.get_secret(secret_name)\n", + "os.environ['AZURE_CLIENT_SECRET'] = retrieved_secret.value\n", + "\n", + "# Now you can use DefaultAzureCredential or other credential classes\n", + "print(credential)\n", + "\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "gather": { + "logged": 1736241000791 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Verify Environment Variables are Set\n", + "You should see the values of the following:\n", + "- AZURE_TENANT_ID\n", + "- AZURE_CLIENT_ID\n", + "- AZURE_CLIENT_SECRET" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Verify that the environment variables have been set\n", + "def verify_env_vars():\n", + " tenant_id = os.getenv('AZURE_TENANT_ID')\n", + " client_id = os.getenv('AZURE_CLIENT_ID')\n", + " client_secret = os.getenv('AZURE_CLIENT_SECRET')\n", + " \n", + " if tenant_id and client_id and client_secret:\n", + " print(\"Environment variables have been set successfully:\")\n", + " print(f\"AZURE_TENANT_ID: {tenant_id}\")\n", + " print(f\"AZURE_CLIENT_ID: {client_id}\")\n", + " print(f\"AZURE_CLIENT_SECRET: {client_secret[:4]}... (hidden for security)\")\n", + " else:\n", + " print(\"Failed to set environment variables.\")\n", + "\n", + "# Call the verification function\n", + "verify_env_vars()\n", + "\n", + "\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1736241041191 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup msticpyconfig.yaml\n", + "Ensure your msticpyconfig.yaml has been set up and saved in the current directory you are running this notebook." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "import msticpy\n", + "from msticpy.config import MpConfigFile, MpConfigEdit\n", + "import os\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "mp_conf = \"msticpyconfig.yaml\"\n", + "\n", + "# check if MSTICPYCONFIG is already an env variable\n", + "mp_env = os.environ.get(\"MSTICPYCONFIG\")\n", + "mp_conf = mp_env if mp_env and Path(mp_env).is_file() else mp_conf\n", + "\n", + "if not Path(mp_conf).is_file():\n", + " print(\n", + " \"No msticpyconfig.yaml was found!\",\n", + " \"Please check that there is a config.json file in your workspace folder.\",\n", + " \"If this is not there, go back to the Microsoft Sentinel portal and launch\",\n", + " \"this notebook from there.\",\n", + " sep=\"\\n\"\n", + " )\n", + "else:\n", + " mpedit = MpConfigEdit(mp_conf)\n", + " mpconfig = MpConfigFile(mp_conf)\n", + " \n", + " # Convert SettingsDict to a regular dictionary\n", + " settings_dict = {k: v for k, v in mpconfig.settings.items()}\n", + " print(f\"Configured Sentinel workspaces: {json.dumps(settings_dict, indent=4)}\")\n", + "\n", + "msticpy.settings.refresh_config()\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1736241065955 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup QueryProvider" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "# Refresh any config items that might have been saved\n", + "# to the msticpyconfig in the previous steps.\n", + "msticpy.settings.refresh_config()\n", + "\n", + "# Initialize a QueryProvider for Microsoft Sentinel\n", + "qry_prov = mp.QueryProvider(\"AzureSentinel\")" + ], + "outputs": [], + "execution_count": 5, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1736241100105 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Connect to Sentinel\n", + "You should see \"connected\" output after running this code block. Once you are connected, you can continue on with the notebook." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "# Get the default Microsoft Sentinel workspace details from msticpyconfig.yaml\n", + "\n", + "ws_config = mp.WorkspaceConfig()\n", + "\n", + "# Connect to Microsoft Sentinel with our QueryProvider and config details\n", + "qry_prov.connect(ws_config)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1736241109656 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Run Anomaly Detection Script - Anomalous Processes\n", + "Change your KQL to reduce your data. Enter the field name you want to run the IsolationForest algorithm on to identify anomalies. This script is set to search for anomalous processes on the network. It is recommended to change the contamination rate to fit your environment. The bigger the environment, the smaller the contamination rate will likely need to be. After you select the \"Analyze\" button, you can search the data frame with the \"Column\" and \"Value\" text widgets. There is an option to graph the top ten most significant anomalies based on \"Anomaly Score\" with the \"Graph Results\" button." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "from azure.monitor.query import LogsQueryClient\n", + "import msticpy as mp\n", + "from msticpy.config import MpConfigFile, MpConfigEdit\n", + "from azure.identity import ClientSecretCredential\n", + "from azure.identity import DefaultAzureCredential\n", + "from datetime import timedelta\n", + "import pandas as pd\n", + "from sklearn.ensemble import IsolationForest\n", + "from sklearn.preprocessing import LabelEncoder\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "import re\n", + "import plotly.express as px\n", + "\n", + "# Ensure inline plotting\n", + "%matplotlib inline\n", + "\n", + "\n", + "\n", + "query_text = widgets.Textarea(\n", + " value=\"\"\"\n", + " DeviceNetworkEvents\n", + "| where TimeGenerated >= ago(1d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| project TimeGenerated, DeviceName, InitiatingProcessAccountName, InitiatingProcessFileName, LocalIP, RemoteIP, RemotePort\n", + " \"\"\",\n", + " placeholder='Enter your KQL query here',\n", + " description='Query:',\n", + " disabled=False\n", + ")\n", + "\n", + "# Create a text widget for the field name\n", + "field_name_text = widgets.Text(\n", + " value='InitiatingProcessFileName',\n", + " placeholder='Enter the field name for Isolation Forest',\n", + " description='Field:',\n", + " disabled=False\n", + ")\n", + "\n", + "# Create a text widget for column search\n", + "column_name_text = widgets.Text(\n", + " value='',\n", + " placeholder='Enter column name to search',\n", + " description='Column:',\n", + " disabled=False\n", + ")\n", + "\n", + "# Create a text widget for value search\n", + "value_text = widgets.Text(\n", + " value='',\n", + " placeholder='Enter value to search in the column',\n", + " description='Value:',\n", + " disabled=False\n", + ")\n", + "\n", + "# Create an \"Analyze\" button\n", + "analyze_button = widgets.Button(\n", + " description='Analyze',\n", + " disabled=False,\n", + " button_style='',\n", + " tooltip='Click to run the query',\n", + " icon='search'\n", + ")\n", + "\n", + "# Create a \"Graph Results\" button\n", + "graph_button = widgets.Button(\n", + " description='Graph Results',\n", + " disabled=True, # Initially disabled until data is analyzed\n", + " button_style='',\n", + " tooltip='Click to display the scatterplot',\n", + " icon='bar-chart'\n", + ")\n", + "\n", + "# Create a \"Search\" button\n", + "search_button = widgets.Button(\n", + " description='Search',\n", + " disabled=True, # Initially disabled until data is analyzed\n", + " button_style='',\n", + " tooltip='Click to search the DataFrame',\n", + " icon='search'\n", + ")\n", + "\n", + "# Display the text boxes and buttons\n", + "display(query_text, field_name_text, column_name_text, value_text, analyze_button, graph_button, search_button)\n", + "\n", + "# Function to extract timespan from KQL query\n", + "def extract_timespan(query):\n", + " match = re.search(r'ago\\((\\d+)([dhms])\\)', query)\n", + " if match:\n", + " value, unit = int(match.group(1)), match.group(2)\n", + " if unit == 'd':\n", + " return timedelta(days=value)\n", + " elif unit == 'h':\n", + " return timedelta(hours=value)\n", + " elif unit == 'm':\n", + " return timedelta(minutes=value)\n", + " elif unit == 's':\n", + " return timedelta(seconds=value)\n", + " return None\n", + "\n", + "# Function to run the query\n", + "def run_query(query):\n", + " timespan = extract_timespan(query)\n", + " response = qry_prov.exec_query(query=query)\n", + " \n", + " # Convert the response to a Pandas DataFrame\n", + " data = response.to_dict(orient='records')\n", + " df = pd.DataFrame(data)\n", + " \n", + " # Set Pandas option to display all columns\n", + " pd.set_option('display.max_columns', None)\n", + "\n", + " # Set the maximum column width to None (no truncation)\n", + " pd.set_option('display.max_colwidth', None)\n", + " \n", + " # Get the field name from the text widget\n", + " field_name = field_name_text.value\n", + " \n", + " # Encode the selected field\n", + " le = LabelEncoder()\n", + " df['Outlier'] = le.fit_transform(df[field_name])\n", + " \n", + " \n", + " # Apply Isolation Forest for anomaly detection\n", + " iso_forest = IsolationForest(n_estimators=100, contamination=0.01, random_state=42) # Adjust contamination as needed\n", + " df['Anomaly'] = iso_forest.fit_predict(df[['Outlier']])\n", + "\n", + " # Get anomaly scores\n", + " df['Anomaly_Score'] = iso_forest.decision_function(df[['Outlier']])\n", + " \n", + " # Store the DataFrame for later use\n", + " global analyzed_df\n", + " analyzed_df = df\n", + " \n", + " # Display the DataFrame with anomalies\n", + " display(df.head(len(df)))\n", + " \n", + " # Enable the \"Graph Results\" and \"Search\" buttons\n", + " graph_button.disabled = False\n", + " search_button.disabled = False\n", + "\n", + "# Bind the run_query function to the analyze button\n", + "analyze_button.on_click(lambda x: run_query(query_text.value))\n", + "\n", + "import plotly.express as px\n", + "\n", + "# Function to plot results\n", + "import plotly.express as px\n", + "\n", + "# Function to plot results\n", + "import plotly.express as px\n", + "\n", + "# Function to plot results\n", + "import plotly.express as px\n", + "\n", + "# Function to plot results\n", + "def plot_results():\n", + " # Filter anomalies\n", + " anomalies = analyzed_df[analyzed_df['Anomaly'] == -1]\n", + " \n", + " # Sort by Anomaly_Score and select the top 10 most negative scores\n", + " top_anomalies = anomalies.sort_values(by='Anomaly_Score').head(10)\n", + " \n", + " # Create scatter plot\n", + " fig = px.scatter(\n", + " top_anomalies,\n", + " x='TimeGenerated',\n", + " y=field_name_text.value,\n", + " title='Top 10 Most Significant Anomalies Detected',\n", + " hover_data={'LocalIP': True, 'RemoteIP': True, 'RemotePort': True, 'Anomaly_Score': True}\n", + " )\n", + " \n", + " # Update hover template\n", + " fig.update_traces(\n", + " hovertemplate=''.join([\n", + " 'TimeGenerated: %{x}
',\n", + " 'Process: %{y}
', # Correctly reference the y-axis value\n", + " 'More Information: %{customdata}
',\n", + " \n", + " ])\n", + " )\n", + " \n", + " # Show plot\n", + " fig.show()\n", + "\n", + "\n", + "# Bind the plot_results function to the graph button\n", + "graph_button.on_click(lambda x: plot_results())\n", + "\n", + "# Function to search the DataFrame\n", + "def search_dataframe():\n", + " column_name = column_name_text.value\n", + " search_value = value_text.value\n", + " if column_name and search_value:\n", + " search_results = analyzed_df[analyzed_df[column_name].astype(str).str.contains(search_value, na=False)]\n", + " display(search_results)\n", + " else:\n", + " print(\"Please enter both column name and value to search.\")\n", + "\n", + "# Bind the search_dataframe function to the search button\n", + "search_button.on_click(lambda x: search_dataframe())\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## What to do with this Information\n", + "Take note of the any of the anomalies that were generated. You can focus on the Top Anomalies from the graph or all of the anomalies from the data frame. A reminder that anything with a field value of \"Anomaly = -1\" was deemed to be anomalous process generating a successful network connection. You can follow some of the techniques below to investigate these anomalous processes further. In each of the following queries, it ends with \"df.head(10)\". This displays 10 results. If you want to change that number, just change the number 10 to the desired amount of results you would like to see." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Verify Parent Process\n", + "It is common to see a malicious process spawn from normal process. You can check the anomalous processes that were identified to see if there is anything unusual with the parent process of the original anomalous process. **Replace process1.exe, process2.exe, and process3.exe with the names of the anomalous processes.**\n", + "\n", + "``` \n", + "DeviceNetworkEvents\n", + "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", + "| where TimeGenerated >= ago(7d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| where InitiatingProcessParentFileName != InitiatingProcessFileName" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "query=\"\"\"\n", + "DeviceNetworkEvents\n", + "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", + "| where TimeGenerated >= ago(7d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| where InitiatingProcessParentFileName != InitiatingProcessFileName\n", + "| project TimeGenerated, DeviceName, InitiatingProcessAccountName, InitiatingProcessParentFileName, InitiatingProcessFileName, InitiatingProcessSHA1\n", + " \"\"\"\n", + "# Set the maximum column width to None (no truncation)\n", + "pd.set_option('display.max_colwidth', None)\n", + "df = qry_prov.exec_query(query)\n", + "df.head(10)\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Check if cmd.exe or Powershell was Used\n", + "Actors will sometimes use remote code execution with cmd.exe or powershell in coordination with other processes. The following KQL will verify this. **Replace process1.exe, process2.exe, and process3.exe with the names of the anomalous processes.**\n", + "```\n", + "DeviceNetworkEvents\n", + "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", + "| where TimeGenerated >= ago(7d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| where InitiatingProcessCommandLine has_any (\"cmd\", \"powershell\", \"ps.exe\", \"cmd.exe\")" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "query=\"\"\"\n", + "DeviceNetworkEvents\n", + "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", + "| where TimeGenerated >= ago(7d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| where InitiatingProcessFolderPath contains_cs \"temp\"\n", + "| project TimeGenerated, DeviceName, InitiatingProcessAccountName, InitiatingProcessFolderPath, InitiatingProcessFileName, LocalIP, RemoteIP, RemotePort\n", + " \"\"\"\n", + "# Set the maximum column width to None (no truncation)\n", + "pd.set_option('display.max_colwidth', None)\n", + "df = qry_prov.exec_query(query)\n", + "df.head(10)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Check if cmd.exe or Powershell was Used\n", + "Actors will sometimes use remote code execution with cmd.exe or powershell in coordination with other processes. The following KQL will verify this. **Replace process1.exe, process2.exe, and process3.exe with the names of the anomalous processes.**\n", + "```\n", + "DeviceNetworkEvents\n", + "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", + "| where TimeGenerated >= ago(7d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| where InitiatingProcessCommandLine has_any (\"cmd\", \"powershell\", \"ps.exe\", \"cmd.exe\")" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "query=\"\"\"\n", + "DeviceNetworkEvents\n", + "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", + "| where TimeGenerated >= ago(7d)\n", + "| where isnotempty(InitiatingProcessFileName)\n", + "| where ActionType == \"ConnectionSuccess\"\n", + "| where RemoteIPType == \"Public\"\n", + "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", + "| where InitiatingProcessCommandLine has_any (\"cmd\", \"powershell\", \"ps.exe\", \"cmd.exe\")\n", + "| project TimeGenerated, DeviceName, InitiatingProcessAccountName, InitiatingProcessFileName, LocalIP, RemoteIP, RemotePort\n", + " \"\"\"\n", + "# Set the maximum column width to None (no truncation)\n", + "pd.set_option('display.max_colwidth', None)\n", + "df = qry_prov.exec_query(query)\n", + "df.head(10)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + } + } + ], + "metadata": { + "kernelspec": { + "name": "python38-azureml", + "language": "python", + "display_name": "Python 3.8 - AzureML" + }, + "language_info": { + "name": "python", + "version": "3.10.11", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "microsoft": { + "ms_spell_check": { + "ms_spell_check_language": "en" + }, + "host": { + "AzureML": { + "notebookHasBeenCompleted": true + } + } + }, + "kernel_info": { + "name": "python38-azureml" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "version_major": 2, + "version_minor": 0, + "state": { + "000b4a62a9a04e9cb1900c8f24655c55": { + "model_name": "LabelModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_name": "LabelModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": null, + "description_allow_html": false, + "_view_name": "LabelView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_89cc4a0bcfe841f6a3e7bf51ceeead37", + "value": "Loading. Please wait....", + "style": "IPY_MODEL_5d72c361647d401c9f551cabaddb125d", + "placeholder": "​", + "_view_count": null, + "_model_module_version": "1.5.0", + "disabled": false, + "description": "" + } + }, + "89cc4a0bcfe841f6a3e7bf51ceeead37": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": "hidden", + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "5d72c361647d401c9f551cabaddb125d": { + "model_name": "DescriptionStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_name": "DescriptionStyleModel", + "_model_module_version": "1.5.0", + "_view_module": "@jupyter-widgets/base", + "_view_name": "StyleView", + "_view_module_version": "1.2.0", + "_view_count": null, + "description_width": "" + } + } + } + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file From ff50468e528f06a589be85ca175fc28b25d57dd6 Mon Sep 17 00:00:00 2001 From: jaredgraff_microsoft Date: Wed, 8 Jan 2025 11:16:57 -0500 Subject: [PATCH 2/3] Changed markdown header in notebook --- ...nomalous Process Network Connections.ipynb | 738 +++++++++++++++++- 1 file changed, 716 insertions(+), 22 deletions(-) diff --git a/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb b/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb index 7776f727..323f5776 100644 --- a/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb +++ b/machine-learning-notebooks/Guided Hunting - Anomalous Process Network Connections.ipynb @@ -6,7 +6,6 @@ "# Guided Hunting - Anomalous Process Network Connections\n", "
\n", " Details...\n", - "**Notebook Version:** 1.0
\n", "**Python Version:** Python 3.8 (including Python 3.8 - AzureML)
\n", "**Required Packages**: msticpy, pandas, numpy, matplotlib, plotly, ipywidgets, ipython, sklearn
\n", "\n", @@ -81,7 +80,7 @@ "execution_count": null, "metadata": { "gather": { - "logged": 1736241000791 + "logged": 1736349295307 } } }, @@ -138,7 +137,7 @@ } }, "gather": { - "logged": 1736241041191 + "logged": 1736349298403 } } }, @@ -202,7 +201,7 @@ } }, "gather": { - "logged": 1736241065955 + "logged": 1736349588971 } } }, @@ -230,7 +229,7 @@ "qry_prov = mp.QueryProvider(\"AzureSentinel\")" ], "outputs": [], - "execution_count": 5, + "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, @@ -242,7 +241,7 @@ } }, "gather": { - "logged": 1736241100105 + "logged": 1736349860945 } } }, @@ -283,7 +282,7 @@ } }, "gather": { - "logged": 1736241109656 + "logged": 1736349997387 } } }, @@ -520,6 +519,9 @@ "transient": { "deleting": false } + }, + "gather": { + "logged": 1736350170355 } } }, @@ -597,8 +599,8 @@ { "cell_type": "markdown", "source": [ - "### Check if cmd.exe or Powershell was Used\n", - "Actors will sometimes use remote code execution with cmd.exe or powershell in coordination with other processes. The following KQL will verify this. **Replace process1.exe, process2.exe, and process3.exe with the names of the anomalous processes.**\n", + "### Check if Process Spawned out of Temp File Path\n", + "Attackers commonly use a TEMP folder to spawn malicious processes. Ensure the anomalous process did not spawn out of this direction. **Replace process1.exe, process2.exe, and process3.exe with the names of the anomalous processes.**\n", "```\n", "DeviceNetworkEvents\n", "| where InitiatingProcessFileName in (\"process1.exe\", \"process2.exe\", \"process3.exe\")\n", @@ -607,7 +609,7 @@ "| where ActionType == \"ConnectionSuccess\"\n", "| where RemoteIPType == \"Public\"\n", "| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n", - "| where InitiatingProcessCommandLine has_any (\"cmd\", \"powershell\", \"ps.exe\", \"cmd.exe\")" + "| where InitiatingProcessFolderPath contains_cs \"temp\"" ], "metadata": { "nteract": { @@ -743,32 +745,150 @@ "version_major": 2, "version_minor": 0, "state": { - "000b4a62a9a04e9cb1900c8f24655c55": { - "model_name": "LabelModel", + "5674592e7ea64aa98cfb27e7addefdad": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": null, + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "28faf7e1a0af4e76b03ce38fe31d57ef": { + "model_name": "TextareaModel", "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "state": { "_view_module_version": "1.5.0", "description_tooltip": null, - "_model_name": "LabelModel", + "_model_name": "TextareaModel", "_model_module": "@jupyter-widgets/controls", "tooltip": null, "description_allow_html": false, - "_view_name": "LabelView", + "_view_name": "TextareaView", "tabbable": null, "_view_module": "@jupyter-widgets/controls", "_dom_classes": [], - "layout": "IPY_MODEL_89cc4a0bcfe841f6a3e7bf51ceeead37", - "value": "Loading. Please wait....", - "style": "IPY_MODEL_5d72c361647d401c9f551cabaddb125d", - "placeholder": "​", + "layout": "IPY_MODEL_521a56613e984c0c948d884380a38231", + "value": "\n DeviceNetworkEvents\n| where TimeGenerated >= ago(1d)\n| where isnotempty(InitiatingProcessFileName)\n| where ActionType == \"ConnectionSuccess\"\n| where RemoteIPType == \"Public\"\n| where RemoteIP matches regex @\"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$\"\n| project TimeGenerated, DeviceName, InitiatingProcessAccountName, InitiatingProcessFileName, LocalIP, RemoteIP, RemotePort\n ", + "style": "IPY_MODEL_3aa86f73174e449d8277bff2be4a3c19", + "placeholder": "Enter your KQL query here", "_view_count": null, + "rows": null, + "continuous_update": true, "_model_module_version": "1.5.0", "disabled": false, - "description": "" + "description": "Query:" + } + }, + "e3db41c6b92041f0a46bec208633105e": { + "model_name": "ButtonModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": "Click to display the scatterplot", + "button_style": "", + "_view_name": "ButtonView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_5674592e7ea64aa98cfb27e7addefdad", + "style": "IPY_MODEL_ed008ba5132a47539cd770d1cdebb8dc", + "_view_count": null, + "icon": "bar-chart", + "_model_module_version": "1.5.0", + "disabled": false, + "description": "Graph Results" + } + }, + "76fbb0eb67b64b81b6489216ebf64a6e": { + "model_name": "ButtonStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "ButtonStyleModel", + "font_style": "", + "_model_module": "@jupyter-widgets/controls", + "text_decoration": "", + "_view_name": "StyleView", + "_view_module": "@jupyter-widgets/base", + "font_weight": "", + "text_color": "", + "font_size": "", + "_view_count": null, + "font_family": "", + "font_variant": "", + "button_color": null, + "_model_module_version": "1.5.0" + } + }, + "cd72a7c0653943409b2ed45253667596": { + "model_name": "DescriptionStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_name": "DescriptionStyleModel", + "_model_module_version": "1.5.0", + "_view_module": "@jupyter-widgets/base", + "_view_name": "StyleView", + "_view_module_version": "1.2.0", + "_view_count": null, + "description_width": "" } }, - "89cc4a0bcfe841f6a3e7bf51ceeead37": { + "d76b1e7ae1554c43bb48daeaf3e4df2b": { "model_name": "LayoutModel", "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", @@ -806,7 +926,89 @@ "grid_template_columns": null, "justify_items": null, "object_fit": null, - "visibility": "hidden", + "visibility": null, + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "c4e9295b72a647e89df9239af48a5726": { + "model_name": "TextModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_name": "TextModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": null, + "description_allow_html": false, + "_view_name": "TextView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_8eb3e0f20ec64047bf595a66e9a95a6f", + "value": "-1", + "style": "IPY_MODEL_cdc828bc800445fcb7b0c0a493aaa20b", + "placeholder": "Enter value to search in the column", + "_view_count": null, + "continuous_update": true, + "_model_module_version": "1.5.0", + "disabled": false, + "description": "Value:" + } + }, + "521a56613e984c0c948d884380a38231": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": null, "_view_count": null, "flex_flow": null, "min_height": null, @@ -824,7 +1026,7 @@ "align_content": null } }, - "5d72c361647d401c9f551cabaddb125d": { + "3aa86f73174e449d8277bff2be4a3c19": { "model_name": "DescriptionStyleModel", "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", @@ -838,6 +1040,498 @@ "_view_count": null, "description_width": "" } + }, + "ed008ba5132a47539cd770d1cdebb8dc": { + "model_name": "ButtonStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "ButtonStyleModel", + "font_style": "", + "_model_module": "@jupyter-widgets/controls", + "text_decoration": "", + "_view_name": "StyleView", + "_view_module": "@jupyter-widgets/base", + "font_weight": "", + "text_color": "", + "font_size": "", + "_view_count": null, + "font_family": "", + "font_variant": "", + "button_color": null, + "_model_module_version": "1.5.0" + } + }, + "96eb86e151fb4aeeb84a31afea74acdc": { + "model_name": "TextModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_name": "TextModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": null, + "description_allow_html": false, + "_view_name": "TextView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_59599e7349264cfabe4ee7094a1df26f", + "value": "Anomaly", + "style": "IPY_MODEL_d5e276015481459c9cb3c214c96f1232", + "placeholder": "Enter column name to search", + "_view_count": null, + "continuous_update": true, + "_model_module_version": "1.5.0", + "disabled": false, + "description": "Column:" + } + }, + "59599e7349264cfabe4ee7094a1df26f": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": null, + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "96abccab51ee44199f316740693a49d1": { + "model_name": "ButtonModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": "Click to search the DataFrame", + "button_style": "", + "_view_name": "ButtonView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_34cf5144bfa849978b01e2e37b62c7a6", + "style": "IPY_MODEL_70d25f478b3e4f46b3a1871b39e23c30", + "_view_count": null, + "icon": "search", + "_model_module_version": "1.5.0", + "disabled": false, + "description": "Search" + } + }, + "2eb3b21ac7904f7395ec328353751721": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": null, + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "1815dedfcf7e478b846764076db8f43e": { + "model_name": "DescriptionStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_name": "DescriptionStyleModel", + "_model_module_version": "1.5.0", + "_view_module": "@jupyter-widgets/base", + "_view_name": "StyleView", + "_view_module_version": "1.2.0", + "_view_count": null, + "description_width": "" + } + }, + "34cf5144bfa849978b01e2e37b62c7a6": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": null, + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "d5e276015481459c9cb3c214c96f1232": { + "model_name": "DescriptionStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_name": "DescriptionStyleModel", + "_model_module_version": "1.5.0", + "_view_module": "@jupyter-widgets/base", + "_view_name": "StyleView", + "_view_module_version": "1.2.0", + "_view_count": null, + "description_width": "" + } + }, + "2a9781b403e349d1bc06ddddbf5ab41c": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": "hidden", + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "e6baab7da9814a22aa60c0657071f9f4": { + "model_name": "ButtonModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": "Click to run the query", + "button_style": "", + "_view_name": "ButtonView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_d76b1e7ae1554c43bb48daeaf3e4df2b", + "style": "IPY_MODEL_76fbb0eb67b64b81b6489216ebf64a6e", + "_view_count": null, + "icon": "search", + "_model_module_version": "1.5.0", + "disabled": false, + "description": "Analyze" + } + }, + "4fb55052b88b479d90ad721226b9f37d": { + "model_name": "LabelModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_name": "LabelModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": null, + "description_allow_html": false, + "_view_name": "LabelView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_2a9781b403e349d1bc06ddddbf5ab41c", + "value": "Loading. Please wait....", + "style": "IPY_MODEL_1815dedfcf7e478b846764076db8f43e", + "placeholder": "​", + "_view_count": null, + "_model_module_version": "1.5.0", + "disabled": false, + "description": "" + } + }, + "cdc828bc800445fcb7b0c0a493aaa20b": { + "model_name": "DescriptionStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_name": "DescriptionStyleModel", + "_model_module_version": "1.5.0", + "_view_module": "@jupyter-widgets/base", + "_view_name": "StyleView", + "_view_module_version": "1.2.0", + "_view_count": null, + "description_width": "" + } + }, + "8eb3e0f20ec64047bf595a66e9a95a6f": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": null, + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "70d25f478b3e4f46b3a1871b39e23c30": { + "model_name": "ButtonStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "ButtonStyleModel", + "font_style": "", + "_model_module": "@jupyter-widgets/controls", + "text_decoration": "", + "_view_name": "StyleView", + "_view_module": "@jupyter-widgets/base", + "font_weight": "", + "text_color": "", + "font_size": "", + "_view_count": null, + "font_family": "", + "font_variant": "", + "button_color": null, + "_model_module_version": "1.5.0" + } + }, + "519cf121b72d44fa8067b50a98a24b2b": { + "model_name": "TextModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_name": "TextModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": null, + "description_allow_html": false, + "_view_name": "TextView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_2eb3b21ac7904f7395ec328353751721", + "value": "InitiatingProcessFileName", + "style": "IPY_MODEL_cd72a7c0653943409b2ed45253667596", + "placeholder": "Enter the field name for Isolation Forest", + "_view_count": null, + "continuous_update": true, + "_model_module_version": "1.5.0", + "disabled": false, + "description": "Field:" + } } } } From 26ffa65653cbcc798e412c3ce18fed1ccf0e1135 Mon Sep 17 00:00:00 2001 From: Jgraff1995 <96274100+Jgraff1995@users.noreply.github.com> Date: Wed, 2 Apr 2025 19:36:07 -0400 Subject: [PATCH 3/3] Add DGA Notebook --- .../DGA_Detection_ManagedIdentity.ipynb | 577 ++++++++++++++++++ 1 file changed, 577 insertions(+) create mode 100644 machine-learning-notebooks/DGA_Detection_ManagedIdentity.ipynb diff --git a/machine-learning-notebooks/DGA_Detection_ManagedIdentity.ipynb b/machine-learning-notebooks/DGA_Detection_ManagedIdentity.ipynb new file mode 100644 index 00000000..52b24bf7 --- /dev/null +++ b/machine-learning-notebooks/DGA_Detection_ManagedIdentity.ipynb @@ -0,0 +1,577 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Guided Hunting - Domain Generation Algorithm (DGA) Detection\n", + "
\n", + " Details...\n", + "**Python Version:** Python 3.8 (including Python 3.8 - AzureML)
\n", + "**Required Packages**: msticpy, pandas, numpy, matplotlib, plotly, ipywidgets, ipython, sklearn
\n", + "\n", + "**Data Sources Required**:\n", + "- Log Analytics - DeviceNetworkEvents\n", + "\n", + "
\n", + "\n", + "Brings together a series of queries and visualizations to help you investigate anomalous processes in your network. There are then guided hunting steps to investigate these occurences in further dept. This notebook authenticates with environment variables and requires the following:\n", + "- msticpyconfig.yaml has been properly configured\n", + "- managed identity with appropriate RBAC" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Log in with Managed Identity\n", + "Replace the [CLIENT_ID] with the client id of your Managed Identity. This can be found on the Azure Portal at Managed Identities -> Overview" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "!az login --identity --username [CLIENT_ID]" + ], + "outputs": [], + "execution_count": null, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Import Libraries" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import msticpy\n", + "import msticpy as mp\n", + "from azure.identity import DefaultAzureCredential, ManagedIdentityCredential\n", + "from azure.keyvault.secrets import SecretClient\n", + "from azure.mgmt.resource import ResourceManagementClient\n", + "\n", + "\n", + "# Initialize ManagedIdentity\n", + "credential = ManagedIdentityCredential()\n", + "\n", + "\n", + "# Now you can use ManagedIdentity or other credential classes\n", + "print(credential)\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "gather": { + "logged": 1743622407389 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup msticpyconfig.yaml\n", + "Ensure your msticpyconfig.yaml has been set up and saved in the current directory you are running this notebook." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "import msticpy\n", + "from msticpy.config import MpConfigFile, MpConfigEdit\n", + "import os\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "mp_conf = \"msticpyconfig.yaml\"\n", + "\n", + "# check if MSTICPYCONFIG is already an env variable\n", + "mp_env = os.environ.get(\"MSTICPYCONFIG\")\n", + "mp_conf = mp_env if mp_env and Path(mp_env).is_file() else mp_conf\n", + "\n", + "if not Path(mp_conf).is_file():\n", + " print(\n", + " \"No msticpyconfig.yaml was found!\",\n", + " \"Please check that there is a config.json file in your workspace folder.\",\n", + " \"If this is not there, go back to the Microsoft Sentinel portal and launch\",\n", + " \"this notebook from there.\",\n", + " sep=\"\\n\"\n", + " )\n", + "else:\n", + " mpedit = MpConfigEdit(mp_conf)\n", + " mpconfig = MpConfigFile(mp_conf)\n", + " \n", + " # Convert SettingsDict to a regular dictionary\n", + " settings_dict = {k: v for k, v in mpconfig.settings.items()}\n", + " print(f\"Configured Sentinel workspaces: {json.dumps(settings_dict, indent=4)}\")\n", + "\n", + "msticpy.settings.refresh_config()\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1743622411835 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup QueryProvider" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "# Refresh any config items that might have been saved\n", + "# to the msticpyconfig in the previous steps.\n", + "msticpy.settings.refresh_config()\n", + "\n", + "# Initialize a QueryProvider for Microsoft Sentinel\n", + "qry_prov = mp.QueryProvider(\"AzureSentinel\")" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1743622415440 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Connect to Sentinel\n", + "You should see \"connected\" output after running this code block. Once you are connected, you can continue on with the notebook." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "# Get the default Microsoft Sentinel workspace details from msticpyconfig.yaml\n", + "\n", + "ws_config = mp.WorkspaceConfig()\n", + "\n", + "# Connect to Microsoft Sentinel with our QueryProvider and config details\n", + "qry_prov.connect(ws_config, mp_az_auth=[\"msi\"])" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": false, + "outputs_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + }, + "gather": { + "logged": 1743622418323 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## DGA Model Creation\n", + "Make sure \"domain.csv\" is saved in your current working directory. Change the \"model_filename\" to the appropriate path in your environment." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.naive_bayes import MultinomialNB\n", + "from sklearn.pipeline import make_pipeline\n", + "import joblib\n", + "import os\n", + "\n", + "# Load the CSV file containing the labeled domains\n", + "labeled_domains_df = pd.read_csv('/home/azureuser/cloudfiles/code/Users/jgraff1/domain.csv')\n", + "\n", + "# Preprocess the data\n", + "X = labeled_domains_df['Domain']\n", + "y = labeled_domains_df['Label'].apply(lambda x: 1 if x == 'DGA' else 0)\n", + "\n", + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)\n", + "\n", + "# Create a pipeline that combines the CountVectorizer and the MultinomialNB classifier\n", + "model = make_pipeline(CountVectorizer(), MultinomialNB())\n", + "\n", + "# Train the model\n", + "model.fit(X_train, y_train)\n", + "\n", + "# Save the trained model to a file\n", + "model_filename = '/home/azureuser/cloudfiles/code/Users/jgraff1/Models/dga_model.joblib'\n", + "joblib.dump(model, model_filename)\n", + "print(f'Model saved to {model_filename}')\n", + "\n", + "# Evaluate the model (optional)\n", + "accuracy = model.score(X_test, y_test)\n", + "print(f'Model accuracy: {accuracy:.2f}')" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "gather": { + "logged": 1743622443562 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Apply dga_model.joblib to Sentinel Data" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "query = \"\"\"\n", + "DeviceNetworkEvents\n", + "| where TimeGenerated < ago(30d)\n", + "| where ActionType == \"DnsConnectionInspected\"\n", + "| extend QueryField = tostring(parse_json(AdditionalFields).query)\n", + "| where isnotempty(QueryField)\n", + "| where QueryField matches regex @\"[a-zA-Z0-9]{8,}\"\n", + "| summarize Count = count() by QueryField\n", + "| where Count > 10\n", + "\"\"\"\n", + "\n", + "# Set the maximum column width to None (no truncation)\n", + "pd.set_option('display.max_colwidth', None)\n", + "df = qry_prov.exec_query(query)\n", + "\n", + "# Load the trained model from the file\n", + "model = joblib.load(model_filename)\n", + "print(f'Model loaded from {model_filename}')\n", + "\n", + "# Define a function to check if a domain is associated with a DGA using the trained model\n", + "def is_dga(domain):\n", + " return model.predict([domain])[0] == 1\n", + "\n", + "# Apply the function to the \"QueryField\" column\n", + "df['IsDGA'] = df['QueryField'].apply(is_dga)\n", + "\n", + "# Display the updated dataframe\n", + "df.head(20)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "gather": { + "logged": 1743622473380 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Output All Results to CSV\n", + "Change the \"output_path\" variable to match your environment." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "# Ensure the directory exists\n", + "output_path = '/home/azureuser/cloudfiles/code/Users/jgraff1/dgaresults.csv'\n", + "os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + "\n", + "# Export the DataFrame to a CSV file in the specified file path\n", + "df.to_csv(output_path, index=False)\n", + "\n", + "print(f\"DataFrame has been exported to {output_path}\")" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "gather": { + "logged": 1743450692896 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Filter DGA Results to CSV\n", + "Any results that match the DGA detection algorithm will be saved to a csv. Change the \"output_path\" to your environment\n" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import pandas as pd\n", + "\n", + "# Assuming df is your DataFrame\n", + "# Filter the DataFrame to only include rows where isDGA is \"true\"\n", + "filtered_df = df[df['IsDGA'] == True]\n", + "\n", + "# Ensure the directory exists\n", + "output_path = '/home/azureuser/cloudfiles/code/Users/jgraff1/dgaresults2.csv'\n", + "os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + "\n", + "# Export the filtered DataFrame to a CSV file in the specified file path\n", + "filtered_df.to_csv(output_path, index=False)\n", + "\n", + "print(f\"Filtered DataFrame has been exported to {output_path}\")" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "gather": { + "logged": 1743450695377 + } + } + }, + { + "cell_type": "markdown", + "source": [ + "###" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + } + } + ], + "metadata": { + "kernelspec": { + "name": "python38-azureml", + "language": "python", + "display_name": "Python 3.8 - AzureML" + }, + "language_info": { + "name": "python", + "version": "3.10.11", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "microsoft": { + "ms_spell_check": { + "ms_spell_check_language": "en" + }, + "host": { + "AzureML": { + "notebookHasBeenCompleted": true + } + } + }, + "kernel_info": { + "name": "python38-azureml" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "version_major": 2, + "version_minor": 0, + "state": { + "bd9a68719d5d4769a0172dafce29c3ed": { + "model_name": "LabelModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_name": "LabelModel", + "_model_module": "@jupyter-widgets/controls", + "tooltip": null, + "description_allow_html": false, + "_view_name": "LabelView", + "tabbable": null, + "_view_module": "@jupyter-widgets/controls", + "_dom_classes": [], + "layout": "IPY_MODEL_bbe69074cc034c4cbe0159f7aa02e651", + "value": "Loading. Please wait....", + "style": "IPY_MODEL_ea046babc5d14729acc2994b9ef15916", + "placeholder": "​", + "_view_count": null, + "_model_module_version": "1.5.0", + "disabled": false, + "description": "" + } + }, + "bbe69074cc034c4cbe0159f7aa02e651": { + "model_name": "LayoutModel", + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "state": { + "_view_module_version": "1.2.0", + "_model_name": "LayoutModel", + "grid_row": null, + "_model_module": "@jupyter-widgets/base", + "overflow": null, + "max_height": null, + "display": null, + "border_top": null, + "grid_auto_flow": null, + "grid_template_rows": null, + "align_self": null, + "grid_auto_columns": null, + "width": null, + "grid_area": null, + "align_items": null, + "_view_name": "LayoutView", + "left": null, + "height": null, + "_view_module": "@jupyter-widgets/base", + "border_right": null, + "object_position": null, + "justify_content": null, + "bottom": null, + "max_width": null, + "border": null, + "margin": null, + "order": null, + "grid_column": null, + "grid_auto_rows": null, + "padding": null, + "grid_template_columns": null, + "justify_items": null, + "object_fit": null, + "visibility": "hidden", + "_view_count": null, + "flex_flow": null, + "min_height": null, + "top": null, + "min_width": null, + "flex": null, + "border_left": null, + "_model_module_version": "1.2.0", + "grid_template_areas": null, + "overflow_x": null, + "right": null, + "overflow_y": null, + "grid_gap": null, + "border_bottom": null, + "align_content": null + } + }, + "ea046babc5d14729acc2994b9ef15916": { + "model_name": "DescriptionStyleModel", + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_name": "DescriptionStyleModel", + "_model_module_version": "1.5.0", + "_view_module": "@jupyter-widgets/base", + "_view_name": "StyleView", + "_view_module_version": "1.2.0", + "_view_count": null, + "description_width": "" + } + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file