microsoft · kyoro1 · Oct 2, 2021 · Oct 2, 2021 · Oct 2, 2021 · Oct 3, 2021
diff --git a/docs/how-to/FAQ.md b/docs/how-to/FAQ.md
@@ -1,7 +1,5 @@
 # Frequently Asked Question
 
-[[_TOC_]]
-
 ## When running the ADO training pipeline, the pipeline fails at the _invoke_ step. What's the error ?
 
 If you see the error below. You have to ensure that the service connection is created at the Azure Machine Learning Workspace level and not Subscription level

diff --git a/docs/how-to/GeneralDocumentation.md b/docs/how-to/GeneralDocumentation.md
@@ -1,7 +1,5 @@
 # General Documentation
 
-[[_TOC_]]
-
 ## Data Science Lifecycle Base Repo
 
 The base project structure was inspired by the following [dslp repo](https://github.com/dslp/dslp-repo-template). We readapted it to support minimal MLOps principles.

diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb
@@ -0,0 +1,103 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "source": [
+    "# Get data & train ML model\n",
+    "\n",
+    "We show how to use our scripts with sample data. Please change the setting aligning to your situation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1633182532724
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## Script `train_1_classifier.py` in ../src executes retrieving data, splitting them \n",
+    "## and generating model with RandomForest algorithm\n",
+    "!python ../src/train_1_classifier.py \\\n",
+    "    --dataset-name \"../docs/data/Two_class.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1633183686133
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## script `train_n_classifier.py` in ../src executes retrieving data, splitting them \n",
+    "## and generating model with various algorithms and pick up the best.\n",
+    "#!python ../src/train_n_classifier.py \\\n",
+    "#    --dataset-name \"../docs/data/Two_class.csv\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernel_info": {
+   "name": "python3-azureml"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  },
+  "microsoft": {
+   "host": {
+    "AzureML": {
+     "notebookHasBeenCompleted": true
+    }
+   }
+  },
+  "nteract": {
+   "version": "nteract-front-end@1.0.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/train_1_classifier.py b/src/train_1_classifier.py
@@ -134,7 +134,7 @@ def parse_args(args_list=None):
     parser.add_argument('--model-name', type=str, default='two_class.pkl')
     parser.add_argument('--model-metric-name', type=str, default='mse',
                         help='The name of the evaluation metric used in Train step')
-    parser.add_argument('--keep-columns', type=str, default='Helpfulness Score|Score|Text|Target')
+    parser.add_argument('--keep-columns', type=str, default='Helpfulness_Score|Score|Text|Target')
     parser.add_argument('--target-column', type=str, default='Target')
     parser.add_argument('--target-values', type=str, default='toys games|not a toy/game')
     parser.add_argument('--text-columns', type=str, default='Text')

diff --git a/src/train_n_classifier.py b/src/train_n_classifier.py
@@ -176,7 +176,7 @@ def parse_args(args_list=None):
     parser.add_argument('--model-name', type=str, default='two_class.pkl')
     parser.add_argument('--model-metric-name', type=str, default='Recall',
                         help='The name of the evaluation metric used in Train step')
-    parser.add_argument('--keep-columns', type=str, default='Helpfulness Score|Score|Text|Target')
+    parser.add_argument('--keep-columns', type=str, default='Helpfulness_Score|Score|Text|Target')
     parser.add_argument('--target-column', type=str, default='Target')
     parser.add_argument('--target-values', type=str, default='toys games|not a toy/game')
     parser.add_argument('--text-columns', type=str, default='Text')