amera6 · amera6 · Nov 25, 2025 · Nov 25, 2025
diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb
@@ -96,7 +96,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your answer here"
+    "# Your answer here\n",
+    "wine_df.shape[0]"
    ]
   },
   {
@@ -114,7 +115,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your answer here"
+    "# Your answer here\n",
+    "wine_df.shape[1]"
    ]
   },
   {
@@ -132,7 +134,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your answer here"
+    "# Your answer here\n",
+    "print(wine_df[\"class\"].unique())\n",
+    "print(wine_df[\"class\"].dtype)"
    ]
   },
   {
@@ -151,7 +155,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your answer here"
+    "# Your answer here\n",
+    "target_var = [\"class\"]\n",
+    "standardized_wine = wine_df.copy()\n",
+    "predictors = standardized_wine.columns.difference(target_var)\n",
+    "len(predictors)"
    ]
   },
   {
@@ -204,7 +212,7 @@
    "id": "403ef0bb",
    "metadata": {},
    "source": [
-    "> Your answer here..."
+    "> The KNN algorithm is heavily dependent on distance calculations. If certain predictors have a larger scale of values (ex., proline, magnesium), these can trick the algorithm into thinking that a one-unit change in these features is a larger distance than it actually is. Standardizing puts everything on the same scale so no single feature unfairly influences the model."
    ]
   },
   {
@@ -220,7 +228,7 @@
    "id": "fdee5a15",
    "metadata": {},
    "source": [
-    "> Your answer here..."
+    "> We don't want to standardize the response variable because that is the variable that are hoping to assign membership of points to. Standardizing it, especially for discrete counts (0,1,2), means we lose the ability to interpret the predicted values."
    ]
   },
   {
@@ -236,7 +244,7 @@
    "id": "f0676c21",
    "metadata": {},
    "source": [
-    "> Your answer here..."
+    "> We set a seed to ensure that the computer uses the same randomization process for splitting datasets so that results are consistent for each re-run of code. This facilitates reproducibility since it allows users to share code and get the same results (assuming that the code and data has not been treated differently among users). The value of the seed does not matter so long as the same seed is used each time."
    ]
   },
   {
@@ -261,7 +269,12 @@
     "\n",
     "# split the data into a training and testing set. hint: use train_test_split !\n",
     "\n",
-    "# Your code here ..."
+    "# Your code here ...\n",
+    "standardized_wine = pd.concat([predictors_standardized, wine_df[\"class\"]], axis=1)\n",
+    "\n",
+    "\n",
+    "wine_train, wine_test = train_test_split(\n",
+    "    standardized_wine, train_size=0.75, stratify=standardized_wine[\"class\"])"
    ]
   },
   {
@@ -289,7 +302,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here..."
+    "# Your code here...\n",
+    "knn = KNeighborsClassifier(n_neighbors=5)\n",
+    "\n",
+    "parameter_grid = {\n",
+    "    \"n_neighbors\": range(1, 51),\n",
+    "}\n",
+    "\n",
+    "wine_tune_grid = GridSearchCV(\n",
+    "    estimator=knn,\n",
+    "    param_grid=parameter_grid,\n",
+    "    cv=10\n",
+    ")\n",
+    "\n",
+    "wine_tune_grid.fit(\n",
+    "    wine_train.iloc[:, :-1],\n",
+    "    wine_train[\"class\"]\n",
+    ")\n",
+    "\n",
+    "wine_tune_grid.best_params_"
    ]
   },
   {
@@ -310,7 +341,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here..."
+    "# Your code here...\n",
+    "knn = KNeighborsClassifier(n_neighbors=7)\n",
+    "knn.fit(wine_train.iloc[:, :-1], wine_train[\"class\"])\n",
+    "\n",
+    "wine_test[\"predicted\"] = knn.predict(wine_test.iloc[:, :-1])\n",
+    "wine_test[[\"class\", \"predicted\"]]\n",
+    "\n",
+    "accuracy = accuracy_score(wine_test[\"class\"], wine_test[\"predicted\"])\n",
+    "print(\"Accuracy:\", accuracy)"
    ]
   },
   {
@@ -365,7 +404,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.4",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -379,12 +418,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e"
-   }
+   "version": "3.14.0"
   }
  },
  "nbformat": 4,