diff --git a/Exercise.ipynb b/Exercise.ipynb new file mode 100644 index 0000000..580f7b3 --- /dev/null +++ b/Exercise.ipynb @@ -0,0 +1,2053 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Exercise.ipynb", + "version": "0.3.2", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "metadata": { + "id": "2LTtpUJEibjg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas Exercise :\n", + "\n", + "\n", + "#### import necessary modules" + ] + }, + { + "metadata": { + "id": "c3_UBbMRhiKx", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "tp-cTCyWi8mR", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n", + "\n", + "This is a wine dataset\n", + "\n" + ] + }, + { + "metadata": { + "id": "DMojQY3thrRi", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "75935be6-1816-4db5-c0c7-4016595f14a0" + }, + "cell_type": "code", + "source": [ + "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'\n", + "wine_df = pd.read_csv(url)\n", + "wine_df.head(10)" + ], + "execution_count": 108, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
5114.391.872.4514.6962.502.520.301.985.251.023.581290
6114.062.152.6117.61212.602.510.311.255.051.063.581295
7114.831.642.1714.0972.802.980.291.985.201.082.851045
8113.861.352.2716.0982.983.150.221.857.221.013.551045
9114.102.162.3018.01052.953.320.222.385.751.253.171510
\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n", + "5 1 14.39 1.87 2.45 14.6 96 2.50 2.52 0.30 1.98 5.25 1.02 3.58 \n", + "6 1 14.06 2.15 2.61 17.6 121 2.60 2.51 0.31 1.25 5.05 1.06 3.58 \n", + "7 1 14.83 1.64 2.17 14.0 97 2.80 2.98 0.29 1.98 5.20 1.08 2.85 \n", + "8 1 13.86 1.35 2.27 16.0 98 2.98 3.15 0.22 1.85 7.22 1.01 3.55 \n", + "9 1 14.10 2.16 2.30 18.0 105 2.95 3.32 0.22 2.38 5.75 1.25 3.17 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "1 1185 \n", + "2 1480 \n", + "3 735 \n", + "4 1450 \n", + "5 1290 \n", + "6 1295 \n", + "7 1045 \n", + "8 1045 \n", + "9 1510 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 108 + } + ] + }, + { + "metadata": { + "id": "BF9MMjoZjSlg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### print first five rows" + ] + }, + { + "metadata": { + "id": "1vSMQdnHjYNU", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "c13d7565-8120-42d0-bda7-b60b1ca628b6" + }, + "cell_type": "code", + "source": [ + "wine_df.head(5)" + ], + "execution_count": 109, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "1 1185 \n", + "2 1480 \n", + "3 735 \n", + "4 1450 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 109 + } + ] + }, + { + "metadata": { + "id": "Tet6P2DvjY3T", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n", + "\n", + "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)" + ] + }, + { + "metadata": { + "id": "CMj3qSdJjx0u", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "a6676447-dd0c-4866-9df4-34f02706caf0" + }, + "cell_type": "code", + "source": [ + "wine_df_copy = wine_df\n", + "wine_df_copy = wine_df_copy[wine_df_copy.index%2 == 0]\n", + "wine_df_copy.head(10)" + ], + "execution_count": 110, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.381.053.401050
2114.371.952.5016.81133.853.490.242.187.800.863.451480
4114.201.762.4515.21123.273.390.341.976.751.052.851450
6114.062.152.6117.61212.602.510.311.255.051.063.581295
8113.861.352.2716.0982.983.150.221.857.221.013.551045
10114.121.482.3216.8952.202.430.261.575.001.172.821280
12114.751.732.3911.4913.103.690.432.815.401.252.731150
14113.631.812.7017.21122.852.910.301.467.301.282.881310
16113.831.572.6220.01152.953.400.401.726.601.132.571130
18113.643.102.5615.21162.703.030.171.665.100.963.36845
\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n", + "6 1 14.06 2.15 2.61 17.6 121 2.60 2.51 0.31 1.25 5.05 1.06 3.58 \n", + "8 1 13.86 1.35 2.27 16.0 98 2.98 3.15 0.22 1.85 7.22 1.01 3.55 \n", + "10 1 14.12 1.48 2.32 16.8 95 2.20 2.43 0.26 1.57 5.00 1.17 2.82 \n", + "12 1 14.75 1.73 2.39 11.4 91 3.10 3.69 0.43 2.81 5.40 1.25 2.73 \n", + "14 1 13.63 1.81 2.70 17.2 112 2.85 2.91 0.30 1.46 7.30 1.28 2.88 \n", + "16 1 13.83 1.57 2.62 20.0 115 2.95 3.40 0.40 1.72 6.60 1.13 2.57 \n", + "18 1 13.64 3.10 2.56 15.2 116 2.70 3.03 0.17 1.66 5.10 0.96 3.36 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "2 1480 \n", + "4 1450 \n", + "6 1295 \n", + "8 1045 \n", + "10 1280 \n", + "12 1150 \n", + "14 1310 \n", + "16 1130 \n", + "18 845 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 110 + } + ] + }, + { + "metadata": { + "id": "o6Cs6T1Rjz71", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Assign the columns as below:\n", + "\n", + "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", + "1) Alcohol \n", + "2) Malic acid \n", + "3) Ash \n", + "4) Alcalinity of ash \n", + "5) Magnesium \n", + "6) Total phenols \n", + "7) Flavanoids \n", + "8) Nonflavanoid phenols \n", + "9) Proanthocyanins \n", + "10)Color intensity \n", + "11)Hue \n", + "12)OD280/OD315 of diluted wines \n", + "13)Proline " + ] + }, + { + "metadata": { + "id": "my8HB4V4j779", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "cf250a08-a1a8-43b2-8955-752f3c512750" + }, + "cell_type": "code", + "source": [ + "wine_df.columns = ['Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline','Col-14']\n", + "wine_df.head(10)" + ], + "execution_count": 111, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProlineCol-14
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
5114.391.872.4514.6962.502.520.301.985.251.023.581290
6114.062.152.6117.61212.602.510.311.255.051.063.581295
7114.831.642.1714.0972.802.980.291.985.201.082.851045
8113.861.352.2716.0982.983.150.221.857.221.013.551045
9114.102.162.3018.01052.953.320.222.385.751.253.171510
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 1 13.20 1.78 2.14 11.2 100 \n", + "1 1 13.16 2.36 2.67 18.6 101 \n", + "2 1 14.37 1.95 2.50 16.8 113 \n", + "3 1 13.24 2.59 2.87 21.0 118 \n", + "4 1 14.20 1.76 2.45 15.2 112 \n", + "5 1 14.39 1.87 2.45 14.6 96 \n", + "6 1 14.06 2.15 2.61 17.6 121 \n", + "7 1 14.83 1.64 2.17 14.0 97 \n", + "8 1 13.86 1.35 2.27 16.0 98 \n", + "9 1 14.10 2.16 2.30 18.0 105 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.65 2.76 0.26 1.28 4.38 \n", + "1 2.80 3.24 0.30 2.81 5.68 \n", + "2 3.85 3.49 0.24 2.18 7.80 \n", + "3 2.80 2.69 0.39 1.82 4.32 \n", + "4 3.27 3.39 0.34 1.97 6.75 \n", + "5 2.50 2.52 0.30 1.98 5.25 \n", + "6 2.60 2.51 0.31 1.25 5.05 \n", + "7 2.80 2.98 0.29 1.98 5.20 \n", + "8 2.98 3.15 0.22 1.85 7.22 \n", + "9 2.95 3.32 0.22 2.38 5.75 \n", + "\n", + " OD280/OD315 of diluted wines Proline Col-14 \n", + "0 1.05 3.40 1050 \n", + "1 1.03 3.17 1185 \n", + "2 0.86 3.45 1480 \n", + "3 1.04 2.93 735 \n", + "4 1.05 2.85 1450 \n", + "5 1.02 3.58 1290 \n", + "6 1.06 3.58 1295 \n", + "7 1.08 2.85 1045 \n", + "8 1.01 3.55 1045 \n", + "9 1.25 3.17 1510 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 111 + } + ] + }, + { + "metadata": { + "id": "Zqi7hwWpkNbH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Set the values of the first 3 rows from alcohol as NaN\n", + "\n", + "Hint- Use iloc to select 3 rows of wine_df" + ] + }, + { + "metadata": { + "id": "buyT4vX4kPMl", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 444 + }, + "outputId": "75d93567-4646-4ab4-bea1-fd067d881dcf" + }, + "cell_type": "code", + "source": [ + "wine_df.Alcohol.iloc[:3] = np.nan\n", + "wine_df.head(10)" + ], + "execution_count": 112, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self._setitem_with_indexer(indexer, value)\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProlineCol-14
0NaN13.201.782.1411.21002.652.760.261.284.381.053.401050
1NaN13.162.362.6718.61012.803.240.302.815.681.033.171185
2NaN14.371.952.5016.81133.853.490.242.187.800.863.451480
31.013.242.592.8721.01182.802.690.391.824.321.042.93735
41.014.201.762.4515.21123.273.390.341.976.751.052.851450
51.014.391.872.4514.6962.502.520.301.985.251.023.581290
61.014.062.152.6117.61212.602.510.311.255.051.063.581295
71.014.831.642.1714.0972.802.980.291.985.201.082.851045
81.013.861.352.2716.0982.983.150.221.857.221.013.551045
91.014.102.162.3018.01052.953.320.222.385.751.253.171510
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 NaN 13.20 1.78 2.14 11.2 100 \n", + "1 NaN 13.16 2.36 2.67 18.6 101 \n", + "2 NaN 14.37 1.95 2.50 16.8 113 \n", + "3 1.0 13.24 2.59 2.87 21.0 118 \n", + "4 1.0 14.20 1.76 2.45 15.2 112 \n", + "5 1.0 14.39 1.87 2.45 14.6 96 \n", + "6 1.0 14.06 2.15 2.61 17.6 121 \n", + "7 1.0 14.83 1.64 2.17 14.0 97 \n", + "8 1.0 13.86 1.35 2.27 16.0 98 \n", + "9 1.0 14.10 2.16 2.30 18.0 105 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.65 2.76 0.26 1.28 4.38 \n", + "1 2.80 3.24 0.30 2.81 5.68 \n", + "2 3.85 3.49 0.24 2.18 7.80 \n", + "3 2.80 2.69 0.39 1.82 4.32 \n", + "4 3.27 3.39 0.34 1.97 6.75 \n", + "5 2.50 2.52 0.30 1.98 5.25 \n", + "6 2.60 2.51 0.31 1.25 5.05 \n", + "7 2.80 2.98 0.29 1.98 5.20 \n", + "8 2.98 3.15 0.22 1.85 7.22 \n", + "9 2.95 3.32 0.22 2.38 5.75 \n", + "\n", + " OD280/OD315 of diluted wines Proline Col-14 \n", + "0 1.05 3.40 1050 \n", + "1 1.03 3.17 1185 \n", + "2 0.86 3.45 1480 \n", + "3 1.04 2.93 735 \n", + "4 1.05 2.85 1450 \n", + "5 1.02 3.58 1290 \n", + "6 1.06 3.58 1295 \n", + "7 1.08 2.85 1045 \n", + "8 1.01 3.55 1045 \n", + "9 1.25 3.17 1510 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 112 + } + ] + }, + { + "metadata": { + "id": "RQMNI2UHkP3o", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`" + ] + }, + { + "metadata": { + "id": "xunmCjaEmDwZ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "eff253d6-a5ef-425f-fc8e-abb78c499c9f" + }, + "cell_type": "code", + "source": [ + "random = np.random.randint(1,10,10)\n", + "print(random)" + ], + "execution_count": 113, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[8 2 1 9 6 6 9 2 9 2]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "hELUakyXmFSu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol" + ] + }, + { + "metadata": { + "id": "zMgaNnNHmP01", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "a9f82ab3-b77d-47e2-fec3-d12bf130fc86" + }, + "cell_type": "code", + "source": [ + "wine_df.Alcohol.iloc[random] = np.nan\n", + "wine_df.head(10)" + ], + "execution_count": 114, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProlineCol-14
0NaN13.201.782.1411.21002.652.760.261.284.381.053.401050
1NaN13.162.362.6718.61012.803.240.302.815.681.033.171185
2NaN14.371.952.5016.81133.853.490.242.187.800.863.451480
31.013.242.592.8721.01182.802.690.391.824.321.042.93735
41.014.201.762.4515.21123.273.390.341.976.751.052.851450
51.014.391.872.4514.6962.502.520.301.985.251.023.581290
6NaN14.062.152.6117.61212.602.510.311.255.051.063.581295
71.014.831.642.1714.0972.802.980.291.985.201.082.851045
8NaN13.861.352.2716.0982.983.150.221.857.221.013.551045
9NaN14.102.162.3018.01052.953.320.222.385.751.253.171510
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 NaN 13.20 1.78 2.14 11.2 100 \n", + "1 NaN 13.16 2.36 2.67 18.6 101 \n", + "2 NaN 14.37 1.95 2.50 16.8 113 \n", + "3 1.0 13.24 2.59 2.87 21.0 118 \n", + "4 1.0 14.20 1.76 2.45 15.2 112 \n", + "5 1.0 14.39 1.87 2.45 14.6 96 \n", + "6 NaN 14.06 2.15 2.61 17.6 121 \n", + "7 1.0 14.83 1.64 2.17 14.0 97 \n", + "8 NaN 13.86 1.35 2.27 16.0 98 \n", + "9 NaN 14.10 2.16 2.30 18.0 105 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.65 2.76 0.26 1.28 4.38 \n", + "1 2.80 3.24 0.30 2.81 5.68 \n", + "2 3.85 3.49 0.24 2.18 7.80 \n", + "3 2.80 2.69 0.39 1.82 4.32 \n", + "4 3.27 3.39 0.34 1.97 6.75 \n", + "5 2.50 2.52 0.30 1.98 5.25 \n", + "6 2.60 2.51 0.31 1.25 5.05 \n", + "7 2.80 2.98 0.29 1.98 5.20 \n", + "8 2.98 3.15 0.22 1.85 7.22 \n", + "9 2.95 3.32 0.22 2.38 5.75 \n", + "\n", + " OD280/OD315 of diluted wines Proline Col-14 \n", + "0 1.05 3.40 1050 \n", + "1 1.03 3.17 1185 \n", + "2 0.86 3.45 1480 \n", + "3 1.04 2.93 735 \n", + "4 1.05 2.85 1450 \n", + "5 1.02 3.58 1290 \n", + "6 1.06 3.58 1295 \n", + "7 1.08 2.85 1045 \n", + "8 1.01 3.55 1045 \n", + "9 1.25 3.17 1510 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 114 + } + ] + }, + { + "metadata": { + "id": "PHyK_vRsmRwV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### How many missing values do we have? \n", + "\n", + "Hint: you can use isnull() and sum()" + ] + }, + { + "metadata": { + "id": "EnOYhmEqmfKp", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 272 + }, + "outputId": "409c61b1-0d47-416d-adcf-07e3c452ba94" + }, + "cell_type": "code", + "source": [ + "print((wine_df.isnull()).sum())" + ], + "execution_count": 115, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Alcohol 6\n", + "Malic acid 0\n", + "Ash 0\n", + "Alcalinity of ash 0\n", + "Magnesium 0\n", + "Total phenols 0\n", + "Flavanoids 0\n", + "Nonflavanoid phenols 0\n", + "Proanthocyanins 0\n", + "Color intensity 0\n", + "Hue 0\n", + "OD280/OD315 of diluted wines 0\n", + "Proline 0\n", + "Col-14 0\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "-Fd4WBklmf1_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Delete the rows that contain missing values " + ] + }, + { + "metadata": { + "id": "As7IC6Ktms8-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "a79d4627-4676-45e1-8ebf-24b64f5b9a99" + }, + "cell_type": "code", + "source": [ + "wine_df = wine_df.dropna(how='any',axis=0)\n", + "wine_df.head(10)" + ], + "execution_count": 116, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProlineCol-14
31.013.242.592.8721.01182.802.690.391.824.321.042.93735
41.014.201.762.4515.21123.273.390.341.976.751.052.851450
51.014.391.872.4514.6962.502.520.301.985.251.023.581290
71.014.831.642.1714.0972.802.980.291.985.201.082.851045
101.014.121.482.3216.8952.202.430.261.575.001.172.821280
111.013.751.732.4116.0892.602.760.291.815.601.152.901320
121.014.751.732.3911.4913.103.690.432.815.401.252.731150
131.014.381.872.3812.01023.303.640.292.967.501.203.001547
141.013.631.812.7017.21122.852.910.301.467.301.282.881310
151.014.301.922.7220.01202.803.140.331.976.201.072.651280
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "3 1.0 13.24 2.59 2.87 21.0 118 \n", + "4 1.0 14.20 1.76 2.45 15.2 112 \n", + "5 1.0 14.39 1.87 2.45 14.6 96 \n", + "7 1.0 14.83 1.64 2.17 14.0 97 \n", + "10 1.0 14.12 1.48 2.32 16.8 95 \n", + "11 1.0 13.75 1.73 2.41 16.0 89 \n", + "12 1.0 14.75 1.73 2.39 11.4 91 \n", + "13 1.0 14.38 1.87 2.38 12.0 102 \n", + "14 1.0 13.63 1.81 2.70 17.2 112 \n", + "15 1.0 14.30 1.92 2.72 20.0 120 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "3 2.80 2.69 0.39 1.82 4.32 \n", + "4 3.27 3.39 0.34 1.97 6.75 \n", + "5 2.50 2.52 0.30 1.98 5.25 \n", + "7 2.80 2.98 0.29 1.98 5.20 \n", + "10 2.20 2.43 0.26 1.57 5.00 \n", + "11 2.60 2.76 0.29 1.81 5.60 \n", + "12 3.10 3.69 0.43 2.81 5.40 \n", + "13 3.30 3.64 0.29 2.96 7.50 \n", + "14 2.85 2.91 0.30 1.46 7.30 \n", + "15 2.80 3.14 0.33 1.97 6.20 \n", + "\n", + " OD280/OD315 of diluted wines Proline Col-14 \n", + "3 1.04 2.93 735 \n", + "4 1.05 2.85 1450 \n", + "5 1.02 3.58 1290 \n", + "7 1.08 2.85 1045 \n", + "10 1.17 2.82 1280 \n", + "11 1.15 2.90 1320 \n", + "12 1.25 2.73 1150 \n", + "13 1.20 3.00 1547 \n", + "14 1.28 2.88 1310 \n", + "15 1.07 2.65 1280 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 116 + } + ] + }, + { + "metadata": { + "id": "DlpG8drhmz7W", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### BONUS: Play with the data set below" + ] + }, + { + "metadata": { + "id": "mD40T0Cnm5SA", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#exam next week, will play later :-P" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file