diff --git a/Pandas.ipynb b/Pandas.ipynb new file mode 100644 index 0000000..302add0 --- /dev/null +++ b/Pandas.ipynb @@ -0,0 +1,5302 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Untitled3.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/NehaAgarwal2598/Assignment-3/blob/master/Pandas.ipynb)" + ] + }, + { + "metadata": { + "id": "cGbE814_Xaf9", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas\n", + "\n", + "Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.In this tutorial, we will learn the various features of Python Pandas and how to use them in practice.\n", + "\n", + "\n", + "## Import pandas and numpy" + ] + }, + { + "metadata": { + "id": "zWetN2Mxxc7G", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "TrMQOdmixc7T", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "outputId": "1f3e8c1b-da3f-473a-8f93-af084f7dc3ed" + }, + "cell_type": "code", + "source": [ + "a_ascii = ord('A')\n", + "z_ascii = ord('Z')\n", + "alphabets = [chr(i) for i in range(a_ascii, z_ascii+1)]\n", + "\n", + "print(alphabets)\n", + "\n", + "numbers = np.arange(26)\n", + "\n", + "print(numbers)\n", + "\n", + "print(type(alphabets), type(numbers))\n", + "\n", + "alpha_numbers = dict(zip(alphabets, numbers))\n", + "\n", + "print(alpha_numbers)\n", + "\n", + "print(type(alpha_numbers))" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']\n", + "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n", + " 24 25]\n", + " \n", + "{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "6ouDfjWab_Mc", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "outputId": "0db749c8-9343-4355-edfa-f03dcc412274" + }, + "cell_type": "code", + "source": [ + "series1 = pd.Series(alphabets)\n", + "print(series1)" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0 A\n", + "1 B\n", + "2 C\n", + "3 D\n", + "4 E\n", + "5 F\n", + "6 G\n", + "7 H\n", + "8 I\n", + "9 J\n", + "10 K\n", + "11 L\n", + "12 M\n", + "13 N\n", + "14 O\n", + "15 P\n", + "16 Q\n", + "17 R\n", + "18 S\n", + "19 T\n", + "20 U\n", + "21 V\n", + "22 W\n", + "23 X\n", + "24 Y\n", + "25 Z\n", + "dtype: object\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "At7nY7vVcBZ3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "outputId": "ad6bb27b-b631-48fe-8d0c-56b34b06a848" + }, + "cell_type": "code", + "source": [ + "series2 = pd.Series(numbers)\n", + "print(series2)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0 0\n", + "1 1\n", + "2 2\n", + "3 3\n", + "4 4\n", + "5 5\n", + "6 6\n", + "7 7\n", + "8 8\n", + "9 9\n", + "10 10\n", + "11 11\n", + "12 12\n", + "13 13\n", + "14 14\n", + "15 15\n", + "16 16\n", + "17 17\n", + "18 18\n", + "19 19\n", + "20 20\n", + "21 21\n", + "22 22\n", + "23 23\n", + "24 24\n", + "25 25\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "J5z-2CWAdH6N", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "outputId": "30a9f9ad-1502-4436-daf4-0c9de06359ce" + }, + "cell_type": "code", + "source": [ + "series3 = pd.Series(alpha_numbers)\n", + "print(series3)" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "F 5\n", + "G 6\n", + "H 7\n", + "I 8\n", + "J 9\n", + "K 10\n", + "L 11\n", + "M 12\n", + "N 13\n", + "O 14\n", + "P 15\n", + "Q 16\n", + "R 17\n", + "S 18\n", + "T 19\n", + "U 20\n", + "V 21\n", + "W 22\n", + "X 23\n", + "Y 24\n", + "Z 25\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "fYzblGGudKjO", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "0748460a-db72-4e26-cc7a-5b915871f819" + }, + "cell_type": "code", + "source": [ + "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n", + "series3.head()" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "metadata": { + "id": "73UTZ07EdWki", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 855 + }, + "outputId": "234de44b-4693-4cec-c8c3-c66d9297e907" + }, + "cell_type": "code", + "source": [ + "data = {'alphabets': alphabets, 'values': numbers}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "#Lets Change the column `values` to `alpha_numbers`\n", + "\n", + "#df.columns = ['alphabets', 'alpha_numbers']\n", + "\n", + "df" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alphabetsvalues
0A0
1B1
2C2
3D3
4E4
5F5
6G6
7H7
8I8
9J9
10K10
11L11
12M12
13N13
14O14
15P15
16Q16
17R17
18S18
19T19
20U20
21V21
22W22
23X23
24Y24
25Z25
\n", + "
" + ], + "text/plain": [ + " alphabets values\n", + "0 A 0\n", + "1 B 1\n", + "2 C 2\n", + "3 D 3\n", + "4 E 4\n", + "5 F 5\n", + "6 G 6\n", + "7 H 7\n", + "8 I 8\n", + "9 J 9\n", + "10 K 10\n", + "11 L 11\n", + "12 M 12\n", + "13 N 13\n", + "14 O 14\n", + "15 P 15\n", + "16 Q 16\n", + "17 R 17\n", + "18 S 18\n", + "19 T 19\n", + "20 U 20\n", + "21 V 21\n", + "22 W 22\n", + "23 X 23\n", + "24 Y 24\n", + "25 Z 25" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 7 + } + ] + }, + { + "metadata": { + "id": "uaK_1EO9etGS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 140 + }, + "outputId": "2c4a9a29-94ce-4bb2-eba0-e67c93538aa9" + }, + "cell_type": "code", + "source": [ + "# transpose\n", + "\n", + "df.T\n", + "\n", + "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...16171819202122232425
alphabetsABCDEFGHIJ...QRSTUVWXYZ
values0123456789...16171819202122232425
\n", + "

2 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 \\\n", + "alphabets A B C D E F G H I J ... Q R S T U V W X \n", + "values 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 \n", + "\n", + " 24 25 \n", + "alphabets Y Z \n", + "values 24 25 \n", + "\n", + "[2 rows x 26 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } + ] + }, + { + "metadata": { + "id": "tc1-KX_Bfe7U", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "b03ed129-c55c-4994-b855-6dfe4034f2d8" + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n", + "pos = [0, 4, 8, 14, 20]\n", + "\n", + "vowels = ser.take(pos)\n", + "\n", + "df = pd.DataFrame(vowels)#, columns=['vowels'])\n", + "\n", + "df.columns = ['vowels']\n", + "\n", + "#df.index = [0, 1, 2, 3, 4]\n", + "\n", + "df" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
vowels
0a
4e
8i
14o
20u
\n", + "
" + ], + "text/plain": [ + " vowels\n", + "0 a\n", + "4 e\n", + "8 i\n", + "14 o\n", + "20 u" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "metadata": { + "id": "5KagP9PpgV2F", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "aa231d5e-40a8-4d3a-c65a-36fc6fe4366f" + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(['we', 'are', 'learning', 'pandas'])\n", + "\n", + "ser.map(lambda x : x.title())\n", + "\n", + "titles = [i.title() for i in ser]\n", + "\n", + "titles" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['We', 'Are', 'Learning', 'Pandas']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "metadata": { + "id": "h5R0JL2NjuFS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "5f4f8a36-4549-4f8c-acc1-91c1738bb1a1" + }, + "cell_type": "code", + "source": [ + "my_index = [1, 2, 3, 4, 5]\n", + "\n", + "df1 = pd.DataFrame({'upper values': ['A', 'B', 'C', 'D', 'E'],\n", + " 'lower values': ['a', 'b', 'c', 'd', 'e']},\n", + " index = my_index)\n", + "\n", + "df1" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lower valuesupper values
1aA
2bB
3cC
4dD
5eE
\n", + "
" + ], + "text/plain": [ + " lower values upper values\n", + "1 a A\n", + "2 b B\n", + "3 c C\n", + "4 d D\n", + "5 e E" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 + } + ] + }, + { + "metadata": { + "id": "G_Frvc3mk93k", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "aaf8c8ce-0765-4c02-c00d-b63e55088744" + }, + "cell_type": "code", + "source": [ + "new_index = [2, 5, 4, 3, 1]\n", + "\n", + "df1.reindex(index = new_index)" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lower valuesupper values
2bB
5eE
4dD
3cC
1aA
\n", + "
" + ], + "text/plain": [ + " lower values upper values\n", + "2 b B\n", + "5 e E\n", + "4 d D\n", + "3 c C\n", + "1 a A" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 12 + } + ] + }, + { + "metadata": { + "id": "J82LU53m_OU0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Get to know your Data\n", + "\n", + "\n", + "#### Import necessary modules\n" + ] + }, + { + "metadata": { + "id": "ZyO1UXL8mtSj", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yXTzTowtnwGI", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Loading CSV Data to a DataFrame" + ] + }, + { + "metadata": { + "id": "H1Bjlb5wm9f-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KE-k7b_Mn5iN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### See the top 10 rows\n" + ] + }, + { + "metadata": { + "id": "HY2Ps7xMn4ao", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "d67b4210-bb4b-45c6-d509-32f3cfeaf526" + }, + "cell_type": "code", + "source": [ + "iris_df.head()" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 15 + } + ] + }, + { + "metadata": { + "id": "ZQXekIodqOZu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Find number of rows and columns\n" + ] + }, + { + "metadata": { + "id": "6Y-A-lbFqR82", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "59bc7283-b6e4-4103-e36a-8580c37911a7" + }, + "cell_type": "code", + "source": [ + "print(iris_df.shape)\n", + "\n", + "#first is row and second is column\n", + "#select row by simple indexing\n", + "\n", + "#print(iris_df.shape[0])\n", + "#print(iris_df.shape[1])" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(150, 5)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "4ckCiGPhrC_t", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Print all columns" + ] + }, + { + "metadata": { + "id": "S6jgMyRDrF2a", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "44673988-91d6-467d-8b8c-2a429624ad3f" + }, + "cell_type": "code", + "source": [ + "print(iris_df.columns)" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", + " 'species'],\n", + " dtype='object')\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "kVav5-ACtIqS", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Check Index\n" + ] + }, + { + "metadata": { + "id": "iu3I9zIGtLDX", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "0d772f6c-d7e8-4ec9-cb45-d9ac61397420" + }, + "cell_type": "code", + "source": [ + "print(iris_df.index)" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "text": [ + "RangeIndex(start=0, stop=150, step=1)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "psCc7PborOCQ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Right now the iris_data set has all the species grouped together let's shuffle it" + ] + }, + { + "metadata": { + "id": "Bxc8i6avrZPw", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "b51db56f-9b4a-4b2f-8c4e-be225bafa609" + }, + "cell_type": "code", + "source": [ + "#generate a random permutaion on index\n", + "\n", + "print(iris_df.head())\n", + "\n", + "new_index = np.random.permutation(iris_df.index)\n", + "iris_df = iris_df.reindex(index = new_index)\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + " sepal_length sepal_width petal_length petal_width species\n", + "78 6.0 2.9 4.5 1.5 versicolor\n", + "121 5.6 2.8 4.9 2.0 virginica\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "58 6.6 2.9 4.6 1.3 versicolor\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "j32h8022sRT8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### We can also apply an operation on whole column of iris_df" + ] + }, + { + "metadata": { + "id": "seYXHXsYsYJI", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 323 + }, + "outputId": "eb379004-47bf-4f60-b4ba-175e2697c365" + }, + "cell_type": "code", + "source": [ + "#original\n", + "\n", + "print(iris_df.head())\n", + "\n", + "iris_df['sepal_width'] *= 10\n", + "\n", + "#changed\n", + "\n", + "print(iris_df.head())\n", + "\n", + "#lets undo the operation\n", + "\n", + "iris_df['sepal_width'] /= 10\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "78 6.0 2.9 4.5 1.5 versicolor\n", + "121 5.6 2.8 4.9 2.0 virginica\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "58 6.6 2.9 4.6 1.3 versicolor\n", + " sepal_length sepal_width petal_length petal_width species\n", + "78 6.0 29.0 4.5 1.5 versicolor\n", + "121 5.6 28.0 4.9 2.0 virginica\n", + "43 5.0 35.0 1.6 0.6 setosa\n", + "108 6.7 25.0 5.8 1.8 virginica\n", + "58 6.6 29.0 4.6 1.3 versicolor\n", + " sepal_length sepal_width petal_length petal_width species\n", + "78 6.0 2.9 4.5 1.5 versicolor\n", + "121 5.6 2.8 4.9 2.0 virginica\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "58 6.6 2.9 4.6 1.3 versicolor\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "R-Ca-LBLzjiF", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Show all the rows where sepal_width > 3.3" + ] + }, + { + "metadata": { + "id": "WJ7W-F-d0AoZ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1165 + }, + "outputId": "09fb4b87-0377-4594-fda3-67eeaedf1754" + }, + "cell_type": "code", + "source": [ + "iris_df[iris_df['sepal_width']>3.3]" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
435.03.51.60.6setosa
1366.33.45.62.4virginica
205.43.41.70.2setosa
485.33.71.50.2setosa
395.13.41.50.2setosa
1317.93.86.42.0virginica
105.43.71.50.2setosa
185.73.81.70.3setosa
114.83.41.60.2setosa
215.13.71.50.4setosa
465.13.81.60.2setosa
05.13.51.40.2setosa
285.23.41.40.2setosa
165.43.91.30.4setosa
1486.23.45.42.3virginica
405.03.51.30.3setosa
275.23.51.50.2setosa
145.84.01.20.2setosa
195.13.81.50.3setosa
1177.73.86.72.2virginica
244.83.41.90.2setosa
325.24.11.50.1setosa
155.74.41.50.4setosa
315.43.41.50.4setosa
1097.23.66.12.5virginica
445.13.81.90.4setosa
856.03.44.51.6versicolor
365.53.51.30.2setosa
45.03.61.40.2setosa
64.63.41.40.3setosa
224.63.61.00.2setosa
55.43.91.70.4setosa
75.03.41.50.2setosa
175.13.51.40.3setosa
265.03.41.60.4setosa
335.54.21.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "136 6.3 3.4 5.6 2.4 virginica\n", + "20 5.4 3.4 1.7 0.2 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "39 5.1 3.4 1.5 0.2 setosa\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "11 4.8 3.4 1.6 0.2 setosa\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "28 5.2 3.4 1.4 0.2 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "24 4.8 3.4 1.9 0.2 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "31 5.4 3.4 1.5 0.4 setosa\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "85 6.0 3.4 4.5 1.6 versicolor\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "26 5.0 3.4 1.6 0.4 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 21 + } + ] + }, + { + "metadata": { + "id": "gH3DnhCq2Cbl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Club two filters together - Find all samples where sepal_width > 3.3 and species is versicolor" + ] + }, + { + "metadata": { + "id": "4U7ksr_R2H7M", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "outputId": "732f5979-ca02-46a4-b5be-da8180bcd6e1" + }, + "cell_type": "code", + "source": [ + "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] " + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
856.03.44.51.6versicolor
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "85 6.0 3.4 4.5 1.6 versicolor" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 22 + } + ] + }, + { + "metadata": { + "id": "1lmnB3ot2u7I", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Sorting a column by value" + ] + }, + { + "metadata": { + "id": "K7KIj6fv2zWP", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1969 + }, + "outputId": "83a21fb6-0211-4ccd-ccdc-f10e08f981a9" + }, + "cell_type": "code", + "source": [ + "iris_df.sort_values(by='sepal_width')#, ascending = False)\n", + "#pass ascending = False for descending order" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
605.02.03.51.0versicolor
1196.02.25.01.5virginica
686.22.24.51.5versicolor
626.02.24.01.0versicolor
535.52.34.01.3versicolor
935.02.33.31.0versicolor
876.32.34.41.3versicolor
414.52.31.30.3setosa
574.92.43.31.0versicolor
805.52.43.81.1versicolor
815.52.43.71.0versicolor
695.62.53.91.1versicolor
985.12.53.01.1versicolor
1064.92.54.51.7virginica
726.32.54.91.5versicolor
1086.72.55.81.8virginica
1466.32.55.01.9virginica
1135.72.55.02.0virginica
895.52.54.01.3versicolor
795.72.63.51.0versicolor
1187.72.66.92.3virginica
905.52.64.41.2versicolor
925.82.64.01.2versicolor
1346.12.65.61.4virginica
1015.82.75.11.9virginica
595.22.73.91.4versicolor
1116.42.75.31.9virginica
825.82.73.91.2versicolor
836.02.75.11.6versicolor
945.62.74.21.3versicolor
..................
285.23.41.40.2setosa
315.43.41.50.4setosa
114.83.41.60.2setosa
395.13.41.50.2setosa
856.03.44.51.6versicolor
244.83.41.90.2setosa
435.03.51.60.6setosa
05.13.51.40.2setosa
175.13.51.40.3setosa
365.53.51.30.2setosa
275.23.51.50.2setosa
405.03.51.30.3setosa
224.63.61.00.2setosa
1097.23.66.12.5virginica
45.03.61.40.2setosa
215.13.71.50.4setosa
105.43.71.50.2setosa
485.33.71.50.2setosa
185.73.81.70.3setosa
195.13.81.50.3setosa
1177.73.86.72.2virginica
1317.93.86.42.0virginica
465.13.81.60.2setosa
445.13.81.90.4setosa
165.43.91.30.4setosa
55.43.91.70.4setosa
145.84.01.20.2setosa
325.24.11.50.1setosa
335.54.21.40.2setosa
155.74.41.50.4setosa
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "60 5.0 2.0 3.5 1.0 versicolor\n", + "119 6.0 2.2 5.0 1.5 virginica\n", + "68 6.2 2.2 4.5 1.5 versicolor\n", + "62 6.0 2.2 4.0 1.0 versicolor\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "93 5.0 2.3 3.3 1.0 versicolor\n", + "87 6.3 2.3 4.4 1.3 versicolor\n", + "41 4.5 2.3 1.3 0.3 setosa\n", + "57 4.9 2.4 3.3 1.0 versicolor\n", + "80 5.5 2.4 3.8 1.1 versicolor\n", + "81 5.5 2.4 3.7 1.0 versicolor\n", + "69 5.6 2.5 3.9 1.1 versicolor\n", + "98 5.1 2.5 3.0 1.1 versicolor\n", + "106 4.9 2.5 4.5 1.7 virginica\n", + "72 6.3 2.5 4.9 1.5 versicolor\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "113 5.7 2.5 5.0 2.0 virginica\n", + "89 5.5 2.5 4.0 1.3 versicolor\n", + "79 5.7 2.6 3.5 1.0 versicolor\n", + "118 7.7 2.6 6.9 2.3 virginica\n", + "90 5.5 2.6 4.4 1.2 versicolor\n", + "92 5.8 2.6 4.0 1.2 versicolor\n", + "134 6.1 2.6 5.6 1.4 virginica\n", + "101 5.8 2.7 5.1 1.9 virginica\n", + "59 5.2 2.7 3.9 1.4 versicolor\n", + "111 6.4 2.7 5.3 1.9 virginica\n", + "82 5.8 2.7 3.9 1.2 versicolor\n", + "83 6.0 2.7 5.1 1.6 versicolor\n", + "94 5.6 2.7 4.2 1.3 versicolor\n", + ".. ... ... ... ... ...\n", + "28 5.2 3.4 1.4 0.2 setosa\n", + "31 5.4 3.4 1.5 0.4 setosa\n", + "11 4.8 3.4 1.6 0.2 setosa\n", + "39 5.1 3.4 1.5 0.2 setosa\n", + "85 6.0 3.4 4.5 1.6 versicolor\n", + "24 4.8 3.4 1.9 0.2 setosa\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 23 + } + ] + }, + { + "metadata": { + "id": "9jg_Z4YCoMSV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### List all the unique species" + ] + }, + { + "metadata": { + "id": "M6EN78ufoJY7", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "cae20cb6-faec-49d8-8810-c6fc54c942db" + }, + "cell_type": "code", + "source": [ + "species = iris_df['species'].unique()\n", + "\n", + "print(species)" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['versicolor' 'virginica' 'setosa']\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "wG1i5nxBodmB", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Selecting a particular species using boolean mask (learnt in previous exercise)" + ] + }, + { + "metadata": { + "id": "gZvpbKBwoVUe", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "df4ff2d0-9d60-4dc7-b7c1-1055c0099fe8" + }, + "cell_type": "code", + "source": [ + "setosa = iris_df[iris_df['species'] == species[0]]\n", + "\n", + "setosa.head()" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
786.02.94.51.5versicolor
586.62.94.61.3versicolor
686.22.24.51.5versicolor
825.82.73.91.2versicolor
895.52.54.01.3versicolor
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "78 6.0 2.9 4.5 1.5 versicolor\n", + "58 6.6 2.9 4.6 1.3 versicolor\n", + "68 6.2 2.2 4.5 1.5 versicolor\n", + "82 5.8 2.7 3.9 1.2 versicolor\n", + "89 5.5 2.5 4.0 1.3 versicolor" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "metadata": { + "id": "7tumfZ3DotPG", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "662d6678-ac35-4b15-8a11-540ef538064b" + }, + "cell_type": "code", + "source": [ + "# do the same for other 2 species \n", + "versicolor = iris_df[iris_df['species'] == species[1]]\n", + "\n", + "versicolor.head()" + ], + "execution_count": 26, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
1215.62.84.92.0virginica
1086.72.55.81.8virginica
1006.33.36.02.5virginica
1366.33.45.62.4virginica
1135.72.55.02.0virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "121 5.6 2.8 4.9 2.0 virginica\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "100 6.3 3.3 6.0 2.5 virginica\n", + "136 6.3 3.4 5.6 2.4 virginica\n", + "113 5.7 2.5 5.0 2.0 virginica" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 26 + } + ] + }, + { + "metadata": { + "id": "cUYm5UqVpDPy", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "340897a3-525a-4f72-fdec-a541d5433c7d" + }, + "cell_type": "code", + "source": [ + "\n", + "\n", + "virginica = iris_df[iris_df['species'] == species[2]]\n", + "\n", + "virginica.head()" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
435.03.51.60.6setosa
205.43.41.70.2setosa
94.93.11.50.1setosa
384.43.01.30.2setosa
485.33.71.50.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "20 5.4 3.4 1.7 0.2 setosa\n", + "9 4.9 3.1 1.5 0.1 setosa\n", + "38 4.4 3.0 1.3 0.2 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 27 + } + ] + }, + { + "metadata": { + "id": "-y1wDc8SpdQs", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Describe each created species to see the difference\n", + "\n" + ] + }, + { + "metadata": { + "id": "eHrn3ZVRpOk5", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "0deee989-05dc-4dd5-8e02-c709c99a9577" + }, + "cell_type": "code", + "source": [ + "setosa.describe()" + ], + "execution_count": 28, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.00000050.00000050.00000050.000000
mean5.9360002.7700004.2600001.326000
std0.5161710.3137980.4699110.197753
min4.9000002.0000003.0000001.000000
25%5.6000002.5250004.0000001.200000
50%5.9000002.8000004.3500001.300000
75%6.3000003.0000004.6000001.500000
max7.0000003.4000005.1000001.800000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.000000 50.000000 50.000000 50.000000\n", + "mean 5.936000 2.770000 4.260000 1.326000\n", + "std 0.516171 0.313798 0.469911 0.197753\n", + "min 4.900000 2.000000 3.000000 1.000000\n", + "25% 5.600000 2.525000 4.000000 1.200000\n", + "50% 5.900000 2.800000 4.350000 1.300000\n", + "75% 6.300000 3.000000 4.600000 1.500000\n", + "max 7.000000 3.400000 5.100000 1.800000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 28 + } + ] + }, + { + "metadata": { + "id": "GwJFT2GlpwUv", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "f769b827-0624-4ba5-c3cf-031cb7557d40" + }, + "cell_type": "code", + "source": [ + "versicolor.describe()" + ], + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.0000050.00000050.00000050.00000
mean6.588002.9740005.5520002.02600
std0.635880.3224970.5518950.27465
min4.900002.2000004.5000001.40000
25%6.225002.8000005.1000001.80000
50%6.500003.0000005.5500002.00000
75%6.900003.1750005.8750002.30000
max7.900003.8000006.9000002.50000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.00000 50.000000 50.000000 50.00000\n", + "mean 6.58800 2.974000 5.552000 2.02600\n", + "std 0.63588 0.322497 0.551895 0.27465\n", + "min 4.90000 2.200000 4.500000 1.40000\n", + "25% 6.22500 2.800000 5.100000 1.80000\n", + "50% 6.50000 3.000000 5.550000 2.00000\n", + "75% 6.90000 3.175000 5.875000 2.30000\n", + "max 7.90000 3.800000 6.900000 2.50000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 29 + } + ] + }, + { + "metadata": { + "id": "Ad4qhSZLpztf", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "dc25951b-48b4-47df-8c5e-835085ad5cd5" + }, + "cell_type": "code", + "source": [ + "virginica.describe()" + ], + "execution_count": 30, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.0000050.00000050.00000050.00000
mean5.006003.4180001.4640000.24400
std0.352490.3810240.1735110.10721
min4.300002.3000001.0000000.10000
25%4.800003.1250001.4000000.20000
50%5.000003.4000001.5000000.20000
75%5.200003.6750001.5750000.30000
max5.800004.4000001.9000000.60000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.00000 50.000000 50.000000 50.00000\n", + "mean 5.00600 3.418000 1.464000 0.24400\n", + "std 0.35249 0.381024 0.173511 0.10721\n", + "min 4.30000 2.300000 1.000000 0.10000\n", + "25% 4.80000 3.125000 1.400000 0.20000\n", + "50% 5.00000 3.400000 1.500000 0.20000\n", + "75% 5.20000 3.675000 1.575000 0.30000\n", + "max 5.80000 4.400000 1.900000 0.60000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 30 + } + ] + }, + { + "metadata": { + "id": "Vdu0ulZWtr09", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Let's plot and see the difference" + ] + }, + { + "metadata": { + "id": "PEVMzRvpttmD", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "##### import matplotlib.pyplot " + ] + }, + { + "metadata": { + "id": "rqDXuuAtt7C3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 398 + }, + "outputId": "00e7b46e-378e-41cb-e39f-275ff332113a" + }, + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "#hist creates a histogram there are many more plots(see the documentation) you can play with it.\n", + "\n", + "plt.hist(setosa['sepal_length'])\n", + "plt.hist(versicolor['sepal_length'])\n", + "plt.hist(virginica['sepal_length'])" + ], + "execution_count": 31, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([ 4., 1., 6., 5., 12., 8., 4., 5., 2., 3.]),\n", + " array([4.3 , 4.45, 4.6 , 4.75, 4.9 , 5.05, 5.2 , 5.35, 5.5 , 5.65, 5.8 ]),\n", + " )" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 31 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFKCAYAAABcq1WoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFGpJREFUeJzt3X9s1Hf9wPFX6cmwpWKHLQzdcFk0\ni9twkG0ZP92QH264uaErP8KQZCYiDGaCGdhgICExYeG7MN3cdEPmF0LCBgh8jZFFhMRsgEYMisnC\nwMTwY4Myys9CgO6+fxgacdDS6/V93PXx+Iv73PVzrzdvcs/eHb2WZbPZbAAAyXQr9AAA0NWILwAk\nJr4AkJj4AkBi4gsAiYkvACSWSXEnDQ2nUtxNXlRXV0RjY1Ohx+hUpb5G6yt+pb5G6yt+17LGmpqq\nq17nme9/yWTKCz1Cpyv1NVpf8Sv1NVpf8evoGsUXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEg\nMfEFgMSuKb579uyJUaNGxcqVKyMi4v33349p06bFlClTYtq0adHQ0NCpQwJAKWkzvk1NTbFo0aIY\nPHhwy7GlS5dGXV1drFy5MkaPHh3Lly/v1CEBoJS0Gd/u3bvHq6++GrW1tS3HFixYEGPHjo2IiOrq\n6jh+/HjnTQgAJabN+GYymejRo8dlxyoqKqK8vDyam5tj1apV8cgjj3TagABQanL+rUbNzc3x7LPP\nxv3333/ZS9JXUl1dUVQftN3ab6K4Hr39jW+26/Z7rnBs6Ia1+RnmOlFse9hepb6+iNJfo/UVv46s\nMef4/vCHP4z+/fvH008/3eZti+lXS9XUVBXVr0DMl1Jac6nvYamvL6L012h9xe9a1pj3Xym4cePG\n+MQnPhGzZ8/O5csBoEtr85nv7t27Y/HixXHw4MHIZDKxadOm+PDDD+OGG26IJ598MiIibrvttli4\ncGFnzwoAJaHN+N55552xYsWKFLMAQJfgE64AIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18A\nSEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx\n8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQX\nABITXwBITHwBIDHxBYDExBcAErum+O7ZsydGjRoVK1eujIiI999/P5588smYPHlyPPPMM3H+/PlO\nHRIASkmb8W1qaopFixbF4MGDW4795Cc/icmTJ8eqVauif//+sWbNmk4dEgBKSZvx7d69e7z66qtR\nW1vbcmzHjh3x1a9+NSIiHnzwwdi2bVvnTQgAJSbT5g0ymchkLr/Z2bNno3v37hER0bt372hoaOic\n6QCgBLUZ37Zks9k2b1NdXRGZTHlH7yqZmpqqQo/QLnvycI5iW3NbLq3nkTkbOvV+/u9/vtGp57+a\nUtuvK+mMNdat/l7ez5mLNya8XPJ7WOrri+jYGnOKb0VFRZw7dy569OgRhw8fvuwl6StpbGzKabhC\nqKmpioaGU4UeI7lSWnPKPSzE31tX+DfaFdZYyuvrCvt3LWtsLc45/ajRkCFDYtOmTRER8dZbb8Xw\n4cNzOQ0AdEltPvPdvXt3LF68OA4ePBiZTCY2bdoUS5YsiXnz5sXq1aujX79+8dhjj6WYFQBKQpvx\nvfPOO2PFihUfO758+fJOGQgASp1PuAKAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEF\ngMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEssUegCg9M38w7OFHgGuK575\nAkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY+AJAYuILAImJLwAkJr4AkJj4AkBi4gsA\niYkvACQmvgCQmPgCQGLiCwCJiS8AJJbJ5YvOnDkTc+fOjRMnTsSFCxdi5syZMXz48HzPBgAlKaf4\n/vrXv45bb7015syZE4cPH45vf/vb8bvf/S7fswFAScrpZefq6uo4fvx4REScPHkyqqur8zoUAJSy\nnJ75jhs3LtatWxejR4+OkydPxs9//vN8zwUAJSun+G7YsCH69esXy5Yti3fffTfq6+tj3bp1V719\ndXVFZDLlOQ+ZWk1NVaFHaJc9+TjHd6Z1+BxDN6zt+CB5kmoPO/t+HpmzocPn+OR93hK6HhXb40x7\nlfr6Ijq2xpziu3Pnzhg2bFhERNx+++1x5MiRaG5ujvLyKwe2sbEp5wFTq6mpioaGU4UeoyhdL39v\nKffwelkzxaeU/+10hcfRa1lja3HO6T3f/v37x65duyIi4uDBg1FZWXnV8AIAl8vpme+ECROivr4+\npkyZEhcvXoyFCxfmeSwAKF05xbeysjJeeOGFfM8CAF2CT7gCgMTEFwASE18ASEx8ASAx8QWAxMQX\nABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBI\nTHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHx\nBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASCzn+G7cuDEeffTRGD9+fGzdujWPIwFAacspvo2N\njfHSSy/FqlWr4pVXXonNmzfney4AKFmZXL5o27ZtMXjw4OjZs2f07NkzFi1alO+5AKBk5RTfAwcO\nxLlz52L69Olx8uTJmDVrVgwePPiqt6+urohMpjznIVOrqam65tu+/Y1vdui+hm5Y26Gvj4jY0+Ez\n5Ed7/t7+0yNzNuR5knRyXTOU+r+d9qyvbvX3OnGSa/fGhJfbdfuO7GFO8Y2IOH78eLz44otx6NCh\nmDp1amzZsiXKysqueNvGxqacB0ytpqYqGhpOJbu/lPfV2UppLdeqK66Z/CjlfzupH0fzpT0zX8sa\nW4tzTu/59u7dOwYOHBiZTCZuueWWqKysjGPHjuVyKgDocnKK77Bhw2L79u3x0UcfRWNjYzQ1NUV1\ndXW+ZwOAkpTTy859+vSJsWPHRl1dXUREzJ8/P7p18yPDAHAtcn7Pd+LEiTFx4sR8zgIAXYKnqwCQ\nmPgCQGLiCwCJiS8AJCa+AJCY+AJAYuILAImJLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLi\nCwCJiS8AJCa+AJCY+AJAYplCD9DV7fnOtEKPcF2Zt/d/O3yOFybX5mGSazPzD7/r1PN/8r5OPT0F\nUrf6e4UeISIiXhr5XKFH6LI88wWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTE\nFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEisQ/E9d+5cjBo1KtatW5ev\neQCg5HUovi+//HL06tUrX7MAQJeQc3z37dsXe/fujQceeCCP4wBA6cs5vosXL4558+blcxYA6BIy\nuXzR+vXr4+67746bb775mm5fXV0RmUx5Lnd1VW9/45sdPsfQDWuveLympuqaz7Gnw1OUjj3fmZbT\n1/kWDgqjPY9119O5O0t7Z+7IGnOK79atW2P//v2xdevW+OCDD6J79+7Rt2/fGDJkyBVv39jYlPOA\nnamh4dTHjtXUVF3xOECp6azHumJ9HG3PzNeyxtbinFN8ly5d2vLnn/70p/HZz372quEFAC7n53wB\nILGcnvn+p1mzZuVjDgDoMjzzBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQX\nABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASCxT6AHgevTMqiMdPscLk2vz\nMAlQijzzBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHx\nBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASCyT6xc+99xz8Ze//CUuXrwY3/3ud2PMmDH5nAsA\nSlZO8d2+fXu89957sXr16mhsbIzHH39cfAHgGuUU33vvvTcGDBgQERGf+tSn4uzZs9Hc3Bzl5eV5\nHQ4ASlFO8S0vL4+KioqIiFizZk2MGDGi1fBWV1dEJpPfMO/Jwzlqaqradbyz5iC/nll1pNAjQFFo\nz2Pd9XTuztLemTuyxpzf842I+P3vfx9r1qyJX/7yl63errGxqSN302kaGk597FhNTdUVjwOUms56\nrCvWx9H2zHwta2wtzjnH949//GO88sor8dprr0VVVfF9hwMAhZJTfE+dOhXPPfdcvP766/HpT386\n3zMBQEnLKb6//e1vo7GxMb7//e+3HFu8eHH069cvb4MBQKnKKb4TJkyICRMm5HsWAOgSfMIVACQm\nvgCQmPgCQGLiCwCJiS8AJCa+AJCY+AJAYuILAImJLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgC\nQGLiCwCJiS8AJCa+AJBYptADFNKe70z7+LH0YwAUxMw/PFvoEbosz3wBIDHxBYDExBcAEhNfAEhM\nfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEF\ngMTEFwASy+T6hT/+8Y9j165dUVZWFvX19TFgwIB8zgUAJSun+P7pT3+Kf/3rX7F69erYt29f1NfX\nx+rVq/M9GwCUpJxedt62bVuMGjUqIiJuu+22OHHiRJw+fTqvgwFAqcopvkePHo3q6uqWyzfeeGM0\nNDTkbSgAKGU5v+f7n7LZbKvX19RU5eNuLj/nhrV5Pyfk09BCDwB0qo60LadnvrW1tXH06NGWy0eO\nHImampqchwCAriSn+A4dOjQ2bdoUERH/+Mc/ora2Nnr27JnXwQCgVOX0svOgQYPijjvuiIkTJ0ZZ\nWVksWLAg33MBQMkqy7b1hi0AkFc+4QoAEhNfAEgsLz9qVMzOnTsXX//612PGjBkxfvz4luMjR46M\nvn37Rnl5eURELFmyJPr06VOoMdttx44d8cwzz8QXvvCFiIj44he/GD/60Y9arn/nnXfi+eefj/Ly\n8hgxYkTMnDmzUKPmpK31Ffv+XbJx48Z47bXXIpPJxOzZs+OBBx5oua7Y9zCi9fWVwh6++eabsXHj\nxpbLu3fvjr/+9a8tlzdu3Bi/+tWvolu3blFXVxdPPPFEIcbMWVvru+OOO2LQoEEtl19//fWW/SwG\nZ86ciblz58aJEyfiwoULMXPmzBg+fHjL9R3av2wX9/zzz2fHjx+fXbt27WXHH3zwwezp06cLNFXH\nbd++PTtr1qyrXv/QQw9lDx06lG1ubs5OmjQp+9577yWcruPaWl+x7182m80eO3YsO2bMmOypU6ey\nhw8fzs6fP/+y64t9D9taXyns4X/asWNHduHChS2Xz5w5kx0zZkz25MmT2bNnz2bHjRuXbWxsLOCE\nHfPf68tms9n77ruvQNPkx4oVK7JLlizJZrPZ7AcffJAdO3Zsy3Ud3b8u/bLzvn37Yu/evZd9t90V\n7N+/P3r16hU33XRTdOvWLb7yla/Etm3bCj0W/2Xbtm0xePDg6NmzZ9TW1saiRYtariuFPWxtfaXo\npZdeihkzZrRc3rVrV9x1111RVVUVPXr0iEGDBsXOnTsLOGHH/Pf6SkF1dXUcP348IiJOnjx52Sc7\ndnT/unR8Fy9eHPPmzbvq9QsWLIhJkybFkiVL2vwUr+vR3r17Y/r06TFp0qR4++23W443NDTEjTfe\n2HK5WD8e9Grru6TY9+/AgQNx7ty5mD59ekyePPmyuJbCHra2vkuKfQ8v+dvf/hY33XTTZR9GdPTo\n0aLfw0uutL6IiPPnz8ecOXNi4sSJsXz58gJNl7tx48bFoUOHYvTo0TFlypSYO3duy3Ud3b8u+57v\n+vXr4+67746bb775itfPnj07hg8fHr169YqZM2fGpk2b4mtf+1riKXP3+c9/Pp5++ul46KGHYv/+\n/TF16tR46623onv37oUeLS/aWl+x798lx48fjxdffDEOHToUU6dOjS1btkRZWVmhx8qb1tZXKnsY\nEbFmzZp4/PHHW71NMX9zcbX1Pfvss/Hoo49GWVlZTJkyJe6555646667CjBhbjZs2BD9+vWLZcuW\nxbvvvhv19fWxbt26K962vfvXZZ/5bt26NTZv3hx1dXXx5ptvxs9+9rN45513Wq5/7LHHonfv3pHJ\nZGLEiBGxZ8+eAk7bfn369ImHH344ysrK4pZbbonPfOYzcfjw4Yj4+MeDHj58OGpraws1ak5aW19E\n8e9fRETv3r1j4MCBkclk4pZbbonKyso4duxYRJTGHra2vojS2MNLduzYEQMHDrzs2JU+prfY9vCS\nK60vImLSpElRWVkZFRUVcf/99xfdHu7cuTOGDRsWERG33357HDlyJJqbmyOi4/vXZeO7dOnSWLt2\nbbzxxhvxxBNPxIwZM2LIkCEREXHq1Kl46qmn4vz58xER8ec//7nlf9UWi40bN8ayZcsi4t8vUX74\n4Yct/1P0c5/7XJw+fToOHDgQFy9ejC1btsTQocX1awBaW18p7F9ExLBhw2L79u3x0UcfRWNjYzQ1\nNbW851QKe9ja+kplDyP+/Y1RZWXlx151+vKXvxx///vf4+TJk3HmzJnYuXNn3HPPPQWaMndXW98/\n//nPmDNnTmSz2bh48WLs3Lmz6Pawf//+sWvXroiIOHjwYFRWVrb8b+2O7l+Xfdn5StatWxdVVVUx\nevToGDFiREyYMCFuuOGG+NKXvlR0L3eNHDkyfvCDH8TmzZvjwoULsXDhwvjNb37Tsr6FCxfGnDlz\nIiLi4YcfjltvvbXAE7dPW+sr9v2L+Pez+7Fjx0ZdXV1ERMyfPz/Wr19fMnvY1vpKYQ8jPv7+/C9+\n8Yu49957Y+DAgTFnzpx46qmnoqysLGbOnBlVVfn/DXCdrbX19e3bN771rW9Ft27dYuTIkTFgwIAC\nTtp+EyZMiPr6+pgyZUpcvHgxFi5cmLf98/GSAJBYl33ZGQAKRXwBIDHxBYDExBcAEhNfAEhMfAEg\nMfEFgMTEFwAS+3/wQITBxOH0DQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "metadata": { + "id": "2LTtpUJEibjg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas Exercise :\n", + "\n", + "\n", + "#### import necessary modules" + ] + }, + { + "metadata": { + "id": "c3_UBbMRhiKx", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "tp-cTCyWi8mR", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n", + "\n", + "This is a wine dataset\n", + "\n" + ] + }, + { + "metadata": { + "id": "DMojQY3thrRi", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "wine_df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data')\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BF9MMjoZjSlg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### print first five rows" + ] + }, + { + "metadata": { + "id": "1vSMQdnHjYNU", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "outputId": "bcff5bdc-957f-4477-8fb6-42ae96485fe5" + }, + "cell_type": "code", + "source": [ + "print(wine_df.head())" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "text": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "1 1185 \n", + "2 1480 \n", + "3 735 \n", + "4 1450 \n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "Tet6P2DvjY3T", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n", + "\n", + "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)" + ] + }, + { + "metadata": { + "id": "CMj3qSdJjx0u", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "f1d7229b-92dc-44da-f2c1-6107aa684373" + }, + "cell_type": "code", + "source": [ + "wine_df_copy = wine_df.iloc[::2]\n", + "\n", + "wine_df_copy.head()\n" + ], + "execution_count": 39, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
0113.201.782.1411.21002.652.760.261.284.381.053.401050
2114.371.952.5016.81133.853.490.242.187.800.863.451480
4114.201.762.4515.21123.273.390.341.976.751.052.851450
6114.062.152.6117.61212.602.510.311.255.051.063.581295
8113.861.352.2716.0982.983.150.221.857.221.013.551045
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 \n", + "6 1 14.06 2.15 2.61 17.6 121 2.60 \n", + "8 1 13.86 1.35 2.27 16.0 98 2.98 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.49 0.24 2.18 7.80 0.86 \n", + "4 3.39 0.34 1.97 6.75 1.05 \n", + "6 2.51 0.31 1.25 5.05 1.06 \n", + "8 3.15 0.22 1.85 7.22 1.01 \n", + "\n", + " OD280/OD315 of diluted wines Proline \n", + "0 3.40 1050 \n", + "2 3.45 1480 \n", + "4 2.85 1450 \n", + "6 3.58 1295 \n", + "8 3.55 1045 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 39 + } + ] + }, + { + "metadata": { + "id": "o6Cs6T1Rjz71", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Assign the columns as below:\n", + "\n", + "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", + "1) Alcohol \n", + "2) Malic acid \n", + "3) Ash \n", + "4) Alcalinity of ash \n", + "5) Magnesium \n", + "6) Total phenols \n", + "7) Flavanoids \n", + "8) Nonflavanoid phenols \n", + "9) Proanthocyanins \n", + "10)Color intensity \n", + "11)Hue \n", + "12)OD280/OD315 of diluted wines \n", + "13)Proline " + ] + }, + { + "metadata": { + "id": "my8HB4V4j779", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 376 + }, + "outputId": "3716de85-4ea3-4aaa-de05-829869427e88" + }, + "cell_type": "code", + "source": [ + "wine_df.columns = [' ', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']\n", + "\n", + "wine_df.head(10)" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
5114.391.872.4514.6962.502.520.301.985.251.023.581290
6114.062.152.6117.61212.602.510.311.255.051.063.581295
7114.831.642.1714.0972.802.980.291.985.201.082.851045
8113.861.352.2716.0982.983.150.221.857.221.013.551045
9114.102.162.3018.01052.953.320.222.385.751.253.171510
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 \n", + "5 1 14.39 1.87 2.45 14.6 96 2.50 \n", + "6 1 14.06 2.15 2.61 17.6 121 2.60 \n", + "7 1 14.83 1.64 2.17 14.0 97 2.80 \n", + "8 1 13.86 1.35 2.27 16.0 98 2.98 \n", + "9 1 14.10 2.16 2.30 18.0 105 2.95 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.76 0.26 1.28 4.38 1.05 \n", + "1 3.24 0.30 2.81 5.68 1.03 \n", + "2 3.49 0.24 2.18 7.80 0.86 \n", + "3 2.69 0.39 1.82 4.32 1.04 \n", + "4 3.39 0.34 1.97 6.75 1.05 \n", + "5 2.52 0.30 1.98 5.25 1.02 \n", + "6 2.51 0.31 1.25 5.05 1.06 \n", + "7 2.98 0.29 1.98 5.20 1.08 \n", + "8 3.15 0.22 1.85 7.22 1.01 \n", + "9 3.32 0.22 2.38 5.75 1.25 \n", + "\n", + " OD280/OD315 of diluted wines Proline \n", + "0 3.40 1050 \n", + "1 3.17 1185 \n", + "2 3.45 1480 \n", + "3 2.93 735 \n", + "4 2.85 1450 \n", + "5 3.58 1290 \n", + "6 3.58 1295 \n", + "7 2.85 1045 \n", + "8 3.55 1045 \n", + "9 3.17 1510 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 40 + } + ] + }, + { + "metadata": { + "id": "Zqi7hwWpkNbH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Set the values of the first 3 rows from alcohol as NaN\n", + "\n", + "Hint- Use iloc to select 3 rows of wine_df" + ] + }, + { + "metadata": { + "id": "buyT4vX4kPMl", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "ec7814fd-5805-462d-bde5-700a2284b3bf" + }, + "cell_type": "code", + "source": [ + "wine_df.loc[:2, 'Alcohol'] = np.NaN\n", + "wine_df.head()\n" + ], + "execution_count": 41, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
01NaN1.782.1411.21002.652.760.261.284.381.053.401050
11NaN2.362.6718.61012.803.240.302.815.681.033.171185
21NaN1.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 1 NaN 1.78 2.14 11.2 100 2.65 \n", + "1 1 NaN 2.36 2.67 18.6 101 2.80 \n", + "2 1 NaN 1.95 2.50 16.8 113 3.85 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.76 0.26 1.28 4.38 1.05 \n", + "1 3.24 0.30 2.81 5.68 1.03 \n", + "2 3.49 0.24 2.18 7.80 0.86 \n", + "3 2.69 0.39 1.82 4.32 1.04 \n", + "4 3.39 0.34 1.97 6.75 1.05 \n", + "\n", + " OD280/OD315 of diluted wines Proline \n", + "0 3.40 1050 \n", + "1 3.17 1185 \n", + "2 3.45 1480 \n", + "3 2.93 735 \n", + "4 2.85 1450 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 41 + } + ] + }, + { + "metadata": { + "id": "RQMNI2UHkP3o", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`" + ] + }, + { + "metadata": { + "id": "xunmCjaEmDwZ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import random\n", + "arr = random.sample(range(0, 10), 10)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "hELUakyXmFSu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol" + ] + }, + { + "metadata": { + "id": "zMgaNnNHmP01", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 531 + }, + "outputId": "04045d1b-d45a-4655-c41b-f1648efba5ed" + }, + "cell_type": "code", + "source": [ + "wine_df.loc[arr, 'Alcohol'] = np.NaN\n", + "wine_df.head(15)" + ], + "execution_count": 43, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
01NaN1.782.1411.21002.652.760.261.284.381.053.401050
11NaN2.362.6718.61012.803.240.302.815.681.033.171185
21NaN1.952.5016.81133.853.490.242.187.800.863.451480
31NaN2.592.8721.01182.802.690.391.824.321.042.93735
41NaN1.762.4515.21123.273.390.341.976.751.052.851450
51NaN1.872.4514.6962.502.520.301.985.251.023.581290
61NaN2.152.6117.61212.602.510.311.255.051.063.581295
71NaN1.642.1714.0972.802.980.291.985.201.082.851045
81NaN1.352.2716.0982.983.150.221.857.221.013.551045
91NaN2.162.3018.01052.953.320.222.385.751.253.171510
10114.121.482.3216.8952.202.430.261.575.001.172.821280
11113.751.732.4116.0892.602.760.291.815.601.152.901320
12114.751.732.3911.4913.103.690.432.815.401.252.731150
13114.381.872.3812.01023.303.640.292.967.501.203.001547
14113.631.812.7017.21122.852.910.301.467.301.282.881310
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "0 1 NaN 1.78 2.14 11.2 100 2.65 \n", + "1 1 NaN 2.36 2.67 18.6 101 2.80 \n", + "2 1 NaN 1.95 2.50 16.8 113 3.85 \n", + "3 1 NaN 2.59 2.87 21.0 118 2.80 \n", + "4 1 NaN 1.76 2.45 15.2 112 3.27 \n", + "5 1 NaN 1.87 2.45 14.6 96 2.50 \n", + "6 1 NaN 2.15 2.61 17.6 121 2.60 \n", + "7 1 NaN 1.64 2.17 14.0 97 2.80 \n", + "8 1 NaN 1.35 2.27 16.0 98 2.98 \n", + "9 1 NaN 2.16 2.30 18.0 105 2.95 \n", + "10 1 14.12 1.48 2.32 16.8 95 2.20 \n", + "11 1 13.75 1.73 2.41 16.0 89 2.60 \n", + "12 1 14.75 1.73 2.39 11.4 91 3.10 \n", + "13 1 14.38 1.87 2.38 12.0 102 3.30 \n", + "14 1 13.63 1.81 2.70 17.2 112 2.85 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "0 2.76 0.26 1.28 4.38 1.05 \n", + "1 3.24 0.30 2.81 5.68 1.03 \n", + "2 3.49 0.24 2.18 7.80 0.86 \n", + "3 2.69 0.39 1.82 4.32 1.04 \n", + "4 3.39 0.34 1.97 6.75 1.05 \n", + "5 2.52 0.30 1.98 5.25 1.02 \n", + "6 2.51 0.31 1.25 5.05 1.06 \n", + "7 2.98 0.29 1.98 5.20 1.08 \n", + "8 3.15 0.22 1.85 7.22 1.01 \n", + "9 3.32 0.22 2.38 5.75 1.25 \n", + "10 2.43 0.26 1.57 5.00 1.17 \n", + "11 2.76 0.29 1.81 5.60 1.15 \n", + "12 3.69 0.43 2.81 5.40 1.25 \n", + "13 3.64 0.29 2.96 7.50 1.20 \n", + "14 2.91 0.30 1.46 7.30 1.28 \n", + "\n", + " OD280/OD315 of diluted wines Proline \n", + "0 3.40 1050 \n", + "1 3.17 1185 \n", + "2 3.45 1480 \n", + "3 2.93 735 \n", + "4 2.85 1450 \n", + "5 3.58 1290 \n", + "6 3.58 1295 \n", + "7 2.85 1045 \n", + "8 3.55 1045 \n", + "9 3.17 1510 \n", + "10 2.82 1280 \n", + "11 2.90 1320 \n", + "12 2.73 1150 \n", + "13 3.00 1547 \n", + "14 2.88 1310 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 43 + } + ] + }, + { + "metadata": { + "id": "PHyK_vRsmRwV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### How many missing values do we have? \n", + "\n", + "Hint: you can use isnull() and sum()" + ] + }, + { + "metadata": { + "id": "EnOYhmEqmfKp", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 272 + }, + "outputId": "767ffaf7-7758-4394-9b18-8b240297c6d8" + }, + "cell_type": "code", + "source": [ + "wine_df.isnull().sum()" + ], + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0\n", + "Alcohol 10\n", + "Malic acid 0\n", + "Ash 0\n", + "Alcalinity of ash 0\n", + "Magnesium 0\n", + "Total phenols 0\n", + "Flavanoids 0\n", + "Nonflavanoid phenols 0\n", + "Proanthocyanins 0\n", + "Color intensity 0\n", + "Hue 0\n", + "OD280/OD315 of diluted wines 0\n", + "Proline 0\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 44 + } + ] + }, + { + "metadata": { + "id": "-Fd4WBklmf1_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Delete the rows that contain missing values " + ] + }, + { + "metadata": { + "id": "As7IC6Ktms8-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 531 + }, + "outputId": "42fd938a-aaf9-421a-c574-b1ae84862997" + }, + "cell_type": "code", + "source": [ + "wine_df.dropna(inplace=True)\n", + "wine_df.head(15)" + ], + "execution_count": 45, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
10114.121.482.3216.8952.202.430.261.575.001.172.821280
11113.751.732.4116.0892.602.760.291.815.601.152.901320
12114.751.732.3911.4913.103.690.432.815.401.252.731150
13114.381.872.3812.01023.303.640.292.967.501.203.001547
14113.631.812.7017.21122.852.910.301.467.301.282.881310
15114.301.922.7220.01202.803.140.331.976.201.072.651280
16113.831.572.6220.01152.953.400.401.726.601.132.571130
17114.191.592.4816.51083.303.930.321.868.701.232.821680
18113.643.102.5615.21162.703.030.171.665.100.963.36845
19114.061.632.2816.01263.003.170.242.105.651.093.71780
20112.933.802.6518.61022.412.410.251.984.501.033.52770
21113.711.862.3616.61012.612.880.271.693.801.114.001035
22112.851.602.5217.8952.482.370.261.463.931.093.631015
23113.501.812.6120.0962.532.610.281.663.521.123.82845
24113.052.053.2225.01242.632.680.471.923.581.133.20830
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", + "10 1 14.12 1.48 2.32 16.8 95 2.20 \n", + "11 1 13.75 1.73 2.41 16.0 89 2.60 \n", + "12 1 14.75 1.73 2.39 11.4 91 3.10 \n", + "13 1 14.38 1.87 2.38 12.0 102 3.30 \n", + "14 1 13.63 1.81 2.70 17.2 112 2.85 \n", + "15 1 14.30 1.92 2.72 20.0 120 2.80 \n", + "16 1 13.83 1.57 2.62 20.0 115 2.95 \n", + "17 1 14.19 1.59 2.48 16.5 108 3.30 \n", + "18 1 13.64 3.10 2.56 15.2 116 2.70 \n", + "19 1 14.06 1.63 2.28 16.0 126 3.00 \n", + "20 1 12.93 3.80 2.65 18.6 102 2.41 \n", + "21 1 13.71 1.86 2.36 16.6 101 2.61 \n", + "22 1 12.85 1.60 2.52 17.8 95 2.48 \n", + "23 1 13.50 1.81 2.61 20.0 96 2.53 \n", + "24 1 13.05 2.05 3.22 25.0 124 2.63 \n", + "\n", + " Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", + "10 2.43 0.26 1.57 5.00 1.17 \n", + "11 2.76 0.29 1.81 5.60 1.15 \n", + "12 3.69 0.43 2.81 5.40 1.25 \n", + "13 3.64 0.29 2.96 7.50 1.20 \n", + "14 2.91 0.30 1.46 7.30 1.28 \n", + "15 3.14 0.33 1.97 6.20 1.07 \n", + "16 3.40 0.40 1.72 6.60 1.13 \n", + "17 3.93 0.32 1.86 8.70 1.23 \n", + "18 3.03 0.17 1.66 5.10 0.96 \n", + "19 3.17 0.24 2.10 5.65 1.09 \n", + "20 2.41 0.25 1.98 4.50 1.03 \n", + "21 2.88 0.27 1.69 3.80 1.11 \n", + "22 2.37 0.26 1.46 3.93 1.09 \n", + "23 2.61 0.28 1.66 3.52 1.12 \n", + "24 2.68 0.47 1.92 3.58 1.13 \n", + "\n", + " OD280/OD315 of diluted wines Proline \n", + "10 2.82 1280 \n", + "11 2.90 1320 \n", + "12 2.73 1150 \n", + "13 3.00 1547 \n", + "14 2.88 1310 \n", + "15 2.65 1280 \n", + "16 2.57 1130 \n", + "17 2.82 1680 \n", + "18 3.36 845 \n", + "19 3.71 780 \n", + "20 3.52 770 \n", + "21 4.00 1035 \n", + "22 3.63 1015 \n", + "23 3.82 845 \n", + "24 3.20 830 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 45 + } + ] + }, + { + "metadata": { + "id": "DlpG8drhmz7W", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### BONUS: Play with the data set below" + ] + }, + { + "metadata": { + "id": "mD40T0Cnm5SA", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 351 + }, + "outputId": "bb940ea4-50e4-474d-edb2-7d62f72d6920" + }, + "cell_type": "code", + "source": [ + "wine_df.describe()" + ], + "execution_count": 47, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
count167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000167.000000
mean2.00000012.9369462.3625752.36155719.70958199.1976052.2546711.9646710.3668261.5652105.0137720.9517132.565449717.347305
std0.7604690.7864931.1454330.2773953.27585614.3427240.6185360.9943130.1263100.5711452.3715550.2338590.703846296.915081
min1.00000011.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000
25%1.00000012.3300001.5800002.21000017.50000088.0000001.7000001.0900000.2700001.1850003.0650000.7650001.845000495.000000
50%2.00000012.9300001.8600002.36000019.50000097.0000002.2000002.0300000.3400001.5300004.6000000.9600002.750000650.000000
75%3.00000013.5800003.1700002.55500021.500000107.0000002.7400002.7700000.4500001.8850006.2000001.1200003.135000882.500000
max3.00000014.7500005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000
\n", + "
" + ], + "text/plain": [ + " Alcohol Malic acid Ash Alcalinity of ash \\\n", + "count 167.000000 167.000000 167.000000 167.000000 167.000000 \n", + "mean 2.000000 12.936946 2.362575 2.361557 19.709581 \n", + "std 0.760469 0.786493 1.145433 0.277395 3.275856 \n", + "min 1.000000 11.030000 0.740000 1.360000 10.600000 \n", + "25% 1.000000 12.330000 1.580000 2.210000 17.500000 \n", + "50% 2.000000 12.930000 1.860000 2.360000 19.500000 \n", + "75% 3.000000 13.580000 3.170000 2.555000 21.500000 \n", + "max 3.000000 14.750000 5.800000 3.230000 30.000000 \n", + "\n", + " Magnesium Total phenols Flavanoids Nonflavanoid phenols \\\n", + "count 167.000000 167.000000 167.000000 167.000000 \n", + "mean 99.197605 2.254671 1.964671 0.366826 \n", + "std 14.342724 0.618536 0.994313 0.126310 \n", + "min 70.000000 0.980000 0.340000 0.130000 \n", + "25% 88.000000 1.700000 1.090000 0.270000 \n", + "50% 97.000000 2.200000 2.030000 0.340000 \n", + "75% 107.000000 2.740000 2.770000 0.450000 \n", + "max 162.000000 3.880000 5.080000 0.660000 \n", + "\n", + " Proanthocyanins Color intensity Hue \\\n", + "count 167.000000 167.000000 167.000000 \n", + "mean 1.565210 5.013772 0.951713 \n", + "std 0.571145 2.371555 0.233859 \n", + "min 0.410000 1.280000 0.480000 \n", + "25% 1.185000 3.065000 0.765000 \n", + "50% 1.530000 4.600000 0.960000 \n", + "75% 1.885000 6.200000 1.120000 \n", + "max 3.580000 13.000000 1.710000 \n", + "\n", + " OD280/OD315 of diluted wines Proline \n", + "count 167.000000 167.000000 \n", + "mean 2.565449 717.347305 \n", + "std 0.703846 296.915081 \n", + "min 1.270000 278.000000 \n", + "25% 1.845000 495.000000 \n", + "50% 2.750000 650.000000 \n", + "75% 3.135000 882.500000 \n", + "max 4.000000 1680.000000 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 47 + } + ] + }, + { + "metadata": { + "id": "0kskZlnc3AOf", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "**Bonus :\n", + " SORTING THE DATA SET ACCORDING THEIR Alcohol value and Alcalinity of ash**" + ] + }, + { + "metadata": { + "id": "ggiVqoBp27sz", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "206427fd-2616-4c08-9d75-900c1b2970aa" + }, + "cell_type": "code", + "source": [ + "data_sorted = wine_df.sort_values(['Alcohol','Alcalinity of ash'], ascending=False)\n", + "data_sorted[['Alcohol','Alcalinity of ash']].head(10)" + ], + "execution_count": 50, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholAlcalinity of ash
1214.7511.4
4514.3816.0
1314.3812.0
15714.3425.0
1514.3020.0
5514.2216.3
3814.2213.2
4414.2118.9
1714.1916.5
17114.1620.0
\n", + "
" + ], + "text/plain": [ + " Alcohol Alcalinity of ash\n", + "12 14.75 11.4\n", + "45 14.38 16.0\n", + "13 14.38 12.0\n", + "157 14.34 25.0\n", + "15 14.30 20.0\n", + "55 14.22 16.3\n", + "38 14.22 13.2\n", + "44 14.21 18.9\n", + "17 14.19 16.5\n", + "171 14.16 20.0" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 50 + } + ] + } + ] +} \ No newline at end of file