diff --git a/Basic_Pandas.ipynb b/Basic_Pandas.ipynb
new file mode 100644
index 0000000..8bd7490
--- /dev/null
+++ b/Basic_Pandas.ipynb
@@ -0,0 +1,964 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Basic Pandas.ipynb",
+ "version": "0.3.2",
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "cGbE814_Xaf9",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Pandas\n",
+ "\n",
+ "Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.In this tutorial, we will learn the various features of Python Pandas and how to use them in practice.\n",
+ "\n",
+ "\n",
+ "## Import pandas and numpy"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "irlVYeeAXPDL",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "BI2J-zdMbGwE",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "### This is your playground feel free to explore other functions on pandas\n",
+ "\n",
+ "#### Create Series from numpy array, list and dict\n",
+ "\n",
+ "Don't know what a series is?\n",
+ "\n",
+ "[Series Doc](https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.Series.html)"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "GeEct691YGE3",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 138
+ },
+ "outputId": "fcd30705-266a-4f99-a326-9719d81b2795"
+ },
+ "cell_type": "code",
+ "source": [
+ "a_ascii = ord('A')\n",
+ "z_ascii = ord('Z')\n",
+ "alphabets = [chr(i) for i in range(a_ascii, z_ascii+1)]\n",
+ "\n",
+ "print(alphabets)\n",
+ "\n",
+ "numbers = np.arange(26)\n",
+ "\n",
+ "print(numbers)\n",
+ "\n",
+ "print(type(alphabets), type(numbers))\n",
+ "\n",
+ "alpha_numbers = dict(zip(alphabets, numbers))\n",
+ "\n",
+ "print(alpha_numbers)\n",
+ "\n",
+ "print(type(alpha_numbers))"
+ ],
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']\n",
+ "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n",
+ " 24 25]\n",
+ " \n",
+ "{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "6ouDfjWab_Mc",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 470
+ },
+ "outputId": "7d9365bf-4ac9-46de-b228-796205b6bbb5"
+ },
+ "cell_type": "code",
+ "source": [
+ "series1 = pd.Series(alphabets)\n",
+ "print(series1)"
+ ],
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "0 A\n",
+ "1 B\n",
+ "2 C\n",
+ "3 D\n",
+ "4 E\n",
+ "5 F\n",
+ "6 G\n",
+ "7 H\n",
+ "8 I\n",
+ "9 J\n",
+ "10 K\n",
+ "11 L\n",
+ "12 M\n",
+ "13 N\n",
+ "14 O\n",
+ "15 P\n",
+ "16 Q\n",
+ "17 R\n",
+ "18 S\n",
+ "19 T\n",
+ "20 U\n",
+ "21 V\n",
+ "22 W\n",
+ "23 X\n",
+ "24 Y\n",
+ "25 Z\n",
+ "dtype: object\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "At7nY7vVcBZ3",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 470
+ },
+ "outputId": "63723be2-fd64-447e-ac21-b1e750dcd2da"
+ },
+ "cell_type": "code",
+ "source": [
+ "series2 = pd.Series(numbers)\n",
+ "print(series2)"
+ ],
+ "execution_count": 27,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "0 0\n",
+ "1 1\n",
+ "2 2\n",
+ "3 3\n",
+ "4 4\n",
+ "5 5\n",
+ "6 6\n",
+ "7 7\n",
+ "8 8\n",
+ "9 9\n",
+ "10 10\n",
+ "11 11\n",
+ "12 12\n",
+ "13 13\n",
+ "14 14\n",
+ "15 15\n",
+ "16 16\n",
+ "17 17\n",
+ "18 18\n",
+ "19 19\n",
+ "20 20\n",
+ "21 21\n",
+ "22 22\n",
+ "23 23\n",
+ "24 24\n",
+ "25 25\n",
+ "dtype: int64\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "J5z-2CWAdH6N",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 470
+ },
+ "outputId": "ca1cbb9b-d14b-49f1-8bc9-626613206e6e"
+ },
+ "cell_type": "code",
+ "source": [
+ "series3 = pd.Series(alpha_numbers)\n",
+ "print(series3)"
+ ],
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "A 0\n",
+ "B 1\n",
+ "C 2\n",
+ "D 3\n",
+ "E 4\n",
+ "F 5\n",
+ "G 6\n",
+ "H 7\n",
+ "I 8\n",
+ "J 9\n",
+ "K 10\n",
+ "L 11\n",
+ "M 12\n",
+ "N 13\n",
+ "O 14\n",
+ "P 15\n",
+ "Q 16\n",
+ "R 17\n",
+ "S 18\n",
+ "T 19\n",
+ "U 20\n",
+ "V 21\n",
+ "W 22\n",
+ "X 23\n",
+ "Y 24\n",
+ "Z 25\n",
+ "dtype: int64\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "fYzblGGudKjO",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 151
+ },
+ "outputId": "4deecd54-4154-4659-865a-71a14d108cc8"
+ },
+ "cell_type": "code",
+ "source": [
+ "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n",
+ "series3.head(7)"
+ ],
+ "execution_count": 25,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "A 0\n",
+ "B 1\n",
+ "C 2\n",
+ "D 3\n",
+ "E 4\n",
+ "F 5\n",
+ "G 6\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 25
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "OwsJIf5feTtg",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Create DataFrame from lists\n",
+ "\n",
+ "[DataFrame Doc](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "73UTZ07EdWki",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 817
+ },
+ "outputId": "35b4c77d-4428-4274-ad27-ec9dbf334f9f"
+ },
+ "cell_type": "code",
+ "source": [
+ "data = {'alphabets': alphabets, 'values': numbers}\n",
+ "\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "#Lets Change the column `values` to `alpha_numbers`\n",
+ "\n",
+ "df.columns = ['alphabets', 'alpha_numbers']\n",
+ "\n",
+ "df"
+ ],
+ "execution_count": 24,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " alphabets | \n",
+ " alpha_numbers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " A | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " B | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " C | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " D | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " E | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " F | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " G | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " H | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " I | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " J | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " K | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " L | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " M | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " N | \n",
+ " 13 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " O | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " P | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " Q | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " R | \n",
+ " 17 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " S | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " T | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " U | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " V | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " W | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " X | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " Y | \n",
+ " 24 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " Z | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " alphabets alpha_numbers\n",
+ "0 A 0\n",
+ "1 B 1\n",
+ "2 C 2\n",
+ "3 D 3\n",
+ "4 E 4\n",
+ "5 F 5\n",
+ "6 G 6\n",
+ "7 H 7\n",
+ "8 I 8\n",
+ "9 J 9\n",
+ "10 K 10\n",
+ "11 L 11\n",
+ "12 M 12\n",
+ "13 N 13\n",
+ "14 O 14\n",
+ "15 P 15\n",
+ "16 Q 16\n",
+ "17 R 17\n",
+ "18 S 18\n",
+ "19 T 19\n",
+ "20 U 20\n",
+ "21 V 21\n",
+ "22 W 22\n",
+ "23 X 23\n",
+ "24 Y 24\n",
+ "25 Z 25"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 24
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "uaK_1EO9etGS",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 77
+ },
+ "outputId": "0305d8f6-dd4a-4716-a00b-b2bbe27ba9c1"
+ },
+ "cell_type": "code",
+ "source": [
+ "# transpose\n",
+ "\n",
+ "df.T\n",
+ "\n",
+ "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more"
+ ],
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | vowels | \n",
+ " a | \n",
+ " e | \n",
+ " i | \n",
+ " o | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4\n",
+ "vowels a e i o u"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 23
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "ZYonoaW8gEAJ",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Extract Items from a series"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "tc1-KX_Bfe7U",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "outputId": "8bd83635-e2ba-4b40-ab64-dcb5e1e07e36"
+ },
+ "cell_type": "code",
+ "source": [
+ "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n",
+ "pos = [0, 4, 8, 14, 20]\n",
+ "\n",
+ "vowels = ser.take(pos)\n",
+ "\n",
+ "df = pd.DataFrame(vowels)#, columns=['vowels'])\n",
+ "\n",
+ "df.columns = ['vowels']\n",
+ "\n",
+ "df.index = [0, 1, 2, 3, 4]\n",
+ "\n",
+ "df"
+ ],
+ "execution_count": 22,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " vowels | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " e | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " i | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " o | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " vowels\n",
+ "0 a\n",
+ "1 e\n",
+ "2 i\n",
+ "3 o\n",
+ "4 u"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 22
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "cmDxwtDNjWpO",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Change the first character of each word to upper case in each word of ser"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "5KagP9PpgV2F",
+ "colab_type": "code",
+ "outputId": "e1d65e7d-e497-4ae6-b810-eba5e7551d78",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "ser = pd.Series(['we', 'are', 'learning', 'pandas'])\n",
+ "\n",
+ "ser.map(lambda x : x.title())\n",
+ "\n",
+ "titles = [i.title() for i in ser]\n",
+ "\n",
+ "titles"
+ ],
+ "execution_count": 21,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['We', 'Are', 'Learning', 'Pandas']"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "qn47ee-MkZN8",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Reindexing"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "h5R0JL2NjuFS",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "outputId": "36acbb0f-f10e-410f-e53c-7fae09464b69"
+ },
+ "cell_type": "code",
+ "source": [
+ "my_index = [1, 2, 3, 4, 5]\n",
+ "\n",
+ "df1 = pd.DataFrame({'upper values': ['A', 'B', 'C', 'D', 'E'],\n",
+ " 'lower values': ['a', 'b', 'c', 'd', 'e']},\n",
+ " index = my_index)\n",
+ "\n",
+ "df1"
+ ],
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " lower values | \n",
+ " upper values | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " a | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " b | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " c | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " d | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " e | \n",
+ " E | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lower values upper values\n",
+ "1 a A\n",
+ "2 b B\n",
+ "3 c C\n",
+ "4 d D\n",
+ "5 e E"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 20
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "G_Frvc3mk93k",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "outputId": "8919eda1-4e08-4c10-fe08-52824eef2fd8"
+ },
+ "cell_type": "code",
+ "source": [
+ "new_index = [2, 5, 4, 3, 1]\n",
+ "\n",
+ "df1.reindex(index = new_index)"
+ ],
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " lower values | \n",
+ " upper values | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2 | \n",
+ " b | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " e | \n",
+ " E | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " d | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " c | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " a | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lower values upper values\n",
+ "2 b B\n",
+ "5 e E\n",
+ "4 d D\n",
+ "3 c C\n",
+ "1 a A"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 19
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/Exercise.ipynb b/Exercise.ipynb
new file mode 100644
index 0000000..588c590
--- /dev/null
+++ b/Exercise.ipynb
@@ -0,0 +1,596 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Exercise.ipynb",
+ "version": "0.3.2",
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "2LTtpUJEibjg",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Pandas Exercise :\n",
+ "\n",
+ "\n",
+ "#### import necessary modules"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "c3_UBbMRhiKx",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "tp-cTCyWi8mR",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n",
+ "\n",
+ "This is a wine dataset\n",
+ "\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "DMojQY3thrRi",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data')"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "BF9MMjoZjSlg",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### print first five rows"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "1vSMQdnHjYNU",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 198
+ },
+ "outputId": "c6637b1b-d56e-45f4-e507-9a9e2b660ef9"
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df.head(5)"
+ ],
+ "execution_count": 51,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 1 | \n",
+ " 14.23 | \n",
+ " 1.71 | \n",
+ " 2.43 | \n",
+ " 15.6 | \n",
+ " 127 | \n",
+ " 2.8 | \n",
+ " 3.06 | \n",
+ " .28 | \n",
+ " 2.29 | \n",
+ " 5.64 | \n",
+ " 1.04 | \n",
+ " 3.92 | \n",
+ " 1065 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 13.20 | \n",
+ " 1.78 | \n",
+ " 2.14 | \n",
+ " 11.2 | \n",
+ " 100 | \n",
+ " 2.65 | \n",
+ " 2.76 | \n",
+ " 0.26 | \n",
+ " 1.28 | \n",
+ " 4.38 | \n",
+ " 1.05 | \n",
+ " 3.40 | \n",
+ " 1050 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 13.16 | \n",
+ " 2.36 | \n",
+ " 2.67 | \n",
+ " 18.6 | \n",
+ " 101 | \n",
+ " 2.80 | \n",
+ " 3.24 | \n",
+ " 0.30 | \n",
+ " 2.81 | \n",
+ " 5.68 | \n",
+ " 1.03 | \n",
+ " 3.17 | \n",
+ " 1185 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 14.37 | \n",
+ " 1.95 | \n",
+ " 2.50 | \n",
+ " 16.8 | \n",
+ " 113 | \n",
+ " 3.85 | \n",
+ " 3.49 | \n",
+ " 0.24 | \n",
+ " 2.18 | \n",
+ " 7.80 | \n",
+ " 0.86 | \n",
+ " 3.45 | \n",
+ " 1480 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " 13.24 | \n",
+ " 2.59 | \n",
+ " 2.87 | \n",
+ " 21.0 | \n",
+ " 118 | \n",
+ " 2.80 | \n",
+ " 2.69 | \n",
+ " 0.39 | \n",
+ " 1.82 | \n",
+ " 4.32 | \n",
+ " 1.04 | \n",
+ " 2.93 | \n",
+ " 735 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1 | \n",
+ " 14.20 | \n",
+ " 1.76 | \n",
+ " 2.45 | \n",
+ " 15.2 | \n",
+ " 112 | \n",
+ " 3.27 | \n",
+ " 3.39 | \n",
+ " 0.34 | \n",
+ " 1.97 | \n",
+ " 6.75 | \n",
+ " 1.05 | \n",
+ " 2.85 | \n",
+ " 1450 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n",
+ "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n",
+ "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n",
+ "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n",
+ "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n",
+ "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n",
+ "\n",
+ " 1065 \n",
+ "0 1050 \n",
+ "1 1185 \n",
+ "2 1480 \n",
+ "3 735 \n",
+ "4 1450 "
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 51
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "Tet6P2DvjY3T",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n",
+ "\n",
+ "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "CMj3qSdJjx0u",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 145
+ },
+ "outputId": "d783ecf3-be0d-410c-fbb7-40cbeeb028a2"
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df_copy = wine_df.copy()\n",
+ "wine_df_copy = wine_df_copy[wine_df_copy.index % 2 == 0]\n",
+ "print(wine_df_copy.head(2))"
+ ],
+ "execution_count": 52,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n",
+ "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n",
+ "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n",
+ "\n",
+ " 1065 \n",
+ "0 1050 \n",
+ "2 1480 \n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "o6Cs6T1Rjz71",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Assign the columns as below:\n",
+ "\n",
+ "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n",
+ "1) Alcohol \n",
+ "2) Malic acid \n",
+ "3) Ash \n",
+ "4) Alcalinity of ash \n",
+ "5) Magnesium \n",
+ "6) Total phenols \n",
+ "7) Flavanoids \n",
+ "8) Nonflavanoid phenols \n",
+ "9) Proanthocyanins \n",
+ "10)Color intensity \n",
+ "11)Hue \n",
+ "12)OD280/OD315 of diluted wines \n",
+ "13)Proline "
+ ]
+ },
+ {
+ "metadata": {
+ "id": "my8HB4V4j779",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df.columns = ['Alcohol','Malic Acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','0D280/0D315 of diluted wines','Proline','Acidity']"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "Zqi7hwWpkNbH",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Set the values of the first 3 rows from alcohol as NaN\n",
+ "\n",
+ "Hint- Use iloc to select 3 rows of wine_df"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "buyT4vX4kPMl",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df.iloc[[0,1,2]] = np.nan"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "RQMNI2UHkP3o",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "xunmCjaEmDwZ",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "random = np.random.randint(0,10,10)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "hELUakyXmFSu",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "zMgaNnNHmP01",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df = wine_df.reindex(index=random)\n",
+ "wine_df['Alcohol'] = np.nan"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "PHyK_vRsmRwV",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### How many missing values do we have? \n",
+ "\n",
+ "Hint: you can use isnull() and sum()"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "EnOYhmEqmfKp",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 290
+ },
+ "outputId": "6b955dd1-f1b3-42ec-851c-a355a4a882cf"
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df.isnull().sum()"
+ ],
+ "execution_count": 58,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Alcohol 10\n",
+ "Malic Acid 2\n",
+ "Ash 2\n",
+ "Alcalinity of ash 2\n",
+ "Magnesium 2\n",
+ "Total phenols 2\n",
+ "Flavanoids 2\n",
+ "Nonflavanoid phenols 2\n",
+ "Proanthocyanins 2\n",
+ "Color intensity 2\n",
+ "Hue 2\n",
+ "0D280/0D315 of diluted wines 2\n",
+ "Proline 2\n",
+ "Acidity 2\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 58
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "-Fd4WBklmf1_",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Delete the rows that contain missing values "
+ ]
+ },
+ {
+ "metadata": {
+ "id": "As7IC6Ktms8-",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 103
+ },
+ "outputId": "ab2fc901-801c-4aaa-f5b8-03ea95c242dc"
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df.dropna()"
+ ],
+ "execution_count": 59,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Alcohol | \n",
+ " Malic Acid | \n",
+ " Ash | \n",
+ " Alcalinity of ash | \n",
+ " Magnesium | \n",
+ " Total phenols | \n",
+ " Flavanoids | \n",
+ " Nonflavanoid phenols | \n",
+ " Proanthocyanins | \n",
+ " Color intensity | \n",
+ " Hue | \n",
+ " 0D280/0D315 of diluted wines | \n",
+ " Proline | \n",
+ " Acidity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [Alcohol, Malic Acid, Ash, Alcalinity of ash, Magnesium, Total phenols, Flavanoids, Nonflavanoid phenols, Proanthocyanins, Color intensity, Hue, 0D280/0D315 of diluted wines, Proline, Acidity]\n",
+ "Index: []"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 59
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "DlpG8drhmz7W",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "### BONUS: Play with the data set below"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "mD40T0Cnm5SA",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 108
+ },
+ "outputId": "25442923-9c27-4ff3-fc7d-f961605ffac5"
+ },
+ "cell_type": "code",
+ "source": [
+ "wine_df.fillna(value='Fill Value',inplace=True)\n",
+ "wine_df.head(4)['Alcohol']"
+ ],
+ "execution_count": 62,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 Fill Value\n",
+ "5 Fill Value\n",
+ "3 Fill Value\n",
+ "8 Fill Value\n",
+ "Name: Alcohol, dtype: object"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 62
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/Get_to_know_your_Data.ipynb b/Get_to_know_your_Data.ipynb
new file mode 100644
index 0000000..d2ee9de
--- /dev/null
+++ b/Get_to_know_your_Data.ipynb
@@ -0,0 +1,2397 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Get to know your Data.ipynb",
+ "version": "0.3.2",
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "J82LU53m_OU0",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "# Get to know your Data\n",
+ "\n",
+ "\n",
+ "#### Import necessary modules\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "ZyO1UXL8mtSj",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "yXTzTowtnwGI",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Loading CSV Data to a DataFrame"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "H1Bjlb5wm9f-",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "cell_type": "code",
+ "source": [
+ "iris_df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')\n"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "metadata": {
+ "id": "KE-k7b_Mn5iN",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### See the top 10 rows\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "HY2Ps7xMn4ao",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 343
+ },
+ "outputId": "f0213ede-3239-48b7-adae-99d184fd8373"
+ },
+ "cell_type": "code",
+ "source": [
+ "iris_df.head(10)"
+ ],
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 4.9 | \n",
+ " 3.0 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 4.7 | \n",
+ " 3.2 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4.6 | \n",
+ " 3.1 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5.0 | \n",
+ " 3.6 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 5.4 | \n",
+ " 3.9 | \n",
+ " 1.7 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 4.6 | \n",
+ " 3.4 | \n",
+ " 1.4 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 5.0 | \n",
+ " 3.4 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 4.4 | \n",
+ " 2.9 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 4.9 | \n",
+ " 3.1 | \n",
+ " 1.5 | \n",
+ " 0.1 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "0 5.1 3.5 1.4 0.2 setosa\n",
+ "1 4.9 3.0 1.4 0.2 setosa\n",
+ "2 4.7 3.2 1.3 0.2 setosa\n",
+ "3 4.6 3.1 1.5 0.2 setosa\n",
+ "4 5.0 3.6 1.4 0.2 setosa\n",
+ "5 5.4 3.9 1.7 0.4 setosa\n",
+ "6 4.6 3.4 1.4 0.3 setosa\n",
+ "7 5.0 3.4 1.5 0.2 setosa\n",
+ "8 4.4 2.9 1.4 0.2 setosa\n",
+ "9 4.9 3.1 1.5 0.1 setosa"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "ZQXekIodqOZu",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Find number of rows and columns\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "6Y-A-lbFqR82",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "outputId": "312cb316-059a-462a-898d-63387aa2178c"
+ },
+ "cell_type": "code",
+ "source": [
+ "print(iris_df.shape)\n",
+ "\n",
+ "#first is row and second is column\n",
+ "#select row by simple indexing\n",
+ "\n",
+ "print(iris_df.shape[0])\n",
+ "print(iris_df.shape[1])"
+ ],
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "(150, 5)\n",
+ "150\n",
+ "5\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "4ckCiGPhrC_t",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Print all columns"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "S6jgMyRDrF2a",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 67
+ },
+ "outputId": "a3e00efb-0ffe-4dcb-f4a7-6cea68671ca2"
+ },
+ "cell_type": "code",
+ "source": [
+ "print(iris_df.columns)"
+ ],
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n",
+ " 'species'],\n",
+ " dtype='object')\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "kVav5-ACtIqS",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Check Index\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "iu3I9zIGtLDX",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "outputId": "a8b5d13c-18df-4b37-adc4-118e1dfcf5db"
+ },
+ "cell_type": "code",
+ "source": [
+ "print(iris_df.index)"
+ ],
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "RangeIndex(start=0, stop=150, step=1)\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "psCc7PborOCQ",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Right now the iris_data set has all the species grouped together let's shuffle it"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "Bxc8i6avrZPw",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 218
+ },
+ "outputId": "29403c79-2312-4782-c4c5-d8fd3353ed0c"
+ },
+ "cell_type": "code",
+ "source": [
+ "#generate a random permutaion on index\n",
+ "\n",
+ "print(iris_df.head())\n",
+ "\n",
+ "new_index = np.random.permutation(iris_df.index)\n",
+ "iris_df = iris_df.reindex(index = new_index)\n",
+ "\n",
+ "print(iris_df.head())"
+ ],
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "0 5.1 3.5 1.4 0.2 setosa\n",
+ "1 4.9 3.0 1.4 0.2 setosa\n",
+ "2 4.7 3.2 1.3 0.2 setosa\n",
+ "3 4.6 3.1 1.5 0.2 setosa\n",
+ "4 5.0 3.6 1.4 0.2 setosa\n",
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "2 4.7 3.2 1.3 0.2 setosa\n",
+ "53 5.5 2.3 4.0 1.3 versicolor\n",
+ "16 5.4 3.9 1.3 0.4 setosa\n",
+ "114 5.8 2.8 5.1 2.4 virginica\n",
+ "128 6.4 2.8 5.6 2.1 virginica\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "j32h8022sRT8",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### We can also apply an operation on whole column of iris_df"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "seYXHXsYsYJI",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 319
+ },
+ "outputId": "b94e8cf9-ee3c-4c76-e9e8-117271d11f82"
+ },
+ "cell_type": "code",
+ "source": [
+ "#original\n",
+ "\n",
+ "print(iris_df.head())\n",
+ "\n",
+ "iris_df['sepal_width'] *= 10\n",
+ "\n",
+ "#changed\n",
+ "\n",
+ "print(iris_df.head())\n",
+ "\n",
+ "#lets undo the operation\n",
+ "\n",
+ "iris_df['sepal_width'] /= 10\n",
+ "\n",
+ "print(iris_df.head())"
+ ],
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "2 4.7 3.2 1.3 0.2 setosa\n",
+ "53 5.5 2.3 4.0 1.3 versicolor\n",
+ "16 5.4 3.9 1.3 0.4 setosa\n",
+ "114 5.8 2.8 5.1 2.4 virginica\n",
+ "128 6.4 2.8 5.6 2.1 virginica\n",
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "2 4.7 32.0 1.3 0.2 setosa\n",
+ "53 5.5 23.0 4.0 1.3 versicolor\n",
+ "16 5.4 39.0 1.3 0.4 setosa\n",
+ "114 5.8 28.0 5.1 2.4 virginica\n",
+ "128 6.4 28.0 5.6 2.1 virginica\n",
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "2 4.7 3.2 1.3 0.2 setosa\n",
+ "53 5.5 2.3 4.0 1.3 versicolor\n",
+ "16 5.4 3.9 1.3 0.4 setosa\n",
+ "114 5.8 2.8 5.1 2.4 virginica\n",
+ "128 6.4 2.8 5.6 2.1 virginica\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "R-Ca-LBLzjiF",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Show all the rows where sepal_width > 3.3"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "WJ7W-F-d0AoZ",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1113
+ },
+ "outputId": "baef2d61-5538-480f-c3bd-a4294ebf12ba"
+ },
+ "cell_type": "code",
+ "source": [
+ "iris_df[iris_df['sepal_width']>3.3]"
+ ],
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 16 | \n",
+ " 5.4 | \n",
+ " 3.9 | \n",
+ " 1.3 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 5.7 | \n",
+ " 3.8 | \n",
+ " 1.7 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 46 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.6 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 109 | \n",
+ " 7.2 | \n",
+ " 3.6 | \n",
+ " 6.1 | \n",
+ " 2.5 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 5.1 | \n",
+ " 3.7 | \n",
+ " 1.5 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " 5.1 | \n",
+ " 3.4 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 5.0 | \n",
+ " 3.4 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 85 | \n",
+ " 6.0 | \n",
+ " 3.4 | \n",
+ " 4.5 | \n",
+ " 1.6 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 4.6 | \n",
+ " 3.4 | \n",
+ " 1.4 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 5.4 | \n",
+ " 3.4 | \n",
+ " 1.5 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.9 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 5.2 | \n",
+ " 3.4 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 117 | \n",
+ " 7.7 | \n",
+ " 3.8 | \n",
+ " 6.7 | \n",
+ " 2.2 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 4.8 | \n",
+ " 3.4 | \n",
+ " 1.9 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 136 | \n",
+ " 6.3 | \n",
+ " 3.4 | \n",
+ " 5.6 | \n",
+ " 2.4 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 5.2 | \n",
+ " 3.5 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " 5.0 | \n",
+ " 3.5 | \n",
+ " 1.3 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 5.7 | \n",
+ " 4.4 | \n",
+ " 1.5 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 5.8 | \n",
+ " 4.0 | \n",
+ " 1.2 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 5.4 | \n",
+ " 3.4 | \n",
+ " 1.7 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5.0 | \n",
+ " 3.6 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 4.8 | \n",
+ " 3.4 | \n",
+ " 1.6 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 5.4 | \n",
+ " 3.9 | \n",
+ " 1.7 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 5.0 | \n",
+ " 3.4 | \n",
+ " 1.6 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 48 | \n",
+ " 5.3 | \n",
+ " 3.7 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 148 | \n",
+ " 6.2 | \n",
+ " 3.4 | \n",
+ " 5.4 | \n",
+ " 2.3 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 5.4 | \n",
+ " 3.7 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 4.6 | \n",
+ " 3.6 | \n",
+ " 1.0 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 5.2 | \n",
+ " 4.1 | \n",
+ " 1.5 | \n",
+ " 0.1 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.5 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 5.5 | \n",
+ " 4.2 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " 5.5 | \n",
+ " 3.5 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 5.0 | \n",
+ " 3.5 | \n",
+ " 1.6 | \n",
+ " 0.6 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 131 | \n",
+ " 7.9 | \n",
+ " 3.8 | \n",
+ " 6.4 | \n",
+ " 2.0 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "16 5.4 3.9 1.3 0.4 setosa\n",
+ "18 5.7 3.8 1.7 0.3 setosa\n",
+ "46 5.1 3.8 1.6 0.2 setosa\n",
+ "109 7.2 3.6 6.1 2.5 virginica\n",
+ "21 5.1 3.7 1.5 0.4 setosa\n",
+ "39 5.1 3.4 1.5 0.2 setosa\n",
+ "7 5.0 3.4 1.5 0.2 setosa\n",
+ "0 5.1 3.5 1.4 0.2 setosa\n",
+ "85 6.0 3.4 4.5 1.6 versicolor\n",
+ "6 4.6 3.4 1.4 0.3 setosa\n",
+ "31 5.4 3.4 1.5 0.4 setosa\n",
+ "44 5.1 3.8 1.9 0.4 setosa\n",
+ "28 5.2 3.4 1.4 0.2 setosa\n",
+ "117 7.7 3.8 6.7 2.2 virginica\n",
+ "24 4.8 3.4 1.9 0.2 setosa\n",
+ "136 6.3 3.4 5.6 2.4 virginica\n",
+ "27 5.2 3.5 1.5 0.2 setosa\n",
+ "40 5.0 3.5 1.3 0.3 setosa\n",
+ "15 5.7 4.4 1.5 0.4 setosa\n",
+ "14 5.8 4.0 1.2 0.2 setosa\n",
+ "20 5.4 3.4 1.7 0.2 setosa\n",
+ "4 5.0 3.6 1.4 0.2 setosa\n",
+ "11 4.8 3.4 1.6 0.2 setosa\n",
+ "5 5.4 3.9 1.7 0.4 setosa\n",
+ "26 5.0 3.4 1.6 0.4 setosa\n",
+ "17 5.1 3.5 1.4 0.3 setosa\n",
+ "48 5.3 3.7 1.5 0.2 setosa\n",
+ "148 6.2 3.4 5.4 2.3 virginica\n",
+ "10 5.4 3.7 1.5 0.2 setosa\n",
+ "22 4.6 3.6 1.0 0.2 setosa\n",
+ "32 5.2 4.1 1.5 0.1 setosa\n",
+ "19 5.1 3.8 1.5 0.3 setosa\n",
+ "33 5.5 4.2 1.4 0.2 setosa\n",
+ "36 5.5 3.5 1.3 0.2 setosa\n",
+ "43 5.0 3.5 1.6 0.6 setosa\n",
+ "131 7.9 3.8 6.4 2.0 virginica"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "gH3DnhCq2Cbl",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Club two filters together - Find all samples where sepal_width > 3.3 and species is versicolor"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "4U7ksr_R2H7M",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 77
+ },
+ "outputId": "9f9cb554-eafe-4ecf-9f6b-c68b94af22b3"
+ },
+ "cell_type": "code",
+ "source": [
+ "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] "
+ ],
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 85 | \n",
+ " 6.0 | \n",
+ " 3.4 | \n",
+ " 4.5 | \n",
+ " 1.6 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "85 6.0 3.4 4.5 1.6 versicolor"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "1lmnB3ot2u7I",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Sorting a column by value"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "K7KIj6fv2zWP",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1882
+ },
+ "outputId": "0dae1ddf-b2e3-444b-d45a-fb349c32ef9c"
+ },
+ "cell_type": "code",
+ "source": [
+ "iris_df.sort_values(by='sepal_width')#, ascending = False)\n",
+ "#pass ascending = False for descending order"
+ ],
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 60 | \n",
+ " 5.0 | \n",
+ " 2.0 | \n",
+ " 3.5 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 68 | \n",
+ " 6.2 | \n",
+ " 2.2 | \n",
+ " 4.5 | \n",
+ " 1.5 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 62 | \n",
+ " 6.0 | \n",
+ " 2.2 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 119 | \n",
+ " 6.0 | \n",
+ " 2.2 | \n",
+ " 5.0 | \n",
+ " 1.5 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " 4.5 | \n",
+ " 2.3 | \n",
+ " 1.3 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 53 | \n",
+ " 5.5 | \n",
+ " 2.3 | \n",
+ " 4.0 | \n",
+ " 1.3 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 87 | \n",
+ " 6.3 | \n",
+ " 2.3 | \n",
+ " 4.4 | \n",
+ " 1.3 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 93 | \n",
+ " 5.0 | \n",
+ " 2.3 | \n",
+ " 3.3 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 80 | \n",
+ " 5.5 | \n",
+ " 2.4 | \n",
+ " 3.8 | \n",
+ " 1.1 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 81 | \n",
+ " 5.5 | \n",
+ " 2.4 | \n",
+ " 3.7 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 57 | \n",
+ " 4.9 | \n",
+ " 2.4 | \n",
+ " 3.3 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 113 | \n",
+ " 5.7 | \n",
+ " 2.5 | \n",
+ " 5.0 | \n",
+ " 2.0 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 146 | \n",
+ " 6.3 | \n",
+ " 2.5 | \n",
+ " 5.0 | \n",
+ " 1.9 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 106 | \n",
+ " 4.9 | \n",
+ " 2.5 | \n",
+ " 4.5 | \n",
+ " 1.7 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 69 | \n",
+ " 5.6 | \n",
+ " 2.5 | \n",
+ " 3.9 | \n",
+ " 1.1 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 108 | \n",
+ " 6.7 | \n",
+ " 2.5 | \n",
+ " 5.8 | \n",
+ " 1.8 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 72 | \n",
+ " 6.3 | \n",
+ " 2.5 | \n",
+ " 4.9 | \n",
+ " 1.5 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " 5.1 | \n",
+ " 2.5 | \n",
+ " 3.0 | \n",
+ " 1.1 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 89 | \n",
+ " 5.5 | \n",
+ " 2.5 | \n",
+ " 4.0 | \n",
+ " 1.3 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 79 | \n",
+ " 5.7 | \n",
+ " 2.6 | \n",
+ " 3.5 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 134 | \n",
+ " 6.1 | \n",
+ " 2.6 | \n",
+ " 5.6 | \n",
+ " 1.4 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 92 | \n",
+ " 5.8 | \n",
+ " 2.6 | \n",
+ " 4.0 | \n",
+ " 1.2 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 118 | \n",
+ " 7.7 | \n",
+ " 2.6 | \n",
+ " 6.9 | \n",
+ " 2.3 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 90 | \n",
+ " 5.5 | \n",
+ " 2.6 | \n",
+ " 4.4 | \n",
+ " 1.2 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 82 | \n",
+ " 5.8 | \n",
+ " 2.7 | \n",
+ " 3.9 | \n",
+ " 1.2 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 111 | \n",
+ " 6.4 | \n",
+ " 2.7 | \n",
+ " 5.3 | \n",
+ " 1.9 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 59 | \n",
+ " 5.2 | \n",
+ " 2.7 | \n",
+ " 3.9 | \n",
+ " 1.4 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 142 | \n",
+ " 5.8 | \n",
+ " 2.7 | \n",
+ " 5.1 | \n",
+ " 1.9 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 101 | \n",
+ " 5.8 | \n",
+ " 2.7 | \n",
+ " 5.1 | \n",
+ " 1.9 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 67 | \n",
+ " 5.8 | \n",
+ " 2.7 | \n",
+ " 4.1 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 4.8 | \n",
+ " 3.4 | \n",
+ " 1.6 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 5.2 | \n",
+ " 3.4 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 136 | \n",
+ " 6.3 | \n",
+ " 3.4 | \n",
+ " 5.6 | \n",
+ " 2.4 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 4.8 | \n",
+ " 3.4 | \n",
+ " 1.9 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 5.4 | \n",
+ " 3.4 | \n",
+ " 1.7 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 5.0 | \n",
+ " 3.4 | \n",
+ " 1.6 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 5.0 | \n",
+ " 3.5 | \n",
+ " 1.6 | \n",
+ " 0.6 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 5.2 | \n",
+ " 3.5 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " 5.0 | \n",
+ " 3.5 | \n",
+ " 1.3 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " 5.5 | \n",
+ " 3.5 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 109 | \n",
+ " 7.2 | \n",
+ " 3.6 | \n",
+ " 6.1 | \n",
+ " 2.5 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5.0 | \n",
+ " 3.6 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 4.6 | \n",
+ " 3.6 | \n",
+ " 1.0 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 48 | \n",
+ " 5.3 | \n",
+ " 3.7 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 5.1 | \n",
+ " 3.7 | \n",
+ " 1.5 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 5.4 | \n",
+ " 3.7 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 5.7 | \n",
+ " 3.8 | \n",
+ " 1.7 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.5 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 117 | \n",
+ " 7.7 | \n",
+ " 3.8 | \n",
+ " 6.7 | \n",
+ " 2.2 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 131 | \n",
+ " 7.9 | \n",
+ " 3.8 | \n",
+ " 6.4 | \n",
+ " 2.0 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 46 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.6 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.9 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 5.4 | \n",
+ " 3.9 | \n",
+ " 1.3 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 5.4 | \n",
+ " 3.9 | \n",
+ " 1.7 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 5.8 | \n",
+ " 4.0 | \n",
+ " 1.2 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 5.2 | \n",
+ " 4.1 | \n",
+ " 1.5 | \n",
+ " 0.1 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 5.5 | \n",
+ " 4.2 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 5.7 | \n",
+ " 4.4 | \n",
+ " 1.5 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
150 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "60 5.0 2.0 3.5 1.0 versicolor\n",
+ "68 6.2 2.2 4.5 1.5 versicolor\n",
+ "62 6.0 2.2 4.0 1.0 versicolor\n",
+ "119 6.0 2.2 5.0 1.5 virginica\n",
+ "41 4.5 2.3 1.3 0.3 setosa\n",
+ "53 5.5 2.3 4.0 1.3 versicolor\n",
+ "87 6.3 2.3 4.4 1.3 versicolor\n",
+ "93 5.0 2.3 3.3 1.0 versicolor\n",
+ "80 5.5 2.4 3.8 1.1 versicolor\n",
+ "81 5.5 2.4 3.7 1.0 versicolor\n",
+ "57 4.9 2.4 3.3 1.0 versicolor\n",
+ "113 5.7 2.5 5.0 2.0 virginica\n",
+ "146 6.3 2.5 5.0 1.9 virginica\n",
+ "106 4.9 2.5 4.5 1.7 virginica\n",
+ "69 5.6 2.5 3.9 1.1 versicolor\n",
+ "108 6.7 2.5 5.8 1.8 virginica\n",
+ "72 6.3 2.5 4.9 1.5 versicolor\n",
+ "98 5.1 2.5 3.0 1.1 versicolor\n",
+ "89 5.5 2.5 4.0 1.3 versicolor\n",
+ "79 5.7 2.6 3.5 1.0 versicolor\n",
+ "134 6.1 2.6 5.6 1.4 virginica\n",
+ "92 5.8 2.6 4.0 1.2 versicolor\n",
+ "118 7.7 2.6 6.9 2.3 virginica\n",
+ "90 5.5 2.6 4.4 1.2 versicolor\n",
+ "82 5.8 2.7 3.9 1.2 versicolor\n",
+ "111 6.4 2.7 5.3 1.9 virginica\n",
+ "59 5.2 2.7 3.9 1.4 versicolor\n",
+ "142 5.8 2.7 5.1 1.9 virginica\n",
+ "101 5.8 2.7 5.1 1.9 virginica\n",
+ "67 5.8 2.7 4.1 1.0 versicolor\n",
+ ".. ... ... ... ... ...\n",
+ "11 4.8 3.4 1.6 0.2 setosa\n",
+ "28 5.2 3.4 1.4 0.2 setosa\n",
+ "136 6.3 3.4 5.6 2.4 virginica\n",
+ "24 4.8 3.4 1.9 0.2 setosa\n",
+ "20 5.4 3.4 1.7 0.2 setosa\n",
+ "26 5.0 3.4 1.6 0.4 setosa\n",
+ "17 5.1 3.5 1.4 0.3 setosa\n",
+ "43 5.0 3.5 1.6 0.6 setosa\n",
+ "27 5.2 3.5 1.5 0.2 setosa\n",
+ "0 5.1 3.5 1.4 0.2 setosa\n",
+ "40 5.0 3.5 1.3 0.3 setosa\n",
+ "36 5.5 3.5 1.3 0.2 setosa\n",
+ "109 7.2 3.6 6.1 2.5 virginica\n",
+ "4 5.0 3.6 1.4 0.2 setosa\n",
+ "22 4.6 3.6 1.0 0.2 setosa\n",
+ "48 5.3 3.7 1.5 0.2 setosa\n",
+ "21 5.1 3.7 1.5 0.4 setosa\n",
+ "10 5.4 3.7 1.5 0.2 setosa\n",
+ "18 5.7 3.8 1.7 0.3 setosa\n",
+ "19 5.1 3.8 1.5 0.3 setosa\n",
+ "117 7.7 3.8 6.7 2.2 virginica\n",
+ "131 7.9 3.8 6.4 2.0 virginica\n",
+ "46 5.1 3.8 1.6 0.2 setosa\n",
+ "44 5.1 3.8 1.9 0.4 setosa\n",
+ "16 5.4 3.9 1.3 0.4 setosa\n",
+ "5 5.4 3.9 1.7 0.4 setosa\n",
+ "14 5.8 4.0 1.2 0.2 setosa\n",
+ "32 5.2 4.1 1.5 0.1 setosa\n",
+ "33 5.5 4.2 1.4 0.2 setosa\n",
+ "15 5.7 4.4 1.5 0.4 setosa\n",
+ "\n",
+ "[150 rows x 5 columns]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "9jg_Z4YCoMSV",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### List all the unique species"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "M6EN78ufoJY7",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "outputId": "32b420d2-87c2-472c-d3fb-8d75deea2ed7"
+ },
+ "cell_type": "code",
+ "source": [
+ "species = iris_df['species'].unique()\n",
+ "\n",
+ "print(species)"
+ ],
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "['setosa' 'versicolor' 'virginica']\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "wG1i5nxBodmB",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Selecting a particular species using boolean mask (learnt in previous exercise)"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "gZvpbKBwoVUe",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "outputId": "e65f62c2-cb2d-4e63-c976-f9fa26653a02"
+ },
+ "cell_type": "code",
+ "source": [
+ "setosa = iris_df[iris_df['species'] == species[0]]\n",
+ "\n",
+ "setosa.head()"
+ ],
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2 | \n",
+ " 4.7 | \n",
+ " 3.2 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 5.4 | \n",
+ " 3.9 | \n",
+ " 1.3 | \n",
+ " 0.4 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " 4.4 | \n",
+ " 3.2 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 5.7 | \n",
+ " 3.8 | \n",
+ " 1.7 | \n",
+ " 0.3 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ " | 46 | \n",
+ " 5.1 | \n",
+ " 3.8 | \n",
+ " 1.6 | \n",
+ " 0.2 | \n",
+ " setosa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "2 4.7 3.2 1.3 0.2 setosa\n",
+ "16 5.4 3.9 1.3 0.4 setosa\n",
+ "42 4.4 3.2 1.3 0.2 setosa\n",
+ "18 5.7 3.8 1.7 0.3 setosa\n",
+ "46 5.1 3.8 1.6 0.2 setosa"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "7tumfZ3DotPG",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "outputId": "b07f8700-e865-4b7b-c13f-e241438aec1f"
+ },
+ "cell_type": "code",
+ "source": [
+ "# do the same for other 2 species \n",
+ "versicolor = iris_df[iris_df['species'] == species[1]]\n",
+ "\n",
+ "versicolor.head()"
+ ],
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 53 | \n",
+ " 5.5 | \n",
+ " 2.3 | \n",
+ " 4.0 | \n",
+ " 1.3 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 50 | \n",
+ " 7.0 | \n",
+ " 3.2 | \n",
+ " 4.7 | \n",
+ " 1.4 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 77 | \n",
+ " 6.7 | \n",
+ " 3.0 | \n",
+ " 5.0 | \n",
+ " 1.7 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 90 | \n",
+ " 5.5 | \n",
+ " 2.6 | \n",
+ " 4.4 | \n",
+ " 1.2 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ " | 93 | \n",
+ " 5.0 | \n",
+ " 2.3 | \n",
+ " 3.3 | \n",
+ " 1.0 | \n",
+ " versicolor | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "53 5.5 2.3 4.0 1.3 versicolor\n",
+ "50 7.0 3.2 4.7 1.4 versicolor\n",
+ "77 6.7 3.0 5.0 1.7 versicolor\n",
+ "90 5.5 2.6 4.4 1.2 versicolor\n",
+ "93 5.0 2.3 3.3 1.0 versicolor"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 15
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "cUYm5UqVpDPy",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "outputId": "f01dc7fe-120b-44e1-aa68-d07eeb8d5ff2"
+ },
+ "cell_type": "code",
+ "source": [
+ "virginica = iris_df[iris_df['species'] == species[2]]\n",
+ "\n",
+ "virginica.head()"
+ ],
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 114 | \n",
+ " 5.8 | \n",
+ " 2.8 | \n",
+ " 5.1 | \n",
+ " 2.4 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 128 | \n",
+ " 6.4 | \n",
+ " 2.8 | \n",
+ " 5.6 | \n",
+ " 2.1 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 105 | \n",
+ " 7.6 | \n",
+ " 3.0 | \n",
+ " 6.6 | \n",
+ " 2.1 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 146 | \n",
+ " 6.3 | \n",
+ " 2.5 | \n",
+ " 5.0 | \n",
+ " 1.9 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ " | 111 | \n",
+ " 6.4 | \n",
+ " 2.7 | \n",
+ " 5.3 | \n",
+ " 1.9 | \n",
+ " virginica | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width species\n",
+ "114 5.8 2.8 5.1 2.4 virginica\n",
+ "128 6.4 2.8 5.6 2.1 virginica\n",
+ "105 7.6 3.0 6.6 2.1 virginica\n",
+ "146 6.3 2.5 5.0 1.9 virginica\n",
+ "111 6.4 2.7 5.3 1.9 virginica"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "-y1wDc8SpdQs",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Describe each created species to see the difference\n",
+ "\n"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "eHrn3ZVRpOk5",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 284
+ },
+ "outputId": "424fb1fb-87b4-45da-a0eb-0be8e56d1ae1"
+ },
+ "cell_type": "code",
+ "source": [
+ "setosa.describe()"
+ ],
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 50.00000 | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ " 50.00000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 5.00600 | \n",
+ " 3.418000 | \n",
+ " 1.464000 | \n",
+ " 0.24400 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 0.35249 | \n",
+ " 0.381024 | \n",
+ " 0.173511 | \n",
+ " 0.10721 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 4.30000 | \n",
+ " 2.300000 | \n",
+ " 1.000000 | \n",
+ " 0.10000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 4.80000 | \n",
+ " 3.125000 | \n",
+ " 1.400000 | \n",
+ " 0.20000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 5.00000 | \n",
+ " 3.400000 | \n",
+ " 1.500000 | \n",
+ " 0.20000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 5.20000 | \n",
+ " 3.675000 | \n",
+ " 1.575000 | \n",
+ " 0.30000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 5.80000 | \n",
+ " 4.400000 | \n",
+ " 1.900000 | \n",
+ " 0.60000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width\n",
+ "count 50.00000 50.000000 50.000000 50.00000\n",
+ "mean 5.00600 3.418000 1.464000 0.24400\n",
+ "std 0.35249 0.381024 0.173511 0.10721\n",
+ "min 4.30000 2.300000 1.000000 0.10000\n",
+ "25% 4.80000 3.125000 1.400000 0.20000\n",
+ "50% 5.00000 3.400000 1.500000 0.20000\n",
+ "75% 5.20000 3.675000 1.575000 0.30000\n",
+ "max 5.80000 4.400000 1.900000 0.60000"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "GwJFT2GlpwUv",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 284
+ },
+ "outputId": "e048426d-e231-45c3-9ad9-d43cb916d000"
+ },
+ "cell_type": "code",
+ "source": [
+ "versicolor.describe()"
+ ],
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 5.936000 | \n",
+ " 2.770000 | \n",
+ " 4.260000 | \n",
+ " 1.326000 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 0.516171 | \n",
+ " 0.313798 | \n",
+ " 0.469911 | \n",
+ " 0.197753 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 4.900000 | \n",
+ " 2.000000 | \n",
+ " 3.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 5.600000 | \n",
+ " 2.525000 | \n",
+ " 4.000000 | \n",
+ " 1.200000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 5.900000 | \n",
+ " 2.800000 | \n",
+ " 4.350000 | \n",
+ " 1.300000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 6.300000 | \n",
+ " 3.000000 | \n",
+ " 4.600000 | \n",
+ " 1.500000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 7.000000 | \n",
+ " 3.400000 | \n",
+ " 5.100000 | \n",
+ " 1.800000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width\n",
+ "count 50.000000 50.000000 50.000000 50.000000\n",
+ "mean 5.936000 2.770000 4.260000 1.326000\n",
+ "std 0.516171 0.313798 0.469911 0.197753\n",
+ "min 4.900000 2.000000 3.000000 1.000000\n",
+ "25% 5.600000 2.525000 4.000000 1.200000\n",
+ "50% 5.900000 2.800000 4.350000 1.300000\n",
+ "75% 6.300000 3.000000 4.600000 1.500000\n",
+ "max 7.000000 3.400000 5.100000 1.800000"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "Ad4qhSZLpztf",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 284
+ },
+ "outputId": "6c755e83-6751-4dbe-8185-31e3097704d4"
+ },
+ "cell_type": "code",
+ "source": [
+ "virginica.describe()"
+ ],
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 50.00000 | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ " 50.00000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 6.58800 | \n",
+ " 2.974000 | \n",
+ " 5.552000 | \n",
+ " 2.02600 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 0.63588 | \n",
+ " 0.322497 | \n",
+ " 0.551895 | \n",
+ " 0.27465 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 4.90000 | \n",
+ " 2.200000 | \n",
+ " 4.500000 | \n",
+ " 1.40000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 6.22500 | \n",
+ " 2.800000 | \n",
+ " 5.100000 | \n",
+ " 1.80000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 6.50000 | \n",
+ " 3.000000 | \n",
+ " 5.550000 | \n",
+ " 2.00000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 6.90000 | \n",
+ " 3.175000 | \n",
+ " 5.875000 | \n",
+ " 2.30000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 7.90000 | \n",
+ " 3.800000 | \n",
+ " 6.900000 | \n",
+ " 2.50000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width\n",
+ "count 50.00000 50.000000 50.000000 50.00000\n",
+ "mean 6.58800 2.974000 5.552000 2.02600\n",
+ "std 0.63588 0.322497 0.551895 0.27465\n",
+ "min 4.90000 2.200000 4.500000 1.40000\n",
+ "25% 6.22500 2.800000 5.100000 1.80000\n",
+ "50% 6.50000 3.000000 5.550000 2.00000\n",
+ "75% 6.90000 3.175000 5.875000 2.30000\n",
+ "max 7.90000 3.800000 6.900000 2.50000"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 19
+ }
+ ]
+ },
+ {
+ "metadata": {
+ "id": "Vdu0ulZWtr09",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "#### Let's plot and see the difference"
+ ]
+ },
+ {
+ "metadata": {
+ "id": "PEVMzRvpttmD",
+ "colab_type": "text"
+ },
+ "cell_type": "markdown",
+ "source": [
+ "##### import matplotlib.pyplot "
+ ]
+ },
+ {
+ "metadata": {
+ "id": "rqDXuuAtt7C3",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 398
+ },
+ "outputId": "39a012f1-d57d-48bb-c668-c98cae6c827b"
+ },
+ "cell_type": "code",
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "#hist creates a histogram there are many more plots(see the documentation) you can play with it.\n",
+ "\n",
+ "plt.hist(setosa['sepal_length'])\n",
+ "plt.hist(versicolor['sepal_length'])\n",
+ "plt.hist(virginica['sepal_length'])"
+ ],
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(array([ 1., 0., 5., 5., 8., 9., 10., 5., 1., 6.]),\n",
+ " array([4.9, 5.2, 5.5, 5.8, 6.1, 6.4, 6.7, 7. , 7.3, 7.6, 7.9]),\n",
+ " )"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 20
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFKCAYAAABcq1WoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFGdJREFUeJzt3XuMlGfZwOF72ZHy7bLiFnehaItN\no2lsi4W0TaGALXKwxZ7QcgpFYr9EhAImmIIEAwmJkYY2qK1WW6R+EBIsRdgaI40IiWkBjRgUk4aC\nieHQwlKWM4RD5/vDsBELe5idfYZ997r+6r7z7jv3sw+ZHzNTZsvy+Xw+AIBkupR6AADobMQXABIT\nXwBITHwBIDHxBYDExBcAEsuluJP6+hMp7qYoqqsroqHhdKnHaFdZX6P1dXxZX6P1dXwtWWNNTdVV\nb/PM97/kcuWlHqHdZX2N1tfxZX2N1tfxtXWN4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY\n+AJAYi2K765du2L48OGxcuXKiIh47733YsqUKTFp0qSYMmVK1NfXt+uQAJAlzcb39OnTsWjRohg4\ncGDjsaVLl8bYsWNj5cqVMWLEiFi+fHm7DgkAWdJsfLt27Rovv/xy1NbWNh5bsGBBjBo1KiIiqqur\n4+jRo+03IQBkTLPxzeVy0a1bt8uOVVRURHl5eVy8eDFWrVoVDz/8cLsNCABZU/BvNbp48WI888wz\nce+99172kvSVVFdXdKgP2m7qN1Fcix6evb7N13jjuUeLMMm1o6PtYWtlfX0R2V+j9XV8bVljwfH9\n7ne/G3379o2nn3662XM70q+Wqqmp6lC/ArFYsrTmrO9h1tcXkf01Wl/H15I1Fv1XCtbV1cXHPvax\nmDlzZiHfDgCdWrPPfHfu3BmLFy+O/fv3Ry6Xiw0bNsQHH3wQ1113XTz55JMREXHLLbfEwoUL23tW\nAMiEZuN7++23x4oVK1LMAgCdgk+4AoDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWA\nxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABIT\nXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwB\nIDHxBYDExBcAEhNfAEisRfHdtWtXDB8+PFauXBkREe+99148+eSTMXHixJg1a1acO3euXYcEgCxp\nNr6nT5+ORYsWxcCBAxuP/ehHP4qJEyfGqlWrom/fvrFmzZp2HRIAsqTZ+Hbt2jVefvnlqK2tbTy2\nbdu2+NKXvhQREQ888EBs2bKl/SYEgIzJNXtCLhe53OWnnTlzJrp27RoRET179oz6+vr2mQ4AMqjZ\n+DYnn883e051dUXkcuVtvatkamqqSj1Ccllb86X1jF39rXa9n1+N+2m7Xv9qsrZfV9Iea3zr0a8W\n/ZqFqFn/eub3MOvri2jbGguKb0VFRZw9eza6desWBw8evOwl6StpaDhd0HClUFNTFfX1J0o9RnJZ\nWnPKPSzFz60z/BntDGvM8vo6w/61ZI1Nxbmgf2o0aNCg2LBhQ0REvPnmmzFkyJBCLgMAnVKzz3x3\n7twZixcvjv3790cul4sNGzbEkiVLYu7cubF69ero06dPPPbYYylmBYBMaDa+t99+e6xYseIjx5cv\nX94uAwFA1vmEKwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQX\nABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgsVypBwCyb9f/Tmnd+e0zBlwzPPMFgMTEFwASE18A\nSEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx\n8QWAxMQXABITXwBILFfIN506dSrmzJkTx44di/Pnz8f06dNjyJAhxZ4NADKpoPj++te/jptvvjlm\nz54dBw8ejK9//evxu9/9rtizAUAmFfSyc3V1dRw9ejQiIo4fPx7V1dVFHQoAsqygZ76jR4+OtWvX\nxogRI+L48ePxs5/9rNhzAUBmFRTf9evXR58+fWLZsmXxzjvvxLx582Lt2rVXPb+6uiJyufKCh0yt\npqaq1CMk940f/KHN13jjuUeLMElxpNrD9r6fsau/1eZrzFp1qAiTUGxZf5zJ+voi2rbGguK7ffv2\nGDx4cERE3HrrrXHo0KG4ePFilJdfObANDacLHjC1mpqqqK8/UeoxOqRr5eeWcg+vlTXT8WT5z05n\neBxtyRqbinNB7/n27ds3duzYERER+/fvj8rKyquGFwC4XEHPfMeNGxfz5s2LSZMmxYULF2LhwoVF\nHgsAsqug+FZWVsYPf/jDYs8CAJ2CT7gCgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHx\nBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcA\nEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhM\nfAEgMfEFgMTEFwASE18ASKzg+NbV1cUjjzwSY8aMic2bNxdxJADItoLi29DQEC+++GKsWrUqXnrp\npdi4cWOx5wKAzMoV8k1btmyJgQMHRvfu3aN79+6xaNGiYs8FAJlVUHz37dsXZ8+ejalTp8bx48dj\nxowZMXDgwKueX11dEblcecFDplZTU9Xicx+evb5N9/XGc4+26fuvJa35uf2nsau/VeRJ0il0zZD1\nPzutWd9bj361HSdpufvWv96q89uyhwXFNyLi6NGj8cILL8SBAwdi8uTJsWnTpigrK7viuQ0Npwse\nMLWamqqorz+R7P5S3ld7y9JaWqozrpniyPKfndSPo8XSmplbssam4lzQe749e/aM/v37Ry6Xi5tu\nuikqKyvjyJEjhVwKADqdguI7ePDg2Lp1a3z44YfR0NAQp0+fjurq6mLPBgCZVNDLzr169YpRo0bF\n2LFjIyJi/vz50aWLfzIMAC1R8Hu+48ePj/HjxxdzFgDoFDxdBYDExBcAEhNfAEhMfAEgMfEFgMTE\nFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASy5V6\ngM7uGz/4Q6lHyJxZqw4lu69dq6a06/VntevVKZW3Hv1qqUeIiIjPvfJqqUfotDzzBYDExBcAEhNf\nAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEg\nMfEFgMTEFwASE18ASKxN8T179mwMHz481q5dW6x5ACDz2hTfn/70p9GjR49izQIAnULB8d2zZ0/s\n3r077r///iKOAwDZV3B8Fy9eHHPnzi3mLADQKeQK+aZ169bFnXfeGTfeeGOLzq+urohcrryQu7qq\nh2evb/M13nju0Sser6mpavO1U/qfe37Xrtc/86cvt+i8b/zgDwVd/3/uKejbgDZqz8e61lx7V7tN\n0Tqt/Xm05edXUHw3b94ce/fujc2bN8f7778fXbt2jd69e8egQYOueH5Dw+mCB2xP9fUnPnKspqbq\niscBsqa9Hus66uNoa2ZuyRqbinNB8V26dGnjf//4xz+OT33qU1cNLwBwOf/OFwASK+iZ73+aMWNG\nMeYAgE7DM18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwAS\nE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8\nASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWA\nxHKFfuOzzz4bf/nLX+LChQvxzW9+M0aOHFnMuQAgswqK79atW+Pdd9+N1atXR0NDQzz++OPiCwAt\nVFB877777ujXr19ERHz84x+PM2fOxMWLF6O8vLyowwFAFhUU3/Ly8qioqIiIiDVr1sTQoUObDG91\ndUXkctdemGtqqlp1HCBL2vOxrjXX3tVuU7ROa38ebfn5Ffyeb0TE73//+1izZk384he/aPK8hobT\nbbmbdlNff+Ijx2pqqq54HCBr2uuxrqM+jrZm5passak4FxzfP/7xj/HSSy/FK6+8ElVVnikCQEsV\nFN8TJ07Es88+G6+++mp84hOfKPZMAJBpBcX3t7/9bTQ0NMS3v/3txmOLFy+OPn36FG0wAMiqguI7\nbty4GDduXLFnAYBOwSdcAUBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY+AJAYuILAImJ\nLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJ5Uo9QCl94wd/KPUIRTFr1aF2vof/a9FZ\nP5xY285zAMW063+ntM912+Wq2eKZLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8A\nJCa+AJCY+AJAYuILAImJLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGK5Qr/x+9//fuzYsSPK\nyspi3rx50a9fv2LOBQCZVVB8//SnP8W//vWvWL16dezZsyfmzZsXq1evLvZsAJBJBb3svGXLlhg+\nfHhERNxyyy1x7NixOHnyZFEHA4CsKii+hw8fjurq6savr7/++qivry/aUACQZQW/5/uf8vl8k7fX\n1FQV424u88Zzjxb9mh3XtfGzuK/UA1wyrtQDAJ1BW9pW0DPf2traOHz4cOPXhw4dipqamoKHAIDO\npKD43nfffbFhw4aIiPjHP/4RtbW10b1796IOBgBZVdDLzgMGDIjbbrstxo8fH2VlZbFgwYJizwUA\nmVWWb+4NWwCgqHzCFQAkJr4AkFhR/qlRR3b27Nn4yle+EtOmTYsxY8Y0Hh82bFj07t07ysvLIyJi\nyZIl0atXr1KN2Wrbtm2LWbNmxWc/+9mIiPjc5z4X3/ve9xpvf/vtt+P555+P8vLyGDp0aEyfPr1U\noxakufV19P27pK6uLl555ZXI5XIxc+bMuP/++xtv6+h7GNH0+rKwh6+99lrU1dU1fr1z587461//\n2vh1XV1d/PKXv4wuXbrE2LFj44knnijFmAVrbn233XZbDBgwoPHrV199tXE/O4JTp07FnDlz4tix\nY3H+/PmYPn16DBkypPH2Nu1fvpN7/vnn82PGjMm//vrrlx1/4IEH8idPnizRVG23devW/IwZM656\n+4MPPpg/cOBA/uLFi/kJEybk33333YTTtV1z6+vo+5fP5/NHjhzJjxw5Mn/ixIn8wYMH8/Pnz7/s\n9o6+h82tLwt7+J+2bduWX7hwYePXp06dyo8cOTJ//Pjx/JkzZ/KjR4/ONzQ0lHDCtvnv9eXz+fw9\n99xTommKY8WKFfklS5bk8/l8/v3338+PGjWq8ba27l+nftl5z549sXv37sv+tt0Z7N27N3r06BE3\n3HBDdOnSJb74xS/Gli1bSj0W/2XLli0xcODA6N69e9TW1saiRYsab8vCHja1vix68cUXY9q0aY1f\n79ixI+64446oqqqKbt26xYABA2L79u0lnLBt/nt9WVBdXR1Hjx6NiIjjx49f9smObd2/Th3fxYsX\nx9y5c696+4IFC2LChAmxZMmSZj/F61q0e/fumDp1akyYMCHeeuutxuP19fVx/fXXN37dUT8e9Grr\nu6Sj79++ffvi7NmzMXXq1Jg4ceJlcc3CHja1vks6+h5e8re//S1uuOGGyz6M6PDhwx1+Dy+50voi\nIs6dOxezZ8+O8ePHx/Lly0s0XeFGjx4dBw4ciBEjRsSkSZNizpw5jbe1df867Xu+69atizvvvDNu\nvPHGK94+c+bMGDJkSPTo0SOmT58eGzZsiC9/+cuJpyzcZz7zmXj66afjwQcfjL1798bkyZPjzTff\njK5du5Z6tKJobn0dff8uOXr0aLzwwgtx4MCBmDx5cmzatCnKyspKPVbRNLW+rOxhRMSaNWvi8ccf\nb/KcjvyXi6ut75lnnolHHnkkysrKYtKkSXHXXXfFHXfcUYIJC7N+/fro06dPLFu2LN55552YN29e\nrF279orntnb/Ou0z382bN8fGjRtj7Nix8dprr8VPfvKTePvttxtvf+yxx6Jnz56Ry+Vi6NChsWvX\nrhJO23q9evWKhx56KMrKyuKmm26KT37yk3Hw4MGI+OjHgx48eDBqa2tLNWpBmlpfRMffv4iInj17\nRv/+/SOXy8VNN90UlZWVceTIkYjIxh42tb6IbOzhJdu2bYv+/ftfduxKH9Pb0fbwkiutLyJiwoQJ\nUVlZGRUVFXHvvfd2uD3cvn17DB48OCIibr311jh06FBcvHgxItq+f502vkuXLo3XX389fvWrX8UT\nTzwR06ZNi0GDBkVExIkTJ+Kpp56Kc+fORUTEn//858b/q7ajqKuri2XLlkXEv1+i/OCDDxr/T9FP\nf/rTcfLkydi3b19cuHAhNm3aFPfdd838WoQWaWp9Wdi/iIjBgwfH1q1b48MPP4yGhoY4ffp043tO\nWdjDptaXlT2M+PdfjCorKz/yqtMXvvCF+Pvf/x7Hjx+PU6dOxfbt2+Ouu+4q0ZSFu9r6/vnPf8bs\n2bMjn8/HhQsXYvv27R1uD/v27Rs7duyIiIj9+/dHZWVl4/+t3db967QvO1/J2rVro6qqKkaMGBFD\nhw6NcePGxXXXXRef//znO9zLXcOGDYvvfOc7sXHjxjh//nwsXLgwfvOb3zSub+HChTF79uyIiHjo\noYfi5ptvLvHErdPc+jr6/kX8+9n9qFGjYuzYsRERMX/+/Fi3bl1m9rC59WVhDyM++v78z3/+87j7\n7rujf//+MXv27HjqqaeirKwspk+fHlVVxf8NcO2tqfX17t07vva1r0WXLl1i2LBh0a9fvxJO2nrj\nxo2LefPmxaRJk+LChQuxcOHCou2fj5cEgMQ67cvOAFAq4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8A\nJCa+AJDY/wP18IOmMPyNQgAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file