diff --git a/Basic_Pandas.ipynb b/Basic_Pandas.ipynb new file mode 100644 index 0000000..8bd7490 --- /dev/null +++ b/Basic_Pandas.ipynb @@ -0,0 +1,964 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Basic Pandas.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "id": "cGbE814_Xaf9", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas\n", + "\n", + "Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.In this tutorial, we will learn the various features of Python Pandas and how to use them in practice.\n", + "\n", + "\n", + "## Import pandas and numpy" + ] + }, + { + "metadata": { + "id": "irlVYeeAXPDL", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BI2J-zdMbGwE", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### This is your playground feel free to explore other functions on pandas\n", + "\n", + "#### Create Series from numpy array, list and dict\n", + "\n", + "Don't know what a series is?\n", + "\n", + "[Series Doc](https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.Series.html)" + ] + }, + { + "metadata": { + "id": "GeEct691YGE3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 138 + }, + "outputId": "fcd30705-266a-4f99-a326-9719d81b2795" + }, + "cell_type": "code", + "source": [ + "a_ascii = ord('A')\n", + "z_ascii = ord('Z')\n", + "alphabets = [chr(i) for i in range(a_ascii, z_ascii+1)]\n", + "\n", + "print(alphabets)\n", + "\n", + "numbers = np.arange(26)\n", + "\n", + "print(numbers)\n", + "\n", + "print(type(alphabets), type(numbers))\n", + "\n", + "alpha_numbers = dict(zip(alphabets, numbers))\n", + "\n", + "print(alpha_numbers)\n", + "\n", + "print(type(alpha_numbers))" + ], + "execution_count": 29, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']\n", + "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n", + " 24 25]\n", + " \n", + "{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "6ouDfjWab_Mc", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 470 + }, + "outputId": "7d9365bf-4ac9-46de-b228-796205b6bbb5" + }, + "cell_type": "code", + "source": [ + "series1 = pd.Series(alphabets)\n", + "print(series1)" + ], + "execution_count": 28, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0 A\n", + "1 B\n", + "2 C\n", + "3 D\n", + "4 E\n", + "5 F\n", + "6 G\n", + "7 H\n", + "8 I\n", + "9 J\n", + "10 K\n", + "11 L\n", + "12 M\n", + "13 N\n", + "14 O\n", + "15 P\n", + "16 Q\n", + "17 R\n", + "18 S\n", + "19 T\n", + "20 U\n", + "21 V\n", + "22 W\n", + "23 X\n", + "24 Y\n", + "25 Z\n", + "dtype: object\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "At7nY7vVcBZ3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 470 + }, + "outputId": "63723be2-fd64-447e-ac21-b1e750dcd2da" + }, + "cell_type": "code", + "source": [ + "series2 = pd.Series(numbers)\n", + "print(series2)" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0 0\n", + "1 1\n", + "2 2\n", + "3 3\n", + "4 4\n", + "5 5\n", + "6 6\n", + "7 7\n", + "8 8\n", + "9 9\n", + "10 10\n", + "11 11\n", + "12 12\n", + "13 13\n", + "14 14\n", + "15 15\n", + "16 16\n", + "17 17\n", + "18 18\n", + "19 19\n", + "20 20\n", + "21 21\n", + "22 22\n", + "23 23\n", + "24 24\n", + "25 25\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "J5z-2CWAdH6N", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 470 + }, + "outputId": "ca1cbb9b-d14b-49f1-8bc9-626613206e6e" + }, + "cell_type": "code", + "source": [ + "series3 = pd.Series(alpha_numbers)\n", + "print(series3)" + ], + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "text": [ + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "F 5\n", + "G 6\n", + "H 7\n", + "I 8\n", + "J 9\n", + "K 10\n", + "L 11\n", + "M 12\n", + "N 13\n", + "O 14\n", + "P 15\n", + "Q 16\n", + "R 17\n", + "S 18\n", + "T 19\n", + "U 20\n", + "V 21\n", + "W 22\n", + "X 23\n", + "Y 24\n", + "Z 25\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "fYzblGGudKjO", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 151 + }, + "outputId": "4deecd54-4154-4659-865a-71a14d108cc8" + }, + "cell_type": "code", + "source": [ + "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n", + "series3.head(7)" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "F 5\n", + "G 6\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "metadata": { + "id": "OwsJIf5feTtg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create DataFrame from lists\n", + "\n", + "[DataFrame Doc](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)" + ] + }, + { + "metadata": { + "id": "73UTZ07EdWki", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 817 + }, + "outputId": "35b4c77d-4428-4274-ad27-ec9dbf334f9f" + }, + "cell_type": "code", + "source": [ + "data = {'alphabets': alphabets, 'values': numbers}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "#Lets Change the column `values` to `alpha_numbers`\n", + "\n", + "df.columns = ['alphabets', 'alpha_numbers']\n", + "\n", + "df" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alphabetsalpha_numbers
0A0
1B1
2C2
3D3
4E4
5F5
6G6
7H7
8I8
9J9
10K10
11L11
12M12
13N13
14O14
15P15
16Q16
17R17
18S18
19T19
20U20
21V21
22W22
23X23
24Y24
25Z25
\n", + "
" + ], + "text/plain": [ + " alphabets alpha_numbers\n", + "0 A 0\n", + "1 B 1\n", + "2 C 2\n", + "3 D 3\n", + "4 E 4\n", + "5 F 5\n", + "6 G 6\n", + "7 H 7\n", + "8 I 8\n", + "9 J 9\n", + "10 K 10\n", + "11 L 11\n", + "12 M 12\n", + "13 N 13\n", + "14 O 14\n", + "15 P 15\n", + "16 Q 16\n", + "17 R 17\n", + "18 S 18\n", + "19 T 19\n", + "20 U 20\n", + "21 V 21\n", + "22 W 22\n", + "23 X 23\n", + "24 Y 24\n", + "25 Z 25" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 24 + } + ] + }, + { + "metadata": { + "id": "uaK_1EO9etGS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 77 + }, + "outputId": "0305d8f6-dd4a-4716-a00b-b2bbe27ba9c1" + }, + "cell_type": "code", + "source": [ + "# transpose\n", + "\n", + "df.T\n", + "\n", + "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
vowelsaeiou
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "vowels a e i o u" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 23 + } + ] + }, + { + "metadata": { + "id": "ZYonoaW8gEAJ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Extract Items from a series" + ] + }, + { + "metadata": { + "id": "tc1-KX_Bfe7U", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "8bd83635-e2ba-4b40-ab64-dcb5e1e07e36" + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n", + "pos = [0, 4, 8, 14, 20]\n", + "\n", + "vowels = ser.take(pos)\n", + "\n", + "df = pd.DataFrame(vowels)#, columns=['vowels'])\n", + "\n", + "df.columns = ['vowels']\n", + "\n", + "df.index = [0, 1, 2, 3, 4]\n", + "\n", + "df" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
vowels
0a
1e
2i
3o
4u
\n", + "
" + ], + "text/plain": [ + " vowels\n", + "0 a\n", + "1 e\n", + "2 i\n", + "3 o\n", + "4 u" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 22 + } + ] + }, + { + "metadata": { + "id": "cmDxwtDNjWpO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Change the first character of each word to upper case in each word of ser" + ] + }, + { + "metadata": { + "id": "5KagP9PpgV2F", + "colab_type": "code", + "outputId": "e1d65e7d-e497-4ae6-b810-eba5e7551d78", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(['we', 'are', 'learning', 'pandas'])\n", + "\n", + "ser.map(lambda x : x.title())\n", + "\n", + "titles = [i.title() for i in ser]\n", + "\n", + "titles" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['We', 'Are', 'Learning', 'Pandas']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 21 + } + ] + }, + { + "metadata": { + "id": "qn47ee-MkZN8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Reindexing" + ] + }, + { + "metadata": { + "id": "h5R0JL2NjuFS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "36acbb0f-f10e-410f-e53c-7fae09464b69" + }, + "cell_type": "code", + "source": [ + "my_index = [1, 2, 3, 4, 5]\n", + "\n", + "df1 = pd.DataFrame({'upper values': ['A', 'B', 'C', 'D', 'E'],\n", + " 'lower values': ['a', 'b', 'c', 'd', 'e']},\n", + " index = my_index)\n", + "\n", + "df1" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lower valuesupper values
1aA
2bB
3cC
4dD
5eE
\n", + "
" + ], + "text/plain": [ + " lower values upper values\n", + "1 a A\n", + "2 b B\n", + "3 c C\n", + "4 d D\n", + "5 e E" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + } + ] + }, + { + "metadata": { + "id": "G_Frvc3mk93k", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "8919eda1-4e08-4c10-fe08-52824eef2fd8" + }, + "cell_type": "code", + "source": [ + "new_index = [2, 5, 4, 3, 1]\n", + "\n", + "df1.reindex(index = new_index)" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lower valuesupper values
2bB
5eE
4dD
3cC
1aA
\n", + "
" + ], + "text/plain": [ + " lower values upper values\n", + "2 b B\n", + "5 e E\n", + "4 d D\n", + "3 c C\n", + "1 a A" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 19 + } + ] + } + ] +} \ No newline at end of file diff --git a/Exercise.ipynb b/Exercise.ipynb new file mode 100644 index 0000000..588c590 --- /dev/null +++ b/Exercise.ipynb @@ -0,0 +1,596 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Exercise.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "id": "2LTtpUJEibjg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas Exercise :\n", + "\n", + "\n", + "#### import necessary modules" + ] + }, + { + "metadata": { + "id": "c3_UBbMRhiKx", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "tp-cTCyWi8mR", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n", + "\n", + "This is a wine dataset\n", + "\n" + ] + }, + { + "metadata": { + "id": "DMojQY3thrRi", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "wine_df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BF9MMjoZjSlg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### print first five rows" + ] + }, + { + "metadata": { + "id": "1vSMQdnHjYNU", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 198 + }, + "outputId": "c6637b1b-d56e-45f4-e507-9a9e2b660ef9" + }, + "cell_type": "code", + "source": [ + "wine_df.head(5)" + ], + "execution_count": 51, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "1 1185 \n", + "2 1480 \n", + "3 735 \n", + "4 1450 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 51 + } + ] + }, + { + "metadata": { + "id": "Tet6P2DvjY3T", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n", + "\n", + "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)" + ] + }, + { + "metadata": { + "id": "CMj3qSdJjx0u", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 145 + }, + "outputId": "d783ecf3-be0d-410c-fbb7-40cbeeb028a2" + }, + "cell_type": "code", + "source": [ + "wine_df_copy = wine_df.copy()\n", + "wine_df_copy = wine_df_copy[wine_df_copy.index % 2 == 0]\n", + "print(wine_df_copy.head(2))" + ], + "execution_count": 52, + "outputs": [ + { + "output_type": "stream", + "text": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "2 1480 \n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "o6Cs6T1Rjz71", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Assign the columns as below:\n", + "\n", + "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", + "1) Alcohol \n", + "2) Malic acid \n", + "3) Ash \n", + "4) Alcalinity of ash \n", + "5) Magnesium \n", + "6) Total phenols \n", + "7) Flavanoids \n", + "8) Nonflavanoid phenols \n", + "9) Proanthocyanins \n", + "10)Color intensity \n", + "11)Hue \n", + "12)OD280/OD315 of diluted wines \n", + "13)Proline " + ] + }, + { + "metadata": { + "id": "my8HB4V4j779", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "wine_df.columns = ['Alcohol','Malic Acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','0D280/0D315 of diluted wines','Proline','Acidity']" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Zqi7hwWpkNbH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Set the values of the first 3 rows from alcohol as NaN\n", + "\n", + "Hint- Use iloc to select 3 rows of wine_df" + ] + }, + { + "metadata": { + "id": "buyT4vX4kPMl", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "wine_df.iloc[[0,1,2]] = np.nan" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "RQMNI2UHkP3o", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`" + ] + }, + { + "metadata": { + "id": "xunmCjaEmDwZ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "random = np.random.randint(0,10,10)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "hELUakyXmFSu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol" + ] + }, + { + "metadata": { + "id": "zMgaNnNHmP01", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "wine_df = wine_df.reindex(index=random)\n", + "wine_df['Alcohol'] = np.nan" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "PHyK_vRsmRwV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### How many missing values do we have? \n", + "\n", + "Hint: you can use isnull() and sum()" + ] + }, + { + "metadata": { + "id": "EnOYhmEqmfKp", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 290 + }, + "outputId": "6b955dd1-f1b3-42ec-851c-a355a4a882cf" + }, + "cell_type": "code", + "source": [ + "wine_df.isnull().sum()" + ], + "execution_count": 58, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Alcohol 10\n", + "Malic Acid 2\n", + "Ash 2\n", + "Alcalinity of ash 2\n", + "Magnesium 2\n", + "Total phenols 2\n", + "Flavanoids 2\n", + "Nonflavanoid phenols 2\n", + "Proanthocyanins 2\n", + "Color intensity 2\n", + "Hue 2\n", + "0D280/0D315 of diluted wines 2\n", + "Proline 2\n", + "Acidity 2\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 58 + } + ] + }, + { + "metadata": { + "id": "-Fd4WBklmf1_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Delete the rows that contain missing values " + ] + }, + { + "metadata": { + "id": "As7IC6Ktms8-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 103 + }, + "outputId": "ab2fc901-801c-4aaa-f5b8-03ea95c242dc" + }, + "cell_type": "code", + "source": [ + "wine_df.dropna()" + ], + "execution_count": 59, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AlcoholMalic AcidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHue0D280/0D315 of diluted winesProlineAcidity
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Alcohol, Malic Acid, Ash, Alcalinity of ash, Magnesium, Total phenols, Flavanoids, Nonflavanoid phenols, Proanthocyanins, Color intensity, Hue, 0D280/0D315 of diluted wines, Proline, Acidity]\n", + "Index: []" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 59 + } + ] + }, + { + "metadata": { + "id": "DlpG8drhmz7W", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### BONUS: Play with the data set below" + ] + }, + { + "metadata": { + "id": "mD40T0Cnm5SA", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 108 + }, + "outputId": "25442923-9c27-4ff3-fc7d-f961605ffac5" + }, + "cell_type": "code", + "source": [ + "wine_df.fillna(value='Fill Value',inplace=True)\n", + "wine_df.head(4)['Alcohol']" + ], + "execution_count": 62, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 Fill Value\n", + "5 Fill Value\n", + "3 Fill Value\n", + "8 Fill Value\n", + "Name: Alcohol, dtype: object" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 62 + } + ] + } + ] +} \ No newline at end of file diff --git a/Get_to_know_your_Data.ipynb b/Get_to_know_your_Data.ipynb new file mode 100644 index 0000000..d2ee9de --- /dev/null +++ b/Get_to_know_your_Data.ipynb @@ -0,0 +1,2397 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Get to know your Data.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "id": "J82LU53m_OU0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Get to know your Data\n", + "\n", + "\n", + "#### Import necessary modules\n" + ] + }, + { + "metadata": { + "id": "ZyO1UXL8mtSj", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yXTzTowtnwGI", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Loading CSV Data to a DataFrame" + ] + }, + { + "metadata": { + "id": "H1Bjlb5wm9f-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KE-k7b_Mn5iN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### See the top 10 rows\n" + ] + }, + { + "metadata": { + "id": "HY2Ps7xMn4ao", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 343 + }, + "outputId": "f0213ede-3239-48b7-adae-99d184fd8373" + }, + "cell_type": "code", + "source": [ + "iris_df.head(10)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
55.43.91.70.4setosa
64.63.41.40.3setosa
75.03.41.50.2setosa
84.42.91.40.2setosa
94.93.11.50.1setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + "8 4.4 2.9 1.4 0.2 setosa\n", + "9 4.9 3.1 1.5 0.1 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 4 + } + ] + }, + { + "metadata": { + "id": "ZQXekIodqOZu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Find number of rows and columns\n" + ] + }, + { + "metadata": { + "id": "6Y-A-lbFqR82", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "outputId": "312cb316-059a-462a-898d-63387aa2178c" + }, + "cell_type": "code", + "source": [ + "print(iris_df.shape)\n", + "\n", + "#first is row and second is column\n", + "#select row by simple indexing\n", + "\n", + "print(iris_df.shape[0])\n", + "print(iris_df.shape[1])" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(150, 5)\n", + "150\n", + "5\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "4ckCiGPhrC_t", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Print all columns" + ] + }, + { + "metadata": { + "id": "S6jgMyRDrF2a", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "outputId": "a3e00efb-0ffe-4dcb-f4a7-6cea68671ca2" + }, + "cell_type": "code", + "source": [ + "print(iris_df.columns)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", + " 'species'],\n", + " dtype='object')\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "kVav5-ACtIqS", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Check Index\n" + ] + }, + { + "metadata": { + "id": "iu3I9zIGtLDX", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "a8b5d13c-18df-4b37-adc4-118e1dfcf5db" + }, + "cell_type": "code", + "source": [ + "print(iris_df.index)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "RangeIndex(start=0, stop=150, step=1)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "psCc7PborOCQ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Right now the iris_data set has all the species grouped together let's shuffle it" + ] + }, + { + "metadata": { + "id": "Bxc8i6avrZPw", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 218 + }, + "outputId": "29403c79-2312-4782-c4c5-d8fd3353ed0c" + }, + "cell_type": "code", + "source": [ + "#generate a random permutaion on index\n", + "\n", + "print(iris_df.head())\n", + "\n", + "new_index = np.random.permutation(iris_df.index)\n", + "iris_df = iris_df.reindex(index = new_index)\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + " sepal_length sepal_width petal_length petal_width species\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "114 5.8 2.8 5.1 2.4 virginica\n", + "128 6.4 2.8 5.6 2.1 virginica\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "j32h8022sRT8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### We can also apply an operation on whole column of iris_df" + ] + }, + { + "metadata": { + "id": "seYXHXsYsYJI", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 319 + }, + "outputId": "b94e8cf9-ee3c-4c76-e9e8-117271d11f82" + }, + "cell_type": "code", + "source": [ + "#original\n", + "\n", + "print(iris_df.head())\n", + "\n", + "iris_df['sepal_width'] *= 10\n", + "\n", + "#changed\n", + "\n", + "print(iris_df.head())\n", + "\n", + "#lets undo the operation\n", + "\n", + "iris_df['sepal_width'] /= 10\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "114 5.8 2.8 5.1 2.4 virginica\n", + "128 6.4 2.8 5.6 2.1 virginica\n", + " sepal_length sepal_width petal_length petal_width species\n", + "2 4.7 32.0 1.3 0.2 setosa\n", + "53 5.5 23.0 4.0 1.3 versicolor\n", + "16 5.4 39.0 1.3 0.4 setosa\n", + "114 5.8 28.0 5.1 2.4 virginica\n", + "128 6.4 28.0 5.6 2.1 virginica\n", + " sepal_length sepal_width petal_length petal_width species\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "114 5.8 2.8 5.1 2.4 virginica\n", + "128 6.4 2.8 5.6 2.1 virginica\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "R-Ca-LBLzjiF", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Show all the rows where sepal_width > 3.3" + ] + }, + { + "metadata": { + "id": "WJ7W-F-d0AoZ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1113 + }, + "outputId": "baef2d61-5538-480f-c3bd-a4294ebf12ba" + }, + "cell_type": "code", + "source": [ + "iris_df[iris_df['sepal_width']>3.3]" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
165.43.91.30.4setosa
185.73.81.70.3setosa
465.13.81.60.2setosa
1097.23.66.12.5virginica
215.13.71.50.4setosa
395.13.41.50.2setosa
75.03.41.50.2setosa
05.13.51.40.2setosa
856.03.44.51.6versicolor
64.63.41.40.3setosa
315.43.41.50.4setosa
445.13.81.90.4setosa
285.23.41.40.2setosa
1177.73.86.72.2virginica
244.83.41.90.2setosa
1366.33.45.62.4virginica
275.23.51.50.2setosa
405.03.51.30.3setosa
155.74.41.50.4setosa
145.84.01.20.2setosa
205.43.41.70.2setosa
45.03.61.40.2setosa
114.83.41.60.2setosa
55.43.91.70.4setosa
265.03.41.60.4setosa
175.13.51.40.3setosa
485.33.71.50.2setosa
1486.23.45.42.3virginica
105.43.71.50.2setosa
224.63.61.00.2setosa
325.24.11.50.1setosa
195.13.81.50.3setosa
335.54.21.40.2setosa
365.53.51.30.2setosa
435.03.51.60.6setosa
1317.93.86.42.0virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "39 5.1 3.4 1.5 0.2 setosa\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "85 6.0 3.4 4.5 1.6 versicolor\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "31 5.4 3.4 1.5 0.4 setosa\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "28 5.2 3.4 1.4 0.2 setosa\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "24 4.8 3.4 1.9 0.2 setosa\n", + "136 6.3 3.4 5.6 2.4 virginica\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "20 5.4 3.4 1.7 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "11 4.8 3.4 1.6 0.2 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "26 5.0 3.4 1.6 0.4 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "131 7.9 3.8 6.4 2.0 virginica" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "metadata": { + "id": "gH3DnhCq2Cbl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Club two filters together - Find all samples where sepal_width > 3.3 and species is versicolor" + ] + }, + { + "metadata": { + "id": "4U7ksr_R2H7M", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 77 + }, + "outputId": "9f9cb554-eafe-4ecf-9f6b-c68b94af22b3" + }, + "cell_type": "code", + "source": [ + "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] " + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
856.03.44.51.6versicolor
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "85 6.0 3.4 4.5 1.6 versicolor" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 + } + ] + }, + { + "metadata": { + "id": "1lmnB3ot2u7I", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Sorting a column by value" + ] + }, + { + "metadata": { + "id": "K7KIj6fv2zWP", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1882 + }, + "outputId": "0dae1ddf-b2e3-444b-d45a-fb349c32ef9c" + }, + "cell_type": "code", + "source": [ + "iris_df.sort_values(by='sepal_width')#, ascending = False)\n", + "#pass ascending = False for descending order" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
605.02.03.51.0versicolor
686.22.24.51.5versicolor
626.02.24.01.0versicolor
1196.02.25.01.5virginica
414.52.31.30.3setosa
535.52.34.01.3versicolor
876.32.34.41.3versicolor
935.02.33.31.0versicolor
805.52.43.81.1versicolor
815.52.43.71.0versicolor
574.92.43.31.0versicolor
1135.72.55.02.0virginica
1466.32.55.01.9virginica
1064.92.54.51.7virginica
695.62.53.91.1versicolor
1086.72.55.81.8virginica
726.32.54.91.5versicolor
985.12.53.01.1versicolor
895.52.54.01.3versicolor
795.72.63.51.0versicolor
1346.12.65.61.4virginica
925.82.64.01.2versicolor
1187.72.66.92.3virginica
905.52.64.41.2versicolor
825.82.73.91.2versicolor
1116.42.75.31.9virginica
595.22.73.91.4versicolor
1425.82.75.11.9virginica
1015.82.75.11.9virginica
675.82.74.11.0versicolor
..................
114.83.41.60.2setosa
285.23.41.40.2setosa
1366.33.45.62.4virginica
244.83.41.90.2setosa
205.43.41.70.2setosa
265.03.41.60.4setosa
175.13.51.40.3setosa
435.03.51.60.6setosa
275.23.51.50.2setosa
05.13.51.40.2setosa
405.03.51.30.3setosa
365.53.51.30.2setosa
1097.23.66.12.5virginica
45.03.61.40.2setosa
224.63.61.00.2setosa
485.33.71.50.2setosa
215.13.71.50.4setosa
105.43.71.50.2setosa
185.73.81.70.3setosa
195.13.81.50.3setosa
1177.73.86.72.2virginica
1317.93.86.42.0virginica
465.13.81.60.2setosa
445.13.81.90.4setosa
165.43.91.30.4setosa
55.43.91.70.4setosa
145.84.01.20.2setosa
325.24.11.50.1setosa
335.54.21.40.2setosa
155.74.41.50.4setosa
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "60 5.0 2.0 3.5 1.0 versicolor\n", + "68 6.2 2.2 4.5 1.5 versicolor\n", + "62 6.0 2.2 4.0 1.0 versicolor\n", + "119 6.0 2.2 5.0 1.5 virginica\n", + "41 4.5 2.3 1.3 0.3 setosa\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "87 6.3 2.3 4.4 1.3 versicolor\n", + "93 5.0 2.3 3.3 1.0 versicolor\n", + "80 5.5 2.4 3.8 1.1 versicolor\n", + "81 5.5 2.4 3.7 1.0 versicolor\n", + "57 4.9 2.4 3.3 1.0 versicolor\n", + "113 5.7 2.5 5.0 2.0 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "106 4.9 2.5 4.5 1.7 virginica\n", + "69 5.6 2.5 3.9 1.1 versicolor\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "72 6.3 2.5 4.9 1.5 versicolor\n", + "98 5.1 2.5 3.0 1.1 versicolor\n", + "89 5.5 2.5 4.0 1.3 versicolor\n", + "79 5.7 2.6 3.5 1.0 versicolor\n", + "134 6.1 2.6 5.6 1.4 virginica\n", + "92 5.8 2.6 4.0 1.2 versicolor\n", + "118 7.7 2.6 6.9 2.3 virginica\n", + "90 5.5 2.6 4.4 1.2 versicolor\n", + "82 5.8 2.7 3.9 1.2 versicolor\n", + "111 6.4 2.7 5.3 1.9 virginica\n", + "59 5.2 2.7 3.9 1.4 versicolor\n", + "142 5.8 2.7 5.1 1.9 virginica\n", + "101 5.8 2.7 5.1 1.9 virginica\n", + "67 5.8 2.7 4.1 1.0 versicolor\n", + ".. ... ... ... ... ...\n", + "11 4.8 3.4 1.6 0.2 setosa\n", + "28 5.2 3.4 1.4 0.2 setosa\n", + "136 6.3 3.4 5.6 2.4 virginica\n", + "24 4.8 3.4 1.9 0.2 setosa\n", + "20 5.4 3.4 1.7 0.2 setosa\n", + "26 5.0 3.4 1.6 0.4 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 12 + } + ] + }, + { + "metadata": { + "id": "9jg_Z4YCoMSV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### List all the unique species" + ] + }, + { + "metadata": { + "id": "M6EN78ufoJY7", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "32b420d2-87c2-472c-d3fb-8d75deea2ed7" + }, + "cell_type": "code", + "source": [ + "species = iris_df['species'].unique()\n", + "\n", + "print(species)" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['setosa' 'versicolor' 'virginica']\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "wG1i5nxBodmB", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Selecting a particular species using boolean mask (learnt in previous exercise)" + ] + }, + { + "metadata": { + "id": "gZvpbKBwoVUe", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "e65f62c2-cb2d-4e63-c976-f9fa26653a02" + }, + "cell_type": "code", + "source": [ + "setosa = iris_df[iris_df['species'] == species[0]]\n", + "\n", + "setosa.head()" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
24.73.21.30.2setosa
165.43.91.30.4setosa
424.43.21.30.2setosa
185.73.81.70.3setosa
465.13.81.60.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "42 4.4 3.2 1.3 0.2 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "46 5.1 3.8 1.6 0.2 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 14 + } + ] + }, + { + "metadata": { + "id": "7tumfZ3DotPG", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "b07f8700-e865-4b7b-c13f-e241438aec1f" + }, + "cell_type": "code", + "source": [ + "# do the same for other 2 species \n", + "versicolor = iris_df[iris_df['species'] == species[1]]\n", + "\n", + "versicolor.head()" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
535.52.34.01.3versicolor
507.03.24.71.4versicolor
776.73.05.01.7versicolor
905.52.64.41.2versicolor
935.02.33.31.0versicolor
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "50 7.0 3.2 4.7 1.4 versicolor\n", + "77 6.7 3.0 5.0 1.7 versicolor\n", + "90 5.5 2.6 4.4 1.2 versicolor\n", + "93 5.0 2.3 3.3 1.0 versicolor" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 15 + } + ] + }, + { + "metadata": { + "id": "cUYm5UqVpDPy", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "f01dc7fe-120b-44e1-aa68-d07eeb8d5ff2" + }, + "cell_type": "code", + "source": [ + "virginica = iris_df[iris_df['species'] == species[2]]\n", + "\n", + "virginica.head()" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
1145.82.85.12.4virginica
1286.42.85.62.1virginica
1057.63.06.62.1virginica
1466.32.55.01.9virginica
1116.42.75.31.9virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "114 5.8 2.8 5.1 2.4 virginica\n", + "128 6.4 2.8 5.6 2.1 virginica\n", + "105 7.6 3.0 6.6 2.1 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "111 6.4 2.7 5.3 1.9 virginica" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 16 + } + ] + }, + { + "metadata": { + "id": "-y1wDc8SpdQs", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Describe each created species to see the difference\n", + "\n" + ] + }, + { + "metadata": { + "id": "eHrn3ZVRpOk5", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "outputId": "424fb1fb-87b4-45da-a0eb-0be8e56d1ae1" + }, + "cell_type": "code", + "source": [ + "setosa.describe()" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.0000050.00000050.00000050.00000
mean5.006003.4180001.4640000.24400
std0.352490.3810240.1735110.10721
min4.300002.3000001.0000000.10000
25%4.800003.1250001.4000000.20000
50%5.000003.4000001.5000000.20000
75%5.200003.6750001.5750000.30000
max5.800004.4000001.9000000.60000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.00000 50.000000 50.000000 50.00000\n", + "mean 5.00600 3.418000 1.464000 0.24400\n", + "std 0.35249 0.381024 0.173511 0.10721\n", + "min 4.30000 2.300000 1.000000 0.10000\n", + "25% 4.80000 3.125000 1.400000 0.20000\n", + "50% 5.00000 3.400000 1.500000 0.20000\n", + "75% 5.20000 3.675000 1.575000 0.30000\n", + "max 5.80000 4.400000 1.900000 0.60000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 17 + } + ] + }, + { + "metadata": { + "id": "GwJFT2GlpwUv", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "outputId": "e048426d-e231-45c3-9ad9-d43cb916d000" + }, + "cell_type": "code", + "source": [ + "versicolor.describe()" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.00000050.00000050.00000050.000000
mean5.9360002.7700004.2600001.326000
std0.5161710.3137980.4699110.197753
min4.9000002.0000003.0000001.000000
25%5.6000002.5250004.0000001.200000
50%5.9000002.8000004.3500001.300000
75%6.3000003.0000004.6000001.500000
max7.0000003.4000005.1000001.800000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.000000 50.000000 50.000000 50.000000\n", + "mean 5.936000 2.770000 4.260000 1.326000\n", + "std 0.516171 0.313798 0.469911 0.197753\n", + "min 4.900000 2.000000 3.000000 1.000000\n", + "25% 5.600000 2.525000 4.000000 1.200000\n", + "50% 5.900000 2.800000 4.350000 1.300000\n", + "75% 6.300000 3.000000 4.600000 1.500000\n", + "max 7.000000 3.400000 5.100000 1.800000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 18 + } + ] + }, + { + "metadata": { + "id": "Ad4qhSZLpztf", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "outputId": "6c755e83-6751-4dbe-8185-31e3097704d4" + }, + "cell_type": "code", + "source": [ + "virginica.describe()" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.0000050.00000050.00000050.00000
mean6.588002.9740005.5520002.02600
std0.635880.3224970.5518950.27465
min4.900002.2000004.5000001.40000
25%6.225002.8000005.1000001.80000
50%6.500003.0000005.5500002.00000
75%6.900003.1750005.8750002.30000
max7.900003.8000006.9000002.50000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.00000 50.000000 50.000000 50.00000\n", + "mean 6.58800 2.974000 5.552000 2.02600\n", + "std 0.63588 0.322497 0.551895 0.27465\n", + "min 4.90000 2.200000 4.500000 1.40000\n", + "25% 6.22500 2.800000 5.100000 1.80000\n", + "50% 6.50000 3.000000 5.550000 2.00000\n", + "75% 6.90000 3.175000 5.875000 2.30000\n", + "max 7.90000 3.800000 6.900000 2.50000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 19 + } + ] + }, + { + "metadata": { + "id": "Vdu0ulZWtr09", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Let's plot and see the difference" + ] + }, + { + "metadata": { + "id": "PEVMzRvpttmD", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "##### import matplotlib.pyplot " + ] + }, + { + "metadata": { + "id": "rqDXuuAtt7C3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 398 + }, + "outputId": "39a012f1-d57d-48bb-c668-c98cae6c827b" + }, + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "#hist creates a histogram there are many more plots(see the documentation) you can play with it.\n", + "\n", + "plt.hist(setosa['sepal_length'])\n", + "plt.hist(versicolor['sepal_length'])\n", + "plt.hist(virginica['sepal_length'])" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([ 1., 0., 5., 5., 8., 9., 10., 5., 1., 6.]),\n", + " array([4.9, 5.2, 5.5, 5.8, 6.1, 6.4, 6.7, 7. , 7.3, 7.6, 7.9]),\n", + " )" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFKCAYAAABcq1WoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFGdJREFUeJzt3XuMlGfZwOF72ZHy7bLiFnehaItN\no2lsi4W0TaGALXKwxZ7QcgpFYr9EhAImmIIEAwmJkYY2qK1WW6R+EBIsRdgaI40IiWkBjRgUk4aC\nieHQwlKWM4RD5/vDsBELe5idfYZ997r+6r7z7jv3sw+ZHzNTZsvy+Xw+AIBkupR6AADobMQXABIT\nXwBITHwBIDHxBYDExBcAEsuluJP6+hMp7qYoqqsroqHhdKnHaFdZX6P1dXxZX6P1dXwtWWNNTdVV\nb/PM97/kcuWlHqHdZX2N1tfxZX2N1tfxtXWN4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY\n+AJAYi2K765du2L48OGxcuXKiIh47733YsqUKTFp0qSYMmVK1NfXt+uQAJAlzcb39OnTsWjRohg4\ncGDjsaVLl8bYsWNj5cqVMWLEiFi+fHm7DgkAWdJsfLt27Rovv/xy1NbWNh5bsGBBjBo1KiIiqqur\n4+jRo+03IQBkTLPxzeVy0a1bt8uOVVRURHl5eVy8eDFWrVoVDz/8cLsNCABZU/BvNbp48WI888wz\nce+99172kvSVVFdXdKgP2m7qN1Fcix6evb7N13jjuUeLMMm1o6PtYWtlfX0R2V+j9XV8bVljwfH9\n7ne/G3379o2nn3662XM70q+Wqqmp6lC/ArFYsrTmrO9h1tcXkf01Wl/H15I1Fv1XCtbV1cXHPvax\nmDlzZiHfDgCdWrPPfHfu3BmLFy+O/fv3Ry6Xiw0bNsQHH3wQ1113XTz55JMREXHLLbfEwoUL23tW\nAMiEZuN7++23x4oVK1LMAgCdgk+4AoDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWA\nxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABIT\nXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwB\nIDHxBYDExBcAEhNfAEisRfHdtWtXDB8+PFauXBkREe+99148+eSTMXHixJg1a1acO3euXYcEgCxp\nNr6nT5+ORYsWxcCBAxuP/ehHP4qJEyfGqlWrom/fvrFmzZp2HRIAsqTZ+Hbt2jVefvnlqK2tbTy2\nbdu2+NKXvhQREQ888EBs2bKl/SYEgIzJNXtCLhe53OWnnTlzJrp27RoRET179oz6+vr2mQ4AMqjZ\n+DYnn883e051dUXkcuVtvatkamqqSj1Ccllb86X1jF39rXa9n1+N+2m7Xv9qsrZfV9Iea3zr0a8W\n/ZqFqFn/eub3MOvri2jbGguKb0VFRZw9eza6desWBw8evOwl6StpaDhd0HClUFNTFfX1J0o9RnJZ\nWnPKPSzFz60z/BntDGvM8vo6w/61ZI1Nxbmgf2o0aNCg2LBhQ0REvPnmmzFkyJBCLgMAnVKzz3x3\n7twZixcvjv3790cul4sNGzbEkiVLYu7cubF69ero06dPPPbYYylmBYBMaDa+t99+e6xYseIjx5cv\nX94uAwFA1vmEKwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQX\nABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgsVypBwCyb9f/Tmnd+e0zBlwzPPMFgMTEFwASE18A\nSEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx\n8QWAxMQXABITXwBILFfIN506dSrmzJkTx44di/Pnz8f06dNjyJAhxZ4NADKpoPj++te/jptvvjlm\nz54dBw8ejK9//evxu9/9rtizAUAmFfSyc3V1dRw9ejQiIo4fPx7V1dVFHQoAsqygZ76jR4+OtWvX\nxogRI+L48ePxs5/9rNhzAUBmFRTf9evXR58+fWLZsmXxzjvvxLx582Lt2rVXPb+6uiJyufKCh0yt\npqaq1CMk940f/KHN13jjuUeLMElxpNrD9r6fsau/1eZrzFp1qAiTUGxZf5zJ+voi2rbGguK7ffv2\nGDx4cERE3HrrrXHo0KG4ePFilJdfObANDacLHjC1mpqqqK8/UeoxOqRr5eeWcg+vlTXT8WT5z05n\neBxtyRqbinNB7/n27ds3duzYERER+/fvj8rKyquGFwC4XEHPfMeNGxfz5s2LSZMmxYULF2LhwoVF\nHgsAsqug+FZWVsYPf/jDYs8CAJ2CT7gCgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHx\nBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcA\nEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhM\nfAEgMfEFgMTEFwASE18ASKzg+NbV1cUjjzwSY8aMic2bNxdxJADItoLi29DQEC+++GKsWrUqXnrp\npdi4cWOx5wKAzMoV8k1btmyJgQMHRvfu3aN79+6xaNGiYs8FAJlVUHz37dsXZ8+ejalTp8bx48dj\nxowZMXDgwKueX11dEblcecFDplZTU9Xicx+evb5N9/XGc4+26fuvJa35uf2nsau/VeRJ0il0zZD1\nPzutWd9bj361HSdpufvWv96q89uyhwXFNyLi6NGj8cILL8SBAwdi8uTJsWnTpigrK7viuQ0Npwse\nMLWamqqorz+R7P5S3ld7y9JaWqozrpniyPKfndSPo8XSmplbssam4lzQe749e/aM/v37Ry6Xi5tu\nuikqKyvjyJEjhVwKADqdguI7ePDg2Lp1a3z44YfR0NAQp0+fjurq6mLPBgCZVNDLzr169YpRo0bF\n2LFjIyJi/vz50aWLfzIMAC1R8Hu+48ePj/HjxxdzFgDoFDxdBYDExBcAEhNfAEhMfAEgMfEFgMTE\nFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASy5V6\ngM7uGz/4Q6lHyJxZqw4lu69dq6a06/VntevVKZW3Hv1qqUeIiIjPvfJqqUfotDzzBYDExBcAEhNf\nAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEg\nMfEFgMTEFwASE18ASKxN8T179mwMHz481q5dW6x5ACDz2hTfn/70p9GjR49izQIAnULB8d2zZ0/s\n3r077r///iKOAwDZV3B8Fy9eHHPnzi3mLADQKeQK+aZ169bFnXfeGTfeeGOLzq+urohcrryQu7qq\nh2evb/M13nju0Sser6mpavO1U/qfe37Xrtc/86cvt+i8b/zgDwVd/3/uKejbgDZqz8e61lx7V7tN\n0Tqt/Xm05edXUHw3b94ce/fujc2bN8f7778fXbt2jd69e8egQYOueH5Dw+mCB2xP9fUnPnKspqbq\niscBsqa9Hus66uNoa2ZuyRqbinNB8V26dGnjf//4xz+OT33qU1cNLwBwOf/OFwASK+iZ73+aMWNG\nMeYAgE7DM18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwAS\nE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8\nASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWA\nxHKFfuOzzz4bf/nLX+LChQvxzW9+M0aOHFnMuQAgswqK79atW+Pdd9+N1atXR0NDQzz++OPiCwAt\nVFB877777ujXr19ERHz84x+PM2fOxMWLF6O8vLyowwFAFhUU3/Ly8qioqIiIiDVr1sTQoUObDG91\ndUXkctdemGtqqlp1HCBL2vOxrjXX3tVuU7ROa38ebfn5Ffyeb0TE73//+1izZk384he/aPK8hobT\nbbmbdlNff+Ijx2pqqq54HCBr2uuxrqM+jrZm5passak4FxzfP/7xj/HSSy/FK6+8ElVVnikCQEsV\nFN8TJ07Es88+G6+++mp84hOfKPZMAJBpBcX3t7/9bTQ0NMS3v/3txmOLFy+OPn36FG0wAMiqguI7\nbty4GDduXLFnAYBOwSdcAUBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY+AJAYuILAImJ\nLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJ5Uo9QCl94wd/KPUIRTFr1aF2vof/a9FZ\nP5xY285zAMW063+ntM912+Wq2eKZLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8A\nJCa+AJCY+AJAYuILAImJLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGK5Qr/x+9//fuzYsSPK\nyspi3rx50a9fv2LOBQCZVVB8//SnP8W//vWvWL16dezZsyfmzZsXq1evLvZsAJBJBb3svGXLlhg+\nfHhERNxyyy1x7NixOHnyZFEHA4CsKii+hw8fjurq6savr7/++qivry/aUACQZQW/5/uf8vl8k7fX\n1FQV424u88Zzjxb9mh3XtfGzuK/UA1wyrtQDAJ1BW9pW0DPf2traOHz4cOPXhw4dipqamoKHAIDO\npKD43nfffbFhw4aIiPjHP/4RtbW10b1796IOBgBZVdDLzgMGDIjbbrstxo8fH2VlZbFgwYJizwUA\nmVWWb+4NWwCgqHzCFQAkJr4AkFhR/qlRR3b27Nn4yle+EtOmTYsxY8Y0Hh82bFj07t07ysvLIyJi\nyZIl0atXr1KN2Wrbtm2LWbNmxWc/+9mIiPjc5z4X3/ve9xpvf/vtt+P555+P8vLyGDp0aEyfPr1U\noxakufV19P27pK6uLl555ZXI5XIxc+bMuP/++xtv6+h7GNH0+rKwh6+99lrU1dU1fr1z587461//\n2vh1XV1d/PKXv4wuXbrE2LFj44knnijFmAVrbn233XZbDBgwoPHrV199tXE/O4JTp07FnDlz4tix\nY3H+/PmYPn16DBkypPH2Nu1fvpN7/vnn82PGjMm//vrrlx1/4IEH8idPnizRVG23devW/IwZM656\n+4MPPpg/cOBA/uLFi/kJEybk33333YTTtV1z6+vo+5fP5/NHjhzJjxw5Mn/ixIn8wYMH8/Pnz7/s\n9o6+h82tLwt7+J+2bduWX7hwYePXp06dyo8cOTJ//Pjx/JkzZ/KjR4/ONzQ0lHDCtvnv9eXz+fw9\n99xTommKY8WKFfklS5bk8/l8/v3338+PGjWq8ba27l+nftl5z549sXv37sv+tt0Z7N27N3r06BE3\n3HBDdOnSJb74xS/Gli1bSj0W/2XLli0xcODA6N69e9TW1saiRYsab8vCHja1vix68cUXY9q0aY1f\n79ixI+64446oqqqKbt26xYABA2L79u0lnLBt/nt9WVBdXR1Hjx6NiIjjx49f9smObd2/Th3fxYsX\nx9y5c696+4IFC2LChAmxZMmSZj/F61q0e/fumDp1akyYMCHeeuutxuP19fVx/fXXN37dUT8e9Grr\nu6Sj79++ffvi7NmzMXXq1Jg4ceJlcc3CHja1vks6+h5e8re//S1uuOGGyz6M6PDhwx1+Dy+50voi\nIs6dOxezZ8+O8ePHx/Lly0s0XeFGjx4dBw4ciBEjRsSkSZNizpw5jbe1df867Xu+69atizvvvDNu\nvPHGK94+c+bMGDJkSPTo0SOmT58eGzZsiC9/+cuJpyzcZz7zmXj66afjwQcfjL1798bkyZPjzTff\njK5du5Z6tKJobn0dff8uOXr0aLzwwgtx4MCBmDx5cmzatCnKyspKPVbRNLW+rOxhRMSaNWvi8ccf\nb/KcjvyXi6ut75lnnolHHnkkysrKYtKkSXHXXXfFHXfcUYIJC7N+/fro06dPLFu2LN55552YN29e\nrF279orntnb/Ou0z382bN8fGjRtj7Nix8dprr8VPfvKTePvttxtvf+yxx6Jnz56Ry+Vi6NChsWvX\nrhJO23q9evWKhx56KMrKyuKmm26KT37yk3Hw4MGI+OjHgx48eDBqa2tLNWpBmlpfRMffv4iInj17\nRv/+/SOXy8VNN90UlZWVceTIkYjIxh42tb6IbOzhJdu2bYv+/ftfduxKH9Pb0fbwkiutLyJiwoQJ\nUVlZGRUVFXHvvfd2uD3cvn17DB48OCIibr311jh06FBcvHgxItq+f502vkuXLo3XX389fvWrX8UT\nTzwR06ZNi0GDBkVExIkTJ+Kpp56Kc+fORUTEn//858b/q7ajqKuri2XLlkXEv1+i/OCDDxr/T9FP\nf/rTcfLkydi3b19cuHAhNm3aFPfdd838WoQWaWp9Wdi/iIjBgwfH1q1b48MPP4yGhoY4ffp043tO\nWdjDptaXlT2M+PdfjCorKz/yqtMXvvCF+Pvf/x7Hjx+PU6dOxfbt2+Ouu+4q0ZSFu9r6/vnPf8bs\n2bMjn8/HhQsXYvv27R1uD/v27Rs7duyIiIj9+/dHZWVl4/+t3db967QvO1/J2rVro6qqKkaMGBFD\nhw6NcePGxXXXXRef//znO9zLXcOGDYvvfOc7sXHjxjh//nwsXLgwfvOb3zSub+HChTF79uyIiHjo\noYfi5ptvLvHErdPc+jr6/kX8+9n9qFGjYuzYsRERMX/+/Fi3bl1m9rC59WVhDyM++v78z3/+87j7\n7rujf//+MXv27HjqqaeirKwspk+fHlVVxf8NcO2tqfX17t07vva1r0WXLl1i2LBh0a9fvxJO2nrj\nxo2LefPmxaRJk+LChQuxcOHCou2fj5cEgMQ67cvOAFAq4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8A\nJCa+AJDY/wP18IOmMPyNQgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + } + ] +} \ No newline at end of file