From 3fc2fa85c2373f70aadb2f4442834624180c631d Mon Sep 17 00:00:00 2001
From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com>
Date: Mon, 28 Jan 2019 19:49:49 +0530
Subject: [PATCH 1/3] Created using Colaboratory

---
 MLCC_NeuralNetwork(Single_Layer).ipynb | 496 +++++++++++++++++++++++++
 1 file changed, 496 insertions(+)
 create mode 100644 MLCC_NeuralNetwork(Single_Layer).ipynb
diff --git a/MLCC_NeuralNetwork(Single_Layer).ipynb b/MLCC_NeuralNetwork(Single_Layer).ipynb
new file mode 100644
index 0000000..8ee22d7
--- /dev/null
+++ b/MLCC_NeuralNetwork(Single_Layer).ipynb
@@ -0,0 +1,496 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "MLCC NeuralNetwork(Single Layer).ipynb",
+      "version": "0.3.2",
+      "provenance": [],
+      "collapsed_sections": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/ArnabG99/Assignment-1/blob/ArnabG99/MLCC_NeuralNetwork(Single_Layer).ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "RdfxQ2a3qrD1",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from sklearn import datasets\n",
+        "from pandas import DataFrame as df\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "wmOBi6uvqtzZ",
+        "colab_type": "code",
+        "outputId": "9045f72a-6eb7-4403-8624-88ae3fccc532",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        }
+      },
+      "cell_type": "code",
+      "source": [
+        "iris = datasets.load_iris()\n",
+        "x = iris.data\n",
+        "y = iris.target\n",
+        "x = np.insert(x,x.shape[1],y,axis=1)\n",
+        "data = pd.DataFrame(x)\n",
+        "\n",
+        "data = data.reindex(np.random.permutation(data.index))\n",
+        "data.head()"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>0</th>\n",
+              "      <th>1</th>\n",
+              "      <th>2</th>\n",
+              "      <th>3</th>\n",
+              "      <th>4</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>77</th>\n",
+              "      <td>6.7</td>\n",
+              "      <td>3.0</td>\n",
+              "      <td>5.0</td>\n",
+              "      <td>1.7</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>70</th>\n",
+              "      <td>5.9</td>\n",
+              "      <td>3.2</td>\n",
+              "      <td>4.8</td>\n",
+              "      <td>1.8</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>5.7</td>\n",
+              "      <td>4.4</td>\n",
+              "      <td>1.5</td>\n",
+              "      <td>0.4</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>40</th>\n",
+              "      <td>5.0</td>\n",
+              "      <td>3.5</td>\n",
+              "      <td>1.3</td>\n",
+              "      <td>0.3</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>107</th>\n",
+              "      <td>7.3</td>\n",
+              "      <td>2.9</td>\n",
+              "      <td>6.3</td>\n",
+              "      <td>1.8</td>\n",
+              "      <td>2.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "       0    1    2    3    4\n",
+              "77   6.7  3.0  5.0  1.7  1.0\n",
+              "70   5.9  3.2  4.8  1.8  1.0\n",
+              "15   5.7  4.4  1.5  0.4  0.0\n",
+              "40   5.0  3.5  1.3  0.3  0.0\n",
+              "107  7.3  2.9  6.3  1.8  2.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 54
+        }
+      ]
+    },
+    {
+      "metadata": {
+        "id": "9rj7RDGJssVD",
+        "colab_type": "code",
+        "outputId": "c788248b-a8e8-4129-f306-6e6f428d91c1",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 2584
+        }
+      },
+      "cell_type": "code",
+      "source": [
+        "features = data.iloc[:,:4]\n",
+        "target = data.iloc[:,4]\n",
+        "\n",
+        "target = target.values.reshape((target.shape[0],1))\n",
+        "target_class = np.zeros(shape=(target.shape[0],3))\n",
+        "\n",
+        "for i in range(target.shape[0]):\n",
+        "  if target[i] == 0:\n",
+        "    target_class[i][0] = 1\n",
+        "  elif target[i] == 1:\n",
+        "    target_class[i][1] = 1\n",
+        "  else:\n",
+        "    target_class[i][2] = 1\n",
+        "    \n",
+        "print(target_class.shape)\n",
+        "print(target_class)\n",
+        "\n"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "(150, 3)\n",
+            "[[0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [1. 0. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 0. 1.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]\n",
+            " [0. 1. 0.]]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "metadata": {
+        "id": "yBv3zOuYsArP",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "def sigmoid(x):\n",
+        "  return 1/(1+np.exp(-x))\n",
+        "\n",
+        "def der_sigmoid(x):\n",
+        "  return (sigmoid(x)*(1-sigmoid(x)))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "9WbZu7ihvSSp",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "input_nodes = features.shape[1]\n",
+        "output_nodes = 3;\n",
+        "weight_matrix = np.random.uniform(size=(input_nodes,output_nodes))\n",
+        "\n",
+        "epoch = 2000 #increase it to increase accuracy\n",
+        "lr = 0.015  #learning rate decrease it to increase accuracy "
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "NJzxuP_X0IIn",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "for i in range(epoch):\n",
+        "  output_node_input = features.dot(weight_matrix)\n",
+        "  output = sigmoid(output_node_input)\n",
+        "  \n",
+        "  error = target_class - output\n",
+        "  drv = der_sigmoid(output_node_input)\n",
+        "  delta_weight = error*drv\n",
+        " \n",
+        "  weight_matrix = weight_matrix + lr*features.T.dot(delta_weight)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "6yfIk5ky1EJt",
+        "colab_type": "code",
+        "outputId": "32cb0d02-4dd7-41db-92fe-e1fa8957ba6b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 136
+        }
+      },
+      "cell_type": "code",
+      "source": [
+        "final_output = np.array(output)\n",
+        "#print(final_output) #test here\n",
+        "output_class = np.zeros(shape=final_output.shape[0])\n",
+        "\n",
+        "for i in range(final_output.shape[0]):\n",
+        "  output_class[i] = np.argmax(final_output[i])\n",
+        "\n",
+        "print(output_class) #test here\n",
+        "class_diff = output_class - target[:,0]"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[1. 2. 0. 0. 1. 0. 0. 1. 1. 0. 2. 2. 1. 0. 1. 2. 0. 0. 0. 1. 0. 0. 0. 1.\n",
+            " 1. 1. 2. 1. 0. 2. 1. 2. 2. 0. 1. 0. 2. 2. 0. 1. 0. 0. 0. 0. 0. 0. 1. 2.\n",
+            " 2. 1. 0. 0. 2. 2. 0. 1. 2. 2. 0. 2. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 2. 1.\n",
+            " 1. 2. 2. 2. 2. 1. 1. 2. 1. 2. 2. 2. 0. 1. 1. 2. 1. 2. 2. 0. 1. 2. 1. 2.\n",
+            " 0. 0. 0. 1. 2. 2. 0. 1. 0. 0. 2. 2. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1.\n",
+            " 1. 0. 0. 2. 1. 1. 1. 1. 2. 0. 2. 1. 2. 1. 2. 2. 0. 2. 1. 0. 1. 0. 1. 2.\n",
+            " 2. 1. 2. 1. 2. 1.]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "metadata": {
+        "id": "q9xP2z-dEqQC",
+        "colab_type": "code",
+        "outputId": "397bad9d-7060-4797-b494-e9584b5df4f1",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 323
+        }
+      },
+      "cell_type": "code",
+      "source": [
+        "print('Actual Class')\n",
+        "print(target[:,0])\n",
+        "print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>')\n",
+        "print('Predicted Class')\n",
+        "print(output_class)\n",
+        "\n",
+        "wrong_prediction = np.count_nonzero(class_diff)\n",
+        "N = len(class_diff)\n",
+        "#print(class_diff)\n",
+        "simple_accuracy = 100 * (N-wrong_prediction)/N\n",
+        "print('Accuracy : ', (simple_accuracy), '%')"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Actual Class\n",
+            "[1. 1. 0. 0. 2. 0. 0. 2. 1. 0. 2. 2. 1. 0. 1. 2. 0. 0. 0. 1. 0. 0. 0. 1.\n",
+            " 1. 1. 2. 1. 0. 2. 2. 2. 2. 0. 1. 0. 2. 2. 0. 2. 0. 0. 0. 0. 0. 0. 1. 2.\n",
+            " 2. 1. 0. 0. 2. 2. 0. 2. 2. 2. 0. 2. 0. 0. 1. 2. 0. 0. 0. 0. 1. 1. 2. 1.\n",
+            " 2. 2. 2. 2. 2. 1. 1. 2. 1. 2. 2. 2. 0. 1. 1. 2. 1. 2. 2. 0. 1. 1. 1. 2.\n",
+            " 0. 0. 0. 1. 2. 2. 0. 1. 0. 0. 2. 2. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 2. 1.\n",
+            " 1. 0. 0. 2. 1. 1. 1. 1. 2. 0. 2. 1. 2. 1. 2. 1. 0. 2. 1. 0. 1. 0. 1. 2.\n",
+            " 2. 1. 2. 1. 1. 1.]\n",
+            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>\n",
+            "Predicted Class\n",
+            "[1. 2. 0. 0. 1. 0. 0. 1. 1. 0. 2. 2. 1. 0. 1. 2. 0. 0. 0. 1. 0. 0. 0. 1.\n",
+            " 1. 1. 2. 1. 0. 2. 1. 2. 2. 0. 1. 0. 2. 2. 0. 1. 0. 0. 0. 0. 0. 0. 1. 2.\n",
+            " 2. 1. 0. 0. 2. 2. 0. 1. 2. 2. 0. 2. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 2. 1.\n",
+            " 1. 2. 2. 2. 2. 1. 1. 2. 1. 2. 2. 2. 0. 1. 1. 2. 1. 2. 2. 0. 1. 2. 1. 2.\n",
+            " 0. 0. 0. 1. 2. 2. 0. 1. 0. 0. 2. 2. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1.\n",
+            " 1. 0. 0. 2. 1. 1. 1. 1. 2. 0. 2. 1. 2. 1. 2. 2. 0. 2. 1. 0. 1. 0. 1. 2.\n",
+            " 2. 1. 2. 1. 2. 1.]\n",
+            "Accuracy :  92.0 %\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 1fac85faf9238c1614d8339e0a9102fad0ed4ea3 Mon Sep 17 00:00:00 2001
From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com>
Date: Wed, 30 Jan 2019 18:23:02 +0530
Subject: [PATCH 2/3] Created using Colaboratory

---
 intro_to_pandas.ipynb | 660 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 660 insertions(+)
 create mode 100644 intro_to_pandas.ipynb

diff --git a/intro_to_pandas.ipynb b/intro_to_pandas.ipynb
new file mode 100644
index 0000000..aa51c27
--- /dev/null
+++ b/intro_to_pandas.ipynb
@@ -0,0 +1,660 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "intro_to_pandas.ipynb",
+      "version": "0.3.2",
+      "provenance": [],
+      "collapsed_sections": [
+        "JndnmDMp66FL",
+        "YHIWvc9Ms-Ll",
+        "TJffr5_Jwqvd"
+      ],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python2",
+      "display_name": "Python 2"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/ArnabG99/Assignment-1/blob/ArnabG99/intro_to_pandas.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "JndnmDMp66FL"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "#### Copyright 2017 Google LLC."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "hMqWDc_m6rUC",
+        "cellView": "both",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "rHLcriKWLRe4"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# Intro to pandas"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "QvJBqX8_Bctk"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "**Learning Objectives:**\n",
+        "  * Gain an introduction to the `DataFrame` and `Series` data structures of the *pandas* library\n",
+        "  * Access and manipulate data within a `DataFrame` and `Series`\n",
+        "  * Import CSV data into a *pandas* `DataFrame`\n",
+        "  * Reindex a `DataFrame` to shuffle data"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "TIFJ83ZTBctl"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "[*pandas*](http://pandas.pydata.org/) is a column-oriented data analysis API. It's a great tool for handling and analyzing input data, and many ML frameworks support *pandas* data structures as inputs.\n",
+        "Although a comprehensive introduction to the *pandas* API would span many pages, the core concepts are fairly straightforward, and we'll present them below. For a more complete reference, the [*pandas* docs site](http://pandas.pydata.org/pandas-docs/stable/index.html) contains extensive documentation and many tutorials."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "s_JOISVgmn9v"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Basic Concepts\n",
+        "\n",
+        "The following line imports the *pandas* API and prints the API version:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "aSRYu62xUi3g",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "from __future__ import print_function\n",
+        "\n",
+        "import pandas as pd\n",
+        "pd.__version__"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "daQreKXIUslr"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "The primary data structures in *pandas* are implemented as two classes:\n",
+        "\n",
+        "  * **`DataFrame`**, which you can imagine as a relational data table, with rows and named columns.\n",
+        "  * **`Series`**, which is a single column. A `DataFrame` contains one or more `Series` and a name for each `Series`.\n",
+        "\n",
+        "The data frame is a commonly used abstraction for data manipulation. Similar implementations exist in [Spark](https://spark.apache.org/) and [R](https://www.r-project.org/about.html)."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "fjnAk1xcU0yc"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "One way to create a `Series` is to construct a `Series` object. For example:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "DFZ42Uq7UFDj",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "pd.Series(['San Francisco', 'San Jose', 'Sacramento'])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "U5ouUp1cU6pC"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "`DataFrame` objects can be created by passing a `dict` mapping `string` column names to their respective `Series`. If the `Series` don't match in length, missing values are filled with special [NA/NaN](http://pandas.pydata.org/pandas-docs/stable/missing_data.html) values. Example:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "avgr6GfiUh8t",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "city_names = pd.Series(['San Francisco', 'San Jose', 'Sacramento'])\n",
+        "population = pd.Series([852469, 1015785, 485199])\n",
+        "\n",
+        "pd.DataFrame({ 'City name': city_names, 'Population': population })"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "oa5wfZT7VHJl"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "But most of the time, you load an entire file into a `DataFrame`. The following example loads a file with California housing data. Run the following cell to load the data and create feature definitions:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "av6RYOraVG1V",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "california_housing_dataframe = pd.read_csv(\"https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv\", sep=\",\")\n",
+        "california_housing_dataframe.describe()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "WrkBjfz5kEQu"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "The example above used `DataFrame.describe` to show interesting statistics about a `DataFrame`. Another useful function is `DataFrame.head`, which displays the first few records of a `DataFrame`:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "s3ND3bgOkB5k",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "california_housing_dataframe.head()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "w9-Es5Y6laGd"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "Another powerful feature of *pandas* is graphing. For example, `DataFrame.hist` lets you quickly study the distribution of values in a column:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "nqndFVXVlbPN",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "california_housing_dataframe.hist('housing_median_age')"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "XtYZ7114n3b-"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Accessing Data\n",
+        "\n",
+        "You can access `DataFrame` data using familiar Python dict/list operations:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "_TFm7-looBFF",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities = pd.DataFrame({ 'City name': city_names, 'Population': population })\n",
+        "print(type(cities['City name']))\n",
+        "cities['City name']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "V5L6xacLoxyv",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "print(type(cities['City name'][1]))\n",
+        "cities['City name'][1]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "gcYX1tBPugZl",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "print(type(cities[0:2]))\n",
+        "cities[0:2]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "65g1ZdGVjXsQ"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "In addition, *pandas* provides an extremely rich API for advanced [indexing and selection](http://pandas.pydata.org/pandas-docs/stable/indexing.html) that is too extensive to be covered here."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "RM1iaD-ka3Y1"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Manipulating Data\n",
+        "\n",
+        "You may apply Python's basic arithmetic operations to `Series`. For example:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "XWmyCFJ5bOv-",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "population / 1000."
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "TQzIVnbnmWGM"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "[NumPy](http://www.numpy.org/) is a popular toolkit for scientific computing. *pandas* `Series` can be used as arguments to most NumPy functions:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "ko6pLK6JmkYP",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "np.log(population)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "xmxFuQmurr6d"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "For more complex single-column transformations, you can use `Series.apply`. Like the Python [map function](https://docs.python.org/2/library/functions.html#map), \n",
+        "`Series.apply` accepts as an argument a [lambda function](https://docs.python.org/2/tutorial/controlflow.html#lambda-expressions), which is applied to each value.\n",
+        "\n",
+        "The example below creates a new `Series` that indicates whether `population` is over one million:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "Fc1DvPAbstjI",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "population.apply(lambda val: val > 1000000)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "ZeYYLoV9b9fB"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "\n",
+        "Modifying `DataFrames` is also straightforward. For example, the following code adds two `Series` to an existing `DataFrame`:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "0gCEX99Hb8LR",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities['Area square miles'] = pd.Series([46.87, 176.53, 97.92])\n",
+        "cities['Population density'] = cities['Population'] / cities['Area square miles']\n",
+        "cities"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "6qh63m-ayb-c"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Exercise #1\n",
+        "\n",
+        "Modify the `cities` table by adding a new boolean column that is True if and only if *both* of the following are True:\n",
+        "\n",
+        "  * The city is named after a saint.\n",
+        "  * The city has an area greater than 50 square miles.\n",
+        "\n",
+        "**Note:** Boolean `Series` are combined using the bitwise, rather than the traditional boolean, operators. For example, when performing *logical and*, use `&` instead of `and`.\n",
+        "\n",
+        "**Hint:** \"San\" in Spanish means \"saint.\""
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "zCOn8ftSyddH",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "# Your code here"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "YHIWvc9Ms-Ll"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "### Solution\n",
+        "\n",
+        "Click below for a solution."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "T5OlrqtdtCIb",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities['Is wide and has saint name'] = (cities['Area square miles'] > 50) & cities['City name'].apply(lambda name: name.startswith('San'))\n",
+        "cities"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "f-xAOJeMiXFB"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Indexes\n",
+        "Both `Series` and `DataFrame` objects also define an `index` property that assigns an identifier value to each `Series` item or `DataFrame` row. \n",
+        "\n",
+        "By default, at construction, *pandas* assigns index values that reflect the ordering of the source data. Once created, the index values are stable; that is, they do not change when data is reordered."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "2684gsWNinq9",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "city_names.index"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "F_qPe2TBjfWd",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities.index"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "hp2oWY9Slo_h"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "Call `DataFrame.reindex` to manually reorder the rows. For example, the following has the same effect as sorting by city name:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "sN0zUzSAj-U1",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities.reindex([2, 0, 1])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "-GQFz8NZuS06"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "Reindexing is a great way to shuffle (randomize) a `DataFrame`. In the example below, we take the index, which is array-like, and pass it to NumPy's `random.permutation` function, which shuffles its values in place. Calling `reindex` with this shuffled array causes the `DataFrame` rows to be shuffled in the same way.\n",
+        "Try running the following cell multiple times!"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "mF8GC0k8uYhz",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities.reindex(np.random.permutation(cities.index))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "fSso35fQmGKb"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "For more information, see the [Index documentation](http://pandas.pydata.org/pandas-docs/stable/indexing.html#index-objects)."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "8UngIdVhz8C0"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Exercise #2\n",
+        "\n",
+        "The `reindex` method allows index values that are not in the original `DataFrame`'s index values. Try it and see what happens if you use such values! Why do you think this is allowed?"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "PN55GrDX0jzO",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "# Your code here"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "TJffr5_Jwqvd"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "### Solution\n",
+        "\n",
+        "Click below for the solution."
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "8oSvi2QWwuDH"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "If your `reindex` input array includes values not in the original `DataFrame` index values, `reindex` will add new rows for these \"missing\" indices and populate all corresponding columns with `NaN` values:"
+      ]
+    },
+    {
+      "metadata": {
+        "colab_type": "code",
+        "id": "yBdkucKCwy4x",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "cities.reindex([0, 4, 5, 2])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "2l82PhPbwz7g"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "This behavior is desirable because indexes are often strings pulled from the actual data (see the [*pandas* reindex\n",
+        "documentation](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html) for an example\n",
+        "in which the index values are browser names).\n",
+        "\n",
+        "In this case, allowing \"missing\" indices makes it easy to reindex using an external list, as you don't have to worry about\n",
+        "sanitizing the input."
+      ]
+    }
+  ]
+}
\ No newline at end of file

From a7693a493afff154343bbbfc426d55e2a9ad1775 Mon Sep 17 00:00:00 2001
From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com>
Date: Wed, 30 Jan 2019 18:23:37 +0530
Subject: [PATCH 3/3] Delete intro_to_pandas.ipynb

---
 intro_to_pandas.ipynb | 660 ------------------------------------------
 1 file changed, 660 deletions(-)
 delete mode 100644 intro_to_pandas.ipynb

diff --git a/intro_to_pandas.ipynb b/intro_to_pandas.ipynb
deleted file mode 100644
index aa51c27..0000000
--- a/intro_to_pandas.ipynb
+++ /dev/null
@@ -1,660 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "intro_to_pandas.ipynb",
-      "version": "0.3.2",
-      "provenance": [],
-      "collapsed_sections": [
-        "JndnmDMp66FL",
-        "YHIWvc9Ms-Ll",
-        "TJffr5_Jwqvd"
-      ],
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python2",
-      "display_name": "Python 2"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/ArnabG99/Assignment-1/blob/ArnabG99/intro_to_pandas.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "JndnmDMp66FL"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "#### Copyright 2017 Google LLC."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "hMqWDc_m6rUC",
-        "cellView": "both",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "rHLcriKWLRe4"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "# Intro to pandas"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "QvJBqX8_Bctk"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "**Learning Objectives:**\n",
-        "  * Gain an introduction to the `DataFrame` and `Series` data structures of the *pandas* library\n",
-        "  * Access and manipulate data within a `DataFrame` and `Series`\n",
-        "  * Import CSV data into a *pandas* `DataFrame`\n",
-        "  * Reindex a `DataFrame` to shuffle data"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "TIFJ83ZTBctl"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "[*pandas*](http://pandas.pydata.org/) is a column-oriented data analysis API. It's a great tool for handling and analyzing input data, and many ML frameworks support *pandas* data structures as inputs.\n",
-        "Although a comprehensive introduction to the *pandas* API would span many pages, the core concepts are fairly straightforward, and we'll present them below. For a more complete reference, the [*pandas* docs site](http://pandas.pydata.org/pandas-docs/stable/index.html) contains extensive documentation and many tutorials."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "s_JOISVgmn9v"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "## Basic Concepts\n",
-        "\n",
-        "The following line imports the *pandas* API and prints the API version:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "aSRYu62xUi3g",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "from __future__ import print_function\n",
-        "\n",
-        "import pandas as pd\n",
-        "pd.__version__"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "daQreKXIUslr"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "The primary data structures in *pandas* are implemented as two classes:\n",
-        "\n",
-        "  * **`DataFrame`**, which you can imagine as a relational data table, with rows and named columns.\n",
-        "  * **`Series`**, which is a single column. A `DataFrame` contains one or more `Series` and a name for each `Series`.\n",
-        "\n",
-        "The data frame is a commonly used abstraction for data manipulation. Similar implementations exist in [Spark](https://spark.apache.org/) and [R](https://www.r-project.org/about.html)."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "fjnAk1xcU0yc"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "One way to create a `Series` is to construct a `Series` object. For example:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "DFZ42Uq7UFDj",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "pd.Series(['San Francisco', 'San Jose', 'Sacramento'])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "U5ouUp1cU6pC"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "`DataFrame` objects can be created by passing a `dict` mapping `string` column names to their respective `Series`. If the `Series` don't match in length, missing values are filled with special [NA/NaN](http://pandas.pydata.org/pandas-docs/stable/missing_data.html) values. Example:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "avgr6GfiUh8t",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "city_names = pd.Series(['San Francisco', 'San Jose', 'Sacramento'])\n",
-        "population = pd.Series([852469, 1015785, 485199])\n",
-        "\n",
-        "pd.DataFrame({ 'City name': city_names, 'Population': population })"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "oa5wfZT7VHJl"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "But most of the time, you load an entire file into a `DataFrame`. The following example loads a file with California housing data. Run the following cell to load the data and create feature definitions:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "av6RYOraVG1V",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "california_housing_dataframe = pd.read_csv(\"https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv\", sep=\",\")\n",
-        "california_housing_dataframe.describe()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "WrkBjfz5kEQu"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "The example above used `DataFrame.describe` to show interesting statistics about a `DataFrame`. Another useful function is `DataFrame.head`, which displays the first few records of a `DataFrame`:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "s3ND3bgOkB5k",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "california_housing_dataframe.head()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "w9-Es5Y6laGd"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "Another powerful feature of *pandas* is graphing. For example, `DataFrame.hist` lets you quickly study the distribution of values in a column:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "nqndFVXVlbPN",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "california_housing_dataframe.hist('housing_median_age')"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "XtYZ7114n3b-"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "## Accessing Data\n",
-        "\n",
-        "You can access `DataFrame` data using familiar Python dict/list operations:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "_TFm7-looBFF",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities = pd.DataFrame({ 'City name': city_names, 'Population': population })\n",
-        "print(type(cities['City name']))\n",
-        "cities['City name']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "V5L6xacLoxyv",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "print(type(cities['City name'][1]))\n",
-        "cities['City name'][1]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "gcYX1tBPugZl",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "print(type(cities[0:2]))\n",
-        "cities[0:2]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "65g1ZdGVjXsQ"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "In addition, *pandas* provides an extremely rich API for advanced [indexing and selection](http://pandas.pydata.org/pandas-docs/stable/indexing.html) that is too extensive to be covered here."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "RM1iaD-ka3Y1"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "## Manipulating Data\n",
-        "\n",
-        "You may apply Python's basic arithmetic operations to `Series`. For example:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "XWmyCFJ5bOv-",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "population / 1000."
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "TQzIVnbnmWGM"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "[NumPy](http://www.numpy.org/) is a popular toolkit for scientific computing. *pandas* `Series` can be used as arguments to most NumPy functions:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "ko6pLK6JmkYP",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "import numpy as np\n",
-        "\n",
-        "np.log(population)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "xmxFuQmurr6d"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "For more complex single-column transformations, you can use `Series.apply`. Like the Python [map function](https://docs.python.org/2/library/functions.html#map), \n",
-        "`Series.apply` accepts as an argument a [lambda function](https://docs.python.org/2/tutorial/controlflow.html#lambda-expressions), which is applied to each value.\n",
-        "\n",
-        "The example below creates a new `Series` that indicates whether `population` is over one million:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "Fc1DvPAbstjI",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "population.apply(lambda val: val > 1000000)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "ZeYYLoV9b9fB"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "\n",
-        "Modifying `DataFrames` is also straightforward. For example, the following code adds two `Series` to an existing `DataFrame`:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "0gCEX99Hb8LR",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities['Area square miles'] = pd.Series([46.87, 176.53, 97.92])\n",
-        "cities['Population density'] = cities['Population'] / cities['Area square miles']\n",
-        "cities"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "6qh63m-ayb-c"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "## Exercise #1\n",
-        "\n",
-        "Modify the `cities` table by adding a new boolean column that is True if and only if *both* of the following are True:\n",
-        "\n",
-        "  * The city is named after a saint.\n",
-        "  * The city has an area greater than 50 square miles.\n",
-        "\n",
-        "**Note:** Boolean `Series` are combined using the bitwise, rather than the traditional boolean, operators. For example, when performing *logical and*, use `&` instead of `and`.\n",
-        "\n",
-        "**Hint:** \"San\" in Spanish means \"saint.\""
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "zCOn8ftSyddH",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Your code here"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "YHIWvc9Ms-Ll"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "### Solution\n",
-        "\n",
-        "Click below for a solution."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "T5OlrqtdtCIb",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities['Is wide and has saint name'] = (cities['Area square miles'] > 50) & cities['City name'].apply(lambda name: name.startswith('San'))\n",
-        "cities"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "f-xAOJeMiXFB"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "## Indexes\n",
-        "Both `Series` and `DataFrame` objects also define an `index` property that assigns an identifier value to each `Series` item or `DataFrame` row. \n",
-        "\n",
-        "By default, at construction, *pandas* assigns index values that reflect the ordering of the source data. Once created, the index values are stable; that is, they do not change when data is reordered."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "2684gsWNinq9",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "city_names.index"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "F_qPe2TBjfWd",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities.index"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "hp2oWY9Slo_h"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "Call `DataFrame.reindex` to manually reorder the rows. For example, the following has the same effect as sorting by city name:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "sN0zUzSAj-U1",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities.reindex([2, 0, 1])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "-GQFz8NZuS06"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "Reindexing is a great way to shuffle (randomize) a `DataFrame`. In the example below, we take the index, which is array-like, and pass it to NumPy's `random.permutation` function, which shuffles its values in place. Calling `reindex` with this shuffled array causes the `DataFrame` rows to be shuffled in the same way.\n",
-        "Try running the following cell multiple times!"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "mF8GC0k8uYhz",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities.reindex(np.random.permutation(cities.index))"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "fSso35fQmGKb"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "For more information, see the [Index documentation](http://pandas.pydata.org/pandas-docs/stable/indexing.html#index-objects)."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "8UngIdVhz8C0"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "## Exercise #2\n",
-        "\n",
-        "The `reindex` method allows index values that are not in the original `DataFrame`'s index values. Try it and see what happens if you use such values! Why do you think this is allowed?"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "PN55GrDX0jzO",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Your code here"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "TJffr5_Jwqvd"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "### Solution\n",
-        "\n",
-        "Click below for the solution."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "8oSvi2QWwuDH"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "If your `reindex` input array includes values not in the original `DataFrame` index values, `reindex` will add new rows for these \"missing\" indices and populate all corresponding columns with `NaN` values:"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "yBdkucKCwy4x",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "cities.reindex([0, 4, 5, 2])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "2l82PhPbwz7g"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "This behavior is desirable because indexes are often strings pulled from the actual data (see the [*pandas* reindex\n",
-        "documentation](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html) for an example\n",
-        "in which the index values are browser names).\n",
-        "\n",
-        "In this case, allowing \"missing\" indices makes it easy to reindex using an external list, as you don't have to worry about\n",
-        "sanitizing the input."
-      ]
-    }
-  ]
-}
\ No newline at end of file

	0	1	2	3	4
77	6.7	3.0	5.0	1.7	1.0
70	5.9	3.2	4.8	1.8	1.0
15	5.7	4.4	1.5	0.4	0.0
40	5.0	3.5	1.3	0.3	0.0
107	7.3	2.9	6.3	1.8	2.0