From 51cacf72e4a4b666fe0d5a25f3f697162d67c702 Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Thu, 27 Sep 2018 23:39:45 +0530 Subject: [PATCH 1/7] Assignment 2 Completed (List + Numpy Exercise) (Examples have been tried out but not appended here) --- AGCreates.ipynb | 284 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 254 insertions(+), 30 deletions(-) diff --git a/AGCreates.ipynb b/AGCreates.ipynb index 9e2543a..be02be8 100644 --- a/AGCreates.ipynb +++ b/AGCreates.ipynb @@ -1,32 +1,256 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "AGCreates.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/AGCreates/Assignment-2/blob/AGCreates/AGCreates.ipynb)" + ] + }, + { + "metadata": { + "id": "FTBef_OJr-Mu", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "outputId": "607f64d3-512a-4b67-8c3c-be3d29a4f5be" + }, + "cell_type": "code", + "source": [ + "import random\n", + "dummy_list = [x for x in range (10)]\n", + "print (\"Dummy List\", dummy_list)\n", + "dummy_list.reverse()\n", + "print (\" Dummy List Reversed\", dummy_list)\n", + "dummy_list_2 = [2, 200, 16, 4, 1, 0, 9.45, 45.67, 90, 12.01, 12.02]\n", + "dummy_list= dummy_list + dummy_list_2\n", + "print(\"Dummy List after addition \", dummy_list)\n", + "\n", + "dummy_list_freq = []\n", + "dummy_list_unique = []\n", + "for x in dummy_list:\n", + " if x not in dummy_list_unique:\n", + " dummy_list_unique.append(x)\n", + "print (\" Dummy List Unique \",dummy_list_unique)\n", + "\n", + "for x in (dummy_list_unique):\n", + " counter=0\n", + " for y in (dummy_list):\n", + " if x==y:\n", + " counter= counter+1\n", + " dummy_list_freq.append(counter)\n", + " \n", + "dummy_dict = dict()\n", + "for i in range (len(dummy_list_unique)):\n", + " for j in range (len(dummy_list_freq)):\n", + " if i == j: dummy_dict[dummy_list_unique[i]] = dummy_list_freq[j]\n", + "print (\"Dummy Dictionary\", dummy_dict)\n", + "dummy_list.sort()\n", + "print (\"Dummy List Sorted Ascending Order\", dummy_list)\n", + "dummy_list.reverse()\n", + "print (\"Dummy List Sorted Descending Order\",dummy_list) \n", + "dummy_list.remove(200)\n", + "print (\"Dummy List Sorted After removing x= 200\", dummy_list)\n", + "index_remove = random.randint(0, len(dummy_list)-1)\n", + "print (\"Removing the value at radom index \", index_remove, \" which is \")\n", + "dummy_list.pop(index_remove)\n", + "print (\"Dummy List after removal of value at random index \", dummy_list)\n", + "print (\"Cleared Dummy List \", dummy_list.clear())" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Dummy List [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + " Dummy List Reversed [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]\n", + "Dummy List after addition [9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 2, 200, 16, 4, 1, 0, 9.45, 45.67, 90, 12.01, 12.02]\n", + " Dummy List Unique [9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 200, 16, 9.45, 45.67, 90, 12.01, 12.02]\n", + "Dummy Dictionary {9: 1, 8: 1, 7: 1, 6: 1, 5: 1, 4: 2, 3: 1, 2: 2, 1: 2, 0: 2, 200: 1, 16: 1, 9.45: 1, 45.67: 1, 90: 1, 12.01: 1, 12.02: 1}\n", + "Dummy List Sorted Ascending Order [0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 9.45, 12.01, 12.02, 16, 45.67, 90, 200]\n", + "Dummy List Sorted Descending Order [200, 90, 45.67, 16, 12.02, 12.01, 9.45, 9, 8, 7, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "7wkOxQUpzRl2", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 986 + }, + "outputId": "877ad255-e68e-4042-ea73-8d701553ec76" + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "\n", + "#Numpy examples have been tried out but haven not been added here.\n", + "#This part consists of the Numpy Exercise only \n", + "\n", + "\n", + "unisub = np.linspace(-1.3, 2.5, 64)\n", + "print (\"Uniform Subdivision: \\n\", unisub) \n", + "\n", + "n = 8\n", + "cyc= np.array([1, 2, 3])\n", + "cyc = np.resize(cyc, 3 * n) \n", + "print(\"Cyclic Pattern \\n\", cyc) \n", + "\n", + "oddarr = np.arange(1, 2 * 10, 2)\n", + "print (\" First 10 odd int array:\\n\", oddarr) \n", + "\n", + "\n", + "a = np.array([1,2,3,2,3,4,3,4,5,6])\n", + "b = np.array([7,2,10,2,7,4,9,4,9,8])\n", + "\n", + "intersect = np.intersect1d(a, b)\n", + "print(\"Intersection array \\n\", intersect)\n", + "\n", + "a = np.arange(10)\n", + " \n", + "a = a.reshape(2, 5)\n", + " \n", + "print(\"Reshaped Array \\n\", a)\n", + "\n", + "\n", + "a = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + " \n", + "a = np.array(a)\n", + "print(\"List to Numpy Array: \\n\", a)\n", + " \n", + "a= list(a)\n", + "print(\"Numpy Array to List: \\n\", a)\n", + "\n", + "\n", + "\n", + "\n", + "n = 10\n", + "arr = np.zeros(shape = (n, n))\n", + "print(\"Array of Zeroes: \\n\", arr)\n", + "print()\n", + "arr[n - 1, :] = np.ones(n)\n", + "arr[:, 0] = np.ones(n)\n", + "arr[:, n - 1] = np.ones(n)\n", + "arr[0, :] = np.ones(n)\n", + "print(\"Array of Zeroes bounded by 1:\\n\", arr)\n", + " \n", + "\n", + "n=8 \n", + " \n", + "print(\"Checkerboard:\")\n", + "x = np.zeros((n, n), dtype = int) \n", + "x[1::2, ::2] = 1\n", + "x[::2, 1::2] = 1\n", + " \n", + "for i in range(n): \n", + " for j in range(n): \n", + " print(x[i][j], end= \" \",) \n", + " print() \n", + " " + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Uniform Subdivision: \n", + " [-1.3 -1.23968254 -1.17936508 -1.11904762 -1.05873016 -0.9984127\n", + " -0.93809524 -0.87777778 -0.81746032 -0.75714286 -0.6968254 -0.63650794\n", + " -0.57619048 -0.51587302 -0.45555556 -0.3952381 -0.33492063 -0.27460317\n", + " -0.21428571 -0.15396825 -0.09365079 -0.03333333 0.02698413 0.08730159\n", + " 0.14761905 0.20793651 0.26825397 0.32857143 0.38888889 0.44920635\n", + " 0.50952381 0.56984127 0.63015873 0.69047619 0.75079365 0.81111111\n", + " 0.87142857 0.93174603 0.99206349 1.05238095 1.11269841 1.17301587\n", + " 1.23333333 1.29365079 1.35396825 1.41428571 1.47460317 1.53492063\n", + " 1.5952381 1.65555556 1.71587302 1.77619048 1.83650794 1.8968254\n", + " 1.95714286 2.01746032 2.07777778 2.13809524 2.1984127 2.25873016\n", + " 2.31904762 2.37936508 2.43968254 2.5 ]\n", + "Cyclic Pattern \n", + " [1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3]\n", + " First 10 odd int array:\n", + " [ 1 3 5 7 9 11 13 15 17 19]\n", + "Intersection array \n", + " [2 4]\n", + "Reshaped Array \n", + " [[0 1 2 3 4]\n", + " [5 6 7 8 9]]\n", + "List to Numpy Array: \n", + " [1 2 3 4 5 6 7 8 9]\n", + "Numpy Array to List: \n", + " [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + "Array of Zeroes: \n", + " [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]\n", + "\n", + "Array of Zeroes bounded by 1:\n", + " [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n", + "Checkerboard:\n", + "0 1 0 1 0 1 0 1 \n", + "1 0 1 0 1 0 1 0 \n", + "0 1 0 1 0 1 0 1 \n", + "1 0 1 0 1 0 1 0 \n", + "0 1 0 1 0 1 0 1 \n", + "1 0 1 0 1 0 1 0 \n", + "0 1 0 1 0 1 0 1 \n", + "1 0 1 0 1 0 1 0 \n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "dozwmoLWM_jJ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From ce1fe30ecc63b281f890878b8dff20c8dcd8d336 Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Tue, 2 Oct 2018 15:32:51 +0530 Subject: [PATCH 2/7] Numpy Examples Done by AGCreates. List Exercise and Numpy Exercise Already Done So Total Assignment Complete. --- Numpy_Examples_1.ipynb | 524 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 Numpy_Examples_1.ipynb diff --git a/Numpy_Examples_1.ipynb b/Numpy_Examples_1.ipynb new file mode 100644 index 0000000..d2f1cfe --- /dev/null +++ b/Numpy_Examples_1.ipynb @@ -0,0 +1,524 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Numpy_Examples 1.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/AGCreates/Assignment-2/blob/AGCreates/Numpy_Examples_1.ipynb)" + ] + }, + { + "metadata": { + "id": "3pSVAeWfuPcq", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Numpy Examples\n", + "\n", + "## What is numpy?\n", + "\n", + "#### Python has built-in:\n", + "\n", + "- containers: lists (costless insertion and append), dictionnaries (fast lookup)\n", + "- high-level number objects: integers, floating point\n", + "\n", + "#### Numpy is:\n", + "\n", + " - extension package to Python for multidimensional arrays\n", + " - closer to hardware (efficiency)\n", + " - designed for scientific computation (convenience)\n", + "\n", + "\n", + "#### Import numpy\n", + "\n" + ] + }, + { + "metadata": { + "id": "ozUi4_X55UHE", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "3-1ghFDF5N2z", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Uncomment Print statement and run each cell to see the output\n", + "\n", + "#### Create numpy arrays\n" + ] + }, + { + "metadata": { + "id": "atYpk2ert0b-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "1f4a43a7-7c79-4f79-9da9-8f6c9c6e9397" + }, + "cell_type": "code", + "source": [ + "a = np.array([1, 2, 3]) # Create a rank 1 array\n", + "print(a)\n", + "print(type(a)) #print type of a\n", + "\n", + "b = np.array([[1,2,3],[4,5,6]]) # Create a rank 2 array\n", + "print(b.shape) # Prints \"(2, 3)\"\n", + "print(b[0, 0], b[0, 1], b[1, 0])" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[1 2 3]\n", + "\n", + "(2, 3)\n", + "1 2 4\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "Kro5ZOwXue5n", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Some basic functions for creating arrays. Print all the defined arrays and see the results." + ] + }, + { + "metadata": { + "id": "V3rdzgr9uhHS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "82021c72-1d40-4494-d3bd-605a51a607a5" + }, + "cell_type": "code", + "source": [ + "a = np.zeros(shape=(2,2))\n", + "b = np.ones(shape = (3,3))\n", + "c = np.eye(2)\n", + "d = np.full(shape=(3,3), fill_value=5)\n", + "e = np.random.random((2,2))\n", + "\n", + "print('a', a)\n", + "print('b',b)\n", + "print('c',c)\n", + "print('d',d)\n", + "print('e',e)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "a [[0. 0.]\n", + " [0. 0.]]\n", + "b [[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "c [[1. 0.]\n", + " [0. 1.]]\n", + "d [[5 5 5]\n", + " [5 5 5]\n", + " [5 5 5]]\n", + "e [[0.97720034 0.28219433]\n", + " [0.05717303 0.44113851]]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "8RPW_SutukjF", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Execute and understand :)" + ] + }, + { + "metadata": { + "id": "-8JuqYt4upeo", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 + }, + "outputId": "21f8bc53-208a-40c7-cb4b-59b75437d2e9" + }, + "cell_type": "code", + "source": [ + "a == np.arange(10)\n", + "b == np.linspace(0,10, num=6)\n", + "print(a)\n", + "print(b)" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0. 0.]\n", + " [0. 0.]]\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [1. 1. 1.]]\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: DeprecationWarning: elementwise == comparison failed; this will raise an error in the future.\n", + " \"\"\"Entry point for launching an IPython kernel.\n", + "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: DeprecationWarning: elementwise == comparison failed; this will raise an error in the future.\n", + " \n" + ], + "name": "stderr" + } + ] + }, + { + "metadata": { + "id": "MRHhbjx4uvYN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Array Indexing" + ] + }, + { + "metadata": { + "id": "grF5_yUSuxVK", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "36940c6d-6d74-456b-9481-1f6af2242b3e" + }, + "cell_type": "code", + "source": [ + "a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])\n", + "\n", + "# Use slicing to pull out the subarray consisting of the first 2 rows\n", + "# and columns 1 and 2; b is the following array of shape (2, 2):\n", + "# [[2 3]\n", + "# [6 7]]\n", + "b = a[:2, 1:3]\n", + "\n", + "# A slice of an array is a view into the same data, so modifying it\n", + "# will modify the original array.\n", + "\n", + "print(a[0, 1]) # Prints \"2\"\n", + "\n", + "b[0, 0] = 77 # b[0, 0] is the same piece of data as a[0, 1]\n", + "print(a[0, 1]) # Prints \"77\"" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2\n", + "77\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "s400Gijxu0kO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Slicing" + ] + }, + { + "metadata": { + "id": "kubpegh2u4zF", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "d436e463-31fd-41e9-8f2f-12e04f450ef9" + }, + "cell_type": "code", + "source": [ + "a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])\n", + "\n", + "row_r1 = a[1, :] # Rank 1 view of the second row of a\n", + "row_r2 = a[1:2, :] # Rank 2 view of the second row of a\n", + "\n", + "print(row_r1, row_r1.shape) # Prints \"[5 6 7 8] (4,)\"\n", + "print(row_r2, row_r2.shape) # Prints \"[[5 6 7 8]] (1, 4)\"\n", + "\n", + "col_r1 = a[:, 1]\n", + "col_r2 = a[:, 1:2]\n", + "\n", + "print(col_r1, col_r1.shape) # Prints \"[ 2 6 10] (3,)\"\n", + "print(col_r2, col_r2.shape)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[5 6 7 8] (4,)\n", + "[[5 6 7 8]] (1, 4)\n", + "[ 2 6 10] (3,)\n", + "[[ 2]\n", + " [ 6]\n", + " [10]] (3, 1)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "TmGnCO3AvE8t", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Aritmetic operations" + ] + }, + { + "metadata": { + "id": "YvBw3ImjvGqD", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "9d182092-4e38-4715-b592-63ea4fd64596" + }, + "cell_type": "code", + "source": [ + "x = np.array([[1,2],[3,4]])\n", + "\n", + "print(np.sum(x)) # Compute sum of all elements; prints \"10\"\n", + "print(np.sum(x, axis=0)) # Compute sum of each column; prints \"[4 6]\"\n", + "print(np.sum(x, axis=1)) # Compute sum of each row; prints \"[3 7]\"" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "10\n", + "[4 6]\n", + "[3 7]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "uaVY3ZzD4pC2", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Using Boolean Mask" + ] + }, + { + "metadata": { + "id": "-PNfOMvh4_Gp", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "c6b67d2a-ab61-4b5b-ed1c-2f4201694e66" + }, + "cell_type": "code", + "source": [ + "b = np.arange(10)\n", + "\n", + "print(b)\n", + "\n", + "mask = b%2!=0 #perform computations on the list \n", + "\n", + "print(mask)\n", + "\n", + "print(b[mask]) #applying the mask on the numpy array\n" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[0 1 2 3 4 5 6 7 8 9]\n", + "[False True False True False True False True False True]\n", + "[1 3 5 7 9]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "HbEPBbz-5J9K", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "12a21405-a73f-4174-a819-bae2ee150850" + }, + "cell_type": "code", + "source": [ + "modified_b = b\n", + "modified_b[mask] = -1\n", + "\n", + "print(modified_b)" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[ 0 -1 2 -1 4 -1 6 -1 8 -1]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "zgSd71EEAHC7", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Swapping two columns in a 2d numpy array" + ] + }, + { + "metadata": { + "id": "-cvqeXd_AGo1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "69b028fc-4ee7-4069-e50b-a456f47098b9" + }, + "cell_type": "code", + "source": [ + "a = np.arange(9).reshape(3,3)\n", + "print(a)\n", + "\n", + "print(a[:, [1,0,2]])" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0 1 2]\n", + " [3 4 5]\n", + " [6 7 8]]\n", + "[[1 0 2]\n", + " [4 3 5]\n", + " [7 6 8]]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "U7ifiLY3Ayky", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Swapping two rows in a 2d numpy array" + ] + }, + { + "metadata": { + "id": "0FrOURRDAZNP", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "16187094-b86d-48e7-9b1d-1fd9c8f2fc59" + }, + "cell_type": "code", + "source": [ + "a = np.arange(9).reshape(3,3)\n", + "print(a)\n", + "\n", + "print(a[[1,0,2], :]) #Change made by AGCreates in this line instead of a there was arr written" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0 1 2]\n", + " [3 4 5]\n", + " [6 7 8]]\n", + "[[3 4 5]\n", + " [0 1 2]\n", + " [6 7 8]]\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file From 3e480a7bfacf97621ba3d9f836304c114b033bdb Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Sat, 26 Jan 2019 16:21:53 +0530 Subject: [PATCH 3/7] Intro to pandas completed (Assigment 5 intro_to_pandas)by AGCreates --- intro_to_pandas.ipynb | 1725 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1725 insertions(+) create mode 100644 intro_to_pandas.ipynb diff --git a/intro_to_pandas.ipynb b/intro_to_pandas.ipynb new file mode 100644 index 0000000..36035dd --- /dev/null +++ b/intro_to_pandas.ipynb @@ -0,0 +1,1725 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "intro_to_pandas.ipynb", + "version": "0.3.2", + "provenance": [], + "collapsed_sections": [ + "JndnmDMp66FL", + "YHIWvc9Ms-Ll", + "TJffr5_Jwqvd" + ], + "include_colab_link": true + }, + "kernelspec": { + "name": "python2", + "display_name": "Python 2" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "JndnmDMp66FL" + }, + "cell_type": "markdown", + "source": [ + "#### Copyright 2017 Google LLC." + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "hMqWDc_m6rUC", + "cellView": "both", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "colab_type": "text", + "id": "rHLcriKWLRe4" + }, + "cell_type": "markdown", + "source": [ + "# Intro to pandas" + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "QvJBqX8_Bctk" + }, + "cell_type": "markdown", + "source": [ + "**Learning Objectives:**\n", + " * Gain an introduction to the `DataFrame` and `Series` data structures of the *pandas* library\n", + " * Access and manipulate data within a `DataFrame` and `Series`\n", + " * Import CSV data into a *pandas* `DataFrame`\n", + " * Reindex a `DataFrame` to shuffle data" + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "TIFJ83ZTBctl" + }, + "cell_type": "markdown", + "source": [ + "[*pandas*](http://pandas.pydata.org/) is a column-oriented data analysis API. It's a great tool for handling and analyzing input data, and many ML frameworks support *pandas* data structures as inputs.\n", + "Although a comprehensive introduction to the *pandas* API would span many pages, the core concepts are fairly straightforward, and we'll present them below. For a more complete reference, the [*pandas* docs site](http://pandas.pydata.org/pandas-docs/stable/index.html) contains extensive documentation and many tutorials." + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "s_JOISVgmn9v" + }, + "cell_type": "markdown", + "source": [ + "## Basic Concepts\n", + "\n", + "The following line imports the *pandas* API and prints the API version:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "aSRYu62xUi3g", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "aecdcd96-a616-40b3-8669-3eca04f2d3b2" + }, + "cell_type": "code", + "source": [ + "from __future__ import print_function\n", + "\n", + "import pandas as pd\n", + "pd.__version__" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "u'0.22.0'" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 19 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "daQreKXIUslr" + }, + "cell_type": "markdown", + "source": [ + "The primary data structures in *pandas* are implemented as two classes:\n", + "\n", + " * **`DataFrame`**, which you can imagine as a relational data table, with rows and named columns.\n", + " * **`Series`**, which is a single column. A `DataFrame` contains one or more `Series` and a name for each `Series`.\n", + "\n", + "The data frame is a commonly used abstraction for data manipulation. Similar implementations exist in [Spark](https://spark.apache.org/) and [R](https://www.r-project.org/about.html)." + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "fjnAk1xcU0yc" + }, + "cell_type": "markdown", + "source": [ + "One way to create a `Series` is to construct a `Series` object. For example:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "DFZ42Uq7UFDj", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "e1f97267-aa6f-449d-bbdc-0e3310e30f9d" + }, + "cell_type": "code", + "source": [ + "pd.Series(['San Francisco', 'San Jose', 'Sacramento'])" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 San Francisco\n", + "1 San Jose\n", + "2 Sacramento\n", + "dtype: object" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "U5ouUp1cU6pC" + }, + "cell_type": "markdown", + "source": [ + "`DataFrame` objects can be created by passing a `dict` mapping `string` column names to their respective `Series`. If the `Series` don't match in length, missing values are filled with special [NA/NaN](http://pandas.pydata.org/pandas-docs/stable/missing_data.html) values. Example:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "avgr6GfiUh8t", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 142 + }, + "outputId": "a9fd4201-f678-4151-dbfd-f25ab4f84d27" + }, + "cell_type": "code", + "source": [ + "city_names = pd.Series(['San Francisco', 'San Jose', 'Sacramento'])\n", + "population = pd.Series([852469, 1015785, 485199])\n", + "\n", + "pd.DataFrame({ 'City name': city_names, 'Population': population })" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulation
0San Francisco852469
1San Jose1015785
2Sacramento485199
\n", + "
" + ], + "text/plain": [ + " City name Population\n", + "0 San Francisco 852469\n", + "1 San Jose 1015785\n", + "2 Sacramento 485199" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 21 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "oa5wfZT7VHJl" + }, + "cell_type": "markdown", + "source": [ + "But most of the time, you load an entire file into a `DataFrame`. The following example loads a file with California housing data. Run the following cell to load the data and create feature definitions:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "av6RYOraVG1V", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "50daf81b-0947-4c5f-e2d7-f647ef5a68a6" + }, + "cell_type": "code", + "source": [ + "california_housing_dataframe = pd.read_csv(\"https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv\", sep=\",\")\n", + "california_housing_dataframe.describe()" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
count17000.00000017000.00000017000.00000017000.00000017000.00000017000.00000017000.00000017000.00000017000.000000
mean-119.56210835.62522528.5893532643.664412539.4108241429.573941501.2219413.883578207300.912353
std2.0051662.13734012.5869372179.947071421.4994521147.852959384.5208411.908157115983.764387
min-124.35000032.5400001.0000002.0000001.0000003.0000001.0000000.49990014999.000000
25%-121.79000033.93000018.0000001462.000000297.000000790.000000282.0000002.566375119400.000000
50%-118.49000034.25000029.0000002127.000000434.0000001167.000000409.0000003.544600180400.000000
75%-118.00000037.72000037.0000003151.250000648.2500001721.000000605.2500004.767000265000.000000
max-114.31000041.95000052.00000037937.0000006445.00000035682.0000006082.00000015.000100500001.000000
\n", + "
" + ], + "text/plain": [ + " longitude latitude housing_median_age total_rooms \\\n", + "count 17000.000000 17000.000000 17000.000000 17000.000000 \n", + "mean -119.562108 35.625225 28.589353 2643.664412 \n", + "std 2.005166 2.137340 12.586937 2179.947071 \n", + "min -124.350000 32.540000 1.000000 2.000000 \n", + "25% -121.790000 33.930000 18.000000 1462.000000 \n", + "50% -118.490000 34.250000 29.000000 2127.000000 \n", + "75% -118.000000 37.720000 37.000000 3151.250000 \n", + "max -114.310000 41.950000 52.000000 37937.000000 \n", + "\n", + " total_bedrooms population households median_income \\\n", + "count 17000.000000 17000.000000 17000.000000 17000.000000 \n", + "mean 539.410824 1429.573941 501.221941 3.883578 \n", + "std 421.499452 1147.852959 384.520841 1.908157 \n", + "min 1.000000 3.000000 1.000000 0.499900 \n", + "25% 297.000000 790.000000 282.000000 2.566375 \n", + "50% 434.000000 1167.000000 409.000000 3.544600 \n", + "75% 648.250000 1721.000000 605.250000 4.767000 \n", + "max 6445.000000 35682.000000 6082.000000 15.000100 \n", + "\n", + " median_house_value \n", + "count 17000.000000 \n", + "mean 207300.912353 \n", + "std 115983.764387 \n", + "min 14999.000000 \n", + "25% 119400.000000 \n", + "50% 180400.000000 \n", + "75% 265000.000000 \n", + "max 500001.000000 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 22 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "WrkBjfz5kEQu" + }, + "cell_type": "markdown", + "source": [ + "The example above used `DataFrame.describe` to show interesting statistics about a `DataFrame`. Another useful function is `DataFrame.head`, which displays the first few records of a `DataFrame`:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "s3ND3bgOkB5k", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "6052fdd2-cd27-4ad0-a782-714236c355c3" + }, + "cell_type": "code", + "source": [ + "california_housing_dataframe.head()" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
0-114.3134.1915.05612.01283.01015.0472.01.493666900.0
1-114.4734.4019.07650.01901.01129.0463.01.820080100.0
2-114.5633.6917.0720.0174.0333.0117.01.650985700.0
3-114.5733.6414.01501.0337.0515.0226.03.191773400.0
4-114.5733.5720.01454.0326.0624.0262.01.925065500.0
\n", + "
" + ], + "text/plain": [ + " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", + "0 -114.31 34.19 15.0 5612.0 1283.0 \n", + "1 -114.47 34.40 19.0 7650.0 1901.0 \n", + "2 -114.56 33.69 17.0 720.0 174.0 \n", + "3 -114.57 33.64 14.0 1501.0 337.0 \n", + "4 -114.57 33.57 20.0 1454.0 326.0 \n", + "\n", + " population households median_income median_house_value \n", + "0 1015.0 472.0 1.4936 66900.0 \n", + "1 1129.0 463.0 1.8200 80100.0 \n", + "2 333.0 117.0 1.6509 85700.0 \n", + "3 515.0 226.0 3.1917 73400.0 \n", + "4 624.0 262.0 1.9250 65500.0 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 23 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "w9-Es5Y6laGd" + }, + "cell_type": "markdown", + "source": [ + "Another powerful feature of *pandas* is graphing. For example, `DataFrame.hist` lets you quickly study the distribution of values in a column:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "nqndFVXVlbPN", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 396 + }, + "outputId": "d40e6503-2585-49f3-b7c5-48bfd20b5581" + }, + "cell_type": "code", + "source": [ + "california_housing_dataframe.hist('housing_median_age')" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[]],\n", + " dtype=object)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 24 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAFZCAYAAABXM2zhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X1UlHX+//HXMDAH0UEEGTfLarf0\naEmaa5l4U0Iokp7IVRPWdU3q6Iqtlql499WTlajRmmZZmunRU7GNtofcAjJxyyRanT0uuu0p2VOr\neTejKCqgSPP7o9Os/FRguP1Az8dfcTEz1+d6H+3pdQ1zYfF6vV4BAAAjBTT3AgAAwPURagAADEao\nAQAwGKEGAMBghBoAAIMRagAADEaogVo6cuSI7rjjjkbdxz//+U+lpKQ06j4a0h133KEjR47o448/\n1ty5c5t7OUCrZOFz1EDtHDlyREOHDtW//vWv5l6KMe644w7l5ubqpptuau6lAK0WZ9SAn5xOp0aO\nHKn7779f27dv1w8//KA//elPio+PV3x8vNLS0lRaWipJiomJ0d69e33P/enry5cva/78+Ro2bJji\n4uI0bdo0nT9/XgUFBYqLi5MkrV69Ws8++6xSU1MVGxur0aNH6+TJk5KkgwcPaujQoRo6dKheeeUV\njRw5UgUFBdWue/Xq1Vq0aJEmT56sgQMHatasWcrLy9OoUaM0cOBA5eXlSZIuXbqk5557TsOGDVNM\nTIzWrl3re42//e1viouL0/Dhw7V+/Xrf9m3btmnixImSJI/Ho5SUFMXHxysmJkZvvfVWleN/9913\nNXr0aA0cOFDp6ek1zrusrEwzZszwrWfZsmW+71U3hx07dmjkyJGKjY3VpEmTdPr06Rr3BZiIUAN+\n+OGHH1RRUaEPPvhAc+fO1cqVK/XRRx/p008/1bZt2/TXv/5VJSUl2rhxY7Wvs3v3bh05ckTZ2dnK\nzc3V7bffrn/84x9XPS47O1vz5s3Tjh07FBERoa1bt0qSFi5cqIkTJyo3N1ft2rXTt99+W6v179q1\nSy+88II++OADZWdn+9Y9ZcoUrVu3TpK0bt06HTp0SB988IG2b9+unJwc5eXlqbKyUvPnz9eiRYv0\n0UcfKSAgQJWVlVft47XXXtNNN92k7Oxsbdq0SRkZGTp27Jjv+3//+9+VmZmprVu3asuWLTp+/Hi1\na37nnXd04cIFZWdn6/3339e2bdt8//i53hwOHz6s2bNnKyMjQ5988on69eunxYsX12pGgGkINeAH\nr9erxMREST9e9j1+/Lh27dqlxMREhYSEyGq1atSoUfr888+rfZ3w8HAVFRXp448/9p0xDho06KrH\n9e3bVzfeeKMsFot69OihY8eOqby8XAcPHtSIESMkSb/97W9V23ew7r77bkVERKhDhw6KjIzU4MGD\nJUndunXzna3n5eUpOTlZNptNISEhevjhh5Wbm6tvv/1Wly5d0sCBAyVJjzzyyDX3sWDBAi1cuFCS\n1KVLF0VGRurIkSO+748cOVJWq1WdOnVSRERElYhfy6RJk/Tqq6/KYrGoffv26tq1q44cOVLtHD79\n9FPde++96tatmyRp3Lhx2rlz5zX/YQGYLrC5FwC0JFarVW3atJEkBQQE6IcfftDp06fVvn1732Pa\nt2+vU6dOVfs6d911lxYsWKDNmzdrzpw5iomJ0aJFi656nN1ur7LvyspKnT17VhaLRaGhoZKkoKAg\nRURE1Gr9bdu2rfJ6ISEhVY5Fks6dO6elS5fqpZdekvTjpfC77rpLZ8+eVbt27aoc57UUFhb6zqID\nAgLkdrt9ry2pymv8dEzV+fbbb5Wenq7//Oc/CggI0PHjxzVq1Khq53Du3Dnt3btX8fHxVfZ75syZ\nWs8KMAWhBuqpY8eOOnPmjO/rM2fOqGPHjpKqBlCSzp496/vvn97TPnPmjObNm6c333xT0dHRNe6v\nXbt28nq9KisrU5s2bXT58uUGff/V4XBo0qRJGjJkSJXtRUVFOn/+vO/r6+1z1qxZ+v3vf6+kpCRZ\nLJZrXinwx7PPPqs777xTa9askdVq1bhx4yRVPweHw6Ho6GitWrWqXvsGTMClb6CeHnjgAWVlZams\nrEyXL1+W0+nU/fffL0mKjIzUv//9b0nShx9+qIsXL0qStm7dqjVr1kiSwsLC9Ktf/arW+2vbtq1u\nu+02ffTRR5KkzMxMWSyWBjue2NhYvffee6qsrJTX69Wrr76qTz/9VDfffLOsVqvvh7W2bdt2zf2e\nOnVKPXv2lMVi0fvvv6+ysjLfD9fVxalTp9SjRw9ZrVZ9/vnn+u6771RaWlrtHAYOHKi9e/fq8OHD\nkn782Ntzzz1X5zUAzYlQA/UUHx+vwYMHa9SoURoxYoR+8YtfaMKECZKkqVOnauPGjRoxYoSKiop0\n++23S/oxhj/9xPLw4cN16NAhPfbYY7Xe56JFi7R27Vo99NBDKi0tVadOnRos1snJyercubMeeugh\nxcfHq6ioSL/+9a8VFBSkJUuWaN68eRo+fLgsFovv0vmVpk+frtTUVI0cOVKlpaV69NFHtXDhQv33\nv/+t03r+8Ic/aNmyZRoxYoS+/PJLTZs2TatXr9a+ffuuOweHw6ElS5YoNTVVw4cP17PPPquEhIT6\njgZoFnyOGmihvF6vL8733XefNm7cqO7duzfzqpoec0Brxxk10AL98Y9/9H2cKj8/X16vV7feemvz\nLqoZMAf8HHBGDbRARUVFmjt3rs6ePaugoCDNmjVLN910k1JTU6/5+Ntuu833nrhpioqK6rzua83h\np58PAFoLQg0AgMG49A0AgMEINQAABjPyhidu9zm/Ht+hQ4iKi+v+Oc2fO+ZXd8yufphf3TG7+jFt\nfpGR9ut+r1WcUQcGWpt7CS0a86s7Zlc/zK/umF39tKT5tYpQAwDQWhFqAAAMRqgBADBYjT9MVlZW\nprS0NJ06dUoXL17U1KlT1b17d82ePVuVlZWKjIzUihUrZLPZlJWVpU2bNikgIEBjx47VmDFjVFFR\nobS0NB09elRWq1VLly5Vly5dmuLYAABo8Wo8o87Ly1PPnj21ZcsWrVy5Uunp6Vq1apWSk5P19ttv\n65ZbbpHT6VRpaanWrFmjjRs3avPmzdq0aZPOnDmj7du3KzQ0VO+8846mTJmijIyMpjguAABahRpD\nnZCQoCeeeEKSdOzYMXXq1EkFBQWKjY2VJA0ZMkT5+fnav3+/oqKiZLfbFRwcrD59+sjlcik/P19x\ncXGSpOjoaLlcrkY8HAAAWpdaf4563LhxOn78uNauXavHHntMNptNkhQRESG32y2Px6Pw8HDf48PD\nw6/aHhAQIIvFokuXLvmeDwAArq/WoX733Xf11VdfadasWbry9uDXu1W4v9uv1KFDiN+fcavuw+Ko\nGfOrO2ZXP8yv7phd/bSU+dUY6gMHDigiIkI33HCDevToocrKSrVt21bl5eUKDg7WiRMn5HA45HA4\n5PF4fM87efKkevfuLYfDIbfbre7du6uiokJer7fGs2l/7xYTGWn3+25m+B/mV3fMrn6YX90xu/ox\nbX71ujPZ3r17tWHDBkmSx+NRaWmpoqOjlZOTI0nKzc3VoEGD1KtXLxUWFqqkpEQXLlyQy+VS3759\nNWDAAGVnZ0v68QfT+vXr1xDHBADAz0KNZ9Tjxo3T/PnzlZycrPLycv3f//2fevbsqTlz5igzM1Od\nO3dWYmKigoKCNHPmTKWkpMhisSg1NVV2u10JCQnas2ePkpKSZLPZlJ6e3hTHBQBAq2Dk76P293KE\naZcwWhrmV3fMrn6YX90xu/oxbX7VXfo28rdnAcC1TErf2dxLqNGGtJjmXgJaGW4hCgCAwQg1AAAG\nI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCA\nwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMA\nYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QA\nABiMUAMAYDBCDQCAwQg1AAAGC6zNg5YvX659+/bp8uXLmjx5snbu3KmDBw8qLCxMkpSSkqIHHnhA\nWVlZ2rRpkwICAjR27FiNGTNGFRUVSktL09GjR2W1WrV06VJ16dKlUQ8KAIDWosZQf/HFF/rmm2+U\nmZmp4uJiPfLII7rvvvv09NNPa8iQIb7HlZaWas2aNXI6nQoKCtLo0aMVFxenvLw8hYaGKiMjQ7t3\n71ZGRoZWrlzZqAcFAEBrUeOl73vuuUcvv/yyJCk0NFRlZWWqrKy86nH79+9XVFSU7Ha7goOD1adP\nH7lcLuXn5ysuLk6SFB0dLZfL1cCHAABA61VjqK1Wq0JCQiRJTqdTgwcPltVq1ZYtWzRhwgQ99dRT\nOn36tDwej8LDw33PCw8Pl9vtrrI9ICBAFotFly5daqTDAQCgdanVe9SStGPHDjmdTm3YsEEHDhxQ\nWFiYevTooTfeeEOvvPKK7r777iqP93q913yd622/UocOIQoMtNZ2aZKkyEi7X49HVcyv7phd/bS2\n+TXl8bS22TW1ljK/WoX6s88+09q1a7V+/XrZ7Xb179/f972YmBgtXrxYw4YNk8fj8W0/efKkevfu\nLYfDIbfbre7du6uiokJer1c2m63a/RUXl/p1EJGRdrnd5/x6Dv6H+dUds6uf1ji/pjqe1ji7pmTa\n/Kr7R0ONl77PnTun5cuX6/XXX/f9lPeTTz6pw4cPS5IKCgrUtWtX9erVS4WFhSopKdGFCxfkcrnU\nt29fDRgwQNnZ2ZKkvLw89evXryGOCQCAn4Uaz6g//PBDFRcXa8aMGb5to0aN0owZM9SmTRuFhIRo\n6dKlCg4O1syZM5WSkiKLxaLU1FTZ7XYlJCRoz549SkpKks1mU3p6eqMeEAAArYnFW5s3jZuYv5cj\nTLuE0dIwv7pjdvXj7/wmpe9sxNU0jA1pMU2yH/7s1Y9p86vXpW8AANB8CDUAAAYj1AAAGIxQAwBg\nMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYj1AAA\nGIxQAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYLbO4FAA1lUvrO5l5CtTakxTT3\nEgC0QJxRAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDB\nCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAbj91EDTcT035ct8TuzARNxRg0AgMFqdUa9fPly7du3\nT5cvX9bkyZMVFRWl2bNnq7KyUpGRkVqxYoVsNpuysrK0adMmBQQEaOzYsRozZowqKiqUlpamo0eP\nymq1aunSperSpUtjHxcAAK1CjaH+4osv9M033ygzM1PFxcV65JFH1L9/fyUnJ2v48OF66aWX5HQ6\nlZiYqDVr1sjpdCooKEijR49WXFyc8vLyFBoaqoyMDO3evVsZGRlauXJlUxwbAAAtXo2Xvu+55x69\n/PLLkqTQ0FCVlZWpoKBAsbGxkqQhQ4YoPz9f+/fvV1RUlOx2u4KDg9WnTx+5XC7l5+crLi5OkhQd\nHS2Xy9WIhwMAQOtS4xm11WpVSEiIJMnpdGrw4MHavXu3bDabJCkiIkJut1sej0fh4eG+54WHh1+1\nPSAgQBaLRZcuXfI9/1o6dAhRYKDVrwOJjLT79XhUxfwgNc+fg9b2Z68pj6e1za6ptZT51fqnvnfs\n2CGn06kNGzZo6NChvu1er/eaj/d3+5WKi0truyxJPw7b7T7n13PwP8wPP2nqPwet8c9eUx1Pa5xd\nUzJtftX9o6FWP/X92Wefae3atVq3bp3sdrtCQkJUXl4uSTpx4oQcDoccDoc8Ho/vOSdPnvRtd7vd\nkqSKigp5vd5qz6YBAMD/1Bjqc+fOafny5Xr99dcVFhYm6cf3mnNyciRJubm5GjRokHr16qXCwkKV\nlJTowoULcrlc6tu3rwYMGKDs7GxJUl5envr169eIhwMAQOtS46XvDz/8UMXFxZoxY4ZvW3p6uhYs\nWKDMzEx17txZiYmJCgoK0syZM5WSkiKLxaLU1FTZ7XYlJCRoz549SkpKks1mU3p6eqMeEAAArUmN\noX700Uf16KOPXrX9rbfeumpbfHy84uPjq2z76bPTAADAf9xCFIBPS7jNKfBzwy1EAQAwGKEGAMBg\nhBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGHcmQ61wxyoAaB6cUQMAYDBCDQCA\nwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMA\nYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABgssLkXAADAlSal72zuJdRoQ1pM\nk+2LM2oAAAxGqAEAMBihBgDAYIQaAACDEWoAAAxGqAEAMBihBgDAYLX6HPXXX3+tqVOnauLEiRo/\nfrzS0tJ08OBBhYWFSZJSUlL0wAMPKCsrS5s2bVJAQIDGjh2rMWPGqKKiQmlpaTp69KisVquWLl2q\nLl26NOpBAUBz4TPAaGg1hrq0tFRLlixR//79q2x/+umnNWTIkCqPW7NmjZxOp4KCgjR69GjFxcUp\nLy9PoaGhysjI0O7du5WRkaGVK1c2/JEAANAK1Xjp22azad26dXI4HNU+bv/+/YqKipLdbldwcLD6\n9Okjl8ul/Px8xcXFSZKio6PlcrkaZuUAAPwM1BjqwMBABQcHX7V9y5YtmjBhgp566imdPn1aHo9H\n4eHhvu+Hh4fL7XZX2R4QECCLxaJLly414CEAANB61ele3w8//LDCwsLUo0cPvfHGG3rllVd09913\nV3mM1+u95nOvt/1KHTqEKDDQ6teaIiPtfj0eVTE/4OeDv+/115QzrFOor3y/OiYmRosXL9awYcPk\n8Xh820+ePKnevXvL4XDI7Xare/fuqqiokNfrlc1mq/b1i4tL/VpPZKRdbvc5/w4CPswP+Hnh73v9\nNfQMqwt/nT6e9eSTT+rw4cOSpIKCAnXt2lW9evVSYWGhSkpKdOHCBblcLvXt21cDBgxQdna2JCkv\nL0/9+vWryy4BAPhZqvGM+sCBA1q2bJm+//57BQYGKicnR+PHj9eMGTPUpk0bhYSEaOnSpQoODtbM\nmTOVkpIii8Wi1NRU2e12JSQkaM+ePUpKSpLNZlN6enpTHBcAAK1CjaHu2bOnNm/efNX2YcOGXbUt\nPj5e8fHxVbb99NlpAADgP+5MBgCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMA\nYDBCDQCAwQg1AAAGI9QAABiMUAMAYLA6/T5qAEDLNSl9Z3MvAX7gjBoAAIMRagAADEaoAQAwGKEG\nAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEao\nAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMR\nagAADFarUH/99dd68MEHtWXLFknSsWPH9Lvf/U7JycmaPn26Ll26JEnKysrSb37zG40ZM0bvvfee\nJKmiokIzZ85UUlKSxo8fr8OHDzfSoQAA0PrUGOrS0lItWbJE/fv3921btWqVkpOT9fbbb+uWW26R\n0+lUaWmp1qxZo40bN2rz5s3atGmTzpw5o+3btys0NFTvvPOOpkyZooyMjEY9IAAAWpMaQ22z2bRu\n3To5HA7ftoKCAsXGxkqShgwZovz8fO3fv19RUVGy2+0KDg5Wnz595HK5lJ+fr7i4OElSdHS0XC5X\nIx0KAACtT42hDgwMVHBwcJVtZWVlstlskqSIiAi53W55PB6Fh4f7HhMeHn7V9oCAAFksFt+lcgAA\nUL3A+r6A1+ttkO1X6tAhRIGBVr/WERlp9+vxqIr5AUDtNeX/M+sU6pCQEJWXlys4OFgnTpyQw+GQ\nw+GQx+PxPebkyZPq3bu3HA6H3G63unfvroqKCnm9Xt/Z+PUUF5f6tZ7ISLvc7nN1ORSI+QGAvxr6\n/5nVhb9OH8+Kjo5WTk6OJCk3N1eDBg1Sr169VFhYqJKSEl24cEEul0t9+/bVgAEDlJ2dLUnKy8tT\nv3796rJLAAB+lmo8oz5w4ICWLVum77//XoGBgcrJydGLL76otLQ0ZWZmqnPnzkpMTFRQUJBmzpyp\nlJQUWSwWpaamym63KyEhQXv27FFSUpJsNpvS09Ob4rgAAGgVLN7avGncxPy9pMCl2/qpzfwmpe9s\notUAgPk2pMU06Os1+KVvAADQNOr9U99oGJyxAgCuhTNqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAM\nRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAA\ngxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYA\nwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMFtjcC2gKk9J3NvcSAACoE86oAQAwGKEG\nAMBghBoAAIMRagAADFanHyYrKCjQ9OnT1bVrV0lSt27d9Pjjj2v27NmqrKxUZGSkVqxYIZvNpqys\nLG3atEkBAQEaO3asxowZ06AHAABAa1bnn/q+9957tWrVKt/Xc+fOVXJysoYPH66XXnpJTqdTiYmJ\nWrNmjZxOp4KCgjR69GjFxcUpLCysQRYPAEBr12CXvgsKChQbGytJGjJkiPLz87V//35FRUXJbrcr\nODhYffr0kcvlaqhdAgDQ6tX5jPrQoUOaMmWKzp49q2nTpqmsrEw2m02SFBERIbfbLY/Ho/DwcN9z\nwsPD5Xa7a3ztDh1CFBho9Ws9kZF2/w4AAIA6asrm1CnUt956q6ZNm6bhw4fr8OHDmjBhgiorK33f\n93q913ze9bb//4qLS/1aT2SkXW73Ob+eAwBAXTV0c6oLf50ufXfq1EkJCQmyWCy6+eab1bFjR509\ne1bl5eWSpBMnTsjhcMjhcMjj8fied/LkSTkcjrrsEgCAn6U6hTorK0tvvvmmJMntduvUqVMaNWqU\ncnJyJEm5ubkaNGiQevXqpcLCQpWUlOjChQtyuVzq27dvw60eAIBWrk6XvmNiYvTMM8/ok08+UUVF\nhRYvXqwePXpozpw5yszMVOfOnZWYmKigoCDNnDlTKSkpslgsSk1Nld3Oe8kAANSWxVvbN46bkL/X\n/mt6j5pfygEAaEgb0mIa9PUa/D1qAADQNAg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiM\nUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAG\nI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCA\nwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABgssCl2\n8sILL2j//v2yWCyaN2+e7rrrrqbYLQAALV6jh/rLL7/Ud999p8zMTBUVFWnevHnKzMxs7N0CANAq\nNPql7/z8fD344IOSpNtuu01nz57V+fPnG3u3AAC0Co0eao/How4dOvi+Dg8Pl9vtbuzdAgDQKjTJ\ne9RX8nq9NT4mMtLu9+tW95wPMh72+/UAADBBo59ROxwOeTwe39cnT55UZGRkY+8WAIBWodFDPWDA\nAOXk5EiSDh48KIfDoXbt2jX2bgEAaBUa/dJ3nz59dOedd2rcuHGyWCxatGhRY+8SAIBWw+KtzZvG\nAACgWXBnMgAADEaoAQAwWJN/PKuhcXtS/3399deaOnWqJk6cqPHjx+vYsWOaPXu2KisrFRkZqRUr\nVshmszX3Mo20fPly7du3T5cvX9bkyZMVFRXF7GqhrKxMaWlpOnXqlC5evKipU6eqe/fuzM5P5eXl\nGjFihKZOnar+/fszv1oqKCjQ9OnT1bVrV0lSt27d9Pjjj7eY+bXoM+orb0/6/PPP6/nnn2/uJRmv\ntLRUS5YsUf/+/X3bVq1apeTkZL399tu65ZZb5HQ6m3GF5vriiy/0zTffKDMzU+vXr9cLL7zA7Gop\nLy9PPXv21JYtW7Ry5Uqlp6czuzp47bXX1L59e0n8vfXXvffeq82bN2vz5s1auHBhi5pfiw41tyf1\nn81m07p16+RwOHzbCgoKFBsbK0kaMmSI8vPzm2t5Rrvnnnv08ssvS5JCQ0NVVlbG7GopISFBTzzx\nhCTp2LFj6tSpE7PzU1FRkQ4dOqQHHnhAEn9v66slza9Fh5rbk/ovMDBQwcHBVbaVlZX5LvlEREQw\nw+uwWq0KCQmRJDmdTg0ePJjZ+WncuHF65plnNG/ePGbnp2XLliktLc33NfPzz6FDhzRlyhQlJSXp\n888/b1Hza/HvUV+JT5rVHzOs2Y4dO+R0OrVhwwYNHTrUt53Z1ezdd9/VV199pVmzZlWZF7Or3l/+\n8hf17t1bXbp0ueb3mV/1br31Vk2bNk3Dhw/X4cOHNWHCBFVWVvq+b/r8WnSouT1pwwgJCVF5ebmC\ng4N14sSJKpfFUdVnn32mtWvXav369bLb7cyulg4cOKCIiAjdcMMN6tGjhyorK9W2bVtmV0u7du3S\n4cOHtWvXLh0/flw2m40/e37o1KmTEhISJEk333yzOnbsqMLCwhYzvxZ96ZvbkzaM6Oho3xxzc3M1\naNCgZl6Rmc6dO6fly5fr9ddfV1hYmCRmV1t79+7Vhg0bJP34llVpaSmz88PKlSu1detW/fnPf9aY\nMWM0depU5ueHrKwsvfnmm5Ikt9utU6dOadSoUS1mfi3+zmQvvvii9u7d67s9affu3Zt7SUY7cOCA\nli1bpu+//16BgYHq1KmTXnytKYqYAAAArElEQVTxRaWlpenixYvq3Lmzli5dqqCgoOZeqnEyMzO1\nevVq/fKXv/RtS09P14IFC5hdDcrLyzV//nwdO3ZM5eXlmjZtmnr27Kk5c+YwOz+tXr1aN954owYO\nHMj8aun8+fN65plnVFJSooqKCk2bNk09evRoMfNr8aEGAKA1a9GXvgEAaO0INQAABiPUAAAYjFAD\nAGAwQg0AgMEINQAABiPUAAAYjFADAGCw/wdkB5RjykY3PgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "XtYZ7114n3b-" + }, + "cell_type": "markdown", + "source": [ + "## Accessing Data\n", + "\n", + "You can access `DataFrame` data using familiar Python dict/list operations:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "_TFm7-looBFF", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "outputId": "ab7fabc4-0baf-4601-cc8c-52cc5ae12bdb" + }, + "cell_type": "code", + "source": [ + "cities = pd.DataFrame({ 'City name': city_names, 'Population': population })\n", + "print(type(cities['City name']))\n", + "cities['City name']" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 San Francisco\n", + "1 San Jose\n", + "2 Sacramento\n", + "Name: City name, dtype: object" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "V5L6xacLoxyv", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "5bb702e7-4e43-4544-9ed7-3b32af4d0322" + }, + "cell_type": "code", + "source": [ + "print(type(cities['City name'][1]))\n", + "cities['City name'][1]" + ], + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'San Jose'" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 26 + } + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "gcYX1tBPugZl", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 128 + }, + "outputId": "51ffb1ed-6165-47c5-eb21-adbdbf769ee1" + }, + "cell_type": "code", + "source": [ + "print(type(cities[0:2]))\n", + "cities[0:2]" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulation
0San Francisco852469
1San Jose1015785
\n", + "
" + ], + "text/plain": [ + " City name Population\n", + "0 San Francisco 852469\n", + "1 San Jose 1015785" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 27 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "65g1ZdGVjXsQ" + }, + "cell_type": "markdown", + "source": [ + "In addition, *pandas* provides an extremely rich API for advanced [indexing and selection](http://pandas.pydata.org/pandas-docs/stable/indexing.html) that is too extensive to be covered here." + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "RM1iaD-ka3Y1" + }, + "cell_type": "markdown", + "source": [ + "## Manipulating Data\n", + "\n", + "You may apply Python's basic arithmetic operations to `Series`. For example:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "XWmyCFJ5bOv-", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "ea13cfe0-702f-4602-9cb0-25aa0db5a742" + }, + "cell_type": "code", + "source": [ + "population / 1000." + ], + "execution_count": 28, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 852.469\n", + "1 1015.785\n", + "2 485.199\n", + "dtype: float64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 28 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "TQzIVnbnmWGM" + }, + "cell_type": "markdown", + "source": [ + "[NumPy](http://www.numpy.org/) is a popular toolkit for scientific computing. *pandas* `Series` can be used as arguments to most NumPy functions:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "ko6pLK6JmkYP", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "650a3d8c-ec6c-4daf-c1e3-bafabccfdef3" + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "\n", + "np.log(population)" + ], + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 13.655892\n", + "1 13.831172\n", + "2 13.092314\n", + "dtype: float64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 29 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "xmxFuQmurr6d" + }, + "cell_type": "markdown", + "source": [ + "For more complex single-column transformations, you can use `Series.apply`. Like the Python [map function](https://docs.python.org/2/library/functions.html#map), \n", + "`Series.apply` accepts as an argument a [lambda function](https://docs.python.org/2/tutorial/controlflow.html#lambda-expressions), which is applied to each value.\n", + "\n", + "The example below creates a new `Series` that indicates whether `population` is over one million:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "Fc1DvPAbstjI", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "981c50d2-4a3a-4409-a01b-57724ce99203" + }, + "cell_type": "code", + "source": [ + "population.apply(lambda val: val > 1000000)" + ], + "execution_count": 30, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 False\n", + "1 True\n", + "2 False\n", + "dtype: bool" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 30 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "ZeYYLoV9b9fB" + }, + "cell_type": "markdown", + "source": [ + "\n", + "Modifying `DataFrames` is also straightforward. For example, the following code adds two `Series` to an existing `DataFrame`:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "0gCEX99Hb8LR", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 142 + }, + "outputId": "c92e1248-2987-4eab-b570-0108d7610b3e" + }, + "cell_type": "code", + "source": [ + "cities['Area square miles'] = pd.Series([46.87, 176.53, 97.92])\n", + "cities['Population density'] = cities['Population'] / cities['Area square miles']\n", + "cities" + ], + "execution_count": 31, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulationArea square milesPopulation density
0San Francisco85246946.8718187.945381
1San Jose1015785176.535754.177760
2Sacramento48519997.924955.055147
\n", + "
" + ], + "text/plain": [ + " City name Population Area square miles Population density\n", + "0 San Francisco 852469 46.87 18187.945381\n", + "1 San Jose 1015785 176.53 5754.177760\n", + "2 Sacramento 485199 97.92 4955.055147" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 31 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "6qh63m-ayb-c" + }, + "cell_type": "markdown", + "source": [ + "## Exercise #1\n", + "\n", + "Modify the `cities` table by adding a new boolean column that is True if and only if *both* of the following are True:\n", + "\n", + " * The city is named after a saint.\n", + " * The city has an area greater than 50 square miles.\n", + "\n", + "**Note:** Boolean `Series` are combined using the bitwise, rather than the traditional boolean, operators. For example, when performing *logical and*, use `&` instead of `and`.\n", + "\n", + "**Hint:** \"San\" in Spanish means \"saint.\"" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "zCOn8ftSyddH", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 142 + }, + "outputId": "d06f79b2-9078-430d-cf74-1446d559c796" + }, + "cell_type": "code", + "source": [ + "# Your code here\n", + "cities['Named after Saint and has Area Greater than 50 sq miles'] = (cities['Area square miles'] > 50) & cities['City name'].apply(lambda name: name.startswith('San'))\n", + "cities" + ], + "execution_count": 33, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
0San Francisco85246946.8718187.945381False
1San Jose1015785176.535754.177760True
2Sacramento48519997.924955.055147False
\n", + "
" + ], + "text/plain": [ + " City name Population Area square miles Population density \\\n", + "0 San Francisco 852469 46.87 18187.945381 \n", + "1 San Jose 1015785 176.53 5754.177760 \n", + "2 Sacramento 485199 97.92 4955.055147 \n", + "\n", + " Named after Saint and has Area Greater than 50 sq miles \n", + "0 False \n", + "1 True \n", + "2 False " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 33 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "YHIWvc9Ms-Ll" + }, + "cell_type": "markdown", + "source": [ + "### Solution\n", + "\n", + "Click below for a solution." + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "T5OlrqtdtCIb", + "colab": {} + }, + "cell_type": "code", + "source": [ + "cities['Is wide and has saint name'] = (cities['Area square miles'] > 50) & cities['City name'].apply(lambda name: name.startswith('San'))\n", + "cities" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "colab_type": "text", + "id": "f-xAOJeMiXFB" + }, + "cell_type": "markdown", + "source": [ + "## Indexes\n", + "Both `Series` and `DataFrame` objects also define an `index` property that assigns an identifier value to each `Series` item or `DataFrame` row. \n", + "\n", + "By default, at construction, *pandas* assigns index values that reflect the ordering of the source data. Once created, the index values are stable; that is, they do not change when data is reordered." + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "2684gsWNinq9", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "7b863b33-378f-4563-e81a-4fd1ffd4dc06" + }, + "cell_type": "code", + "source": [ + "city_names.index" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=3, step=1)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 34 + } + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "F_qPe2TBjfWd", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "c3f73d57-f7b1-4f82-de93-182660648fff" + }, + "cell_type": "code", + "source": [ + "cities.index" + ], + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=3, step=1)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 35 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "hp2oWY9Slo_h" + }, + "cell_type": "markdown", + "source": [ + "Call `DataFrame.reindex` to manually reorder the rows. For example, the following has the same effect as sorting by city name:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "sN0zUzSAj-U1", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 142 + }, + "outputId": "597f00f1-baea-4a4c-8a75-867c5b76d5ab" + }, + "cell_type": "code", + "source": [ + "cities.reindex([2, 0, 1])" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
2Sacramento48519997.924955.055147False
0San Francisco85246946.8718187.945381False
1San Jose1015785176.535754.177760True
\n", + "
" + ], + "text/plain": [ + " City name Population Area square miles Population density \\\n", + "2 Sacramento 485199 97.92 4955.055147 \n", + "0 San Francisco 852469 46.87 18187.945381 \n", + "1 San Jose 1015785 176.53 5754.177760 \n", + "\n", + " Named after Saint and has Area Greater than 50 sq miles \n", + "2 False \n", + "0 False \n", + "1 True " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 36 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "-GQFz8NZuS06" + }, + "cell_type": "markdown", + "source": [ + "Reindexing is a great way to shuffle (randomize) a `DataFrame`. In the example below, we take the index, which is array-like, and pass it to NumPy's `random.permutation` function, which shuffles its values in place. Calling `reindex` with this shuffled array causes the `DataFrame` rows to be shuffled in the same way.\n", + "Try running the following cell multiple times!" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "mF8GC0k8uYhz", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 142 + }, + "outputId": "96433926-6ec8-41a5-9389-aa53b762403f" + }, + "cell_type": "code", + "source": [ + "cities.reindex(np.random.permutation(cities.index))" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
2Sacramento48519997.924955.055147False
0San Francisco85246946.8718187.945381False
1San Jose1015785176.535754.177760True
\n", + "
" + ], + "text/plain": [ + " City name Population Area square miles Population density \\\n", + "2 Sacramento 485199 97.92 4955.055147 \n", + "0 San Francisco 852469 46.87 18187.945381 \n", + "1 San Jose 1015785 176.53 5754.177760 \n", + "\n", + " Named after Saint and has Area Greater than 50 sq miles \n", + "2 False \n", + "0 False \n", + "1 True " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 40 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "fSso35fQmGKb" + }, + "cell_type": "markdown", + "source": [ + "For more information, see the [Index documentation](http://pandas.pydata.org/pandas-docs/stable/indexing.html#index-objects)." + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "8UngIdVhz8C0" + }, + "cell_type": "markdown", + "source": [ + "## Exercise #2\n", + "\n", + "The `reindex` method allows index values that are not in the original `DataFrame`'s index values. Try it and see what happens if you use such values! Why do you think this is allowed?" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "PN55GrDX0jzO", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "outputId": "dc4eb6b6-b0bb-4bcb-ab0f-c09dce9e0422" + }, + "cell_type": "code", + "source": [ + "# Your code here\n", + "cities.reindex([2, 0, 6, 7, 1, 5])\n", + "#The reindex method allows index values that are not in the original DataFrame's index values. This is allowed so as to input new datas that are not present currently.\n", + "#Also sometimes index values may be taken from the dataset, hence it may not be convenient to use that index and instead we can use reindex to give a more suitable, logical index tro the dataset." + ], + "execution_count": 41, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
2Sacramento485199.097.924955.055147False
0San Francisco852469.046.8718187.945381False
6NaNNaNNaNNaNNaN
7NaNNaNNaNNaNNaN
1San Jose1015785.0176.535754.177760True
5NaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " City name Population Area square miles Population density \\\n", + "2 Sacramento 485199.0 97.92 4955.055147 \n", + "0 San Francisco 852469.0 46.87 18187.945381 \n", + "6 NaN NaN NaN NaN \n", + "7 NaN NaN NaN NaN \n", + "1 San Jose 1015785.0 176.53 5754.177760 \n", + "5 NaN NaN NaN NaN \n", + "\n", + " Named after Saint and has Area Greater than 50 sq miles \n", + "2 False \n", + "0 False \n", + "6 NaN \n", + "7 NaN \n", + "1 True \n", + "5 NaN " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 41 + } + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "TJffr5_Jwqvd" + }, + "cell_type": "markdown", + "source": [ + "### Solution\n", + "\n", + "Click below for the solution." + ] + }, + { + "metadata": { + "colab_type": "text", + "id": "8oSvi2QWwuDH" + }, + "cell_type": "markdown", + "source": [ + "If your `reindex` input array includes values not in the original `DataFrame` index values, `reindex` will add new rows for these \"missing\" indices and populate all corresponding columns with `NaN` values:" + ] + }, + { + "metadata": { + "colab_type": "code", + "id": "yBdkucKCwy4x", + "colab": {} + }, + "cell_type": "code", + "source": [ + "cities.reindex([0, 4, 5, 2])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "colab_type": "text", + "id": "2l82PhPbwz7g" + }, + "cell_type": "markdown", + "source": [ + "This behavior is desirable because indexes are often strings pulled from the actual data (see the [*pandas* reindex\n", + "documentation](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html) for an example\n", + "in which the index values are browser names).\n", + "\n", + "In this case, allowing \"missing\" indices makes it easy to reindex using an external list, as you don't have to worry about\n", + "sanitizing the input." + ] + } + ] +} \ No newline at end of file From 0ba9aad3752d2ceaef5ede45d8e29e5d421b7efb Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Sun, 27 Jan 2019 16:37:54 +0530 Subject: [PATCH 4/7] Assigment 2 Numpy Examples solved, therefore whole assignment completed --- Numpy_Examples_1.ipynb | 90 +++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/Numpy_Examples_1.ipynb b/Numpy_Examples_1.ipynb index d2f1cfe..cc4b079 100644 --- a/Numpy_Examples_1.ipynb +++ b/Numpy_Examples_1.ipynb @@ -5,8 +5,7 @@ "colab": { "name": "Numpy_Examples 1.ipynb", "version": "0.3.2", - "provenance": [], - "include_colab_link": true + "provenance": [] }, "kernelspec": { "name": "python3", @@ -14,16 +13,6 @@ } }, "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "[View in Colaboratory](https://colab.research.google.com/github/AGCreates/Assignment-2/blob/AGCreates/Numpy_Examples_1.ipynb)" - ] - }, { "metadata": { "id": "3pSVAeWfuPcq", @@ -80,11 +69,11 @@ "metadata": { "id": "atYpk2ert0b-", "colab_type": "code", + "outputId": "786cbcf4-2557-46ed-c906-0d3fec25c903", "colab": { "base_uri": "https://localhost:8080/", "height": 85 - }, - "outputId": "1f4a43a7-7c79-4f79-9da9-8f6c9c6e9397" + } }, "cell_type": "code", "source": [ @@ -96,7 +85,7 @@ "print(b.shape) # Prints \"(2, 3)\"\n", "print(b[0, 0], b[0, 1], b[1, 0])" ], - "execution_count": 3, + "execution_count": 2, "outputs": [ { "output_type": "stream", @@ -124,11 +113,11 @@ "metadata": { "id": "V3rdzgr9uhHS", "colab_type": "code", + "outputId": "e408a4dc-b427-4e47-983e-f9cba8a991c0", "colab": { "base_uri": "https://localhost:8080/", "height": 221 - }, - "outputId": "82021c72-1d40-4494-d3bd-605a51a607a5" + } }, "cell_type": "code", "source": [ @@ -144,7 +133,7 @@ "print('d',d)\n", "print('e',e)" ], - "execution_count": 4, + "execution_count": 3, "outputs": [ { "output_type": "stream", @@ -159,8 +148,8 @@ "d [[5 5 5]\n", " [5 5 5]\n", " [5 5 5]]\n", - "e [[0.97720034 0.28219433]\n", - " [0.05717303 0.44113851]]\n" + "e [[0.34680347 0.47926927]\n", + " [0.06596635 0.0710514 ]]\n" ], "name": "stdout" } @@ -180,11 +169,11 @@ "metadata": { "id": "-8JuqYt4upeo", "colab_type": "code", + "outputId": "bbf5c19a-dfb1-4b6a-cdd3-7ed06dd7f0a9", "colab": { "base_uri": "https://localhost:8080/", "height": 170 - }, - "outputId": "21f8bc53-208a-40c7-cb4b-59b75437d2e9" + } }, "cell_type": "code", "source": [ @@ -193,7 +182,7 @@ "print(a)\n", "print(b)" ], - "execution_count": 5, + "execution_count": 4, "outputs": [ { "output_type": "stream", @@ -232,11 +221,11 @@ "metadata": { "id": "grF5_yUSuxVK", "colab_type": "code", + "outputId": "bd629ad2-ab71-4bc3-9acf-52e360459aec", "colab": { "base_uri": "https://localhost:8080/", "height": 51 - }, - "outputId": "36940c6d-6d74-456b-9481-1f6af2242b3e" + } }, "cell_type": "code", "source": [ @@ -256,7 +245,7 @@ "b[0, 0] = 77 # b[0, 0] is the same piece of data as a[0, 1]\n", "print(a[0, 1]) # Prints \"77\"" ], - "execution_count": 6, + "execution_count": 5, "outputs": [ { "output_type": "stream", @@ -282,11 +271,11 @@ "metadata": { "id": "kubpegh2u4zF", "colab_type": "code", + "outputId": "eb44409b-2614-4aa6-ee45-6d8d07f5d5f0", "colab": { "base_uri": "https://localhost:8080/", "height": 119 - }, - "outputId": "d436e463-31fd-41e9-8f2f-12e04f450ef9" + } }, "cell_type": "code", "source": [ @@ -304,7 +293,7 @@ "print(col_r1, col_r1.shape) # Prints \"[ 2 6 10] (3,)\"\n", "print(col_r2, col_r2.shape)" ], - "execution_count": 7, + "execution_count": 6, "outputs": [ { "output_type": "stream", @@ -334,11 +323,11 @@ "metadata": { "id": "YvBw3ImjvGqD", "colab_type": "code", + "outputId": "8881c7f7-c8f5-490d-b383-b2f6f6ceaf8a", "colab": { "base_uri": "https://localhost:8080/", "height": 68 - }, - "outputId": "9d182092-4e38-4715-b592-63ea4fd64596" + } }, "cell_type": "code", "source": [ @@ -348,7 +337,7 @@ "print(np.sum(x, axis=0)) # Compute sum of each column; prints \"[4 6]\"\n", "print(np.sum(x, axis=1)) # Compute sum of each row; prints \"[3 7]\"" ], - "execution_count": 8, + "execution_count": 7, "outputs": [ { "output_type": "stream", @@ -375,11 +364,11 @@ "metadata": { "id": "-PNfOMvh4_Gp", "colab_type": "code", + "outputId": "b6a97140-8c94-4cef-9019-2422db2bab7a", "colab": { "base_uri": "https://localhost:8080/", "height": 68 - }, - "outputId": "c6b67d2a-ab61-4b5b-ed1c-2f4201694e66" + } }, "cell_type": "code", "source": [ @@ -393,7 +382,7 @@ "\n", "print(b[mask]) #applying the mask on the numpy array\n" ], - "execution_count": 9, + "execution_count": 8, "outputs": [ { "output_type": "stream", @@ -410,11 +399,11 @@ "metadata": { "id": "HbEPBbz-5J9K", "colab_type": "code", + "outputId": "5ba2ae8b-9326-4f40-ef8e-3b2684d9f650", "colab": { "base_uri": "https://localhost:8080/", "height": 34 - }, - "outputId": "12a21405-a73f-4174-a819-bae2ee150850" + } }, "cell_type": "code", "source": [ @@ -423,7 +412,7 @@ "\n", "print(modified_b)" ], - "execution_count": 10, + "execution_count": 9, "outputs": [ { "output_type": "stream", @@ -448,11 +437,11 @@ "metadata": { "id": "-cvqeXd_AGo1", "colab_type": "code", + "outputId": "1015fdcd-da7e-486c-a09b-cf18fb91e134", "colab": { "base_uri": "https://localhost:8080/", "height": 119 - }, - "outputId": "69b028fc-4ee7-4069-e50b-a456f47098b9" + } }, "cell_type": "code", "source": [ @@ -461,7 +450,7 @@ "\n", "print(a[:, [1,0,2]])" ], - "execution_count": 11, + "execution_count": 10, "outputs": [ { "output_type": "stream", @@ -491,11 +480,11 @@ "metadata": { "id": "0FrOURRDAZNP", "colab_type": "code", + "outputId": "f3c1b7d7-044e-4cca-b715-8248a29f4b05", "colab": { "base_uri": "https://localhost:8080/", "height": 119 - }, - "outputId": "16187094-b86d-48e7-9b1d-1fd9c8f2fc59" + } }, "cell_type": "code", "source": [ @@ -504,7 +493,7 @@ "\n", "print(a[[1,0,2], :]) #Change made by AGCreates in this line instead of a there was arr written" ], - "execution_count": 13, + "execution_count": 11, "outputs": [ { "output_type": "stream", @@ -519,6 +508,19 @@ "name": "stdout" } ] + }, + { + "metadata": { + "id": "lUTzlm9g_Ouk", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] } ] } \ No newline at end of file From d373752aeb27ae48e4b034d9b6fbdd2d6fd98cf6 Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Sun, 27 Jan 2019 16:54:36 +0530 Subject: [PATCH 5/7] Delete Numpy_Examples_1.ipynb --- Numpy_Examples_1.ipynb | 480 ----------------------------------------- 1 file changed, 480 deletions(-) delete mode 100644 Numpy_Examples_1.ipynb diff --git a/Numpy_Examples_1.ipynb b/Numpy_Examples_1.ipynb deleted file mode 100644 index d2e7361..0000000 --- a/Numpy_Examples_1.ipynb +++ /dev/null @@ -1,480 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Numpy_Examples 1.ipynb", - "version": "0.3.2", - - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - - "metadata": { - "id": "3pSVAeWfuPcq", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Numpy Examples\n", - "\n", - "## What is numpy?\n", - "\n", - "#### Python has built-in:\n", - "\n", - "- containers: lists (costless insertion and append), dictionnaries (fast lookup)\n", - "- high-level number objects: integers, floating point\n", - "\n", - "#### Numpy is:\n", - "\n", - " - extension package to Python for multidimensional arrays\n", - " - closer to hardware (efficiency)\n", - " - designed for scientific computation (convenience)\n", - "\n", - "\n", - "#### Import numpy\n", - "\n" - ] - }, - { - "metadata": { - "id": "ozUi4_X55UHE", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "import numpy as np" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "3-1ghFDF5N2z", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "### Uncomment Print statement and run each cell to see the output\n", - "\n", - "#### Create numpy arrays\n" - ] - }, - { - "metadata": { - "id": "atYpk2ert0b-", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a = np.array([1, 2, 3]) # Create a rank 1 array\n", - "print(a)\n", - "print(type(a)) #print type of a\n", - "\n", - "b = np.array([[1,2,3],[4,5,6]]) # Create a rank 2 array\n", - "print(b.shape) # Prints \"(2, 3)\"\n", - "print(b[0, 0], b[0, 1], b[1, 0])" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[1 2 3]\n", - "\n", - "(2, 3)\n", - "1 2 4\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "Kro5ZOwXue5n", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Some basic functions for creating arrays. Print all the defined arrays and see the results." - ] - }, - { - "metadata": { - "id": "V3rdzgr9uhHS", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a = np.zeros(shape=(2,2))\n", - "b = np.ones(shape = (3,3))\n", - "c = np.eye(2)\n", - "d = np.full(shape=(3,3), fill_value=5)\n", - "e = np.random.random((2,2))\n", - "\n", - "print('a', a)\n", - "print('b',b)\n", - "print('c',c)\n", - "print('d',d)\n", - "print('e',e)" - ], - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "text": [ - "a [[0. 0.]\n", - " [0. 0.]]\n", - "b [[1. 1. 1.]\n", - " [1. 1. 1.]\n", - " [1. 1. 1.]]\n", - "c [[1. 0.]\n", - " [0. 1.]]\n", - "d [[5 5 5]\n", - " [5 5 5]\n", - " [5 5 5]]\n", - - "e [[0.34680347 0.47926927]\n", - " [0.06596635 0.0710514 ]]\n" -======= - - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "8RPW_SutukjF", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Execute and understand :)" - ] - }, - { - "metadata": { - "id": "-8JuqYt4upeo", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a == np.arange(10)\n", - "b == np.linspace(0,10, num=6)\n", - "print(a)\n", - "print(b)" - ], - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0. 0.]\n", - " [0. 0.]]\n", - "[[1. 1. 1.]\n", - " [1. 1. 1.]\n", - " [1. 1. 1.]]\n" - ], - "name": "stdout" - }, - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: DeprecationWarning: elementwise == comparison failed; this will raise an error in the future.\n", - " \"\"\"Entry point for launching an IPython kernel.\n", - "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: DeprecationWarning: elementwise == comparison failed; this will raise an error in the future.\n", - " \n" - ], - "name": "stderr" - } - ] - }, - { - "metadata": { - "id": "MRHhbjx4uvYN", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Array Indexing" - ] - }, - { - "metadata": { - "id": "grF5_yUSuxVK", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])\n", - "\n", - "# Use slicing to pull out the subarray consisting of the first 2 rows\n", - "# and columns 1 and 2; b is the following array of shape (2, 2):\n", - "# [[2 3]\n", - "# [6 7]]\n", - "b = a[:2, 1:3]\n", - "\n", - "# A slice of an array is a view into the same data, so modifying it\n", - "# will modify the original array.\n", - "\n", - "print(a[0, 1]) # Prints \"2\"\n", - "\n", - "b[0, 0] = 77 # b[0, 0] is the same piece of data as a[0, 1]\n", - "print(a[0, 1]) # Prints \"77\"" - ], - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2\n", - "77\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "s400Gijxu0kO", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Slicing" - ] - }, - { - "metadata": { - "id": "kubpegh2u4zF", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])\n", - "\n", - "row_r1 = a[1, :] # Rank 1 view of the second row of a\n", - "row_r2 = a[1:2, :] # Rank 2 view of the second row of a\n", - "\n", - "print(row_r1, row_r1.shape) # Prints \"[5 6 7 8] (4,)\"\n", - "print(row_r2, row_r2.shape) # Prints \"[[5 6 7 8]] (1, 4)\"\n", - "\n", - "col_r1 = a[:, 1]\n", - "col_r2 = a[:, 1:2]\n", - "\n", - "print(col_r1, col_r1.shape) # Prints \"[ 2 6 10] (3,)\"\n", - "print(col_r2, col_r2.shape)" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[5 6 7 8] (4,)\n", - "[[5 6 7 8]] (1, 4)\n", - "[ 2 6 10] (3,)\n", - "[[ 2]\n", - " [ 6]\n", - " [10]] (3, 1)\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "TmGnCO3AvE8t", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Aritmetic operations" - ] - }, - { - "metadata": { - "id": "YvBw3ImjvGqD", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "x = np.array([[1,2],[3,4]])\n", - "\n", - "print(np.sum(x)) # Compute sum of all elements; prints \"10\"\n", - "print(np.sum(x, axis=0)) # Compute sum of each column; prints \"[4 6]\"\n", - "print(np.sum(x, axis=1)) # Compute sum of each row; prints \"[3 7]\"" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "text": [ - "10\n", - "[4 6]\n", - "[3 7]\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "uaVY3ZzD4pC2", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Using Boolean Mask" - ] - }, - { - "metadata": { - "id": "-PNfOMvh4_Gp", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "b = np.arange(10)\n", - "\n", - "print(b)\n", - "\n", - "mask = b%2!=0 #perform computations on the list \n", - "\n", - "print(mask)\n", - "\n", - "print(b[mask]) #applying the mask on the numpy array\n" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0 1 2 3 4 5 6 7 8 9]\n", - "[False True False True False True False True False True]\n", - "[1 3 5 7 9]\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "HbEPBbz-5J9K", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "modified_b = b\n", - "modified_b[mask] = -1\n", - "\n", - "print(modified_b)" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[ 0 -1 2 -1 4 -1 6 -1 8 -1]\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "zgSd71EEAHC7", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Swapping two columns in a 2d numpy array" - ] - }, - { - "metadata": { - "id": "-cvqeXd_AGo1", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a = np.arange(9).reshape(3,3)\n", - "print(a)\n", - "\n", - "print(a[:, [1,0,2]])" - ], - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0 1 2]\n", - " [3 4 5]\n", - " [6 7 8]]\n", - "[[1 0 2]\n", - " [4 3 5]\n", - " [7 6 8]]\n" - ], - "name": "stdout" - } - ] - }, - { - "metadata": { - "id": "U7ifiLY3Ayky", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Swapping two rows in a 2d numpy array" - ] - }, - { - "metadata": { - "id": "0FrOURRDAZNP", - "colab_type": "code", - - }, - "cell_type": "code", - "source": [ - "a = np.arange(9).reshape(3,3)\n", - "print(a)\n", - "\n", - - "print(a[[1,0,2], :]) #Change made by AGCreates in this line instead of a there was arr written" - - ], - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0 1 2]\n", - " [3 4 5]\n", - " [6 7 8]]\n", - "[[3 4 5]\n", - " [0 1 2]\n", - " [6 7 8]]\n" - ], - "name": "stdout" - } - ] - - } - ] -} \ No newline at end of file From 1c2cdc50c078b85de3bbd9c97536b2a63723d671 Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Sun, 27 Jan 2019 17:05:04 +0530 Subject: [PATCH 6/7] Assignment 2 whole completed (this consists of the last part Numpy Examples) --- Numpy_Examples_1.ipynb | 522 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 Numpy_Examples_1.ipynb diff --git a/Numpy_Examples_1.ipynb b/Numpy_Examples_1.ipynb new file mode 100644 index 0000000..1d9207e --- /dev/null +++ b/Numpy_Examples_1.ipynb @@ -0,0 +1,522 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Numpy_Examples 1.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "id": "3pSVAeWfuPcq", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Numpy Examples\n", + "\n", + "## What is numpy?\n", + "\n", + "#### Python has built-in:\n", + "\n", + "- containers: lists (costless insertion and append), dictionnaries (fast lookup)\n", + "- high-level number objects: integers, floating point\n", + "\n", + "#### Numpy is:\n", + "\n", + " - extension package to Python for multidimensional arrays\n", + " - closer to hardware (efficiency)\n", + " - designed for scientific computation (convenience)\n", + "\n", + "\n", + "#### Import numpy\n", + "\n" + ] + }, + { + "metadata": { + "id": "ozUi4_X55UHE", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "3-1ghFDF5N2z", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Uncomment Print statement and run each cell to see the output\n", + "\n", + "#### Create numpy arrays\n" + ] + }, + { + "metadata": { + "id": "atYpk2ert0b-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "857d3efe-efa5-483e-a249-957096a97eb0" + }, + "cell_type": "code", + "source": [ + "a = np.array([1, 2, 3]) # Create a rank 1 array\n", + "print(a)\n", + "print(type(a)) #print type of a\n", + "\n", + "b = np.array([[1,2,3],[4,5,6]]) # Create a rank 2 array\n", + "print(b.shape) # Prints \"(2, 3)\"\n", + "print(b[0, 0], b[0, 1], b[1, 0])" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[1 2 3]\n", + "\n", + "(2, 3)\n", + "1 2 4\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "Kro5ZOwXue5n", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Some basic functions for creating arrays. Print all the defined arrays and see the results." + ] + }, + { + "metadata": { + "id": "V3rdzgr9uhHS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "7b1964c2-45c4-49b8-9f09-793cd10d6df9" + }, + "cell_type": "code", + "source": [ + "a = np.zeros(shape=(2,2))\n", + "b = np.ones(shape = (3,3))\n", + "c = np.eye(2)\n", + "d = np.full(shape=(3,3), fill_value=5)\n", + "e = np.random.random((2,2))\n", + "\n", + "print('a', a)\n", + "print('b',b)\n", + "print('c',c)\n", + "print('d',d)\n", + "print('e',e)" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "a [[0. 0.]\n", + " [0. 0.]]\n", + "b [[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "c [[1. 0.]\n", + " [0. 1.]]\n", + "d [[5 5 5]\n", + " [5 5 5]\n", + " [5 5 5]]\n", + "e [[0.08873862 0.4361318 ]\n", + " [0.19152098 0.6743398 ]]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "8RPW_SutukjF", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Execute and understand :)" + ] + }, + { + "metadata": { + "id": "-8JuqYt4upeo", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "447739b9-ef8d-410a-859d-aa684f5dfadf" + }, + "cell_type": "code", + "source": [ + "a == np.arange(10)\n", + "b == np.linspace(0,10, num=6)\n", + "print(a)\n", + "print(b)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[0 1 2 3 4 5 6 7 8 9]\n", + "[ 0. 2. 4. 6. 8. 10.]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "MRHhbjx4uvYN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Array Indexing" + ] + }, + { + "metadata": { + "id": "grF5_yUSuxVK", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "4c371809-e687-44fa-dad4-f07ec9657d3a" + }, + "cell_type": "code", + "source": [ + "a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])\n", + "\n", + "# Use slicing to pull out the subarray consisting of the first 2 rows\n", + "# and columns 1 and 2; b is the following array of shape (2, 2):\n", + "# [[2 3]\n", + "# [6 7]]\n", + "b = a[:2, 1:3]\n", + "\n", + "# A slice of an array is a view into the same data, so modifying it\n", + "# will modify the original array.\n", + "\n", + "print(a[0, 1]) # Prints \"2\"\n", + "\n", + "b[0, 0] = 77 # b[0, 0] is the same piece of data as a[0, 1]\n", + "print(a[0, 1]) # Prints \"77\"" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2\n", + "77\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "s400Gijxu0kO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Slicing" + ] + }, + { + "metadata": { + "id": "kubpegh2u4zF", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "ee70891b-a521-44f0-d39c-cfa200f283dc" + }, + "cell_type": "code", + "source": [ + "a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])\n", + "\n", + "row_r1 = a[1, :] # Rank 1 view of the second row of a\n", + "row_r2 = a[1:2, :] # Rank 2 view of the second row of a\n", + "\n", + "print(row_r1, row_r1.shape) # Prints \"[5 6 7 8] (4,)\"\n", + "print(row_r2, row_r2.shape) # Prints \"[[5 6 7 8]] (1, 4)\"\n", + "\n", + "col_r1 = a[:, 1]\n", + "col_r2 = a[:, 1:2]\n", + "\n", + "print(col_r1, col_r1.shape) # Prints \"[ 2 6 10] (3,)\"\n", + "print(col_r2, col_r2.shape)" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[5 6 7 8] (4,)\n", + "[[5 6 7 8]] (1, 4)\n", + "[ 2 6 10] (3,)\n", + "[[ 2]\n", + " [ 6]\n", + " [10]] (3, 1)\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "TmGnCO3AvE8t", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Aritmetic operations" + ] + }, + { + "metadata": { + "id": "YvBw3ImjvGqD", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "a6687350-7968-4fa2-d314-23f4eb51b067" + }, + "cell_type": "code", + "source": [ + "x = np.array([[1,2],[3,4]])\n", + "\n", + "print(np.sum(x)) # Compute sum of all elements; prints \"10\"\n", + "print(np.sum(x, axis=0)) # Compute sum of each column; prints \"[4 6]\"\n", + "print(np.sum(x, axis=1)) # Compute sum of each row; prints \"[3 7]\"" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "10\n", + "[4 6]\n", + "[3 7]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "uaVY3ZzD4pC2", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Using Boolean Mask" + ] + }, + { + "metadata": { + "id": "-PNfOMvh4_Gp", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "aacee99e-0a0d-4511-b5f6-e5dbad8ca3ba" + }, + "cell_type": "code", + "source": [ + "b = np.arange(10)\n", + "\n", + "print(b)\n", + "\n", + "mask = b%2!=0 #perform computations on the list \n", + "\n", + "print(mask)\n", + "print(b[mask]) #applying the mask on the numpy array\n" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[0 1 2 3 4 5 6 7 8 9]\n", + "[False True False True False True False True False True]\n", + "[1 3 5 7 9]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "HbEPBbz-5J9K", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "62a9990a-9114-4bb1-8715-946e93df6a96" + }, + "cell_type": "code", + "source": [ + "modified_b = b\n", + "modified_b[mask] = -1\n", + "\n", + "print(modified_b)" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[ 0 -1 2 -1 4 -1 6 -1 8 -1]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "zgSd71EEAHC7", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Swapping two columns in a 2d numpy array" + ] + }, + { + "metadata": { + "id": "-cvqeXd_AGo1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "f709a398-9c0a-450e-b521-989f0dba70a7" + }, + "cell_type": "code", + "source": [ + "a = np.arange(9).reshape(3,3)\n", + "print(a)\n", + "print(a[:, [1,0,2]])" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0 1 2]\n", + " [3 4 5]\n", + " [6 7 8]]\n", + "[[1 0 2]\n", + " [4 3 5]\n", + " [7 6 8]]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "U7ifiLY3Ayky", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Swapping two rows in a 2d numpy array" + ] + }, + { + "metadata": { + "id": "0FrOURRDAZNP", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "eacd1335-eb6f-45f9-c287-d3314758da2f" + }, + "cell_type": "code", + "source": [ + "a = np.arange(9).reshape(3,3)\n", + "print(a)\n", + "\n", + "print(a[[1,0,2], :]) #Changes made by AGCreates, a instead of arr" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0 1 2]\n", + " [3 4 5]\n", + " [6 7 8]]\n", + "[[3 4 5]\n", + " [0 1 2]\n", + " [6 7 8]]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "ASJXUAR5FXl9", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#This has bee done again by AGCreates to resolve merge conflicts." + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From 0ff3979c0a1e15a8cc1095cbecc24ba34824e8a8 Mon Sep 17 00:00:00 2001 From: AGCreates <43198265+AGCreates@users.noreply.github.com> Date: Sun, 27 Jan 2019 17:12:15 +0530 Subject: [PATCH 7/7] Delete intro_to_pandas.ipynb This part belongs to assingment 5 ... --- intro_to_pandas.ipynb | 1725 ----------------------------------------- 1 file changed, 1725 deletions(-) delete mode 100644 intro_to_pandas.ipynb diff --git a/intro_to_pandas.ipynb b/intro_to_pandas.ipynb deleted file mode 100644 index 36035dd..0000000 --- a/intro_to_pandas.ipynb +++ /dev/null @@ -1,1725 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "intro_to_pandas.ipynb", - "version": "0.3.2", - "provenance": [], - "collapsed_sections": [ - "JndnmDMp66FL", - "YHIWvc9Ms-Ll", - "TJffr5_Jwqvd" - ], - "include_colab_link": true - }, - "kernelspec": { - "name": "python2", - "display_name": "Python 2" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "JndnmDMp66FL" - }, - "cell_type": "markdown", - "source": [ - "#### Copyright 2017 Google LLC." - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "hMqWDc_m6rUC", - "cellView": "both", - "colab": {} - }, - "cell_type": "code", - "source": [ - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "colab_type": "text", - "id": "rHLcriKWLRe4" - }, - "cell_type": "markdown", - "source": [ - "# Intro to pandas" - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "QvJBqX8_Bctk" - }, - "cell_type": "markdown", - "source": [ - "**Learning Objectives:**\n", - " * Gain an introduction to the `DataFrame` and `Series` data structures of the *pandas* library\n", - " * Access and manipulate data within a `DataFrame` and `Series`\n", - " * Import CSV data into a *pandas* `DataFrame`\n", - " * Reindex a `DataFrame` to shuffle data" - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "TIFJ83ZTBctl" - }, - "cell_type": "markdown", - "source": [ - "[*pandas*](http://pandas.pydata.org/) is a column-oriented data analysis API. It's a great tool for handling and analyzing input data, and many ML frameworks support *pandas* data structures as inputs.\n", - "Although a comprehensive introduction to the *pandas* API would span many pages, the core concepts are fairly straightforward, and we'll present them below. For a more complete reference, the [*pandas* docs site](http://pandas.pydata.org/pandas-docs/stable/index.html) contains extensive documentation and many tutorials." - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "s_JOISVgmn9v" - }, - "cell_type": "markdown", - "source": [ - "## Basic Concepts\n", - "\n", - "The following line imports the *pandas* API and prints the API version:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "aSRYu62xUi3g", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "aecdcd96-a616-40b3-8669-3eca04f2d3b2" - }, - "cell_type": "code", - "source": [ - "from __future__ import print_function\n", - "\n", - "import pandas as pd\n", - "pd.__version__" - ], - "execution_count": 19, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "u'0.22.0'" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 19 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "daQreKXIUslr" - }, - "cell_type": "markdown", - "source": [ - "The primary data structures in *pandas* are implemented as two classes:\n", - "\n", - " * **`DataFrame`**, which you can imagine as a relational data table, with rows and named columns.\n", - " * **`Series`**, which is a single column. A `DataFrame` contains one or more `Series` and a name for each `Series`.\n", - "\n", - "The data frame is a commonly used abstraction for data manipulation. Similar implementations exist in [Spark](https://spark.apache.org/) and [R](https://www.r-project.org/about.html)." - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "fjnAk1xcU0yc" - }, - "cell_type": "markdown", - "source": [ - "One way to create a `Series` is to construct a `Series` object. For example:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "DFZ42Uq7UFDj", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 85 - }, - "outputId": "e1f97267-aa6f-449d-bbdc-0e3310e30f9d" - }, - "cell_type": "code", - "source": [ - "pd.Series(['San Francisco', 'San Jose', 'Sacramento'])" - ], - "execution_count": 20, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 San Francisco\n", - "1 San Jose\n", - "2 Sacramento\n", - "dtype: object" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 20 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "U5ouUp1cU6pC" - }, - "cell_type": "markdown", - "source": [ - "`DataFrame` objects can be created by passing a `dict` mapping `string` column names to their respective `Series`. If the `Series` don't match in length, missing values are filled with special [NA/NaN](http://pandas.pydata.org/pandas-docs/stable/missing_data.html) values. Example:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "avgr6GfiUh8t", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 142 - }, - "outputId": "a9fd4201-f678-4151-dbfd-f25ab4f84d27" - }, - "cell_type": "code", - "source": [ - "city_names = pd.Series(['San Francisco', 'San Jose', 'Sacramento'])\n", - "population = pd.Series([852469, 1015785, 485199])\n", - "\n", - "pd.DataFrame({ 'City name': city_names, 'Population': population })" - ], - "execution_count": 21, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulation
0San Francisco852469
1San Jose1015785
2Sacramento485199
\n", - "
" - ], - "text/plain": [ - " City name Population\n", - "0 San Francisco 852469\n", - "1 San Jose 1015785\n", - "2 Sacramento 485199" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 21 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "oa5wfZT7VHJl" - }, - "cell_type": "markdown", - "source": [ - "But most of the time, you load an entire file into a `DataFrame`. The following example loads a file with California housing data. Run the following cell to load the data and create feature definitions:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "av6RYOraVG1V", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 297 - }, - "outputId": "50daf81b-0947-4c5f-e2d7-f647ef5a68a6" - }, - "cell_type": "code", - "source": [ - "california_housing_dataframe = pd.read_csv(\"https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv\", sep=\",\")\n", - "california_housing_dataframe.describe()" - ], - "execution_count": 22, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
count17000.00000017000.00000017000.00000017000.00000017000.00000017000.00000017000.00000017000.00000017000.000000
mean-119.56210835.62522528.5893532643.664412539.4108241429.573941501.2219413.883578207300.912353
std2.0051662.13734012.5869372179.947071421.4994521147.852959384.5208411.908157115983.764387
min-124.35000032.5400001.0000002.0000001.0000003.0000001.0000000.49990014999.000000
25%-121.79000033.93000018.0000001462.000000297.000000790.000000282.0000002.566375119400.000000
50%-118.49000034.25000029.0000002127.000000434.0000001167.000000409.0000003.544600180400.000000
75%-118.00000037.72000037.0000003151.250000648.2500001721.000000605.2500004.767000265000.000000
max-114.31000041.95000052.00000037937.0000006445.00000035682.0000006082.00000015.000100500001.000000
\n", - "
" - ], - "text/plain": [ - " longitude latitude housing_median_age total_rooms \\\n", - "count 17000.000000 17000.000000 17000.000000 17000.000000 \n", - "mean -119.562108 35.625225 28.589353 2643.664412 \n", - "std 2.005166 2.137340 12.586937 2179.947071 \n", - "min -124.350000 32.540000 1.000000 2.000000 \n", - "25% -121.790000 33.930000 18.000000 1462.000000 \n", - "50% -118.490000 34.250000 29.000000 2127.000000 \n", - "75% -118.000000 37.720000 37.000000 3151.250000 \n", - "max -114.310000 41.950000 52.000000 37937.000000 \n", - "\n", - " total_bedrooms population households median_income \\\n", - "count 17000.000000 17000.000000 17000.000000 17000.000000 \n", - "mean 539.410824 1429.573941 501.221941 3.883578 \n", - "std 421.499452 1147.852959 384.520841 1.908157 \n", - "min 1.000000 3.000000 1.000000 0.499900 \n", - "25% 297.000000 790.000000 282.000000 2.566375 \n", - "50% 434.000000 1167.000000 409.000000 3.544600 \n", - "75% 648.250000 1721.000000 605.250000 4.767000 \n", - "max 6445.000000 35682.000000 6082.000000 15.000100 \n", - "\n", - " median_house_value \n", - "count 17000.000000 \n", - "mean 207300.912353 \n", - "std 115983.764387 \n", - "min 14999.000000 \n", - "25% 119400.000000 \n", - "50% 180400.000000 \n", - "75% 265000.000000 \n", - "max 500001.000000 " - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 22 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "WrkBjfz5kEQu" - }, - "cell_type": "markdown", - "source": [ - "The example above used `DataFrame.describe` to show interesting statistics about a `DataFrame`. Another useful function is `DataFrame.head`, which displays the first few records of a `DataFrame`:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "s3ND3bgOkB5k", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 204 - }, - "outputId": "6052fdd2-cd27-4ad0-a782-714236c355c3" - }, - "cell_type": "code", - "source": [ - "california_housing_dataframe.head()" - ], - "execution_count": 23, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
0-114.3134.1915.05612.01283.01015.0472.01.493666900.0
1-114.4734.4019.07650.01901.01129.0463.01.820080100.0
2-114.5633.6917.0720.0174.0333.0117.01.650985700.0
3-114.5733.6414.01501.0337.0515.0226.03.191773400.0
4-114.5733.5720.01454.0326.0624.0262.01.925065500.0
\n", - "
" - ], - "text/plain": [ - " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", - "0 -114.31 34.19 15.0 5612.0 1283.0 \n", - "1 -114.47 34.40 19.0 7650.0 1901.0 \n", - "2 -114.56 33.69 17.0 720.0 174.0 \n", - "3 -114.57 33.64 14.0 1501.0 337.0 \n", - "4 -114.57 33.57 20.0 1454.0 326.0 \n", - "\n", - " population households median_income median_house_value \n", - "0 1015.0 472.0 1.4936 66900.0 \n", - "1 1129.0 463.0 1.8200 80100.0 \n", - "2 333.0 117.0 1.6509 85700.0 \n", - "3 515.0 226.0 3.1917 73400.0 \n", - "4 624.0 262.0 1.9250 65500.0 " - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 23 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "w9-Es5Y6laGd" - }, - "cell_type": "markdown", - "source": [ - "Another powerful feature of *pandas* is graphing. For example, `DataFrame.hist` lets you quickly study the distribution of values in a column:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "nqndFVXVlbPN", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 396 - }, - "outputId": "d40e6503-2585-49f3-b7c5-48bfd20b5581" - }, - "cell_type": "code", - "source": [ - "california_housing_dataframe.hist('housing_median_age')" - ], - "execution_count": 24, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[]],\n", - " dtype=object)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 24 - }, - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAFZCAYAAABXM2zhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X1UlHX+//HXMDAH0UEEGTfLarf0\naEmaa5l4U0Iokp7IVRPWdU3q6Iqtlql499WTlajRmmZZmunRU7GNtofcAjJxyyRanT0uuu0p2VOr\neTejKCqgSPP7o9Os/FRguP1Az8dfcTEz1+d6H+3pdQ1zYfF6vV4BAAAjBTT3AgAAwPURagAADEao\nAQAwGKEGAMBghBoAAIMRagAADEaogVo6cuSI7rjjjkbdxz//+U+lpKQ06j4a0h133KEjR47o448/\n1ty5c5t7OUCrZOFz1EDtHDlyREOHDtW//vWv5l6KMe644w7l5ubqpptuau6lAK0WZ9SAn5xOp0aO\nHKn7779f27dv1w8//KA//elPio+PV3x8vNLS0lRaWipJiomJ0d69e33P/enry5cva/78+Ro2bJji\n4uI0bdo0nT9/XgUFBYqLi5MkrV69Ws8++6xSU1MVGxur0aNH6+TJk5KkgwcPaujQoRo6dKheeeUV\njRw5UgUFBdWue/Xq1Vq0aJEmT56sgQMHatasWcrLy9OoUaM0cOBA5eXlSZIuXbqk5557TsOGDVNM\nTIzWrl3re42//e1viouL0/Dhw7V+/Xrf9m3btmnixImSJI/Ho5SUFMXHxysmJkZvvfVWleN/9913\nNXr0aA0cOFDp6ek1zrusrEwzZszwrWfZsmW+71U3hx07dmjkyJGKjY3VpEmTdPr06Rr3BZiIUAN+\n+OGHH1RRUaEPPvhAc+fO1cqVK/XRRx/p008/1bZt2/TXv/5VJSUl2rhxY7Wvs3v3bh05ckTZ2dnK\nzc3V7bffrn/84x9XPS47O1vz5s3Tjh07FBERoa1bt0qSFi5cqIkTJyo3N1ft2rXTt99+W6v179q1\nSy+88II++OADZWdn+9Y9ZcoUrVu3TpK0bt06HTp0SB988IG2b9+unJwc5eXlqbKyUvPnz9eiRYv0\n0UcfKSAgQJWVlVft47XXXtNNN92k7Oxsbdq0SRkZGTp27Jjv+3//+9+VmZmprVu3asuWLTp+/Hi1\na37nnXd04cIFZWdn6/3339e2bdt8//i53hwOHz6s2bNnKyMjQ5988on69eunxYsX12pGgGkINeAH\nr9erxMREST9e9j1+/Lh27dqlxMREhYSEyGq1atSoUfr888+rfZ3w8HAVFRXp448/9p0xDho06KrH\n9e3bVzfeeKMsFot69OihY8eOqby8XAcPHtSIESMkSb/97W9V23ew7r77bkVERKhDhw6KjIzU4MGD\nJUndunXzna3n5eUpOTlZNptNISEhevjhh5Wbm6tvv/1Wly5d0sCBAyVJjzzyyDX3sWDBAi1cuFCS\n1KVLF0VGRurIkSO+748cOVJWq1WdOnVSRERElYhfy6RJk/Tqq6/KYrGoffv26tq1q44cOVLtHD79\n9FPde++96tatmyRp3Lhx2rlz5zX/YQGYLrC5FwC0JFarVW3atJEkBQQE6IcfftDp06fVvn1732Pa\nt2+vU6dOVfs6d911lxYsWKDNmzdrzpw5iomJ0aJFi656nN1ur7LvyspKnT17VhaLRaGhoZKkoKAg\nRURE1Gr9bdu2rfJ6ISEhVY5Fks6dO6elS5fqpZdekvTjpfC77rpLZ8+eVbt27aoc57UUFhb6zqID\nAgLkdrt9ry2pymv8dEzV+fbbb5Wenq7//Oc/CggI0PHjxzVq1Khq53Du3Dnt3btX8fHxVfZ75syZ\nWs8KMAWhBuqpY8eOOnPmjO/rM2fOqGPHjpKqBlCSzp496/vvn97TPnPmjObNm6c333xT0dHRNe6v\nXbt28nq9KisrU5s2bXT58uUGff/V4XBo0qRJGjJkSJXtRUVFOn/+vO/r6+1z1qxZ+v3vf6+kpCRZ\nLJZrXinwx7PPPqs777xTa9askdVq1bhx4yRVPweHw6Ho6GitWrWqXvsGTMClb6CeHnjgAWVlZams\nrEyXL1+W0+nU/fffL0mKjIzUv//9b0nShx9+qIsXL0qStm7dqjVr1kiSwsLC9Ktf/arW+2vbtq1u\nu+02ffTRR5KkzMxMWSyWBjue2NhYvffee6qsrJTX69Wrr76qTz/9VDfffLOsVqvvh7W2bdt2zf2e\nOnVKPXv2lMVi0fvvv6+ysjLfD9fVxalTp9SjRw9ZrVZ9/vnn+u6771RaWlrtHAYOHKi9e/fq8OHD\nkn782Ntzzz1X5zUAzYlQA/UUHx+vwYMHa9SoURoxYoR+8YtfaMKECZKkqVOnauPGjRoxYoSKiop0\n++23S/oxhj/9xPLw4cN16NAhPfbYY7Xe56JFi7R27Vo99NBDKi0tVadOnRos1snJyercubMeeugh\nxcfHq6ioSL/+9a8VFBSkJUuWaN68eRo+fLgsFovv0vmVpk+frtTUVI0cOVKlpaV69NFHtXDhQv33\nv/+t03r+8Ic/aNmyZRoxYoS+/PJLTZs2TatXr9a+ffuuOweHw6ElS5YoNTVVw4cP17PPPquEhIT6\njgZoFnyOGmihvF6vL8733XefNm7cqO7duzfzqpoec0Brxxk10AL98Y9/9H2cKj8/X16vV7feemvz\nLqoZMAf8HHBGDbRARUVFmjt3rs6ePaugoCDNmjVLN910k1JTU6/5+Ntuu833nrhpioqK6rzua83h\np58PAFoLQg0AgMG49A0AgMEINQAABjPyhidu9zm/Ht+hQ4iKi+v+Oc2fO+ZXd8yufphf3TG7+jFt\nfpGR9ut+r1WcUQcGWpt7CS0a86s7Zlc/zK/umF39tKT5tYpQAwDQWhFqAAAMRqgBADBYjT9MVlZW\nprS0NJ06dUoXL17U1KlT1b17d82ePVuVlZWKjIzUihUrZLPZlJWVpU2bNikgIEBjx47VmDFjVFFR\nobS0NB09elRWq1VLly5Vly5dmuLYAABo8Wo8o87Ly1PPnj21ZcsWrVy5Uunp6Vq1apWSk5P19ttv\n65ZbbpHT6VRpaanWrFmjjRs3avPmzdq0aZPOnDmj7du3KzQ0VO+8846mTJmijIyMpjguAABahRpD\nnZCQoCeeeEKSdOzYMXXq1EkFBQWKjY2VJA0ZMkT5+fnav3+/oqKiZLfbFRwcrD59+sjlcik/P19x\ncXGSpOjoaLlcrkY8HAAAWpdaf4563LhxOn78uNauXavHHntMNptNkhQRESG32y2Px6Pw8HDf48PD\nw6/aHhAQIIvFokuXLvmeDwAArq/WoX733Xf11VdfadasWbry9uDXu1W4v9uv1KFDiN+fcavuw+Ko\nGfOrO2ZXP8yv7phd/bSU+dUY6gMHDigiIkI33HCDevToocrKSrVt21bl5eUKDg7WiRMn5HA45HA4\n5PF4fM87efKkevfuLYfDIbfbre7du6uiokJer7fGs2l/7xYTGWn3+25m+B/mV3fMrn6YX90xu/ox\nbX71ujPZ3r17tWHDBkmSx+NRaWmpoqOjlZOTI0nKzc3VoEGD1KtXLxUWFqqkpEQXLlyQy+VS3759\nNWDAAGVnZ0v68QfT+vXr1xDHBADAz0KNZ9Tjxo3T/PnzlZycrPLycv3f//2fevbsqTlz5igzM1Od\nO3dWYmKigoKCNHPmTKWkpMhisSg1NVV2u10JCQnas2ePkpKSZLPZlJ6e3hTHBQBAq2Dk76P293KE\naZcwWhrmV3fMrn6YX90xu/oxbX7VXfo28rdnAcC1TErf2dxLqNGGtJjmXgJaGW4hCgCAwQg1AAAG\nI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCA\nwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMA\nYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QA\nABiMUAMAYDBCDQCAwQg1AAAGC6zNg5YvX659+/bp8uXLmjx5snbu3KmDBw8qLCxMkpSSkqIHHnhA\nWVlZ2rRpkwICAjR27FiNGTNGFRUVSktL09GjR2W1WrV06VJ16dKlUQ8KAIDWosZQf/HFF/rmm2+U\nmZmp4uJiPfLII7rvvvv09NNPa8iQIb7HlZaWas2aNXI6nQoKCtLo0aMVFxenvLw8hYaGKiMjQ7t3\n71ZGRoZWrlzZqAcFAEBrUeOl73vuuUcvv/yyJCk0NFRlZWWqrKy86nH79+9XVFSU7Ha7goOD1adP\nH7lcLuXn5ysuLk6SFB0dLZfL1cCHAABA61VjqK1Wq0JCQiRJTqdTgwcPltVq1ZYtWzRhwgQ99dRT\nOn36tDwej8LDw33PCw8Pl9vtrrI9ICBAFotFly5daqTDAQCgdanVe9SStGPHDjmdTm3YsEEHDhxQ\nWFiYevTooTfeeEOvvPKK7r777iqP93q913yd622/UocOIQoMtNZ2aZKkyEi7X49HVcyv7phd/bS2\n+TXl8bS22TW1ljK/WoX6s88+09q1a7V+/XrZ7Xb179/f972YmBgtXrxYw4YNk8fj8W0/efKkevfu\nLYfDIbfbre7du6uiokJer1c2m63a/RUXl/p1EJGRdrnd5/x6Dv6H+dUds6uf1ji/pjqe1ji7pmTa\n/Kr7R0ONl77PnTun5cuX6/XXX/f9lPeTTz6pw4cPS5IKCgrUtWtX9erVS4WFhSopKdGFCxfkcrnU\nt29fDRgwQNnZ2ZKkvLw89evXryGOCQCAn4Uaz6g//PBDFRcXa8aMGb5to0aN0owZM9SmTRuFhIRo\n6dKlCg4O1syZM5WSkiKLxaLU1FTZ7XYlJCRoz549SkpKks1mU3p6eqMeEAAArYnFW5s3jZuYv5cj\nTLuE0dIwv7pjdvXj7/wmpe9sxNU0jA1pMU2yH/7s1Y9p86vXpW8AANB8CDUAAAYj1AAAGIxQAwBg\nMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYj1AAA\nGIxQAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYLbO4FAA1lUvrO5l5CtTakxTT3\nEgC0QJxRAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAYj1AAAGIxQAwBgMEINAIDB\nCDUAAAYj1AAAGIxQAwBgMEINAIDBCDUAAAbj91EDTcT035ct8TuzARNxRg0AgMFqdUa9fPly7du3\nT5cvX9bkyZMVFRWl2bNnq7KyUpGRkVqxYoVsNpuysrK0adMmBQQEaOzYsRozZowqKiqUlpamo0eP\nymq1aunSperSpUtjHxcAAK1CjaH+4osv9M033ygzM1PFxcV65JFH1L9/fyUnJ2v48OF66aWX5HQ6\nlZiYqDVr1sjpdCooKEijR49WXFyc8vLyFBoaqoyMDO3evVsZGRlauXJlUxwbAAAtXo2Xvu+55x69\n/PLLkqTQ0FCVlZWpoKBAsbGxkqQhQ4YoPz9f+/fvV1RUlOx2u4KDg9WnTx+5XC7l5+crLi5OkhQd\nHS2Xy9WIhwMAQOtS4xm11WpVSEiIJMnpdGrw4MHavXu3bDabJCkiIkJut1sej0fh4eG+54WHh1+1\nPSAgQBaLRZcuXfI9/1o6dAhRYKDVrwOJjLT79XhUxfwgNc+fg9b2Z68pj6e1za6ptZT51fqnvnfs\n2CGn06kNGzZo6NChvu1er/eaj/d3+5WKi0truyxJPw7b7T7n13PwP8wPP2nqPwet8c9eUx1Pa5xd\nUzJtftX9o6FWP/X92Wefae3atVq3bp3sdrtCQkJUXl4uSTpx4oQcDoccDoc8Ho/vOSdPnvRtd7vd\nkqSKigp5vd5qz6YBAMD/1Bjqc+fOafny5Xr99dcVFhYm6cf3mnNyciRJubm5GjRokHr16qXCwkKV\nlJTowoULcrlc6tu3rwYMGKDs7GxJUl5envr169eIhwMAQOtS46XvDz/8UMXFxZoxY4ZvW3p6uhYs\nWKDMzEx17txZiYmJCgoK0syZM5WSkiKLxaLU1FTZ7XYlJCRoz549SkpKks1mU3p6eqMeEAAArUmN\noX700Uf16KOPXrX9rbfeumpbfHy84uPjq2z76bPTAADAf9xCFIBPS7jNKfBzwy1EAQAwGKEGAMBg\nhBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGHcmQ61wxyoAaB6cUQMAYDBCDQCA\nwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMA\nYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABgssLkXAADAlSal72zuJdRoQ1pM\nk+2LM2oAAAxGqAEAMBihBgDAYIQaAACDEWoAAAxGqAEAMBihBgDAYLX6HPXXX3+tqVOnauLEiRo/\nfrzS0tJ08OBBhYWFSZJSUlL0wAMPKCsrS5s2bVJAQIDGjh2rMWPGqKKiQmlpaTp69KisVquWLl2q\nLl26NOpBAUBz4TPAaGg1hrq0tFRLlixR//79q2x/+umnNWTIkCqPW7NmjZxOp4KCgjR69GjFxcUp\nLy9PoaGhysjI0O7du5WRkaGVK1c2/JEAANAK1Xjp22azad26dXI4HNU+bv/+/YqKipLdbldwcLD6\n9Okjl8ul/Px8xcXFSZKio6PlcrkaZuUAAPwM1BjqwMBABQcHX7V9y5YtmjBhgp566imdPn1aHo9H\n4eHhvu+Hh4fL7XZX2R4QECCLxaJLly414CEAANB61ele3w8//LDCwsLUo0cPvfHGG3rllVd09913\nV3mM1+u95nOvt/1KHTqEKDDQ6teaIiPtfj0eVTE/4OeDv+/115QzrFOor3y/OiYmRosXL9awYcPk\n8Xh820+ePKnevXvL4XDI7Xare/fuqqiokNfrlc1mq/b1i4tL/VpPZKRdbvc5/w4CPswP+Hnh73v9\nNfQMqwt/nT6e9eSTT+rw4cOSpIKCAnXt2lW9evVSYWGhSkpKdOHCBblcLvXt21cDBgxQdna2JCkv\nL0/9+vWryy4BAPhZqvGM+sCBA1q2bJm+//57BQYGKicnR+PHj9eMGTPUpk0bhYSEaOnSpQoODtbM\nmTOVkpIii8Wi1NRU2e12JSQkaM+ePUpKSpLNZlN6enpTHBcAAK1CjaHu2bOnNm/efNX2YcOGXbUt\nPj5e8fHxVbb99NlpAADgP+5MBgCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMA\nYDBCDQCAwQg1AAAGI9QAABiMUAMAYLA6/T5qAEDLNSl9Z3MvAX7gjBoAAIMRagAADEaoAQAwGKEG\nAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEao\nAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMRagAADEaoAQAwGKEGAMBghBoAAIMR\nagAADFarUH/99dd68MEHtWXLFknSsWPH9Lvf/U7JycmaPn26Ll26JEnKysrSb37zG40ZM0bvvfee\nJKmiokIzZ85UUlKSxo8fr8OHDzfSoQAA0PrUGOrS0lItWbJE/fv3921btWqVkpOT9fbbb+uWW26R\n0+lUaWmp1qxZo40bN2rz5s3atGmTzpw5o+3btys0NFTvvPOOpkyZooyMjEY9IAAAWpMaQ22z2bRu\n3To5HA7ftoKCAsXGxkqShgwZovz8fO3fv19RUVGy2+0KDg5Wnz595HK5lJ+fr7i4OElSdHS0XC5X\nIx0KAACtT42hDgwMVHBwcJVtZWVlstlskqSIiAi53W55PB6Fh4f7HhMeHn7V9oCAAFksFt+lcgAA\nUL3A+r6A1+ttkO1X6tAhRIGBVr/WERlp9+vxqIr5AUDtNeX/M+sU6pCQEJWXlys4OFgnTpyQw+GQ\nw+GQx+PxPebkyZPq3bu3HA6H3G63unfvroqKCnm9Xt/Z+PUUF5f6tZ7ISLvc7nN1ORSI+QGAvxr6\n/5nVhb9OH8+Kjo5WTk6OJCk3N1eDBg1Sr169VFhYqJKSEl24cEEul0t9+/bVgAEDlJ2dLUnKy8tT\nv3796rJLAAB+lmo8oz5w4ICWLVum77//XoGBgcrJydGLL76otLQ0ZWZmqnPnzkpMTFRQUJBmzpyp\nlJQUWSwWpaamym63KyEhQXv27FFSUpJsNpvS09Ob4rgAAGgVLN7avGncxPy9pMCl2/qpzfwmpe9s\notUAgPk2pMU06Os1+KVvAADQNOr9U99oGJyxAgCuhTNqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAM\nRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAA\ngxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMRqgBADAYoQYA\nwGCEGgAAgxFqAAAMRqgBADAYoQYAwGCEGgAAgxFqAAAMFtjcC2gKk9J3NvcSAACoE86oAQAwGKEG\nAMBghBoAAIMRagAADFanHyYrKCjQ9OnT1bVrV0lSt27d9Pjjj2v27NmqrKxUZGSkVqxYIZvNpqys\nLG3atEkBAQEaO3asxowZ06AHAABAa1bnn/q+9957tWrVKt/Xc+fOVXJysoYPH66XXnpJTqdTiYmJ\nWrNmjZxOp4KCgjR69GjFxcUpLCysQRYPAEBr12CXvgsKChQbGytJGjJkiPLz87V//35FRUXJbrcr\nODhYffr0kcvlaqhdAgDQ6tX5jPrQoUOaMmWKzp49q2nTpqmsrEw2m02SFBERIbfbLY/Ho/DwcN9z\nwsPD5Xa7a3ztDh1CFBho9Ws9kZF2/w4AAIA6asrm1CnUt956q6ZNm6bhw4fr8OHDmjBhgiorK33f\n93q913ze9bb//4qLS/1aT2SkXW73Ob+eAwBAXTV0c6oLf50ufXfq1EkJCQmyWCy6+eab1bFjR509\ne1bl5eWSpBMnTsjhcMjhcMjj8fied/LkSTkcjrrsEgCAn6U6hTorK0tvvvmmJMntduvUqVMaNWqU\ncnJyJEm5ubkaNGiQevXqpcLCQpWUlOjChQtyuVzq27dvw60eAIBWrk6XvmNiYvTMM8/ok08+UUVF\nhRYvXqwePXpozpw5yszMVOfOnZWYmKigoCDNnDlTKSkpslgsSk1Nld3Oe8kAANSWxVvbN46bkL/X\n/mt6j5pfygEAaEgb0mIa9PUa/D1qAADQNAg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiM\nUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAG\nI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCA\nwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABiMUAMAYDBCDQCAwQg1AAAGI9QAABgssCl2\n8sILL2j//v2yWCyaN2+e7rrrrqbYLQAALV6jh/rLL7/Ud999p8zMTBUVFWnevHnKzMxs7N0CANAq\nNPql7/z8fD344IOSpNtuu01nz57V+fPnG3u3AAC0Co0eao/How4dOvi+Dg8Pl9vtbuzdAgDQKjTJ\ne9RX8nq9NT4mMtLu9+tW95wPMh72+/UAADBBo59ROxwOeTwe39cnT55UZGRkY+8WAIBWodFDPWDA\nAOXk5EiSDh48KIfDoXbt2jX2bgEAaBUa/dJ3nz59dOedd2rcuHGyWCxatGhRY+8SAIBWw+KtzZvG\nAACgWXBnMgAADEaoAQAwWJN/PKuhcXtS/3399deaOnWqJk6cqPHjx+vYsWOaPXu2KisrFRkZqRUr\nVshmszX3Mo20fPly7du3T5cvX9bkyZMVFRXF7GqhrKxMaWlpOnXqlC5evKipU6eqe/fuzM5P5eXl\nGjFihKZOnar+/fszv1oqKCjQ9OnT1bVrV0lSt27d9Pjjj7eY+bXoM+orb0/6/PPP6/nnn2/uJRmv\ntLRUS5YsUf/+/X3bVq1apeTkZL399tu65ZZb5HQ6m3GF5vriiy/0zTffKDMzU+vXr9cLL7zA7Gop\nLy9PPXv21JYtW7Ry5Uqlp6czuzp47bXX1L59e0n8vfXXvffeq82bN2vz5s1auHBhi5pfiw41tyf1\nn81m07p16+RwOHzbCgoKFBsbK0kaMmSI8vPzm2t5Rrvnnnv08ssvS5JCQ0NVVlbG7GopISFBTzzx\nhCTp2LFj6tSpE7PzU1FRkQ4dOqQHHnhAEn9v66slza9Fh5rbk/ovMDBQwcHBVbaVlZX5LvlEREQw\nw+uwWq0KCQmRJDmdTg0ePJjZ+WncuHF65plnNG/ePGbnp2XLliktLc33NfPzz6FDhzRlyhQlJSXp\n888/b1Hza/HvUV+JT5rVHzOs2Y4dO+R0OrVhwwYNHTrUt53Z1ezdd9/VV199pVmzZlWZF7Or3l/+\n8hf17t1bXbp0ueb3mV/1br31Vk2bNk3Dhw/X4cOHNWHCBFVWVvq+b/r8WnSouT1pwwgJCVF5ebmC\ng4N14sSJKpfFUdVnn32mtWvXav369bLb7cyulg4cOKCIiAjdcMMN6tGjhyorK9W2bVtmV0u7du3S\n4cOHtWvXLh0/flw2m40/e37o1KmTEhISJEk333yzOnbsqMLCwhYzvxZ96ZvbkzaM6Oho3xxzc3M1\naNCgZl6Rmc6dO6fly5fr9ddfV1hYmCRmV1t79+7Vhg0bJP34llVpaSmz88PKlSu1detW/fnPf9aY\nMWM0depU5ueHrKwsvfnmm5Ikt9utU6dOadSoUS1mfi3+zmQvvvii9u7d67s9affu3Zt7SUY7cOCA\nli1bpu+//16BgYHq1KmTXnytKYqYAAAArElEQVTxRaWlpenixYvq3Lmzli5dqqCgoOZeqnEyMzO1\nevVq/fKXv/RtS09P14IFC5hdDcrLyzV//nwdO3ZM5eXlmjZtmnr27Kk5c+YwOz+tXr1aN954owYO\nHMj8aun8+fN65plnVFJSooqKCk2bNk09evRoMfNr8aEGAKA1a9GXvgEAaO0INQAABiPUAAAYjFAD\nAGAwQg0AgMEINQAABiPUAAAYjFADAGCw/wdkB5RjykY3PgAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "XtYZ7114n3b-" - }, - "cell_type": "markdown", - "source": [ - "## Accessing Data\n", - "\n", - "You can access `DataFrame` data using familiar Python dict/list operations:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "_TFm7-looBFF", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 102 - }, - "outputId": "ab7fabc4-0baf-4601-cc8c-52cc5ae12bdb" - }, - "cell_type": "code", - "source": [ - "cities = pd.DataFrame({ 'City name': city_names, 'Population': population })\n", - "print(type(cities['City name']))\n", - "cities['City name']" - ], - "execution_count": 25, - "outputs": [ - { - "output_type": "stream", - "text": [ - "\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 San Francisco\n", - "1 San Jose\n", - "2 Sacramento\n", - "Name: City name, dtype: object" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 25 - } - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "V5L6xacLoxyv", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - }, - "outputId": "5bb702e7-4e43-4544-9ed7-3b32af4d0322" - }, - "cell_type": "code", - "source": [ - "print(type(cities['City name'][1]))\n", - "cities['City name'][1]" - ], - "execution_count": 26, - "outputs": [ - { - "output_type": "stream", - "text": [ - "\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'San Jose'" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 26 - } - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "gcYX1tBPugZl", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 128 - }, - "outputId": "51ffb1ed-6165-47c5-eb21-adbdbf769ee1" - }, - "cell_type": "code", - "source": [ - "print(type(cities[0:2]))\n", - "cities[0:2]" - ], - "execution_count": 27, - "outputs": [ - { - "output_type": "stream", - "text": [ - "\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulation
0San Francisco852469
1San Jose1015785
\n", - "
" - ], - "text/plain": [ - " City name Population\n", - "0 San Francisco 852469\n", - "1 San Jose 1015785" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 27 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "65g1ZdGVjXsQ" - }, - "cell_type": "markdown", - "source": [ - "In addition, *pandas* provides an extremely rich API for advanced [indexing and selection](http://pandas.pydata.org/pandas-docs/stable/indexing.html) that is too extensive to be covered here." - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "RM1iaD-ka3Y1" - }, - "cell_type": "markdown", - "source": [ - "## Manipulating Data\n", - "\n", - "You may apply Python's basic arithmetic operations to `Series`. For example:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "XWmyCFJ5bOv-", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 85 - }, - "outputId": "ea13cfe0-702f-4602-9cb0-25aa0db5a742" - }, - "cell_type": "code", - "source": [ - "population / 1000." - ], - "execution_count": 28, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 852.469\n", - "1 1015.785\n", - "2 485.199\n", - "dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 28 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "TQzIVnbnmWGM" - }, - "cell_type": "markdown", - "source": [ - "[NumPy](http://www.numpy.org/) is a popular toolkit for scientific computing. *pandas* `Series` can be used as arguments to most NumPy functions:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "ko6pLK6JmkYP", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 85 - }, - "outputId": "650a3d8c-ec6c-4daf-c1e3-bafabccfdef3" - }, - "cell_type": "code", - "source": [ - "import numpy as np\n", - "\n", - "np.log(population)" - ], - "execution_count": 29, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 13.655892\n", - "1 13.831172\n", - "2 13.092314\n", - "dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 29 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "xmxFuQmurr6d" - }, - "cell_type": "markdown", - "source": [ - "For more complex single-column transformations, you can use `Series.apply`. Like the Python [map function](https://docs.python.org/2/library/functions.html#map), \n", - "`Series.apply` accepts as an argument a [lambda function](https://docs.python.org/2/tutorial/controlflow.html#lambda-expressions), which is applied to each value.\n", - "\n", - "The example below creates a new `Series` that indicates whether `population` is over one million:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "Fc1DvPAbstjI", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 85 - }, - "outputId": "981c50d2-4a3a-4409-a01b-57724ce99203" - }, - "cell_type": "code", - "source": [ - "population.apply(lambda val: val > 1000000)" - ], - "execution_count": 30, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 False\n", - "1 True\n", - "2 False\n", - "dtype: bool" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 30 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "ZeYYLoV9b9fB" - }, - "cell_type": "markdown", - "source": [ - "\n", - "Modifying `DataFrames` is also straightforward. For example, the following code adds two `Series` to an existing `DataFrame`:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "0gCEX99Hb8LR", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 142 - }, - "outputId": "c92e1248-2987-4eab-b570-0108d7610b3e" - }, - "cell_type": "code", - "source": [ - "cities['Area square miles'] = pd.Series([46.87, 176.53, 97.92])\n", - "cities['Population density'] = cities['Population'] / cities['Area square miles']\n", - "cities" - ], - "execution_count": 31, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulationArea square milesPopulation density
0San Francisco85246946.8718187.945381
1San Jose1015785176.535754.177760
2Sacramento48519997.924955.055147
\n", - "
" - ], - "text/plain": [ - " City name Population Area square miles Population density\n", - "0 San Francisco 852469 46.87 18187.945381\n", - "1 San Jose 1015785 176.53 5754.177760\n", - "2 Sacramento 485199 97.92 4955.055147" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 31 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "6qh63m-ayb-c" - }, - "cell_type": "markdown", - "source": [ - "## Exercise #1\n", - "\n", - "Modify the `cities` table by adding a new boolean column that is True if and only if *both* of the following are True:\n", - "\n", - " * The city is named after a saint.\n", - " * The city has an area greater than 50 square miles.\n", - "\n", - "**Note:** Boolean `Series` are combined using the bitwise, rather than the traditional boolean, operators. For example, when performing *logical and*, use `&` instead of `and`.\n", - "\n", - "**Hint:** \"San\" in Spanish means \"saint.\"" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "zCOn8ftSyddH", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 142 - }, - "outputId": "d06f79b2-9078-430d-cf74-1446d559c796" - }, - "cell_type": "code", - "source": [ - "# Your code here\n", - "cities['Named after Saint and has Area Greater than 50 sq miles'] = (cities['Area square miles'] > 50) & cities['City name'].apply(lambda name: name.startswith('San'))\n", - "cities" - ], - "execution_count": 33, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
0San Francisco85246946.8718187.945381False
1San Jose1015785176.535754.177760True
2Sacramento48519997.924955.055147False
\n", - "
" - ], - "text/plain": [ - " City name Population Area square miles Population density \\\n", - "0 San Francisco 852469 46.87 18187.945381 \n", - "1 San Jose 1015785 176.53 5754.177760 \n", - "2 Sacramento 485199 97.92 4955.055147 \n", - "\n", - " Named after Saint and has Area Greater than 50 sq miles \n", - "0 False \n", - "1 True \n", - "2 False " - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 33 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "YHIWvc9Ms-Ll" - }, - "cell_type": "markdown", - "source": [ - "### Solution\n", - "\n", - "Click below for a solution." - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "T5OlrqtdtCIb", - "colab": {} - }, - "cell_type": "code", - "source": [ - "cities['Is wide and has saint name'] = (cities['Area square miles'] > 50) & cities['City name'].apply(lambda name: name.startswith('San'))\n", - "cities" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "colab_type": "text", - "id": "f-xAOJeMiXFB" - }, - "cell_type": "markdown", - "source": [ - "## Indexes\n", - "Both `Series` and `DataFrame` objects also define an `index` property that assigns an identifier value to each `Series` item or `DataFrame` row. \n", - "\n", - "By default, at construction, *pandas* assigns index values that reflect the ordering of the source data. Once created, the index values are stable; that is, they do not change when data is reordered." - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "2684gsWNinq9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "7b863b33-378f-4563-e81a-4fd1ffd4dc06" - }, - "cell_type": "code", - "source": [ - "city_names.index" - ], - "execution_count": 34, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "RangeIndex(start=0, stop=3, step=1)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 34 - } - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "F_qPe2TBjfWd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "c3f73d57-f7b1-4f82-de93-182660648fff" - }, - "cell_type": "code", - "source": [ - "cities.index" - ], - "execution_count": 35, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "RangeIndex(start=0, stop=3, step=1)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 35 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "hp2oWY9Slo_h" - }, - "cell_type": "markdown", - "source": [ - "Call `DataFrame.reindex` to manually reorder the rows. For example, the following has the same effect as sorting by city name:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "sN0zUzSAj-U1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 142 - }, - "outputId": "597f00f1-baea-4a4c-8a75-867c5b76d5ab" - }, - "cell_type": "code", - "source": [ - "cities.reindex([2, 0, 1])" - ], - "execution_count": 36, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
2Sacramento48519997.924955.055147False
0San Francisco85246946.8718187.945381False
1San Jose1015785176.535754.177760True
\n", - "
" - ], - "text/plain": [ - " City name Population Area square miles Population density \\\n", - "2 Sacramento 485199 97.92 4955.055147 \n", - "0 San Francisco 852469 46.87 18187.945381 \n", - "1 San Jose 1015785 176.53 5754.177760 \n", - "\n", - " Named after Saint and has Area Greater than 50 sq miles \n", - "2 False \n", - "0 False \n", - "1 True " - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 36 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "-GQFz8NZuS06" - }, - "cell_type": "markdown", - "source": [ - "Reindexing is a great way to shuffle (randomize) a `DataFrame`. In the example below, we take the index, which is array-like, and pass it to NumPy's `random.permutation` function, which shuffles its values in place. Calling `reindex` with this shuffled array causes the `DataFrame` rows to be shuffled in the same way.\n", - "Try running the following cell multiple times!" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "mF8GC0k8uYhz", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 142 - }, - "outputId": "96433926-6ec8-41a5-9389-aa53b762403f" - }, - "cell_type": "code", - "source": [ - "cities.reindex(np.random.permutation(cities.index))" - ], - "execution_count": 40, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
2Sacramento48519997.924955.055147False
0San Francisco85246946.8718187.945381False
1San Jose1015785176.535754.177760True
\n", - "
" - ], - "text/plain": [ - " City name Population Area square miles Population density \\\n", - "2 Sacramento 485199 97.92 4955.055147 \n", - "0 San Francisco 852469 46.87 18187.945381 \n", - "1 San Jose 1015785 176.53 5754.177760 \n", - "\n", - " Named after Saint and has Area Greater than 50 sq miles \n", - "2 False \n", - "0 False \n", - "1 True " - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 40 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "fSso35fQmGKb" - }, - "cell_type": "markdown", - "source": [ - "For more information, see the [Index documentation](http://pandas.pydata.org/pandas-docs/stable/indexing.html#index-objects)." - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "8UngIdVhz8C0" - }, - "cell_type": "markdown", - "source": [ - "## Exercise #2\n", - "\n", - "The `reindex` method allows index values that are not in the original `DataFrame`'s index values. Try it and see what happens if you use such values! Why do you think this is allowed?" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "PN55GrDX0jzO", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 235 - }, - "outputId": "dc4eb6b6-b0bb-4bcb-ab0f-c09dce9e0422" - }, - "cell_type": "code", - "source": [ - "# Your code here\n", - "cities.reindex([2, 0, 6, 7, 1, 5])\n", - "#The reindex method allows index values that are not in the original DataFrame's index values. This is allowed so as to input new datas that are not present currently.\n", - "#Also sometimes index values may be taken from the dataset, hence it may not be convenient to use that index and instead we can use reindex to give a more suitable, logical index tro the dataset." - ], - "execution_count": 41, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
City namePopulationArea square milesPopulation densityNamed after Saint and has Area Greater than 50 sq miles
2Sacramento485199.097.924955.055147False
0San Francisco852469.046.8718187.945381False
6NaNNaNNaNNaNNaN
7NaNNaNNaNNaNNaN
1San Jose1015785.0176.535754.177760True
5NaNNaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " City name Population Area square miles Population density \\\n", - "2 Sacramento 485199.0 97.92 4955.055147 \n", - "0 San Francisco 852469.0 46.87 18187.945381 \n", - "6 NaN NaN NaN NaN \n", - "7 NaN NaN NaN NaN \n", - "1 San Jose 1015785.0 176.53 5754.177760 \n", - "5 NaN NaN NaN NaN \n", - "\n", - " Named after Saint and has Area Greater than 50 sq miles \n", - "2 False \n", - "0 False \n", - "6 NaN \n", - "7 NaN \n", - "1 True \n", - "5 NaN " - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 41 - } - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "TJffr5_Jwqvd" - }, - "cell_type": "markdown", - "source": [ - "### Solution\n", - "\n", - "Click below for the solution." - ] - }, - { - "metadata": { - "colab_type": "text", - "id": "8oSvi2QWwuDH" - }, - "cell_type": "markdown", - "source": [ - "If your `reindex` input array includes values not in the original `DataFrame` index values, `reindex` will add new rows for these \"missing\" indices and populate all corresponding columns with `NaN` values:" - ] - }, - { - "metadata": { - "colab_type": "code", - "id": "yBdkucKCwy4x", - "colab": {} - }, - "cell_type": "code", - "source": [ - "cities.reindex([0, 4, 5, 2])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "colab_type": "text", - "id": "2l82PhPbwz7g" - }, - "cell_type": "markdown", - "source": [ - "This behavior is desirable because indexes are often strings pulled from the actual data (see the [*pandas* reindex\n", - "documentation](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html) for an example\n", - "in which the index values are browser names).\n", - "\n", - "In this case, allowing \"missing\" indices makes it easy to reindex using an external list, as you don't have to worry about\n", - "sanitizing the input." - ] - } - ] -} \ No newline at end of file