diff --git a/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Clustering.ipynb b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Clustering.ipynb index 790b04d3..8f3236a1 100644 --- a/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Clustering.ipynb +++ b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Clustering.ipynb @@ -46,7 +46,7 @@ "id": "-eODEP6nPA96" }, "source": [ - "K-means clustering is a simple and popular type of unsupervised machine learning algorithm, which is used on unlabeled data. The goal of this algorithm is tofind groups in the data, with the number of groups represented by the variable K. The algorithm works iteratively to assign each data point to one of K groups according to provided features similarity.The K-means algorithm aims to choose centroids that minimise the inertia, or within-cluster sum of squared criterion:\n", + "K-means clustering is a simple and popular type of unsupervised machine learning algorithm, which is used on unlabeled data. The goal of this algorithm is to find groups in the data, with the number of groups represented by the variable K. The algorithm works iteratively to assign each data point to one of K groups according to provided features similarity.The K-means algorithm aims to choose centroids that minimise the inertia, or within-cluster sum of squared criterion:\n", "\n", "$$\n", "\\sum_{i=0}^{n}\\min_{\\mu_j \\in C}(||x_j - \\mu_i||^2)\n", @@ -471,7 +471,7 @@ } ], "source": [ - "home_data = pd.read_csv('./housing.csv', usecols = ['longitude', 'latitude', 'median_house_value'])\n", + "home_data = pd.read_csv('./housing.csv.zip', usecols = ['longitude', 'latitude', 'median_house_value'])\n", "home_data.head()" ] }, @@ -580,8 +580,8 @@ "source": [ "X_train, X_test, y_train, y_test = train_test_split(home_data[['latitude', 'longitude']], home_data[['median_house_value']], test_size=0.33, random_state=0)\n", "\n", - "X_train_norm = preprocessing.normalize(X_train)\n", - "X_test_norm = preprocessing.normalize(X_test)" + "X_train_norm = normalize(X_train)\n", + "X_test_norm = normalize(X_test)" ] }, { @@ -1784,7 +1784,7 @@ "output_type": "stream", "text": [ ":1: DtypeWarning: Columns (25,108) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " players = pd.read_csv(\"players_22.csv\")\n" + " players = pd.read_csv(\"players_22.csv.zip\")\n" ] }, { @@ -2237,7 +2237,7 @@ } ], "source": [ - "players = pd.read_csv(\"players_22.csv\")\n", + "players = pd.read_csv(\"players_22.csv.zip\")\n", "players.head()" ] }, @@ -4810,7 +4810,8 @@ "outputs": [], "source": [ "from sklearn.metrics import pairwise_distances_argmin\n", - "import matplotlib.animation as animation" + "import matplotlib.animation as animation\n", + "from IPython.display import Image" ] }, { diff --git a/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Mall_Customers.csv b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Mall_Customers.csv new file mode 100644 index 00000000..b324941f --- /dev/null +++ b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/Mall_Customers.csv @@ -0,0 +1,201 @@ +CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100) +0001,Male,19,15,39 +0002,Male,21,15,81 +0003,Female,20,16,6 +0004,Female,23,16,77 +0005,Female,31,17,40 +0006,Female,22,17,76 +0007,Female,35,18,6 +0008,Female,23,18,94 +0009,Male,64,19,3 +0010,Female,30,19,72 +0011,Male,67,19,14 +0012,Female,35,19,99 +0013,Female,58,20,15 +0014,Female,24,20,77 +0015,Male,37,20,13 +0016,Male,22,20,79 +0017,Female,35,21,35 +0018,Male,20,21,66 +0019,Male,52,23,29 +0020,Female,35,23,98 +0021,Male,35,24,35 +0022,Male,25,24,73 +0023,Female,46,25,5 +0024,Male,31,25,73 +0025,Female,54,28,14 +0026,Male,29,28,82 +0027,Female,45,28,32 +0028,Male,35,28,61 +0029,Female,40,29,31 +0030,Female,23,29,87 +0031,Male,60,30,4 +0032,Female,21,30,73 +0033,Male,53,33,4 +0034,Male,18,33,92 +0035,Female,49,33,14 +0036,Female,21,33,81 +0037,Female,42,34,17 +0038,Female,30,34,73 +0039,Female,36,37,26 +0040,Female,20,37,75 +0041,Female,65,38,35 +0042,Male,24,38,92 +0043,Male,48,39,36 +0044,Female,31,39,61 +0045,Female,49,39,28 +0046,Female,24,39,65 +0047,Female,50,40,55 +0048,Female,27,40,47 +0049,Female,29,40,42 +0050,Female,31,40,42 +0051,Female,49,42,52 +0052,Male,33,42,60 +0053,Female,31,43,54 +0054,Male,59,43,60 +0055,Female,50,43,45 +0056,Male,47,43,41 +0057,Female,51,44,50 +0058,Male,69,44,46 +0059,Female,27,46,51 +0060,Male,53,46,46 +0061,Male,70,46,56 +0062,Male,19,46,55 +0063,Female,67,47,52 +0064,Female,54,47,59 +0065,Male,63,48,51 +0066,Male,18,48,59 +0067,Female,43,48,50 +0068,Female,68,48,48 +0069,Male,19,48,59 +0070,Female,32,48,47 +0071,Male,70,49,55 +0072,Female,47,49,42 +0073,Female,60,50,49 +0074,Female,60,50,56 +0075,Male,59,54,47 +0076,Male,26,54,54 +0077,Female,45,54,53 +0078,Male,40,54,48 +0079,Female,23,54,52 +0080,Female,49,54,42 +0081,Male,57,54,51 +0082,Male,38,54,55 +0083,Male,67,54,41 +0084,Female,46,54,44 +0085,Female,21,54,57 +0086,Male,48,54,46 +0087,Female,55,57,58 +0088,Female,22,57,55 +0089,Female,34,58,60 +0090,Female,50,58,46 +0091,Female,68,59,55 +0092,Male,18,59,41 +0093,Male,48,60,49 +0094,Female,40,60,40 +0095,Female,32,60,42 +0096,Male,24,60,52 +0097,Female,47,60,47 +0098,Female,27,60,50 +0099,Male,48,61,42 +0100,Male,20,61,49 +0101,Female,23,62,41 +0102,Female,49,62,48 +0103,Male,67,62,59 +0104,Male,26,62,55 +0105,Male,49,62,56 +0106,Female,21,62,42 +0107,Female,66,63,50 +0108,Male,54,63,46 +0109,Male,68,63,43 +0110,Male,66,63,48 +0111,Male,65,63,52 +0112,Female,19,63,54 +0113,Female,38,64,42 +0114,Male,19,64,46 +0115,Female,18,65,48 +0116,Female,19,65,50 +0117,Female,63,65,43 +0118,Female,49,65,59 +0119,Female,51,67,43 +0120,Female,50,67,57 +0121,Male,27,67,56 +0122,Female,38,67,40 +0123,Female,40,69,58 +0124,Male,39,69,91 +0125,Female,23,70,29 +0126,Female,31,70,77 +0127,Male,43,71,35 +0128,Male,40,71,95 +0129,Male,59,71,11 +0130,Male,38,71,75 +0131,Male,47,71,9 +0132,Male,39,71,75 +0133,Female,25,72,34 +0134,Female,31,72,71 +0135,Male,20,73,5 +0136,Female,29,73,88 +0137,Female,44,73,7 +0138,Male,32,73,73 +0139,Male,19,74,10 +0140,Female,35,74,72 +0141,Female,57,75,5 +0142,Male,32,75,93 +0143,Female,28,76,40 +0144,Female,32,76,87 +0145,Male,25,77,12 +0146,Male,28,77,97 +0147,Male,48,77,36 +0148,Female,32,77,74 +0149,Female,34,78,22 +0150,Male,34,78,90 +0151,Male,43,78,17 +0152,Male,39,78,88 +0153,Female,44,78,20 +0154,Female,38,78,76 +0155,Female,47,78,16 +0156,Female,27,78,89 +0157,Male,37,78,1 +0158,Female,30,78,78 +0159,Male,34,78,1 +0160,Female,30,78,73 +0161,Female,56,79,35 +0162,Female,29,79,83 +0163,Male,19,81,5 +0164,Female,31,81,93 +0165,Male,50,85,26 +0166,Female,36,85,75 +0167,Male,42,86,20 +0168,Female,33,86,95 +0169,Female,36,87,27 +0170,Male,32,87,63 +0171,Male,40,87,13 +0172,Male,28,87,75 +0173,Male,36,87,10 +0174,Male,36,87,92 +0175,Female,52,88,13 +0176,Female,30,88,86 +0177,Male,58,88,15 +0178,Male,27,88,69 +0179,Male,59,93,14 +0180,Male,35,93,90 +0181,Female,37,97,32 +0182,Female,32,97,86 +0183,Male,46,98,15 +0184,Female,29,98,88 +0185,Female,41,99,39 +0186,Male,30,99,97 +0187,Female,54,101,24 +0188,Male,28,101,68 +0189,Female,41,103,17 +0190,Female,36,103,85 +0191,Female,34,103,23 +0192,Female,32,103,69 +0193,Male,33,113,8 +0194,Female,38,113,91 +0195,Female,47,120,16 +0196,Female,35,120,79 +0197,Female,45,126,28 +0198,Male,32,126,74 +0199,Male,32,137,18 +0200,Male,30,137,83 \ No newline at end of file diff --git a/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/housing.csv.zip b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/housing.csv.zip new file mode 100644 index 00000000..7f12f4d3 Binary files /dev/null and b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/housing.csv.zip differ diff --git a/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/players_22.csv.zip b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/players_22.csv.zip new file mode 100644 index 00000000..c1337439 Binary files /dev/null and b/Jupyter_Notebooks/Chapter_02_Unsupervised_Learning/players_22.csv.zip differ