diff --git a/Cust_Segmentation.py b/Cust_Segmentation.py new file mode 100644 index 0000000..71cf4af --- /dev/null +++ b/Cust_Segmentation.py @@ -0,0 +1,59 @@ +import random +import numpy as np +import matplotlib.pyplot as plt +from sklearn.cluster import KMeans +from sklearn.datasets.samples_generator import make_blobs +import pandas as pd +from sklearn.preprocessing import StandardScaler +from mpl_toolkits.mplot3d import Axes3D + +cust_df = pd.read_csv("https://raw.githubusercontent.com/sumyak/ML-Algos-and-Techniques/master/Clustering/K-Means%20Clustering/Cust_Segmentation.csv") + +#data pre processing +df = cust_df.drop('Address', axis=1) + +#Normalizing over the standard deviation +X = df.values[:,1:] +X = np.nan_to_num(X) +Clus_dataSet = StandardScaler().fit_transform(X) +Clus_dataSet + +X = df.values[:,1:] +X = np.nan_to_num(X) +Clus_dataSet = StandardScaler().fit_transform(X) +Clus_dataSet + +#Modeling +clusterNum = 3 +k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12) +k_means.fit(X) +labels = k_means.labels_ + + +df["Clus_km"] = labels +df.head(5) + +print(df.groupby('Clus_km').mean()) + + +#plot +area = np.pi * ( X[:, 1])**2 +plt.scatter(X[:, 0], X[:, 3], s=area, c=labels.astype(np.float), alpha=0.5) +plt.xlabel('Age', fontsize=18) +plt.ylabel('Income', fontsize=16) + +plt.show() + +fig = plt.figure(1, figsize=(8, 6)) +plt.clf() +ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) + +plt.cla() +# plt.ylabel('Age', fontsize=18) +# plt.xlabel('Income', fontsize=16) +# plt.zlabel('Education', fontsize=16) +ax.set_xlabel('Education') +ax.set_ylabel('Age') +ax.set_zlabel('Income') + +ax.scatter(X[:, 1], X[:, 0], X[:, 3], c= labels.astype(np.float))