mirapy-org · akhilsinghal1234 · May 15, 2019 · May 15, 2019
diff --git a/Classifiers/.ipynb_checkpoints/ATLAS variable star Classification-checkpoint.ipynb b/Classifiers/.ipynb_checkpoints/ATLAS variable star Classification-checkpoint.ipynb
@@ -0,0 +1,206 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mirapy.data.load_dataset import load_atlas_star_data\n",
+    "from mirapy.classifiers.models import AtlasVarStarClassifier\n",
+    "import mirapy\n",
+    "\n",
+    "import os\n",
+    "from os import walk\n",
+    "import pandas as pd\n",
+    "from sklearn.metrics import classification_report, accuracy_score\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from keras.optimizers import Adam\n",
+    "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
+    "from sklearn.preprocessing import StandardScaler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = 'D:\\MTP\\ATLAS\\dataset'\n",
+    "csv_file = os.path.join(path, \"non_dub.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ignore feature list to use features selected using feature selection\n",
+    "\n",
+    "Numpy array `y` is the respective class labels.\n",
+    "\n",
+    "`BH` Black Hole\n",
+    "`P` Pulsar\n",
+    "`NP` Non-pulsar"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x, y = load_atlas_star_data(csv_file, 0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Admin\\Anaconda3\\envs\\gpu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:363: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
+      "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
+      "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
+      "  warnings.warn(msg, FutureWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "label_encoder = LabelEncoder()\n",
+    "integer_encoded = label_encoder.fit_transform(y_train)\n",
+    "\n",
+    "onehot_encoder = OneHotEncoder(sparse=False)\n",
+    "integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n",
+    "onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n",
+    "y_train = onehot_encoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classifier = AtlasVarStarClassifier('relu', input_size=x_train[0].shape[0], num_classes=y_train[0].shape[0])\n",
+    "classifier.compile(optimizer='adam', loss='mean_squared_error')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10\n",
+      " - 6s - loss: 0.0238 - acc: 0.8593\n",
+      "Epoch 2/10\n",
+      " - 3s - loss: 0.0118 - acc: 0.9318\n",
+      "Epoch 3/10\n",
+      " - 3s - loss: 0.0095 - acc: 0.9436\n",
+      "Epoch 4/10\n",
+      " - 3s - loss: 0.0087 - acc: 0.9482\n",
+      "Epoch 5/10\n",
+      " - 3s - loss: 0.0083 - acc: 0.9502\n",
+      "Epoch 6/10\n",
+      " - 3s - loss: 0.0081 - acc: 0.9518\n",
+      "Epoch 7/10\n",
+      " - 3s - loss: 0.0079 - acc: 0.9527\n",
+      "Epoch 8/10\n",
+      " - 3s - loss: 0.0077 - acc: 0.9543\n",
+      "Epoch 9/10\n",
+      " - 3s - loss: 0.0076 - acc: 0.9549\n",
+      "Epoch 10/10\n",
+      " - 3s - loss: 0.0074 - acc: 0.9558\n"
+     ]
+    }
+   ],
+   "source": [
+    "classifier.train(x_train, y_train, epochs=10,\n",
+    "                       batch_size=100, verbose=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "convert string classes to integer encoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.95      0.95      0.95      3373\n",
+      "           1       0.97      0.98      0.97      2977\n",
+      "           2       0.84      0.87      0.85       840\n",
+      "           3       0.94      0.91      0.93      1406\n",
+      "           4       0.98      0.99      0.99       439\n",
+      "           5       0.86      0.80      0.83       396\n",
+      "           6       0.94      0.97      0.96      2655\n",
+      "           7       0.98      0.97      0.98      1839\n",
+      "           8       1.00      0.98      0.99      2472\n",
+      "\n",
+      "   micro avg       0.95      0.95      0.95     16397\n",
+      "   macro avg       0.94      0.94      0.94     16397\n",
+      "weighted avg       0.95      0.95      0.95     16397\n",
+      "\n",
+      "Accuracy: 95.45 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "label_encoder = LabelEncoder()\n",
+    "integer_encoded = label_encoder.fit_transform(y_test)\n",
+    "y_test = integer_encoded\n",
+    "\n",
+    "y_predicted = classifier.test(x_test)\n",
+    "print(classification_report(y_test, y_predicted))\n",
+    "print(\"Accuracy:\", round(accuracy_score(y_test, y_predicted)*100, 2), \"%\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Classifiers/.ipynb_checkpoints/OGLE variable star classification-checkpoint.ipynb b/Classifiers/.ipynb_checkpoints/OGLE variable star classification-checkpoint.ipynb
@@ -2,129 +2,110 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
    "source": [
     "from mirapy.data.load_dataset import load_ogle_dataset\n",
     "from mirapy.classifiers.models import OGLEClassifier\n",
     "from keras.utils.np_utils import to_categorical\n",
-    "import mirapy"
+    "import mirapy\n",
+    "\n",
+    "from sklearn.metrics import classification_report, accuracy_score\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from keras.optimizers import Adam\n",
+    "from sklearn.preprocessing import LabelEncoder, OneHotEncoder"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
     "path = 'D:\\MTP\\ogle'"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x_train, y_train, x_test, y_test = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "50 is the optimal length to minimize class inequality"
+    "50 is the optimal length to minimize class inequality\n",
+    "\n",
+    "Numpy array `y` is the respective class labels."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "classifier = OgleClassifier('relu', input_size=50, num_classes=5)\n",
-    "classifier.model.compile(optimizer='adam', loss=\"categorical_crossentropy\", metrics=['accuracy'])"
+    "x, y = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(38431,)\n",
-      "_________________________________________________________________\n",
-      "Layer (type)                 Output Shape              Param #   \n",
-      "=================================================================\n",
-      "lstm_9 (LSTM)                (None, 64)                16896     \n",
-      "_________________________________________________________________\n",
-      "dense_25 (Dense)             (None, 64)                4160      \n",
-      "_________________________________________________________________\n",
-      "dropout_9 (Dropout)          (None, 64)                0         \n",
-      "_________________________________________________________________\n",
-      "dense_26 (Dense)             (None, 16)                1040      \n",
-      "_________________________________________________________________\n",
-      "dense_27 (Dense)             (None, 5)                 85        \n",
-      "=================================================================\n",
-      "Total params: 22,181\n",
-      "Trainable params: 22,181\n",
-      "Non-trainable params: 0\n",
-      "_________________________________________________________________\n",
-      "None\n"
+     "ename": "NameError",
+     "evalue": "name 'train_test_split' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-7-4e29ffc2e028>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mx_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m42\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mlabel_encoder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0minteger_encoded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlabel_encoder\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'train_test_split' is not defined"
      ]
     }
    ],
    "source": [
-    "classifier.compile(optimizer='adam', loss='categorical_crossentropy')\n",
-    "print(classifier.model.summary())"
+    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "label_encoder = LabelEncoder()\n",
+    "integer_encoded = label_encoder.fit_transform(y_train)\n",
+    "\n",
+    "onehot_encoder = OneHotEncoder(sparse=False)\n",
+    "integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n",
+    "onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n",
+    "y_train = onehot_encoded"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classifier = OGLEClassifier('relu', input_size=50, num_classes=5)\n",
+    "classifier.compile(optimizer='adam', loss=\"categorical_crossentropy\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(38431, 5) (38431, 50, 1)\n",
-      "Train on 38431 samples, validate on 9608 samples\n",
-      "Epoch 1/10\n",
-      " - 222s - loss: 0.5010 - acc: 0.8372 - val_loss: 0.3996 - val_acc: 0.8605\n",
-      "Epoch 2/10\n",
-      " - 178s - loss: 0.4137 - acc: 0.8615 - val_loss: 0.4001 - val_acc: 0.8598\n",
-      "Epoch 3/10\n",
-      " - 139s - loss: 0.4100 - acc: 0.8608 - val_loss: 0.4016 - val_acc: 0.8599\n",
-      "Epoch 4/10\n",
-      " - 134s - loss: 0.4028 - acc: 0.8634 - val_loss: 0.4003 - val_acc: 0.8591\n",
-      "Epoch 5/10\n",
-      " - 140s - loss: 0.3988 - acc: 0.8632 - val_loss: 0.3939 - val_acc: 0.8620\n",
-      "Epoch 6/10\n",
-      " - 134s - loss: 0.4000 - acc: 0.8631 - val_loss: 0.3895 - val_acc: 0.8629\n",
-      "Epoch 7/10\n",
-      " - 137s - loss: 0.3956 - acc: 0.8638 - val_loss: 0.3937 - val_acc: 0.8612\n",
-      "Epoch 8/10\n",
-      " - 135s - loss: 0.3958 - acc: 0.8638 - val_loss: 0.3941 - val_acc: 0.8589\n",
-      "Epoch 9/10\n",
-      " - 137s - loss: 0.3931 - acc: 0.8647 - val_loss: 0.4103 - val_acc: 0.8582\n",
-      "Epoch 10/10\n",
-      " - 136s - loss: 0.3950 - acc: 0.8646 - val_loss: 0.3847 - val_acc: 0.8625\n"
+     "ename": "NameError",
+     "evalue": "name 'x_train' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-6-c6eb6a8f7453>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m classifier.train(x_train, to_categorical(y_train),\n\u001b[0m\u001b[0;32m      2\u001b[0m                 \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m                 \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m40\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m                 verbose=2)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'x_train' is not defined"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<keras.callbacks.History at 0x17a952b1400>"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [