diff --git a/Classifiers/.ipynb_checkpoints/ATLAS variable star Classification-checkpoint.ipynb b/Classifiers/.ipynb_checkpoints/ATLAS variable star Classification-checkpoint.ipynb new file mode 100644 index 0000000..60478dd --- /dev/null +++ b/Classifiers/.ipynb_checkpoints/ATLAS variable star Classification-checkpoint.ipynb @@ -0,0 +1,206 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "from mirapy.data.load_dataset import load_atlas_star_data\n", + "from mirapy.classifiers.models import AtlasVarStarClassifier\n", + "import mirapy\n", + "\n", + "import os\n", + "from os import walk\n", + "import pandas as pd\n", + "from sklearn.metrics import classification_report, accuracy_score\n", + "from sklearn.model_selection import train_test_split\n", + "from keras.optimizers import Adam\n", + "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", + "from sklearn.preprocessing import StandardScaler" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path = 'D:\\MTP\\ATLAS\\dataset'\n", + "csv_file = os.path.join(path, \"non_dub.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ignore feature list to use features selected using feature selection\n", + "\n", + "Numpy array `y` is the respective class labels.\n", + "\n", + "`BH` Black Hole\n", + "`P` Pulsar\n", + "`NP` Non-pulsar" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "x, y = load_atlas_star_data(csv_file, 0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Admin\\Anaconda3\\envs\\gpu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:363: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", + "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", + "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", + " warnings.warn(msg, FutureWarning)\n" + ] + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y_train)\n", + "\n", + "onehot_encoder = OneHotEncoder(sparse=False)\n", + "integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n", + "onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n", + "y_train = onehot_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "classifier = AtlasVarStarClassifier('relu', input_size=x_train[0].shape[0], num_classes=y_train[0].shape[0])\n", + "classifier.compile(optimizer='adam', loss='mean_squared_error')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + " - 6s - loss: 0.0238 - acc: 0.8593\n", + "Epoch 2/10\n", + " - 3s - loss: 0.0118 - acc: 0.9318\n", + "Epoch 3/10\n", + " - 3s - loss: 0.0095 - acc: 0.9436\n", + "Epoch 4/10\n", + " - 3s - loss: 0.0087 - acc: 0.9482\n", + "Epoch 5/10\n", + " - 3s - loss: 0.0083 - acc: 0.9502\n", + "Epoch 6/10\n", + " - 3s - loss: 0.0081 - acc: 0.9518\n", + "Epoch 7/10\n", + " - 3s - loss: 0.0079 - acc: 0.9527\n", + "Epoch 8/10\n", + " - 3s - loss: 0.0077 - acc: 0.9543\n", + "Epoch 9/10\n", + " - 3s - loss: 0.0076 - acc: 0.9549\n", + "Epoch 10/10\n", + " - 3s - loss: 0.0074 - acc: 0.9558\n" + ] + } + ], + "source": [ + "classifier.train(x_train, y_train, epochs=10,\n", + " batch_size=100, verbose=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "convert string classes to integer encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.95 0.95 0.95 3373\n", + " 1 0.97 0.98 0.97 2977\n", + " 2 0.84 0.87 0.85 840\n", + " 3 0.94 0.91 0.93 1406\n", + " 4 0.98 0.99 0.99 439\n", + " 5 0.86 0.80 0.83 396\n", + " 6 0.94 0.97 0.96 2655\n", + " 7 0.98 0.97 0.98 1839\n", + " 8 1.00 0.98 0.99 2472\n", + "\n", + " micro avg 0.95 0.95 0.95 16397\n", + " macro avg 0.94 0.94 0.94 16397\n", + "weighted avg 0.95 0.95 0.95 16397\n", + "\n", + "Accuracy: 95.45 %\n" + ] + } + ], + "source": [ + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y_test)\n", + "y_test = integer_encoded\n", + "\n", + "y_predicted = classifier.test(x_test)\n", + "print(classification_report(y_test, y_predicted))\n", + "print(\"Accuracy:\", round(accuracy_score(y_test, y_predicted)*100, 2), \"%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Classifiers/.ipynb_checkpoints/OGLE variable star classification-checkpoint.ipynb b/Classifiers/.ipynb_checkpoints/OGLE variable star classification-checkpoint.ipynb index b77d1bc..0086a6f 100644 --- a/Classifiers/.ipynb_checkpoints/OGLE variable star classification-checkpoint.ipynb +++ b/Classifiers/.ipynb_checkpoints/OGLE variable star classification-checkpoint.ipynb @@ -2,129 +2,110 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], "source": [ "from mirapy.data.load_dataset import load_ogle_dataset\n", "from mirapy.classifiers.models import OGLEClassifier\n", "from keras.utils.np_utils import to_categorical\n", - "import mirapy" + "import mirapy\n", + "\n", + "from sklearn.metrics import classification_report, accuracy_score\n", + "from sklearn.model_selection import train_test_split\n", + "from keras.optimizers import Adam\n", + "from sklearn.preprocessing import LabelEncoder, OneHotEncoder" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "path = 'D:\\MTP\\ogle'" ] }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "x_train, y_train, x_test, y_test = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "50 is the optimal length to minimize class inequality" + "50 is the optimal length to minimize class inequality\n", + "\n", + "Numpy array `y` is the respective class labels." ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "classifier = OgleClassifier('relu', input_size=50, num_classes=5)\n", - "classifier.model.compile(optimizer='adam', loss=\"categorical_crossentropy\", metrics=['accuracy'])" + "x, y = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 7, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "(38431,)\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "lstm_9 (LSTM) (None, 64) 16896 \n", - "_________________________________________________________________\n", - "dense_25 (Dense) (None, 64) 4160 \n", - "_________________________________________________________________\n", - "dropout_9 (Dropout) (None, 64) 0 \n", - "_________________________________________________________________\n", - "dense_26 (Dense) (None, 16) 1040 \n", - "_________________________________________________________________\n", - "dense_27 (Dense) (None, 5) 85 \n", - "=================================================================\n", - "Total params: 22,181\n", - "Trainable params: 22,181\n", - "Non-trainable params: 0\n", - "_________________________________________________________________\n", - "None\n" + "ename": "NameError", + "evalue": "name 'train_test_split' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mx_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m42\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mlabel_encoder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0minteger_encoded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlabel_encoder\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mNameError\u001b[0m: name 'train_test_split' is not defined" ] } ], "source": [ - "classifier.compile(optimizer='adam', loss='categorical_crossentropy')\n", - "print(classifier.model.summary())" + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y_train)\n", + "\n", + "onehot_encoder = OneHotEncoder(sparse=False)\n", + "integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n", + "onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n", + "y_train = onehot_encoded" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "classifier = OGLEClassifier('relu', input_size=50, num_classes=5)\n", + "classifier.compile(optimizer='adam', loss=\"categorical_crossentropy\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "(38431, 5) (38431, 50, 1)\n", - "Train on 38431 samples, validate on 9608 samples\n", - "Epoch 1/10\n", - " - 222s - loss: 0.5010 - acc: 0.8372 - val_loss: 0.3996 - val_acc: 0.8605\n", - "Epoch 2/10\n", - " - 178s - loss: 0.4137 - acc: 0.8615 - val_loss: 0.4001 - val_acc: 0.8598\n", - "Epoch 3/10\n", - " - 139s - loss: 0.4100 - acc: 0.8608 - val_loss: 0.4016 - val_acc: 0.8599\n", - "Epoch 4/10\n", - " - 134s - loss: 0.4028 - acc: 0.8634 - val_loss: 0.4003 - val_acc: 0.8591\n", - "Epoch 5/10\n", - " - 140s - loss: 0.3988 - acc: 0.8632 - val_loss: 0.3939 - val_acc: 0.8620\n", - "Epoch 6/10\n", - " - 134s - loss: 0.4000 - acc: 0.8631 - val_loss: 0.3895 - val_acc: 0.8629\n", - "Epoch 7/10\n", - " - 137s - loss: 0.3956 - acc: 0.8638 - val_loss: 0.3937 - val_acc: 0.8612\n", - "Epoch 8/10\n", - " - 135s - loss: 0.3958 - acc: 0.8638 - val_loss: 0.3941 - val_acc: 0.8589\n", - "Epoch 9/10\n", - " - 137s - loss: 0.3931 - acc: 0.8647 - val_loss: 0.4103 - val_acc: 0.8582\n", - "Epoch 10/10\n", - " - 136s - loss: 0.3950 - acc: 0.8646 - val_loss: 0.3847 - val_acc: 0.8625\n" + "ename": "NameError", + "evalue": "name 'x_train' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m classifier.train(x_train, to_categorical(y_train),\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m40\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m verbose=2)\n", + "\u001b[1;31mNameError\u001b[0m: name 'x_train' is not defined" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ diff --git a/Classifiers/.ipynb_checkpoints/XRay Binary Classification-checkpoint.ipynb b/Classifiers/.ipynb_checkpoints/XRay Binary Classification-checkpoint.ipynb index 652211a..73a2f2d 100644 --- a/Classifiers/.ipynb_checkpoints/XRay Binary Classification-checkpoint.ipynb +++ b/Classifiers/.ipynb_checkpoints/XRay Binary Classification-checkpoint.ipynb @@ -1,8 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# XRay Binary Classification" + ] + }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -12,33 +19,63 @@ "\n", "from sklearn.metrics import classification_report, accuracy_score\n", "from sklearn.model_selection import train_test_split\n", - "from keras.optimizers import Adam" + "from keras.optimizers import Adam\n", + "from keras.utils.np_utils import to_categorical\n", + "from sklearn.preprocessing import LabelEncoder, OneHotEncoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Numpy array `y` is the respective class labels.\n", + "\n", + "`BH` Black Hole\n", + "`P` Pulsar\n", + "`NP` Non-pulsar" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\mtp\\mirapy_0\\mirapy\\mirapy\\data\\load_dataset.py:71: FutureWarning: convert_objects is deprecated. To re-infer data dtypes for object columns, use DataFrame.infer_objects()\n", + "For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.\n", + " rawdf = rawdf.convert_objects(convert_numeric=True)\n" + ] + } + ], "source": [ - "data_dir = '.\\XRay-Binary\\'\n", - "x_train, y_train, x_test, y_test = load_xray_binary_data(data_dir, 0.2, True)" + "data_dir = 'D:\\MTP\\XRay-Binary-Classification\\XRay-Binary-Classification\\Training'\n", + "x, y = load_xray_binary_data(data_dir, True)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y)\n", + "y = integer_encoded\n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n", + "\n", + "y_train = to_categorical(y_train)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "classifier = XRayBinaryClassifier('relu', 'adam')\n", - "\n", - "classifier.compile('mean_squared_error')" + "classifier = XRayBinaryClassifier('relu')\n", + "classifier.compile(optimizer=Adam(lr=0.0001, decay=1e-6), loss='mean_squared_error')" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -46,46 +83,36 @@ "output_type": "stream", "text": [ "Epoch 1/10\n", - " - 4s - loss: 0.0132 - acc: 0.9747\n", + " - 35s - loss: 0.1778 - acc: 0.6895\n", "Epoch 2/10\n", - " - 4s - loss: 0.0133 - acc: 0.9740\n", + " - 10s - loss: 0.0604 - acc: 0.9104\n", "Epoch 3/10\n", - " - 4s - loss: 0.0133 - acc: 0.9742\n", + " - 13s - loss: 0.0308 - acc: 0.9507\n", "Epoch 4/10\n", - " - 4s - loss: 0.0131 - acc: 0.9752\n", + " - 9s - loss: 0.0239 - acc: 0.9589\n", "Epoch 5/10\n", - " - 4s - loss: 0.0132 - acc: 0.9746\n", + " - 9s - loss: 0.0216 - acc: 0.9611\n", "Epoch 6/10\n", - " - 4s - loss: 0.0132 - acc: 0.9740\n", + " - 8s - loss: 0.0204 - acc: 0.9632\n", "Epoch 7/10\n", - " - 4s - loss: 0.0132 - acc: 0.9740\n", + " - 9s - loss: 0.0197 - acc: 0.9644\n", "Epoch 8/10\n", - " - 4s - loss: 0.0132 - acc: 0.9744\n", + " - 9s - loss: 0.0191 - acc: 0.9659\n", "Epoch 9/10\n", - " - 4s - loss: 0.0132 - acc: 0.9745\n", + " - 9s - loss: 0.0187 - acc: 0.9658\n", "Epoch 10/10\n", - " - 4s - loss: 0.0131 - acc: 0.9743\n" + " - 9s - loss: 0.0184 - acc: 0.9666\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "classifier.model.fit(x_train, y_train, epochs=10,\n", - " batch_size=32, verbose=1)" + "classifier.train(x_train, y_train, epochs=10,\n", + " batch_size=32, verbose=2)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -94,20 +121,20 @@ "text": [ " precision recall f1-score support\n", "\n", - " 0 0.96 0.97 0.97 2637\n", - " 1 0.91 0.87 0.89 484\n", - " 2 0.98 0.98 0.98 5056\n", + " 0 0.96 0.95 0.96 2637\n", + " 1 0.97 0.98 0.98 5056\n", + " 2 0.95 0.84 0.89 484\n", "\n", " micro avg 0.97 0.97 0.97 8177\n", - " macro avg 0.95 0.94 0.95 8177\n", - "weighted avg 0.97 0.97 0.97 8177\n", + " macro avg 0.96 0.93 0.94 8177\n", + "weighted avg 0.96 0.97 0.96 8177\n", "\n", - "Accuracy: 97.05 %\n" + "Accuracy: 96.5 %\n" ] } ], "source": [ - "y_predicted = classifier.test(x_test)\n", + "y_predicted = classifier.predict(x_test)\n", "print(classification_report(y_test, y_predicted))\n", "print(\"Accuracy:\", round(accuracy_score(y_test, y_predicted)*100, 2), \"%\")" ] diff --git a/Classifiers/ATLAS variable star Classification.ipynb b/Classifiers/ATLAS variable star Classification.ipynb index 8cc200e..f382836 100644 --- a/Classifiers/ATLAS variable star Classification.ipynb +++ b/Classifiers/ATLAS variable star Classification.ipynb @@ -24,8 +24,7 @@ "from sklearn.metrics import classification_report, accuracy_score\n", "from sklearn.model_selection import train_test_split\n", "from keras.optimizers import Adam\n", - "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", - "from sklearn.preprocessing import StandardScaler" + "from sklearn.preprocessing import LabelEncoder, OneHotEncoder" ] }, { @@ -42,7 +41,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Ignore feature list to use features selected using feature selection" + "Ignore feature list to use features selected using feature selection\n", + "\n", + "Numpy array `y` is the respective class labels.\n", + "\n", + "`BH` Black Hole\n", + "`P` Pulsar\n", + "`NP` Non-pulsar" ] }, { @@ -51,22 +56,50 @@ "metadata": {}, "outputs": [], "source": [ - "x_train, y_train, x_test, y_test = load_atlas_star_data(csv_file, 0.2)" + "x, y = load_atlas_star_data(csv_file, 0.2)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Admin\\Anaconda3\\envs\\gpu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:363: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", + "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", + "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", + " warnings.warn(msg, FutureWarning)\n" + ] + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y_train)\n", + "\n", + "onehot_encoder = OneHotEncoder(sparse=False)\n", + "integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n", + "onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n", + "y_train = onehot_encoded" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "classifier = AtlasVarStarClassifier('relu', 'adam', input_size=x_train[0].shape[0], num_classes=y_train[0].shape[0])\n", - "classifier.compile('mean_squared_error')" + "classifier = AtlasVarStarClassifier('relu', input_size=x_train[0].shape[0], num_classes=y_train[0].shape[0])\n", + "classifier.compile(optimizer='adam', loss='mean_squared_error')" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -74,43 +107,31 @@ "output_type": "stream", "text": [ "Epoch 1/10\n", - " - 10s - loss: 0.0072 - acc: 0.9576\n", + " - 6s - loss: 0.0238 - acc: 0.8593\n", "Epoch 2/10\n", - " - 9s - loss: 0.0073 - acc: 0.9567\n", + " - 3s - loss: 0.0118 - acc: 0.9318\n", "Epoch 3/10\n", - " - 9s - loss: 0.0072 - acc: 0.9569\n", + " - 3s - loss: 0.0095 - acc: 0.9436\n", "Epoch 4/10\n", - " - 10s - loss: 0.0071 - acc: 0.9583\n", + " - 3s - loss: 0.0087 - acc: 0.9482\n", "Epoch 5/10\n", - " - 10s - loss: 0.0070 - acc: 0.9585\n", + " - 3s - loss: 0.0083 - acc: 0.9502\n", "Epoch 6/10\n", - " - 10s - loss: 0.0069 - acc: 0.9592\n", + " - 3s - loss: 0.0081 - acc: 0.9518\n", "Epoch 7/10\n", - " - 9s - loss: 0.0069 - acc: 0.9587\n", + " - 3s - loss: 0.0079 - acc: 0.9527\n", "Epoch 8/10\n", - " - 9s - loss: 0.0068 - acc: 0.9596\n", + " - 3s - loss: 0.0077 - acc: 0.9543\n", "Epoch 9/10\n", - " - 9s - loss: 0.0068 - acc: 0.9599\n", + " - 3s - loss: 0.0076 - acc: 0.9549\n", "Epoch 10/10\n", - " - 9s - loss: 0.0067 - acc: 0.9606\n" + " - 3s - loss: 0.0074 - acc: 0.9558\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "# classifier.train(x_train, y_train, epochs=50,\n", - "# batch_size=32)\n", - "classifier.model.fit(x_train, y_train, epochs=10,\n", - " batch_size=32, verbose=2)" + "classifier.train(x_train, y_train, epochs=10,\n", + " batch_size=100, verbose=2)" ] }, { diff --git a/Classifiers/OGLE variable star classification.ipynb b/Classifiers/OGLE variable star classification.ipynb index 31f36c1..8ee7c93 100644 --- a/Classifiers/OGLE variable star classification.ipynb +++ b/Classifiers/OGLE variable star classification.ipynb @@ -2,14 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from mirapy.data.load_dataset import load_ogle_dataset\n", "from mirapy.classifiers.models import OGLEClassifier\n", "from keras.utils.np_utils import to_categorical\n", - "import mirapy" + "import mirapy\n", + "\n", + "from sklearn.metrics import classification_report, accuracy_score\n", + "from sklearn.model_selection import train_test_split\n", + "from keras.optimizers import Adam\n", + "from sklearn.preprocessing import LabelEncoder, OneHotEncoder" ] }, { @@ -21,20 +26,13 @@ "path = 'D:\\MTP\\ogle'" ] }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "x_train, y_train, x_test, y_test = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "50 is the optimal length to minimize class inequality" + "50 is the optimal length to minimize class inequality\n", + "\n", + "Numpy array `y` is the respective class labels." ] }, { @@ -43,92 +41,76 @@ "metadata": {}, "outputs": [], "source": [ - "classifier = OGLEClassifier('relu', input_size=50, num_classes=5)\n", - "classifier.model.compile(optimizer='adam', loss=\"categorical_crossentropy\", metrics=['accuracy'])" + "x, y = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "(38431,)\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "lstm_9 (LSTM) (None, 64) 16896 \n", - "_________________________________________________________________\n", - "dense_25 (Dense) (None, 64) 4160 \n", - "_________________________________________________________________\n", - "dropout_9 (Dropout) (None, 64) 0 \n", - "_________________________________________________________________\n", - "dense_26 (Dense) (None, 16) 1040 \n", - "_________________________________________________________________\n", - "dense_27 (Dense) (None, 5) 85 \n", - "=================================================================\n", - "Total params: 22,181\n", - "Trainable params: 22,181\n", - "Non-trainable params: 0\n", - "_________________________________________________________________\n", - "None\n" + "C:\\Users\\Admin\\Anaconda3\\envs\\gpu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:363: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", + "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", + "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", + " warnings.warn(msg, FutureWarning)\n" ] } ], "source": [ - "classifier.compile(optimizer='adam', loss='categorical_crossentropy')\n", - "print(classifier.model.summary())" + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y_train)\n", + "\n", + "onehot_encoder = OneHotEncoder(sparse=False)\n", + "integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n", + "onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n", + "y_train = onehot_encoded" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "classifier = OGLEClassifier('relu', input_size=50, num_classes=5)\n", + "classifier.compile(optimizer='adam', loss=\"categorical_crossentropy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(38431, 5) (38431, 50, 1)\n", - "Train on 38431 samples, validate on 9608 samples\n", "Epoch 1/10\n", - " - 222s - loss: 0.5010 - acc: 0.8372 - val_loss: 0.3996 - val_acc: 0.8605\n", + " - 103s - loss: 0.5112 - acc: 0.8354\n", "Epoch 2/10\n", - " - 178s - loss: 0.4137 - acc: 0.8615 - val_loss: 0.4001 - val_acc: 0.8598\n", + " - 100s - loss: 0.4120 - acc: 0.8618\n", "Epoch 3/10\n", - " - 139s - loss: 0.4100 - acc: 0.8608 - val_loss: 0.4016 - val_acc: 0.8599\n", + " - 103s - loss: 0.4055 - acc: 0.8622\n", "Epoch 4/10\n", - " - 134s - loss: 0.4028 - acc: 0.8634 - val_loss: 0.4003 - val_acc: 0.8591\n", + " - 103s - loss: 0.4015 - acc: 0.8627\n", "Epoch 5/10\n", - " - 140s - loss: 0.3988 - acc: 0.8632 - val_loss: 0.3939 - val_acc: 0.8620\n", + " - 103s - loss: 0.3966 - acc: 0.8634\n", "Epoch 6/10\n", - " - 134s - loss: 0.4000 - acc: 0.8631 - val_loss: 0.3895 - val_acc: 0.8629\n", + " - 109s - loss: 0.3979 - acc: 0.8631\n", "Epoch 7/10\n", - " - 137s - loss: 0.3956 - acc: 0.8638 - val_loss: 0.3937 - val_acc: 0.8612\n", - "Epoch 8/10\n", - " - 135s - loss: 0.3958 - acc: 0.8638 - val_loss: 0.3941 - val_acc: 0.8589\n", - "Epoch 9/10\n", - " - 137s - loss: 0.3931 - acc: 0.8647 - val_loss: 0.4103 - val_acc: 0.8582\n", - "Epoch 10/10\n", - " - 136s - loss: 0.3950 - acc: 0.8646 - val_loss: 0.3847 - val_acc: 0.8625\n" + " - 99s - loss: 0.3948 - acc: 0.8647\n", + "Epoch 8/10\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "classifier.train(x_train, to_categorical(y_train),\n", + "classifier.train(x_train, y_train,\n", " epochs=10,\n", " batch_size=40,\n", " verbose=2)" diff --git a/Classifiers/XRay Binary Classification.ipynb b/Classifiers/XRay Binary Classification.ipynb index 050fd04..73a2f2d 100644 --- a/Classifiers/XRay Binary Classification.ipynb +++ b/Classifiers/XRay Binary Classification.ipynb @@ -9,17 +9,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], + "outputs": [], "source": [ "from mirapy.data.load_dataset import load_xray_binary_data\n", "from mirapy.classifiers.models import XRayBinaryClassifier\n", @@ -27,40 +19,63 @@ "\n", "from sklearn.metrics import classification_report, accuracy_score\n", "from sklearn.model_selection import train_test_split\n", - "from keras.optimizers import Adam" + "from keras.optimizers import Adam\n", + "from keras.utils.np_utils import to_categorical\n", + "from sklearn.preprocessing import LabelEncoder, OneHotEncoder" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "`y_train` is categorical and `y_test` is numeric value of the class." + "Numpy array `y` is the respective class labels.\n", + "\n", + "`BH` Black Hole\n", + "`P` Pulsar\n", + "`NP` Non-pulsar" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\mtp\\mirapy_0\\mirapy\\mirapy\\data\\load_dataset.py:71: FutureWarning: convert_objects is deprecated. To re-infer data dtypes for object columns, use DataFrame.infer_objects()\n", + "For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.\n", + " rawdf = rawdf.convert_objects(convert_numeric=True)\n" + ] + } + ], "source": [ "data_dir = 'D:\\MTP\\XRay-Binary-Classification\\XRay-Binary-Classification\\Training'\n", - "x_train, y_train, x_test, y_test = load_xray_binary_data(data_dir, 0.2, True)" + "x, y = load_xray_binary_data(data_dir, True)\n", + "\n", + "label_encoder = LabelEncoder()\n", + "integer_encoded = label_encoder.fit_transform(y)\n", + "y = integer_encoded\n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n", + "\n", + "y_train = to_categorical(y_train)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "classifier = XRayBinaryClassifier('relu', 'adam')\n", - "\n", - "classifier.compile('mean_squared_error')" + "classifier = XRayBinaryClassifier('relu')\n", + "classifier.compile(optimizer=Adam(lr=0.0001, decay=1e-6), loss='mean_squared_error')" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -68,46 +83,36 @@ "output_type": "stream", "text": [ "Epoch 1/10\n", - " - 5s - loss: 0.0153 - acc: 0.9713\n", + " - 35s - loss: 0.1778 - acc: 0.6895\n", "Epoch 2/10\n", - " - 7s - loss: 0.0154 - acc: 0.9709\n", + " - 10s - loss: 0.0604 - acc: 0.9104\n", "Epoch 3/10\n", - " - 6s - loss: 0.0152 - acc: 0.9714\n", + " - 13s - loss: 0.0308 - acc: 0.9507\n", "Epoch 4/10\n", - " - 7s - loss: 0.0150 - acc: 0.9711\n", + " - 9s - loss: 0.0239 - acc: 0.9589\n", "Epoch 5/10\n", - " - 6s - loss: 0.0151 - acc: 0.9709\n", + " - 9s - loss: 0.0216 - acc: 0.9611\n", "Epoch 6/10\n", - " - 6s - loss: 0.0150 - acc: 0.9710\n", + " - 8s - loss: 0.0204 - acc: 0.9632\n", "Epoch 7/10\n", - " - 6s - loss: 0.0149 - acc: 0.9710\n", + " - 9s - loss: 0.0197 - acc: 0.9644\n", "Epoch 8/10\n", - " - 5s - loss: 0.0150 - acc: 0.9714\n", + " - 9s - loss: 0.0191 - acc: 0.9659\n", "Epoch 9/10\n", - " - 4s - loss: 0.0149 - acc: 0.9710\n", + " - 9s - loss: 0.0187 - acc: 0.9658\n", "Epoch 10/10\n", - " - 4s - loss: 0.0148 - acc: 0.9715\n" + " - 9s - loss: 0.0184 - acc: 0.9666\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "classifier.train(x_train, y_train, epochs=10,\n", - " batch_size=32)" + " batch_size=32, verbose=2)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -116,20 +121,20 @@ "text": [ " precision recall f1-score support\n", "\n", - " 0 0.96 0.97 0.97 2637\n", - " 1 0.91 0.87 0.89 484\n", - " 2 0.98 0.98 0.98 5056\n", + " 0 0.96 0.95 0.96 2637\n", + " 1 0.97 0.98 0.98 5056\n", + " 2 0.95 0.84 0.89 484\n", "\n", " micro avg 0.97 0.97 0.97 8177\n", - " macro avg 0.95 0.94 0.95 8177\n", - "weighted avg 0.97 0.97 0.97 8177\n", + " macro avg 0.96 0.93 0.94 8177\n", + "weighted avg 0.96 0.97 0.96 8177\n", "\n", - "Accuracy: 97.05 %\n" + "Accuracy: 96.5 %\n" ] } ], "source": [ - "y_predicted = classifier.test(x_test)\n", + "y_predicted = classifier.predict(x_test)\n", "print(classification_report(y_test, y_predicted))\n", "print(\"Accuracy:\", round(accuracy_score(y_test, y_predicted)*100, 2), \"%\")" ]