Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"from mirapy.data.load_dataset import load_atlas_star_data\n",
"from mirapy.classifiers.models import AtlasVarStarClassifier\n",
"import mirapy\n",
"\n",
"import os\n",
"from os import walk\n",
"import pandas as pd\n",
"from sklearn.metrics import classification_report, accuracy_score\n",
"from sklearn.model_selection import train_test_split\n",
"from keras.optimizers import Adam\n",
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"path = 'D:\\MTP\\ATLAS\\dataset'\n",
"csv_file = os.path.join(path, \"non_dub.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ignore feature list to use features selected using feature selection\n",
"\n",
"Numpy array `y` is the respective class labels.\n",
"\n",
"`BH` Black Hole\n",
"`P` Pulsar\n",
"`NP` Non-pulsar"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"x, y = load_atlas_star_data(csv_file, 0.2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\Anaconda3\\envs\\gpu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:363: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
"If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
"In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
" warnings.warn(msg, FutureWarning)\n"
]
}
],
"source": [
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n",
"\n",
"label_encoder = LabelEncoder()\n",
"integer_encoded = label_encoder.fit_transform(y_train)\n",
"\n",
"onehot_encoder = OneHotEncoder(sparse=False)\n",
"integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n",
"onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n",
"y_train = onehot_encoded"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"classifier = AtlasVarStarClassifier('relu', input_size=x_train[0].shape[0], num_classes=y_train[0].shape[0])\n",
"classifier.compile(optimizer='adam', loss='mean_squared_error')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n",
" - 6s - loss: 0.0238 - acc: 0.8593\n",
"Epoch 2/10\n",
" - 3s - loss: 0.0118 - acc: 0.9318\n",
"Epoch 3/10\n",
" - 3s - loss: 0.0095 - acc: 0.9436\n",
"Epoch 4/10\n",
" - 3s - loss: 0.0087 - acc: 0.9482\n",
"Epoch 5/10\n",
" - 3s - loss: 0.0083 - acc: 0.9502\n",
"Epoch 6/10\n",
" - 3s - loss: 0.0081 - acc: 0.9518\n",
"Epoch 7/10\n",
" - 3s - loss: 0.0079 - acc: 0.9527\n",
"Epoch 8/10\n",
" - 3s - loss: 0.0077 - acc: 0.9543\n",
"Epoch 9/10\n",
" - 3s - loss: 0.0076 - acc: 0.9549\n",
"Epoch 10/10\n",
" - 3s - loss: 0.0074 - acc: 0.9558\n"
]
}
],
"source": [
"classifier.train(x_train, y_train, epochs=10,\n",
" batch_size=100, verbose=2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"convert string classes to integer encoded"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.95 0.95 0.95 3373\n",
" 1 0.97 0.98 0.97 2977\n",
" 2 0.84 0.87 0.85 840\n",
" 3 0.94 0.91 0.93 1406\n",
" 4 0.98 0.99 0.99 439\n",
" 5 0.86 0.80 0.83 396\n",
" 6 0.94 0.97 0.96 2655\n",
" 7 0.98 0.97 0.98 1839\n",
" 8 1.00 0.98 0.99 2472\n",
"\n",
" micro avg 0.95 0.95 0.95 16397\n",
" macro avg 0.94 0.94 0.94 16397\n",
"weighted avg 0.95 0.95 0.95 16397\n",
"\n",
"Accuracy: 95.45 %\n"
]
}
],
"source": [
"label_encoder = LabelEncoder()\n",
"integer_encoded = label_encoder.fit_transform(y_test)\n",
"y_test = integer_encoded\n",
"\n",
"y_predicted = classifier.test(x_test)\n",
"print(classification_report(y_test, y_predicted))\n",
"print(\"Accuracy:\", round(accuracy_score(y_test, y_predicted)*100, 2), \"%\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,129 +2,110 @@
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"from mirapy.data.load_dataset import load_ogle_dataset\n",
"from mirapy.classifiers.models import OGLEClassifier\n",
"from keras.utils.np_utils import to_categorical\n",
"import mirapy"
"import mirapy\n",
"\n",
"from sklearn.metrics import classification_report, accuracy_score\n",
"from sklearn.model_selection import train_test_split\n",
"from keras.optimizers import Adam\n",
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder"
]
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"path = 'D:\\MTP\\ogle'"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"x_train, y_train, x_test, y_test = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"50 is the optimal length to minimize class inequality"
"50 is the optimal length to minimize class inequality\n",
"\n",
"Numpy array `y` is the respective class labels."
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"classifier = OgleClassifier('relu', input_size=50, num_classes=5)\n",
"classifier.model.compile(optimizer='adam', loss=\"categorical_crossentropy\", metrics=['accuracy'])"
"x, y = load_ogle_dataset(path, classes = [\"cep\" , \"dsct\" ,\"lpv (empty)\", \"rrlyr\" ,\"t2cep\"])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(38431,)\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"lstm_9 (LSTM) (None, 64) 16896 \n",
"_________________________________________________________________\n",
"dense_25 (Dense) (None, 64) 4160 \n",
"_________________________________________________________________\n",
"dropout_9 (Dropout) (None, 64) 0 \n",
"_________________________________________________________________\n",
"dense_26 (Dense) (None, 16) 1040 \n",
"_________________________________________________________________\n",
"dense_27 (Dense) (None, 5) 85 \n",
"=================================================================\n",
"Total params: 22,181\n",
"Trainable params: 22,181\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"None\n"
"ename": "NameError",
"evalue": "name 'train_test_split' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-7-4e29ffc2e028>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mx_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m42\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mlabel_encoder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0minteger_encoded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlabel_encoder\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'train_test_split' is not defined"
]
}
],
"source": [
"classifier.compile(optimizer='adam', loss='categorical_crossentropy')\n",
"print(classifier.model.summary())"
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)\n",
"\n",
"label_encoder = LabelEncoder()\n",
"integer_encoded = label_encoder.fit_transform(y_train)\n",
"\n",
"onehot_encoder = OneHotEncoder(sparse=False)\n",
"integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)\n",
"onehot_encoded = onehot_encoder.fit_transform(integer_encoded)\n",
"y_train = onehot_encoded"
]
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"classifier = OGLEClassifier('relu', input_size=50, num_classes=5)\n",
"classifier.compile(optimizer='adam', loss=\"categorical_crossentropy\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(38431, 5) (38431, 50, 1)\n",
"Train on 38431 samples, validate on 9608 samples\n",
"Epoch 1/10\n",
" - 222s - loss: 0.5010 - acc: 0.8372 - val_loss: 0.3996 - val_acc: 0.8605\n",
"Epoch 2/10\n",
" - 178s - loss: 0.4137 - acc: 0.8615 - val_loss: 0.4001 - val_acc: 0.8598\n",
"Epoch 3/10\n",
" - 139s - loss: 0.4100 - acc: 0.8608 - val_loss: 0.4016 - val_acc: 0.8599\n",
"Epoch 4/10\n",
" - 134s - loss: 0.4028 - acc: 0.8634 - val_loss: 0.4003 - val_acc: 0.8591\n",
"Epoch 5/10\n",
" - 140s - loss: 0.3988 - acc: 0.8632 - val_loss: 0.3939 - val_acc: 0.8620\n",
"Epoch 6/10\n",
" - 134s - loss: 0.4000 - acc: 0.8631 - val_loss: 0.3895 - val_acc: 0.8629\n",
"Epoch 7/10\n",
" - 137s - loss: 0.3956 - acc: 0.8638 - val_loss: 0.3937 - val_acc: 0.8612\n",
"Epoch 8/10\n",
" - 135s - loss: 0.3958 - acc: 0.8638 - val_loss: 0.3941 - val_acc: 0.8589\n",
"Epoch 9/10\n",
" - 137s - loss: 0.3931 - acc: 0.8647 - val_loss: 0.4103 - val_acc: 0.8582\n",
"Epoch 10/10\n",
" - 136s - loss: 0.3950 - acc: 0.8646 - val_loss: 0.3847 - val_acc: 0.8625\n"
"ename": "NameError",
"evalue": "name 'x_train' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-6-c6eb6a8f7453>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m classifier.train(x_train, to_categorical(y_train),\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m40\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m verbose=2)\n",
"\u001b[1;31mNameError\u001b[0m: name 'x_train' is not defined"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x17a952b1400>"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand Down
Loading