From 068510ae3975754d4ddff2e6a8c46ca9bc4f2e53 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 14:46:31 +0100 Subject: [PATCH 01/35] Add notebook to load data from open day --- load_data_open-day.ipynb | 1806 +++++++++++++++++ ...ten_Tomato_Merlin_1519148528.2417703.json} | 0 2 files changed, 1806 insertions(+) create mode 100644 load_data_open-day.ipynb rename rated_datasets/{Rotten Tomato_Merlin_1519148528.2417703.json => Rotten_Tomato_Merlin_1519148528.2417703.json} (100%) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb new file mode 100644 index 0000000..a7a6c58 --- /dev/null +++ b/load_data_open-day.ipynb @@ -0,0 +1,1806 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dataset': 'Rotten Tomato',\n", + " 'edge_type_selection': [['PRODUCED', True],\n", + " ['DIRECTED', True],\n", + " ['WROTE', True],\n", + " ['ACTED_IN', True]],\n", + " 'meta_paths': [{'time_to_rate': 0.024361},\n", + " {'id': 1,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.1'},\n", + " {'id': 2,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.25'},\n", + " {'id': 3,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.15'},\n", + " {'id': 4,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.15'},\n", + " {'id': 5,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.2'},\n", + " {'time_to_rate': 150.249221},\n", + " {'id': 6,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.16'},\n", + " {'id': 7,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.26'},\n", + " {'id': 8,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.32'},\n", + " {'id': 9,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 10,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.19'},\n", + " {'time_to_rate': 145.500076},\n", + " {'id': 11,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.46'},\n", + " {'id': 12,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 13,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.12'},\n", + " {'id': 14,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.27'},\n", + " {'id': 15,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.2'},\n", + " {'time_to_rate': 135.839568},\n", + " {'id': 16,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.29'},\n", + " {'id': 17,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.72'},\n", + " {'id': 18,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.5'},\n", + " {'id': 19,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.31'},\n", + " {'id': 20,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'time_to_rate': 385.761841},\n", + " {'id': 21,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.38'},\n", + " {'id': 22,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'],\n", + " 'rating': '0.44'},\n", + " {'id': 23,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.6'},\n", + " {'id': 24,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 25,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.39'},\n", + " {'time_to_rate': 105.28709},\n", + " {'id': 26,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'id': 27,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 28,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 29,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.48'},\n", + " {'id': 30,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.52'},\n", + " {'time_to_rate': 95.974948},\n", + " {'id': 31,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 32,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.22'},\n", + " {'id': 33,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.7'},\n", + " {'id': 34,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.26'},\n", + " {'id': 35,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'time_to_rate': 130.046159},\n", + " {'id': 36,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'id': 37,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.34'},\n", + " {'id': 38,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'id': 39,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.36'},\n", + " {'id': 40,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.28'},\n", + " {'time_to_rate': 98.257121},\n", + " {'id': 41,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 42,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.34'},\n", + " {'id': 43,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'id': 44,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 45,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.56'},\n", + " {'time_to_rate': 39.029786},\n", + " {'id': 46,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.69'},\n", + " {'id': 47,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.57'},\n", + " {'id': 48,\n", + " 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 49,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.75'},\n", + " {'id': 50,\n", + " 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.67'},\n", + " {'time_to_rate': 69.869488},\n", + " {'id': 51,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.62'},\n", + " {'time_to_rate': 21.587904}],\n", + " 'node_type_selection': [['Person', True], ['Movie', True]],\n", + " 'purpose': '',\n", + " 'username': 'Merlin'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data =json.load(open(path, \"r\", encoding=\"utf8\"))\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'time_to_rate': 0.024361},\n", + " {'id': 1,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.1'},\n", + " {'id': 2,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.25'},\n", + " {'id': 3,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.15'},\n", + " {'id': 4,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.15'},\n", + " {'id': 5,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.2'},\n", + " {'time_to_rate': 150.249221},\n", + " {'id': 6,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.16'},\n", + " {'id': 7,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.26'},\n", + " {'id': 8,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.32'},\n", + " {'id': 9,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 10,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.19'},\n", + " {'time_to_rate': 145.500076},\n", + " {'id': 11,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.46'},\n", + " {'id': 12,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 13,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.12'},\n", + " {'id': 14,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.27'},\n", + " {'id': 15,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.2'},\n", + " {'time_to_rate': 135.839568},\n", + " {'id': 16,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.29'},\n", + " {'id': 17,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.72'},\n", + " {'id': 18,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.5'},\n", + " {'id': 19,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.31'},\n", + " {'id': 20,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'time_to_rate': 385.761841},\n", + " {'id': 21,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.38'},\n", + " {'id': 22,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'],\n", + " 'rating': '0.44'},\n", + " {'id': 23,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.6'},\n", + " {'id': 24,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 25,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.39'},\n", + " {'time_to_rate': 105.28709},\n", + " {'id': 26,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'id': 27,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 28,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 29,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.48'},\n", + " {'id': 30,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.52'},\n", + " {'time_to_rate': 95.974948},\n", + " {'id': 31,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 32,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.22'},\n", + " {'id': 33,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.7'},\n", + " {'id': 34,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.26'},\n", + " {'id': 35,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'time_to_rate': 130.046159},\n", + " {'id': 36,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'id': 37,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.34'},\n", + " {'id': 38,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'id': 39,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.36'},\n", + " {'id': 40,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.28'},\n", + " {'time_to_rate': 98.257121},\n", + " {'id': 41,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 42,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.34'},\n", + " {'id': 43,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'id': 44,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 45,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.56'},\n", + " {'time_to_rate': 39.029786},\n", + " {'id': 46,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.69'},\n", + " {'id': 47,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.57'},\n", + " {'id': 48,\n", + " 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 49,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.75'},\n", + " {'id': 50,\n", + " 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.67'},\n", + " {'time_to_rate': 69.869488},\n", + " {'id': 51,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.62'},\n", + " {'time_to_rate': 21.587904}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"meta_paths\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 1,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.1'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"meta_paths\"][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 1, 'rating': '0.1'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 2, 'rating': '0.25'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 3, 'rating': '0.15'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 4, 'rating': '0.15'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 5, 'rating': '0.2'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 6, 'rating': '0.16'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 7, 'rating': '0.26'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 8, 'rating': '0.32'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 9, 'rating': '0.33'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 10, 'rating': '0.19'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 11, 'rating': '0.46'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 12, 'rating': '0.33'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 13, 'rating': '0.12'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 14, 'rating': '0.27'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 15, 'rating': '0.2'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 16, 'rating': '0.29'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 17, 'rating': '0.72'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 18, 'rating': '0.5'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 19, 'rating': '0.31'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 20, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 21, 'rating': '0.38'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 22, 'rating': '0.44'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 23, 'rating': '0.6'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 24, 'rating': '0.55'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 25, 'rating': '0.39'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 26, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 27, 'rating': '0.42'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 28, 'rating': '0.55'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 29, 'rating': '0.48'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 30, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 31, 'rating': '0.33'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 32, 'rating': '0.22'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 33, 'rating': '0.7'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 34, 'rating': '0.26'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 35, 'rating': '0.54'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 36, 'rating': '0.54'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 37, 'rating': '0.34'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 38, 'rating': '0.54'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 39, 'rating': '0.36'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 40, 'rating': '0.28'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 41, 'rating': '0.55'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 42, 'rating': '0.34'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 43, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 44, 'rating': '0.42'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 45, 'rating': '0.56'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 46, 'rating': '0.69'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 47, 'rating': '0.57'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 48, 'rating': '0.42'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 49, 'rating': '0.75'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 50, 'rating': '0.67'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 51, 'rating': '0.62'}\n" + ] + } + ], + "source": [ + "i = 0\n", + "first = True\n", + "for probably_path in data[\"meta_paths\"]:\n", + " # Ignore first time_to_rate\n", + " if first:\n", + " first = False\n", + " continue\n", + " i += 1\n", + " if i == 6:\n", + " # Ignore time_to_rate\n", + " i = 0\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " print(probably_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "i = 0\n", + "first = True\n", + "batches = []\n", + "batch = []\n", + "for probably_path in data[\"meta_paths\"]:\n", + " # Ignore first time_to_rate\n", + " if first:\n", + " first = False\n", + " continue\n", + " i += 1\n", + " if i == 6:\n", + " # Ignore time_to_rate\n", + " i = 0\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[{'id': 1,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.1'},\n", + " {'id': 2,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.25'},\n", + " {'id': 3,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.15'},\n", + " {'id': 4,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.15'},\n", + " {'id': 5,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.2'}],\n", + " [{'id': 6,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.16'},\n", + " {'id': 7,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.26'},\n", + " {'id': 8,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.32'},\n", + " {'id': 9,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 10,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.19'}],\n", + " [{'id': 11,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.46'},\n", + " {'id': 12,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 13,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.12'},\n", + " {'id': 14,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.27'},\n", + " {'id': 15,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.2'}],\n", + " [{'id': 16,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.29'},\n", + " {'id': 17,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.72'},\n", + " {'id': 18,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.5'},\n", + " {'id': 19,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.31'},\n", + " {'id': 20,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.52'}],\n", + " [{'id': 21,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.38'},\n", + " {'id': 22,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'],\n", + " 'rating': '0.44'},\n", + " {'id': 23,\n", + " 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.6'},\n", + " {'id': 24,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 25,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.39'}],\n", + " [{'id': 26,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'id': 27,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 28,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 29,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.48'},\n", + " {'id': 30,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.52'}],\n", + " [{'id': 31,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.33'},\n", + " {'id': 32,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.22'},\n", + " {'id': 33,\n", + " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.7'},\n", + " {'id': 34,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.26'},\n", + " {'id': 35,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.54'}],\n", + " [{'id': 36,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'id': 37,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person'],\n", + " 'rating': '0.34'},\n", + " {'id': 38,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.54'},\n", + " {'id': 39,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.36'},\n", + " {'id': 40,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.28'}],\n", + " [{'id': 41,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.55'},\n", + " {'id': 42,\n", + " 'metapath': ['Person',\n", + " 'WROTE',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.34'},\n", + " {'id': 43,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.52'},\n", + " {'id': 44,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 45,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.56'}],\n", + " [{'id': 46,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'PRODUCED',\n", + " 'Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'WROTE',\n", + " 'Person'],\n", + " 'rating': '0.69'},\n", + " {'id': 47,\n", + " 'metapath': ['Person',\n", + " 'DIRECTED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person'],\n", + " 'rating': '0.57'},\n", + " {'id': 48,\n", + " 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'],\n", + " 'rating': '0.42'},\n", + " {'id': 49,\n", + " 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'],\n", + " 'rating': '0.75'},\n", + " {'id': 50,\n", + " 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'],\n", + " 'rating': '0.67'}]]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batches" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.15\n", + "0.04999999999999999\n", + "0.04999999999999999\n", + "0.1\n", + "0.1\n", + "0.0\n", + "0.05000000000000002\n", + "0.1\n", + "0.0\n", + "0.05000000000000002\n", + "0.04999999999999999\n", + "0.1\n", + "0.16\n", + "0.17\n", + "0.03\n", + "0.06\n", + "0.07\n", + "0.010000000000000009\n", + "0.07\n", + "0.13\n", + "0.14\n", + "0.13\n", + "0.34\n", + "0.21000000000000002\n", + "0.15000000000000002\n", + "0.08000000000000002\n", + "0.19\n", + "0.06\n", + "0.26\n", + "0.13\n", + "0.07\n", + "0.43\n", + "0.21000000000000002\n", + "0.020000000000000018\n", + "0.23000000000000004\n", + "0.21999999999999997\n", + "0.020000000000000018\n", + "0.41\n", + "0.19\n", + "0.21000000000000002\n", + "0.19999999999999996\n", + "0.06\n", + "0.21999999999999997\n", + "0.17000000000000004\n", + "0.010000000000000009\n", + "0.15999999999999998\n", + "0.11000000000000004\n", + "0.04999999999999993\n", + "0.04999999999999999\n", + "0.20999999999999996\n", + "0.16000000000000003\n", + "0.030000000000000027\n", + "0.0\n", + "0.10000000000000003\n", + "0.13000000000000006\n", + "0.06\n", + "0.10000000000000003\n", + "0.040000000000000036\n", + "0.07000000000000006\n", + "0.040000000000000036\n", + "0.0\n", + "0.030000000000000027\n", + "0.36999999999999994\n", + "0.21000000000000002\n", + "0.11000000000000001\n", + "0.48\n", + "0.04000000000000001\n", + "0.32000000000000006\n", + "0.07\n", + "0.43999999999999995\n", + "0.28\n", + "0.15999999999999992\n", + "0.0\n", + "0.2\n", + "0.2\n", + "0.019999999999999962\n", + "0.0\n", + "0.18000000000000005\n", + "0.18000000000000005\n", + "0.26\n", + "0.06\n", + "0.26\n", + "0.07999999999999996\n", + "0.010000000000000009\n", + "0.21000000000000002\n", + "0.18\n", + "0.07999999999999996\n", + "0.22000000000000003\n", + "0.030000000000000027\n", + "0.040000000000000036\n", + "0.13000000000000006\n", + "0.10000000000000003\n", + "0.14000000000000007\n", + "0.06000000000000005\n", + "0.12\n", + "0.18000000000000005\n", + "0.10000000000000009\n", + "0.26999999999999996\n", + "0.14999999999999997\n", + "0.33\n", + "0.25000000000000006\n", + "0.019999999999999907\n", + "0.07999999999999996\n" + ] + } + ], + "source": [ + "from util.datastructures import MetaPathRatingGraph, MetaPath\n", + "graph = MetaPathRatingGraph()\n", + "\n", + "for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath), MetaPath.from_list(metapath), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/rated_datasets/Rotten Tomato_Merlin_1519148528.2417703.json b/rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json similarity index 100% rename from rated_datasets/Rotten Tomato_Merlin_1519148528.2417703.json rename to rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json From b0674ee81e16c92108df0b34c532430dc70c79f4 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 15:09:31 +0100 Subject: [PATCH 02/35] Add fitting of graph --- load_data_open-day.ipynb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb index a7a6c58..00d744c 100644 --- a/load_data_open-day.ipynb +++ b/load_data_open-day.ipynb @@ -1779,7 +1779,11 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "import domain_scoring.domain_scoring as domain_scoring\n", + "domain_score = domain_scoring.DomainScoring(rated_paths, mode=args.mode)\n", + "domain_score.fit(graph)" + ] } ], "metadata": { From 07052525b68f4f1e4796878bd5697bab880a44c0 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:37:15 +0100 Subject: [PATCH 03/35] Correct creation of MetaPathRatingGraph --- load_data_open-day.ipynb | 325 +++++++++++++++++---------------------- 1 file changed, 144 insertions(+), 181 deletions(-) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb index 00d744c..8251149 100644 --- a/load_data_open-day.ipynb +++ b/load_data_open-day.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import json" @@ -14,9 +12,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" @@ -1062,57 +1058,57 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 1, 'rating': '0.1'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 2, 'rating': '0.25'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 3, 'rating': '0.15'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 4, 'rating': '0.15'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 5, 'rating': '0.2'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 6, 'rating': '0.16'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 7, 'rating': '0.26'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 8, 'rating': '0.32'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 9, 'rating': '0.33'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 10, 'rating': '0.19'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 11, 'rating': '0.46'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 12, 'rating': '0.33'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 13, 'rating': '0.12'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 14, 'rating': '0.27'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 15, 'rating': '0.2'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 16, 'rating': '0.29'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 17, 'rating': '0.72'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 18, 'rating': '0.5'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 19, 'rating': '0.31'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 20, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 21, 'rating': '0.38'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 22, 'rating': '0.44'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 23, 'rating': '0.6'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 24, 'rating': '0.55'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 25, 'rating': '0.39'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 26, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 27, 'rating': '0.42'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 28, 'rating': '0.55'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 29, 'rating': '0.48'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 30, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 31, 'rating': '0.33'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 32, 'rating': '0.22'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 33, 'rating': '0.7'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 34, 'rating': '0.26'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 35, 'rating': '0.54'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 36, 'rating': '0.54'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 37, 'rating': '0.34'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 38, 'rating': '0.54'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 39, 'rating': '0.36'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 40, 'rating': '0.28'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 41, 'rating': '0.55'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 42, 'rating': '0.34'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 43, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 44, 'rating': '0.42'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 45, 'rating': '0.56'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 46, 'rating': '0.69'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 47, 'rating': '0.57'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 48, 'rating': '0.42'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 49, 'rating': '0.75'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 50, 'rating': '0.67'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 51, 'rating': '0.62'}\n" + "{'id': 1, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.1'}\n", + "{'id': 2, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.25'}\n", + "{'id': 3, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", + "{'id': 4, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", + "{'id': 5, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.2'}\n", + "{'id': 6, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.16'}\n", + "{'id': 7, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", + "{'id': 8, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.32'}\n", + "{'id': 9, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", + "{'id': 10, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.19'}\n", + "{'id': 11, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.46'}\n", + "{'id': 12, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.33'}\n", + "{'id': 13, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.12'}\n", + "{'id': 14, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.27'}\n", + "{'id': 15, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.2'}\n", + "{'id': 16, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.29'}\n", + "{'id': 17, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.72'}\n", + "{'id': 18, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.5'}\n", + "{'id': 19, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.31'}\n", + "{'id': 20, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.52'}\n", + "{'id': 21, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.38'}\n", + "{'id': 22, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.44'}\n", + "{'id': 23, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.6'}\n", + "{'id': 24, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", + "{'id': 25, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.39'}\n", + "{'id': 26, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", + "{'id': 27, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", + "{'id': 28, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.55'}\n", + "{'id': 29, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.48'}\n", + "{'id': 30, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", + "{'id': 31, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", + "{'id': 32, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.22'}\n", + "{'id': 33, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.7'}\n", + "{'id': 34, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", + "{'id': 35, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.54'}\n", + "{'id': 36, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", + "{'id': 37, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.34'}\n", + "{'id': 38, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", + "{'id': 39, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.36'}\n", + "{'id': 40, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.28'}\n", + "{'id': 41, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", + "{'id': 42, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.34'}\n", + "{'id': 43, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", + "{'id': 44, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", + "{'id': 45, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.56'}\n", + "{'id': 46, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.69'}\n", + "{'id': 47, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.57'}\n", + "{'id': 48, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", + "{'id': 49, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.75'}\n", + "{'id': 50, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.67'}\n", + "{'id': 51, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.62'}\n" ] } ], @@ -1136,9 +1132,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "i = 0\n", @@ -1644,119 +1638,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.15\n", - "0.04999999999999999\n", - "0.04999999999999999\n", - "0.1\n", - "0.1\n", - "0.0\n", - "0.05000000000000002\n", - "0.1\n", - "0.0\n", - "0.05000000000000002\n", - "0.04999999999999999\n", - "0.1\n", - "0.16\n", - "0.17\n", - "0.03\n", - "0.06\n", - "0.07\n", - "0.010000000000000009\n", - "0.07\n", - "0.13\n", - "0.14\n", - "0.13\n", - "0.34\n", - "0.21000000000000002\n", - "0.15000000000000002\n", - "0.08000000000000002\n", - "0.19\n", - "0.06\n", - "0.26\n", - "0.13\n", - "0.07\n", - "0.43\n", - "0.21000000000000002\n", - "0.020000000000000018\n", - "0.23000000000000004\n", - "0.21999999999999997\n", - "0.020000000000000018\n", - "0.41\n", - "0.19\n", - "0.21000000000000002\n", - "0.19999999999999996\n", - "0.06\n", - "0.21999999999999997\n", - "0.17000000000000004\n", - "0.010000000000000009\n", - "0.15999999999999998\n", - "0.11000000000000004\n", - "0.04999999999999993\n", - "0.04999999999999999\n", - "0.20999999999999996\n", - "0.16000000000000003\n", - "0.030000000000000027\n", - "0.0\n", - "0.10000000000000003\n", - "0.13000000000000006\n", - "0.06\n", - "0.10000000000000003\n", - "0.040000000000000036\n", - "0.07000000000000006\n", - "0.040000000000000036\n", - "0.0\n", - "0.030000000000000027\n", - "0.36999999999999994\n", - "0.21000000000000002\n", - "0.11000000000000001\n", - "0.48\n", - "0.04000000000000001\n", - "0.32000000000000006\n", - "0.07\n", - "0.43999999999999995\n", - "0.28\n", - "0.15999999999999992\n", - "0.0\n", - "0.2\n", - "0.2\n", - "0.019999999999999962\n", - "0.0\n", - "0.18000000000000005\n", - "0.18000000000000005\n", - "0.26\n", - "0.06\n", - "0.26\n", - "0.07999999999999996\n", - "0.010000000000000009\n", - "0.21000000000000002\n", - "0.18\n", - "0.07999999999999996\n", - "0.22000000000000003\n", - "0.030000000000000027\n", - "0.040000000000000036\n", - "0.13000000000000006\n", - "0.10000000000000003\n", - "0.14000000000000007\n", - "0.06000000000000005\n", - "0.12\n", - "0.18000000000000005\n", - "0.10000000000000009\n", - "0.26999999999999996\n", - "0.14999999999999997\n", - "0.33\n", - "0.25000000000000006\n", - "0.019999999999999907\n", - "0.07999999999999996\n" - ] - } - ], + "outputs": [], "source": [ "from util.datastructures import MetaPathRatingGraph, MetaPath\n", "graph = MetaPathRatingGraph()\n", @@ -1768,22 +1652,101 @@ " if metapath is another_metapath:\n", " continue\n", " if float(metapath['rating']) <= float(another_metapath['rating']):\n", - " graph.add_user_rating(MetaPath.from_list(another_metapath), MetaPath.from_list(metapath), \n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", " distance=float(another_metapath['rating']) - float(metapath['rating']))" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "ename": "NotFittedError", + "evalue": "This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotFittedError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdomain_scoring\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdomain_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDomainScoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdomain_score\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/32de-python/domain_scoring/domain_scoring.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, metapath_graph, test_size)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Test accuracy is {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_preprocess\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetapath_unrated\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/base.py\u001b[0m in \u001b[0;36mscore\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X, check_input)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0mclasses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpredict\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \"\"\"\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0mcheck_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'tree_'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_X_predict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mproba\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_is_fitted\u001b[0;34m(estimator, attributes, msg, all_or_any)\u001b[0m\n\u001b[1;32m 766\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 767\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mall_or_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mattr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mattributes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 768\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 769\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 770\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNotFittedError\u001b[0m: This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." + ] + } + ], "source": [ "import domain_scoring.domain_scoring as domain_scoring\n", - "domain_score = domain_scoring.DomainScoring(rated_paths, mode=args.mode)\n", - "domain_score.fit(graph)" + "domain_score = domain_scoring.DomainScoring()\n", + "domain_score.fit(graph, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature ranking:\n", + "1. feature 8 (0.252934)\n", + "2. feature 0 (0.241715)\n", + "3. feature 6 (0.141816)\n", + "4. feature 1 (0.069899)\n", + "5. feature 9 (0.061769)\n", + "6. feature 2 (0.059382)\n", + "7. feature 4 (0.059261)\n", + "8. feature 10 (0.036441)\n", + "9. feature 3 (0.034537)\n", + "10. feature 7 (0.021453)\n", + "11. feature 5 (0.014518)\n", + "12. feature 11 (0.006274)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAGNlJREFUeJzt3X+0XWV95/H3J8RggUFQaiOBBBxEAW0lMpFVRS6iEiwadKyGqUIdppVFmVKm1kBdU4JdHYtdM+paDKWdImPRNhZGBbFIbOH6oxUIvwpCfsmPkAS8/AYRkRg+88d+EjY35+aem3tO7r3P/bzW2uuevfez9/c599x8zj7P3vtEtomIiLrMmOgORERE7yXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPaUfSX0r65ET3I6KflOvco1uS7gNeCfwCEGDgYNs/Hsc+jwa+ZHv/nnRyipF0CbDe9p9MdF+iLjMnugMxpRj4DdvX9XCfW94kdmxjaRfbm3vYn51GUj45R9/kjyvGSh0XSkdK+hdJj0u6tRyRb1n325LukvSUpB9J+t2yfDfgH4F9Jf2krJ8t6RJJn2ptf7Sk9a35eyV9QtK/AU9LmiHpVZIul/SQpLsl/dcRn0Br/1v2LemPJA1J2ihpkaTjJa2W9Iikc1rbnivpMknLSn9vkvSrrfWvk3Rd+T3cIek9w+peKOmbkn4CnAr8FvCJsq8rSrsl5ff0lKQfSjqxtY9TJH1P0l9Ieqw814Wt9XtL+kJ5Ho9K+mpr3QnltXlc0vclvaG1bomkDaXmSknHjPT7iynCdqZMXU3AvcDbOyzfF3gEOK7MH1vmX1HmjwcOKI+PAn4KvLHMHw3cP2x/lwCfas2/qE3pxy2l7q40bzg3AZ8EdgEOAH4EvHOE57F1/2Xfm1rb/hfgIeBLwG7AocAzwLzS/lzg58D7Svs/BO4pj2cCa4El5fExwFPAa1p1HweOLPO7Dn+uZfl/BH6lPP5N4OnW/Cml/n8uz/s0YGNr228Cfw/sWfp0VFl+ODAEHFG2+0j5Pb4EOBi4v1VjLnDgRP+9ZRrflCP3GKuvlyPGx1pHhR8Gvmn7GgDb/0wTtu8u81fbvq88/h6wnCbkx+Pzth+w/XPgPwD72P4z25tLrb8BFne5r+eA/+FmeGcZsA/wOdvP2L4LuAv4tVb7m21/rbT/XzQhfWSZdrd9vu1fuBm+ugo4qbXtFbavByh934bt/2d7qDy+jOYNY0GryTrbX7Bt4IvAqyS9UtJs4DjgY7afKr+L75Vtfge4yPZNblxK8yZxJLAZmAW8XtJM2/fbvrfL311MUhlzj7Fa5G3H3OcBH2wNQYjmb+taAEnHA39Cc4Q4A/gl4PZx9mPDsPpzJD3Wqj8D+G6X+3q0BCXAz8rPh1rrfwbs0ZrfOkRk25I20nyKUHtdsQ6Y02nbkUg6GTiL5hMIwO40bzhbbD2Bbftnkij9ewXwmO2nOux2HnBya7hKNEft+9r+nqQ/AJYCh0q6BvhD2w+O1teYvBLuMVadxtzXA39r+2PbNJZmAZfTHN1fYft5SV9r7afTydSf0gyJbPGqDm3a260H7rH92i763wtbr+xRk6z7AQ/QPKe5w9rOBVa35oc/3xfNS5oL/DVwjO0flGW3MsK5jmHWAy+XtGeHgF8P/JntT3fa0PYyYJmkPUr9P6cZAoopKsMy0QtfAt4j6V3l5OZLy4nKfWk+7s8CHinBfjzwrta2Q8ArJO3ZWnYb8O5ycnA2cOYo9W8EflJOsr5U0i6SDpN0RO+e4ou8SdKJknahOcJ+FrgeuAH4aenHTEkDwAk0Y+AjGQJe3ZrfHXgeeKT8Lj8KvL6bTrm5JPVq4EJJe5U+bBn++j/AaZIWAEjaXdK7y8+DJR1T3oifo/mk8nxXv4mYtBLuMRYdL1m0vQFYBPwx8DDNUMTHgRm2nwZ+H7isDJssBq5obbuaJvzuKeP4s4FLaYZt7gO+RTMOPmI/bD9PE6JvpDlJ+BBNmO3Jjtnu0XXp/4doTo7+FvC+Mr69CXgPzbmGR4ALgI/YXjvCfgAuBg7bcg7D9kqacfzraYZfDgO+P4b+foTmPoRVNG8cZwLYvplm3P2C8jqs4YUj811pjtQfpvkE8svAOcSU1tVNTOVSq8/RvBlcbPv8Yes/BvwezYmZnwC/a3tVWXcOzZn9XwBn2l7e02cQsRNJOhf497ZPnui+RGzPqEfuam60uIDmLPxhwEmSXjes2Zdt/6rtw4G/AD5btj0U+CBwCM3lcBeWMcqIiOijboZlFgBrba8rHzuX0XwE36p89N5iD14Yr3svsKxcFnYf217SFRERfdDN1TJzePHlWxvoENCSTgf+G83lVW9vbfuDVrONvPiysIgpxfZ5E92HiG707ISq7QttH0Rzd95/79V+IyJi7Lo5ct/Ii6/d3a8sG8lXgIta27a/7a/jtpLy1ZQRETvAdsfzmN0cua8ADpI0r1wHuxi4st1A0kGt2RNoLrOitFssaZakA4GDaK5J7tTBvk/nnntu6kzCGqkzeWukzuStYW//mHjUI3fbmyWdQfN9IFsuhVwp6Txghe2rgDMkvYPmBojHKdfP2r5L0j/QfDfHJuB0j9ajiIgYt66+fsD2t4DXDlt2buvxH2xn208DHW95joiI/phWd6gODAykziSskTqTt0bqTN4ao5kU/82epIzWRESMkSQ8jhOqERExxSTcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq1FW4S1ooaZWkNZKWdFh/lqQ7Jd0m6duS9m+t2yzpFkm3Svp6LzsfERGdyfb2G0gzgDXAscADwApgse1VrTZHAzfYflbSacCA7cVl3VO29xylhkfrR0REvJgkbKvTum6O3BcAa22vs70JWAYsajew/R3bz5bZ64E57fo70OeIiBiHbsJ9DrC+Nb+BF4f3cKcCV7fmd5V0o6R/lbRopI3G44DZs5HU0+mA2bP70dWIiJ1iZi93JunDwJuAo1uL59l+UNKBwLWSbrd9by/rrhsaoteDOhoa6vEeIyJ2nm7CfSMwtzW/X1n2IpLeAZwDvK0M3wBg+8Hy815Jg8DhwDbhvnTp0q2PBwYGGBgY6Kb/ERHTxuDgIIODg1217eaE6i7AapoTqg8CNwIn2V7ZanM4cBlwnO27W8v3Ap6x/ZykfYB/ARa1T8aWduM6oSqp90fuQE7yRsRktr0TqqMeudveLOkMYDnNGP3FtldKOg9YYfsq4DPA7sBlkgSss30icAjwV5I2l20/PTzYIyKi90Y9ct8pnciRe0TEmI33UsiIiJhiEu4RERVKuI9Br6+nz7X0EdEvGXMfaZ9sO+be6zoZ14+I8ciYe0TENJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKtRVuEtaKGmVpDWSlnRYf5akOyXdJunbkvZvrTulbLda0sm97HxERHQm29tvIM0A1gDHAg8AK4DFtle12hwN3GD7WUmnAQO2F0vaG7gJmA8IuBmYb/vJYTU8Wj9G6SM7vvUI+wSG96nXdTrViIjoliRsq9O6bo7cFwBrba+zvQlYBixqN7D9HdvPltnrgTnl8XHActtP2n4CWA4s3JEnERER3esm3OcA61vzG3ghvDs5Fbh6hG03jrJtRET0wMxe7kzSh4E3AUePddulS5dufTwwMMDAwEDP+hURUYPBwUEGBwe7atvNmPuRwFLbC8v82YBtnz+s3TuAzwNvs/1oWbaYZvz9tDJ/EXCd7a8M2zZj7hERY7S9Mfduwn0XYDXNCdUHgRuBk2yvbLU5HLgMOM723a3l7ROqM8rjN5Xx93aNhHtExBhtL9xHHZaxvVnSGTQnQ2cAF9teKek8YIXtq4DPALsDl0kSsM72ibYfl/SnNKFu4LzhwR4REb036pH7TulEjtwjIsZsvJdCRkTEFJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAp1Fe6SFkpaJWmNpCUd1h8l6WZJmyS9f9i6zZJukXSrpK/3quMRETGymaM1kDQDuAA4FngAWCHpCturWs3WAacAH++wi5/ant+LzkZERHdGDXdgAbDW9joAScuARcDWcLd9f1nnDturB/2MiIgx6GZYZg6wvjW/oSzr1q6SbpT0r5IWjal3ERGxQ7o5ch+vebYflHQgcK2k223fO7zR0qVLtz4eGBhgYGBgJ3QtImLqGBwcZHBwsKu2sjuNpLQaSEcCS20vLPNnA7Z9foe2lwDfsP3VEfbVcb0kj9aPUfrIjm89wj6B4X3qdZ1ONSIiuiUJ2x2HvrsZllkBHCRpnqRZwGLgyu3VaxXeq2yDpH2AXwfu6rrnERGxQ0YNd9ubgTOA5cCdwDLbKyWdJ+kEAElHSFoPfAC4SNIdZfNDgJsk3Qr8M/DpYVfZREREH4w6LLNTOpFhmYiIMRvvsExEREwxCfdJ5oDZs5HU0+mA2bMn+mlFxE6WYZmR9snEDMvsrOcSEVNfhmUiIqaZhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhrsJd0kJJqyStkbSkw/qjJN0saZOk9w9bd0rZbrWkk3vV8YiIGJlsb7+BNANYAxwLPACsABbbXtVqMxfYE/g4cKXtr5blewM3AfMBATcD820/OayGR+vHKH1kx7ceYZ/A8D71us7OqDFSnYiY+iRhW53WdXPkvgBYa3ud7U3AMmBRu4Ht+23/ELbJpeOA5baftP0EsBxYOOZnEBERY9JNuM8B1rfmN5Rl3Ri+7cYxbBsRETto5kR3YIulS5dufTwwMMDAwMCE9SUiYjIaHBxkcHCwq7bdjLkfCSy1vbDMnw3Y9vkd2l4CfKM15r4YGLB9Wpm/CLjO9leGbZcx9z7VGKlOREx94x1zXwEcJGmepFnAYuDK7dVrPb4GeKekl5WTq+8syyIioo9GDXfbm4EzaE6G3gkss71S0nmSTgCQdISk9cAHgIsk3VG2fRz4U5orZm4AzisnViMioo9GHZbZKZ3IsEzfaoxUJyKmvvEOy0RExBSTcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCfZo6YPZsJPV0OmD27Il+WhFR5P9QHWmf1P1/qOb/ao2Y+vJ/qEZETDMJ94iICiXcIyIqlHCPiKhQwj36KlflREyMXC0z0j7J1TJTqU7EdJSrZaJ6vf6E0OnTQT6FxFSSI/eR9kmO3KdznXwKialg3EfukhZKWiVpjaQlHdbPkrRM0lpJP5A0tyyfJ+kZSbeU6cLxPZWIiOjGzNEaSJoBXAAcCzwArJB0he1VrWanAo/Zfo2kDwGfARaXdT+yPb/H/Y6IiO3o5sh9AbDW9jrbm4BlwKJhbRYBXyyPL6d5I9ii40eGiIjon27CfQ6wvjW/oSzr2Mb2ZuAJSS8v6w6QdLOk6yS9dbwdjoiI0Y06LLODthytPwjMtf24pPnA1yUdavvpPtWNiAi6C/eNwNzW/H5lWdsGYH/gAUm7AHvafqysew7A9i2S7gYOBm4ZXmTp0qVbHw8MDDAwMNDdM4iImCYGBwcZHBzsqu2ol0KWsF5NM47+IHAjcJLtla02pwOvt326pMXAibYXS9qH5kTr85JeDXwHeIPtJ4bVyKWQfaqROpO3xkh1Irq1vUshRz1yt71Z0hnAcpox+ottr5R0HrDC9lXAxcClktYCj/LClTJvAz4l6TngeeBjw4M9IiJ6LzcxjbRP6j46TJ2JrzFSnYhu5esHIiKmmYR7RESFEu4RERVKuEdEVCjhHhFRoYR7RESFEu4Rk0z+U5DohYR7xCSzbmgIQ0+ndUND29TJm0jdchPTSPuk7htlUmfia0yXOtE/uYkpImKaSbhHRFQo4R4RUaGEe0REhRLuEREVSrhHRFQo4R4RUaGEe0REhRLuEdFXvb4TNnfBdid3qI60T3IX5HSuU9Nzqa1O7oJ9Qe5QjYiYZhLuEREVSrhHRFQo4R4RU16+vnhbOaE60j7JSbvpXKem51JbnXx98QtyQjUiYppJuEdEVCjhHhFRoYR7RESFugp3SQslrZK0RtKSDutnSVomaa2kH0ia21p3Tlm+UtK7etn5iIidaSpdlTNquEuaAVwAHAccBpwk6XXDmp0KPGb7NcDngM+UbQ8FPggcAhwPXCip45ndnWEwdSZljdSZvDVS58XWDQ1hGHW6ros2W6Z1Q0Pj6NHIujlyXwCstb3O9iZgGbBoWJtFwBfL48uBt5fH7wWW2f6F7fuAtWV/E2IwdSZljdSZvDVSZ/LWGE034T4HWN+a31CWdWxjezPwpKSXd9h2Y4dtIyKix/p1QnXChl4iIoLmDqztTcCRwLda82cDS4a1uRp4c3m8C/BQp7bAt7a0G7Z9t8NTmTJlypSpNY2U3TMZ3QrgIEnzgAeBxcBJw9p8AzgFuAH4TeDasvxK4MuSPkszHHMQcOPwAiPdPhsRETtm1HC3vVnSGcBymmGci22vlHQesML2VcDFwKWS1gKP0rwBYPsuSf8A3AVsAk4f15fIREREVybFF4dFRERvTZs7VCWdJemHkm6X9GVJs/pQY7s3e/WwzsskXVZuDLtT0pv7UONiSUOSbu/1vjvUOlPSHWX6/T7V2E/SteX31bc6pdYMSbdIurKH+9zm9ZC0t6TlklZLukbSy3pVr+x/V0k3SLq1/M7O7eX+W3UOLjVuKT+f7MfrI+k+Sf9WamwzPDyO/XZ6bT5Q8mazpPm9qjUmo51QrWEC9gXuAWaV+a8AJ/e4xgzgR8A84CXAbcDr+vR8/i/w0fJ4JrBnH2q8FXgjcHufX5vDgNuBXWlOxi8HXt2HOrOBN5bHewCr+/j6nAV8Cbiyn68HcD7wifJ4CfDnfXguu5WfuwDXAwv6/PcwA3gA2L8P+74H2LsP++302rwWeA3N+cf5/fydjTRNmyN3mj/O3SXNBHaj+QPqpW5u9ho3SXsCR9m+BMDNDWJP9bqO7e8Dj/d6vx0cAtxg++du7pH4LvD+Xhex/WPbt5XHTwMr6cM9F5L2A94N/E0v9zvC69G+efCLwIm9rFnqPlMe7kpzINHvcdx3AHfbXj9qy7ETfRit6PTa2F5tey0TeFn4tAh32w8A/xO4n+ZGqids/1OPy3Rzs1cvHAg8IumS8jH2ryX9Uh/q7Cw/BI4qQwy70QTj/v0sKOkAmiOtG/qw+88Cf0T/QxDglbaHoHnzAl7Z6wJliOlW4MfAt22v6HWNYT4E/H2f9m3gGkkrJP1On2pMGtMi3CXtRXOUM49miGYPSf9pYnu1w2YC84H/bXs+8AzN/QRTku1VNMML3wb+EbgV2NyvepL2oPmKjDPLEXwv9/0bwFD5hCB2/lFbz99QbD9v+3BgP+DN5fui+kLSS2i+suSyPpV4i+0jaA4gfk/SW/tUZ1KYFuFO81HvHtuPlY/+XwV+vcc1NgJzW/P7lWW9tgFYb/umMn85TdhPWbYvsX2E7QHgCWBNP+qUIbnLgUttX9GHEm8B3ivpHpqjz2Mk/W0f6mwxJOlXACTNBh7qV6Ey9HcdsLBfNWi+XPBm2w/3Y+e2Hyw/Hwa+xgR+z9XOMF3C/X7gSEkvLd9KeSzNmGsvbb3Zq1yJs5jmJq6eKh/D10s6uCw6luY+gn7YKUefkn65/JwLvA/4uz6V+gJwl+3P92Pntv/Y9lzbr6Z5/a+1fXIPSwx/Pa4Efrs8PgXo6RuWpH22XIFThv7eCazqZY1hTqJPQzKSdiuf2pC0O/AumiHBnpVg5H8rEzPuPhFncSdiAs6lCfTbaU4+vaQPNRbSXIWxFji7j8/l12jeTG6j+RTysj7U+Duak84/p3lz/Ggfn893af6h3QoM9KnGW2iGe24rdW4BFvbxOR1Nb6+W2eb1APYG/qn8zS0H9urxc3hD+T3dVv7dfLKPv6/dgIeBf9en/R/Yeu3v6OW/zxFemxNpzsH9jObO/qv79bsbacpNTBERFZouwzIREdNKwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq9P8BGXZbYfGzeTAAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "importances = domain_score.classifier.feature_importances_\n", + "indices = np.argsort(importances)[::-1]\n", + "\n", + "# Print the feature ranking\n", + "print(\"Feature ranking:\")\n", + "\n", + "for f in range(len(domain_score.classifier.feature_importances_)):\n", + " print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n", + "\n", + "# Plot the feature importances of the forest\n", + "plt.figure()\n", + "plt.title(\"Feature importances\")\n", + "plt.bar(range(len(domain_score.classifier.feature_importances_)), importances[indices],\n", + " color=\"r\", align=\"center\")\n", + "plt.xticks(range(len(domain_score.classifier.feature_importances_)), indices)\n", + "plt.xlim([-1, len(domain_score.classifier.feature_importances_)])\n", + "plt.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1802,7 +1765,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.5.2" } }, "nbformat": 4, From f33683621a214f0e95039dfefbaf351eb639e156 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:40:40 +0100 Subject: [PATCH 04/35] Add exporting of decision tree graph and add feature names --- load_data_open-day.ipynb | 325 ++++++++++++++++++++++----------------- 1 file changed, 181 insertions(+), 144 deletions(-) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb index 8251149..00d744c 100644 --- a/load_data_open-day.ipynb +++ b/load_data_open-day.ipynb @@ -3,7 +3,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import json" @@ -12,7 +14,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" @@ -1058,57 +1062,57 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'id': 1, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.1'}\n", - "{'id': 2, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.25'}\n", - "{'id': 3, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", - "{'id': 4, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", - "{'id': 5, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.2'}\n", - "{'id': 6, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.16'}\n", - "{'id': 7, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", - "{'id': 8, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.32'}\n", - "{'id': 9, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", - "{'id': 10, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.19'}\n", - "{'id': 11, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.46'}\n", - "{'id': 12, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.33'}\n", - "{'id': 13, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.12'}\n", - "{'id': 14, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.27'}\n", - "{'id': 15, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.2'}\n", - "{'id': 16, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.29'}\n", - "{'id': 17, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.72'}\n", - "{'id': 18, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.5'}\n", - "{'id': 19, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.31'}\n", - "{'id': 20, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.52'}\n", - "{'id': 21, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.38'}\n", - "{'id': 22, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.44'}\n", - "{'id': 23, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.6'}\n", - "{'id': 24, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", - "{'id': 25, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.39'}\n", - "{'id': 26, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", - "{'id': 27, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", - "{'id': 28, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.55'}\n", - "{'id': 29, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.48'}\n", - "{'id': 30, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", - "{'id': 31, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", - "{'id': 32, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.22'}\n", - "{'id': 33, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.7'}\n", - "{'id': 34, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", - "{'id': 35, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.54'}\n", - "{'id': 36, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", - "{'id': 37, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.34'}\n", - "{'id': 38, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", - "{'id': 39, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.36'}\n", - "{'id': 40, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.28'}\n", - "{'id': 41, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", - "{'id': 42, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.34'}\n", - "{'id': 43, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", - "{'id': 44, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", - "{'id': 45, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.56'}\n", - "{'id': 46, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.69'}\n", - "{'id': 47, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.57'}\n", - "{'id': 48, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", - "{'id': 49, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.75'}\n", - "{'id': 50, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.67'}\n", - "{'id': 51, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.62'}\n" + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 1, 'rating': '0.1'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 2, 'rating': '0.25'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 3, 'rating': '0.15'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 4, 'rating': '0.15'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 5, 'rating': '0.2'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 6, 'rating': '0.16'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 7, 'rating': '0.26'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 8, 'rating': '0.32'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 9, 'rating': '0.33'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 10, 'rating': '0.19'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 11, 'rating': '0.46'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 12, 'rating': '0.33'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 13, 'rating': '0.12'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 14, 'rating': '0.27'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 15, 'rating': '0.2'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 16, 'rating': '0.29'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 17, 'rating': '0.72'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 18, 'rating': '0.5'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 19, 'rating': '0.31'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 20, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 21, 'rating': '0.38'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 22, 'rating': '0.44'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 23, 'rating': '0.6'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 24, 'rating': '0.55'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 25, 'rating': '0.39'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 26, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 27, 'rating': '0.42'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 28, 'rating': '0.55'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 29, 'rating': '0.48'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 30, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 31, 'rating': '0.33'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 32, 'rating': '0.22'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 33, 'rating': '0.7'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 34, 'rating': '0.26'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 35, 'rating': '0.54'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 36, 'rating': '0.54'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 37, 'rating': '0.34'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 38, 'rating': '0.54'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 39, 'rating': '0.36'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 40, 'rating': '0.28'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 41, 'rating': '0.55'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 42, 'rating': '0.34'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 43, 'rating': '0.52'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 44, 'rating': '0.42'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 45, 'rating': '0.56'}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 46, 'rating': '0.69'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 47, 'rating': '0.57'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 48, 'rating': '0.42'}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 49, 'rating': '0.75'}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 50, 'rating': '0.67'}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 51, 'rating': '0.62'}\n" ] } ], @@ -1132,7 +1136,9 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "i = 0\n", @@ -1638,9 +1644,119 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.15\n", + "0.04999999999999999\n", + "0.04999999999999999\n", + "0.1\n", + "0.1\n", + "0.0\n", + "0.05000000000000002\n", + "0.1\n", + "0.0\n", + "0.05000000000000002\n", + "0.04999999999999999\n", + "0.1\n", + "0.16\n", + "0.17\n", + "0.03\n", + "0.06\n", + "0.07\n", + "0.010000000000000009\n", + "0.07\n", + "0.13\n", + "0.14\n", + "0.13\n", + "0.34\n", + "0.21000000000000002\n", + "0.15000000000000002\n", + "0.08000000000000002\n", + "0.19\n", + "0.06\n", + "0.26\n", + "0.13\n", + "0.07\n", + "0.43\n", + "0.21000000000000002\n", + "0.020000000000000018\n", + "0.23000000000000004\n", + "0.21999999999999997\n", + "0.020000000000000018\n", + "0.41\n", + "0.19\n", + "0.21000000000000002\n", + "0.19999999999999996\n", + "0.06\n", + "0.21999999999999997\n", + "0.17000000000000004\n", + "0.010000000000000009\n", + "0.15999999999999998\n", + "0.11000000000000004\n", + "0.04999999999999993\n", + "0.04999999999999999\n", + "0.20999999999999996\n", + "0.16000000000000003\n", + "0.030000000000000027\n", + "0.0\n", + "0.10000000000000003\n", + "0.13000000000000006\n", + "0.06\n", + "0.10000000000000003\n", + "0.040000000000000036\n", + "0.07000000000000006\n", + "0.040000000000000036\n", + "0.0\n", + "0.030000000000000027\n", + "0.36999999999999994\n", + "0.21000000000000002\n", + "0.11000000000000001\n", + "0.48\n", + "0.04000000000000001\n", + "0.32000000000000006\n", + "0.07\n", + "0.43999999999999995\n", + "0.28\n", + "0.15999999999999992\n", + "0.0\n", + "0.2\n", + "0.2\n", + "0.019999999999999962\n", + "0.0\n", + "0.18000000000000005\n", + "0.18000000000000005\n", + "0.26\n", + "0.06\n", + "0.26\n", + "0.07999999999999996\n", + "0.010000000000000009\n", + "0.21000000000000002\n", + "0.18\n", + "0.07999999999999996\n", + "0.22000000000000003\n", + "0.030000000000000027\n", + "0.040000000000000036\n", + "0.13000000000000006\n", + "0.10000000000000003\n", + "0.14000000000000007\n", + "0.06000000000000005\n", + "0.12\n", + "0.18000000000000005\n", + "0.10000000000000009\n", + "0.26999999999999996\n", + "0.14999999999999997\n", + "0.33\n", + "0.25000000000000006\n", + "0.019999999999999907\n", + "0.07999999999999996\n" + ] + } + ], "source": [ "from util.datastructures import MetaPathRatingGraph, MetaPath\n", "graph = MetaPathRatingGraph()\n", @@ -1652,101 +1768,22 @@ " if metapath is another_metapath:\n", " continue\n", " if float(metapath['rating']) <= float(another_metapath['rating']):\n", - " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " graph.add_user_rating(MetaPath.from_list(another_metapath), MetaPath.from_list(metapath), \n", " distance=float(another_metapath['rating']) - float(metapath['rating']))" ] }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "ename": "NotFittedError", - "evalue": "This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotFittedError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdomain_scoring\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdomain_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDomainScoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdomain_score\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/32de-python/domain_scoring/domain_scoring.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, metapath_graph, test_size)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Test accuracy is {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_preprocess\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetapath_unrated\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/base.py\u001b[0m in \u001b[0;36mscore\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X, check_input)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0mclasses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpredict\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \"\"\"\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0mcheck_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'tree_'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_X_predict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mproba\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_is_fitted\u001b[0;34m(estimator, attributes, msg, all_or_any)\u001b[0m\n\u001b[1;32m 766\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 767\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mall_or_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mattr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mattributes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 768\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 769\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 770\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNotFittedError\u001b[0m: This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." - ] - } - ], + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], "source": [ "import domain_scoring.domain_scoring as domain_scoring\n", - "domain_score = domain_scoring.DomainScoring()\n", - "domain_score.fit(graph, test_size=0.3)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature ranking:\n", - "1. feature 8 (0.252934)\n", - "2. feature 0 (0.241715)\n", - "3. feature 6 (0.141816)\n", - "4. feature 1 (0.069899)\n", - "5. feature 9 (0.061769)\n", - "6. feature 2 (0.059382)\n", - "7. feature 4 (0.059261)\n", - "8. feature 10 (0.036441)\n", - "9. feature 3 (0.034537)\n", - "10. feature 7 (0.021453)\n", - "11. feature 5 (0.014518)\n", - "12. feature 11 (0.006274)\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAGNlJREFUeJzt3X+0XWV95/H3J8RggUFQaiOBBBxEAW0lMpFVRS6iEiwadKyGqUIdppVFmVKm1kBdU4JdHYtdM+paDKWdImPRNhZGBbFIbOH6oxUIvwpCfsmPkAS8/AYRkRg+88d+EjY35+aem3tO7r3P/bzW2uuevfez9/c599x8zj7P3vtEtomIiLrMmOgORERE7yXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPaUfSX0r65ET3I6KflOvco1uS7gNeCfwCEGDgYNs/Hsc+jwa+ZHv/nnRyipF0CbDe9p9MdF+iLjMnugMxpRj4DdvX9XCfW94kdmxjaRfbm3vYn51GUj45R9/kjyvGSh0XSkdK+hdJj0u6tRyRb1n325LukvSUpB9J+t2yfDfgH4F9Jf2krJ8t6RJJn2ptf7Sk9a35eyV9QtK/AU9LmiHpVZIul/SQpLsl/dcRn0Br/1v2LemPJA1J2ihpkaTjJa2W9Iikc1rbnivpMknLSn9vkvSrrfWvk3Rd+T3cIek9w+peKOmbkn4CnAr8FvCJsq8rSrsl5ff0lKQfSjqxtY9TJH1P0l9Ieqw814Wt9XtL+kJ5Ho9K+mpr3QnltXlc0vclvaG1bomkDaXmSknHjPT7iynCdqZMXU3AvcDbOyzfF3gEOK7MH1vmX1HmjwcOKI+PAn4KvLHMHw3cP2x/lwCfas2/qE3pxy2l7q40bzg3AZ8EdgEOAH4EvHOE57F1/2Xfm1rb/hfgIeBLwG7AocAzwLzS/lzg58D7Svs/BO4pj2cCa4El5fExwFPAa1p1HweOLPO7Dn+uZfl/BH6lPP5N4OnW/Cml/n8uz/s0YGNr228Cfw/sWfp0VFl+ODAEHFG2+0j5Pb4EOBi4v1VjLnDgRP+9ZRrflCP3GKuvlyPGx1pHhR8Gvmn7GgDb/0wTtu8u81fbvq88/h6wnCbkx+Pzth+w/XPgPwD72P4z25tLrb8BFne5r+eA/+FmeGcZsA/wOdvP2L4LuAv4tVb7m21/rbT/XzQhfWSZdrd9vu1fuBm+ugo4qbXtFbavByh934bt/2d7qDy+jOYNY0GryTrbX7Bt4IvAqyS9UtJs4DjgY7afKr+L75Vtfge4yPZNblxK8yZxJLAZmAW8XtJM2/fbvrfL311MUhlzj7Fa5G3H3OcBH2wNQYjmb+taAEnHA39Cc4Q4A/gl4PZx9mPDsPpzJD3Wqj8D+G6X+3q0BCXAz8rPh1rrfwbs0ZrfOkRk25I20nyKUHtdsQ6Y02nbkUg6GTiL5hMIwO40bzhbbD2Bbftnkij9ewXwmO2nOux2HnBya7hKNEft+9r+nqQ/AJYCh0q6BvhD2w+O1teYvBLuMVadxtzXA39r+2PbNJZmAZfTHN1fYft5SV9r7afTydSf0gyJbPGqDm3a260H7rH92i763wtbr+xRk6z7AQ/QPKe5w9rOBVa35oc/3xfNS5oL/DVwjO0flGW3MsK5jmHWAy+XtGeHgF8P/JntT3fa0PYyYJmkPUr9P6cZAoopKsMy0QtfAt4j6V3l5OZLy4nKfWk+7s8CHinBfjzwrta2Q8ArJO3ZWnYb8O5ycnA2cOYo9W8EflJOsr5U0i6SDpN0RO+e4ou8SdKJknahOcJ+FrgeuAH4aenHTEkDwAk0Y+AjGQJe3ZrfHXgeeKT8Lj8KvL6bTrm5JPVq4EJJe5U+bBn++j/AaZIWAEjaXdK7y8+DJR1T3oifo/mk8nxXv4mYtBLuMRYdL1m0vQFYBPwx8DDNUMTHgRm2nwZ+H7isDJssBq5obbuaJvzuKeP4s4FLaYZt7gO+RTMOPmI/bD9PE6JvpDlJ+BBNmO3Jjtnu0XXp/4doTo7+FvC+Mr69CXgPzbmGR4ALgI/YXjvCfgAuBg7bcg7D9kqacfzraYZfDgO+P4b+foTmPoRVNG8cZwLYvplm3P2C8jqs4YUj811pjtQfpvkE8svAOcSU1tVNTOVSq8/RvBlcbPv8Yes/BvwezYmZnwC/a3tVWXcOzZn9XwBn2l7e02cQsRNJOhf497ZPnui+RGzPqEfuam60uIDmLPxhwEmSXjes2Zdt/6rtw4G/AD5btj0U+CBwCM3lcBeWMcqIiOijboZlFgBrba8rHzuX0XwE36p89N5iD14Yr3svsKxcFnYf217SFRERfdDN1TJzePHlWxvoENCSTgf+G83lVW9vbfuDVrONvPiysIgpxfZ5E92HiG707ISq7QttH0Rzd95/79V+IyJi7Lo5ct/Ii6/d3a8sG8lXgIta27a/7a/jtpLy1ZQRETvAdsfzmN0cua8ADpI0r1wHuxi4st1A0kGt2RNoLrOitFssaZakA4GDaK5J7tTBvk/nnntu6kzCGqkzeWukzuStYW//mHjUI3fbmyWdQfN9IFsuhVwp6Txghe2rgDMkvYPmBojHKdfP2r5L0j/QfDfHJuB0j9ajiIgYt66+fsD2t4DXDlt2buvxH2xn208DHW95joiI/phWd6gODAykziSskTqTt0bqTN4ao5kU/82epIzWRESMkSQ8jhOqERExxSTcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq1FW4S1ooaZWkNZKWdFh/lqQ7Jd0m6duS9m+t2yzpFkm3Svp6LzsfERGdyfb2G0gzgDXAscADwApgse1VrTZHAzfYflbSacCA7cVl3VO29xylhkfrR0REvJgkbKvTum6O3BcAa22vs70JWAYsajew/R3bz5bZ64E57fo70OeIiBiHbsJ9DrC+Nb+BF4f3cKcCV7fmd5V0o6R/lbRopI3G44DZs5HU0+mA2bP70dWIiJ1iZi93JunDwJuAo1uL59l+UNKBwLWSbrd9by/rrhsaoteDOhoa6vEeIyJ2nm7CfSMwtzW/X1n2IpLeAZwDvK0M3wBg+8Hy815Jg8DhwDbhvnTp0q2PBwYGGBgY6Kb/ERHTxuDgIIODg1217eaE6i7AapoTqg8CNwIn2V7ZanM4cBlwnO27W8v3Ap6x/ZykfYB/ARa1T8aWduM6oSqp90fuQE7yRsRktr0TqqMeudveLOkMYDnNGP3FtldKOg9YYfsq4DPA7sBlkgSss30icAjwV5I2l20/PTzYIyKi90Y9ct8pnciRe0TEmI33UsiIiJhiEu4RERVKuI9Br6+nz7X0EdEvGXMfaZ9sO+be6zoZ14+I8ciYe0TENJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKtRVuEtaKGmVpDWSlnRYf5akOyXdJunbkvZvrTulbLda0sm97HxERHQm29tvIM0A1gDHAg8AK4DFtle12hwN3GD7WUmnAQO2F0vaG7gJmA8IuBmYb/vJYTU8Wj9G6SM7vvUI+wSG96nXdTrViIjoliRsq9O6bo7cFwBrba+zvQlYBixqN7D9HdvPltnrgTnl8XHActtP2n4CWA4s3JEnERER3esm3OcA61vzG3ghvDs5Fbh6hG03jrJtRET0wMxe7kzSh4E3AUePddulS5dufTwwMMDAwEDP+hURUYPBwUEGBwe7atvNmPuRwFLbC8v82YBtnz+s3TuAzwNvs/1oWbaYZvz9tDJ/EXCd7a8M2zZj7hERY7S9Mfduwn0XYDXNCdUHgRuBk2yvbLU5HLgMOM723a3l7ROqM8rjN5Xx93aNhHtExBhtL9xHHZaxvVnSGTQnQ2cAF9teKek8YIXtq4DPALsDl0kSsM72ibYfl/SnNKFu4LzhwR4REb036pH7TulEjtwjIsZsvJdCRkTEFJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAp1Fe6SFkpaJWmNpCUd1h8l6WZJmyS9f9i6zZJukXSrpK/3quMRETGymaM1kDQDuAA4FngAWCHpCturWs3WAacAH++wi5/ant+LzkZERHdGDXdgAbDW9joAScuARcDWcLd9f1nnDturB/2MiIgx6GZYZg6wvjW/oSzr1q6SbpT0r5IWjal3ERGxQ7o5ch+vebYflHQgcK2k223fO7zR0qVLtz4eGBhgYGBgJ3QtImLqGBwcZHBwsKu2sjuNpLQaSEcCS20vLPNnA7Z9foe2lwDfsP3VEfbVcb0kj9aPUfrIjm89wj6B4X3qdZ1ONSIiuiUJ2x2HvrsZllkBHCRpnqRZwGLgyu3VaxXeq2yDpH2AXwfu6rrnERGxQ0YNd9ubgTOA5cCdwDLbKyWdJ+kEAElHSFoPfAC4SNIdZfNDgJsk3Qr8M/DpYVfZREREH4w6LLNTOpFhmYiIMRvvsExEREwxCfdJ5oDZs5HU0+mA2bMn+mlFxE6WYZmR9snEDMvsrOcSEVNfhmUiIqaZhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhrsJd0kJJqyStkbSkw/qjJN0saZOk9w9bd0rZbrWkk3vV8YiIGJlsb7+BNANYAxwLPACsABbbXtVqMxfYE/g4cKXtr5blewM3AfMBATcD820/OayGR+vHKH1kx7ceYZ/A8D71us7OqDFSnYiY+iRhW53WdXPkvgBYa3ud7U3AMmBRu4Ht+23/ELbJpeOA5baftP0EsBxYOOZnEBERY9JNuM8B1rfmN5Rl3Ri+7cYxbBsRETto5kR3YIulS5dufTwwMMDAwMCE9SUiYjIaHBxkcHCwq7bdjLkfCSy1vbDMnw3Y9vkd2l4CfKM15r4YGLB9Wpm/CLjO9leGbZcx9z7VGKlOREx94x1zXwEcJGmepFnAYuDK7dVrPb4GeKekl5WTq+8syyIioo9GDXfbm4EzaE6G3gkss71S0nmSTgCQdISk9cAHgIsk3VG2fRz4U5orZm4AzisnViMioo9GHZbZKZ3IsEzfaoxUJyKmvvEOy0RExBSTcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCfZo6YPZsJPV0OmD27Il+WhFR5P9QHWmf1P1/qOb/ao2Y+vJ/qEZETDMJ94iICiXcIyIqlHCPiKhQwj36KlflREyMXC0z0j7J1TJTqU7EdJSrZaJ6vf6E0OnTQT6FxFSSI/eR9kmO3KdznXwKialg3EfukhZKWiVpjaQlHdbPkrRM0lpJP5A0tyyfJ+kZSbeU6cLxPZWIiOjGzNEaSJoBXAAcCzwArJB0he1VrWanAo/Zfo2kDwGfARaXdT+yPb/H/Y6IiO3o5sh9AbDW9jrbm4BlwKJhbRYBXyyPL6d5I9ii40eGiIjon27CfQ6wvjW/oSzr2Mb2ZuAJSS8v6w6QdLOk6yS9dbwdjoiI0Y06LLODthytPwjMtf24pPnA1yUdavvpPtWNiAi6C/eNwNzW/H5lWdsGYH/gAUm7AHvafqysew7A9i2S7gYOBm4ZXmTp0qVbHw8MDDAwMNDdM4iImCYGBwcZHBzsqu2ol0KWsF5NM47+IHAjcJLtla02pwOvt326pMXAibYXS9qH5kTr85JeDXwHeIPtJ4bVyKWQfaqROpO3xkh1Irq1vUshRz1yt71Z0hnAcpox+ottr5R0HrDC9lXAxcClktYCj/LClTJvAz4l6TngeeBjw4M9IiJ6LzcxjbRP6j46TJ2JrzFSnYhu5esHIiKmmYR7RESFEu4RERVKuEdEVCjhHhFRoYR7RESFEu4Rk0z+U5DohYR7xCSzbmgIQ0+ndUND29TJm0jdchPTSPuk7htlUmfia0yXOtE/uYkpImKaSbhHRFQo4R4RUaGEe0REhRLuEREVSrhHRFQo4R4RUaGEe0REhRLuEdFXvb4TNnfBdid3qI60T3IX5HSuU9Nzqa1O7oJ9Qe5QjYiYZhLuEREVSrhHRFQo4R4RU16+vnhbOaE60j7JSbvpXKem51JbnXx98QtyQjUiYppJuEdEVCjhHhFRoYR7RESFugp3SQslrZK0RtKSDutnSVomaa2kH0ia21p3Tlm+UtK7etn5iIidaSpdlTNquEuaAVwAHAccBpwk6XXDmp0KPGb7NcDngM+UbQ8FPggcAhwPXCip45ndnWEwdSZljdSZvDVS58XWDQ1hGHW6ros2W6Z1Q0Pj6NHIujlyXwCstb3O9iZgGbBoWJtFwBfL48uBt5fH7wWW2f6F7fuAtWV/E2IwdSZljdSZvDVSZ/LWGE034T4HWN+a31CWdWxjezPwpKSXd9h2Y4dtIyKix/p1QnXChl4iIoLmDqztTcCRwLda82cDS4a1uRp4c3m8C/BQp7bAt7a0G7Z9t8NTmTJlypSpNY2U3TMZ3QrgIEnzgAeBxcBJw9p8AzgFuAH4TeDasvxK4MuSPkszHHMQcOPwAiPdPhsRETtm1HC3vVnSGcBymmGci22vlHQesML2VcDFwKWS1gKP0rwBYPsuSf8A3AVsAk4f15fIREREVybFF4dFRERvTZs7VCWdJemHkm6X9GVJs/pQY7s3e/WwzsskXVZuDLtT0pv7UONiSUOSbu/1vjvUOlPSHWX6/T7V2E/SteX31bc6pdYMSbdIurKH+9zm9ZC0t6TlklZLukbSy3pVr+x/V0k3SLq1/M7O7eX+W3UOLjVuKT+f7MfrI+k+Sf9WamwzPDyO/XZ6bT5Q8mazpPm9qjUmo51QrWEC9gXuAWaV+a8AJ/e4xgzgR8A84CXAbcDr+vR8/i/w0fJ4JrBnH2q8FXgjcHufX5vDgNuBXWlOxi8HXt2HOrOBN5bHewCr+/j6nAV8Cbiyn68HcD7wifJ4CfDnfXguu5WfuwDXAwv6/PcwA3gA2L8P+74H2LsP++302rwWeA3N+cf5/fydjTRNmyN3mj/O3SXNBHaj+QPqpW5u9ho3SXsCR9m+BMDNDWJP9bqO7e8Dj/d6vx0cAtxg++du7pH4LvD+Xhex/WPbt5XHTwMr6cM9F5L2A94N/E0v9zvC69G+efCLwIm9rFnqPlMe7kpzINHvcdx3AHfbXj9qy7ETfRit6PTa2F5tey0TeFn4tAh32w8A/xO4n+ZGqids/1OPy3Rzs1cvHAg8IumS8jH2ryX9Uh/q7Cw/BI4qQwy70QTj/v0sKOkAmiOtG/qw+88Cf0T/QxDglbaHoHnzAl7Z6wJliOlW4MfAt22v6HWNYT4E/H2f9m3gGkkrJP1On2pMGtMi3CXtRXOUM49miGYPSf9pYnu1w2YC84H/bXs+8AzN/QRTku1VNMML3wb+EbgV2NyvepL2oPmKjDPLEXwv9/0bwFD5hCB2/lFbz99QbD9v+3BgP+DN5fui+kLSS2i+suSyPpV4i+0jaA4gfk/SW/tUZ1KYFuFO81HvHtuPlY/+XwV+vcc1NgJzW/P7lWW9tgFYb/umMn85TdhPWbYvsX2E7QHgCWBNP+qUIbnLgUttX9GHEm8B3ivpHpqjz2Mk/W0f6mwxJOlXACTNBh7qV6Ey9HcdsLBfNWi+XPBm2w/3Y+e2Hyw/Hwa+xgR+z9XOMF3C/X7gSEkvLd9KeSzNmGsvbb3Zq1yJs5jmJq6eKh/D10s6uCw6luY+gn7YKUefkn65/JwLvA/4uz6V+gJwl+3P92Pntv/Y9lzbr6Z5/a+1fXIPSwx/Pa4Efrs8PgXo6RuWpH22XIFThv7eCazqZY1hTqJPQzKSdiuf2pC0O/AumiHBnpVg5H8rEzPuPhFncSdiAs6lCfTbaU4+vaQPNRbSXIWxFji7j8/l12jeTG6j+RTysj7U+Duak84/p3lz/Ggfn893af6h3QoM9KnGW2iGe24rdW4BFvbxOR1Nb6+W2eb1APYG/qn8zS0H9urxc3hD+T3dVv7dfLKPv6/dgIeBf9en/R/Yeu3v6OW/zxFemxNpzsH9jObO/qv79bsbacpNTBERFZouwzIREdNKwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq9P8BGXZbYfGzeTAAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "importances = domain_score.classifier.feature_importances_\n", - "indices = np.argsort(importances)[::-1]\n", - "\n", - "# Print the feature ranking\n", - "print(\"Feature ranking:\")\n", - "\n", - "for f in range(len(domain_score.classifier.feature_importances_)):\n", - " print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n", - "\n", - "# Plot the feature importances of the forest\n", - "plt.figure()\n", - "plt.title(\"Feature importances\")\n", - "plt.bar(range(len(domain_score.classifier.feature_importances_)), importances[indices],\n", - " color=\"r\", align=\"center\")\n", - "plt.xticks(range(len(domain_score.classifier.feature_importances_)), indices)\n", - "plt.xlim([-1, len(domain_score.classifier.feature_importances_)])\n", - "plt.show()" + "domain_score = domain_scoring.DomainScoring(rated_paths, mode=args.mode)\n", + "domain_score.fit(graph)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1765,7 +1802,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.1" } }, "nbformat": 4, From 192bb85865cf1610a260367cae512d2249c598c3 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:41:35 +0100 Subject: [PATCH 05/35] Revert "Add exporting of decision tree graph and add feature names" This reverts commit f33683621a214f0e95039dfefbaf351eb639e156. --- load_data_open-day.ipynb | 325 +++++++++++++++++---------------------- 1 file changed, 144 insertions(+), 181 deletions(-) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb index 00d744c..8251149 100644 --- a/load_data_open-day.ipynb +++ b/load_data_open-day.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import json" @@ -14,9 +12,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" @@ -1062,57 +1058,57 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 1, 'rating': '0.1'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 2, 'rating': '0.25'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 3, 'rating': '0.15'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 4, 'rating': '0.15'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 5, 'rating': '0.2'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 6, 'rating': '0.16'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 7, 'rating': '0.26'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 8, 'rating': '0.32'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 9, 'rating': '0.33'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 10, 'rating': '0.19'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 11, 'rating': '0.46'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 12, 'rating': '0.33'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 13, 'rating': '0.12'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 14, 'rating': '0.27'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 15, 'rating': '0.2'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 16, 'rating': '0.29'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 17, 'rating': '0.72'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 18, 'rating': '0.5'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 19, 'rating': '0.31'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 20, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 21, 'rating': '0.38'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 22, 'rating': '0.44'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 23, 'rating': '0.6'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 24, 'rating': '0.55'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 25, 'rating': '0.39'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 26, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 27, 'rating': '0.42'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 28, 'rating': '0.55'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'id': 29, 'rating': '0.48'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 30, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 31, 'rating': '0.33'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'id': 32, 'rating': '0.22'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 33, 'rating': '0.7'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 34, 'rating': '0.26'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'id': 35, 'rating': '0.54'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 36, 'rating': '0.54'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 37, 'rating': '0.34'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 38, 'rating': '0.54'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 39, 'rating': '0.36'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 40, 'rating': '0.28'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 41, 'rating': '0.55'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'id': 42, 'rating': '0.34'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'id': 43, 'rating': '0.52'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'id': 44, 'rating': '0.42'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'id': 45, 'rating': '0.56'}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 46, 'rating': '0.69'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'id': 47, 'rating': '0.57'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'id': 48, 'rating': '0.42'}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'id': 49, 'rating': '0.75'}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'id': 50, 'rating': '0.67'}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'id': 51, 'rating': '0.62'}\n" + "{'id': 1, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.1'}\n", + "{'id': 2, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.25'}\n", + "{'id': 3, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", + "{'id': 4, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", + "{'id': 5, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.2'}\n", + "{'id': 6, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.16'}\n", + "{'id': 7, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", + "{'id': 8, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.32'}\n", + "{'id': 9, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", + "{'id': 10, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.19'}\n", + "{'id': 11, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.46'}\n", + "{'id': 12, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.33'}\n", + "{'id': 13, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.12'}\n", + "{'id': 14, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.27'}\n", + "{'id': 15, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.2'}\n", + "{'id': 16, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.29'}\n", + "{'id': 17, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.72'}\n", + "{'id': 18, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.5'}\n", + "{'id': 19, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.31'}\n", + "{'id': 20, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.52'}\n", + "{'id': 21, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.38'}\n", + "{'id': 22, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.44'}\n", + "{'id': 23, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.6'}\n", + "{'id': 24, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", + "{'id': 25, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.39'}\n", + "{'id': 26, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", + "{'id': 27, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", + "{'id': 28, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.55'}\n", + "{'id': 29, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.48'}\n", + "{'id': 30, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", + "{'id': 31, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", + "{'id': 32, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.22'}\n", + "{'id': 33, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.7'}\n", + "{'id': 34, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", + "{'id': 35, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.54'}\n", + "{'id': 36, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", + "{'id': 37, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.34'}\n", + "{'id': 38, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", + "{'id': 39, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.36'}\n", + "{'id': 40, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.28'}\n", + "{'id': 41, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", + "{'id': 42, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.34'}\n", + "{'id': 43, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", + "{'id': 44, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", + "{'id': 45, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.56'}\n", + "{'id': 46, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.69'}\n", + "{'id': 47, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.57'}\n", + "{'id': 48, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", + "{'id': 49, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.75'}\n", + "{'id': 50, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.67'}\n", + "{'id': 51, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.62'}\n" ] } ], @@ -1136,9 +1132,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "i = 0\n", @@ -1644,119 +1638,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.15\n", - "0.04999999999999999\n", - "0.04999999999999999\n", - "0.1\n", - "0.1\n", - "0.0\n", - "0.05000000000000002\n", - "0.1\n", - "0.0\n", - "0.05000000000000002\n", - "0.04999999999999999\n", - "0.1\n", - "0.16\n", - "0.17\n", - "0.03\n", - "0.06\n", - "0.07\n", - "0.010000000000000009\n", - "0.07\n", - "0.13\n", - "0.14\n", - "0.13\n", - "0.34\n", - "0.21000000000000002\n", - "0.15000000000000002\n", - "0.08000000000000002\n", - "0.19\n", - "0.06\n", - "0.26\n", - "0.13\n", - "0.07\n", - "0.43\n", - "0.21000000000000002\n", - "0.020000000000000018\n", - "0.23000000000000004\n", - "0.21999999999999997\n", - "0.020000000000000018\n", - "0.41\n", - "0.19\n", - "0.21000000000000002\n", - "0.19999999999999996\n", - "0.06\n", - "0.21999999999999997\n", - "0.17000000000000004\n", - "0.010000000000000009\n", - "0.15999999999999998\n", - "0.11000000000000004\n", - "0.04999999999999993\n", - "0.04999999999999999\n", - "0.20999999999999996\n", - "0.16000000000000003\n", - "0.030000000000000027\n", - "0.0\n", - "0.10000000000000003\n", - "0.13000000000000006\n", - "0.06\n", - "0.10000000000000003\n", - "0.040000000000000036\n", - "0.07000000000000006\n", - "0.040000000000000036\n", - "0.0\n", - "0.030000000000000027\n", - "0.36999999999999994\n", - "0.21000000000000002\n", - "0.11000000000000001\n", - "0.48\n", - "0.04000000000000001\n", - "0.32000000000000006\n", - "0.07\n", - "0.43999999999999995\n", - "0.28\n", - "0.15999999999999992\n", - "0.0\n", - "0.2\n", - "0.2\n", - "0.019999999999999962\n", - "0.0\n", - "0.18000000000000005\n", - "0.18000000000000005\n", - "0.26\n", - "0.06\n", - "0.26\n", - "0.07999999999999996\n", - "0.010000000000000009\n", - "0.21000000000000002\n", - "0.18\n", - "0.07999999999999996\n", - "0.22000000000000003\n", - "0.030000000000000027\n", - "0.040000000000000036\n", - "0.13000000000000006\n", - "0.10000000000000003\n", - "0.14000000000000007\n", - "0.06000000000000005\n", - "0.12\n", - "0.18000000000000005\n", - "0.10000000000000009\n", - "0.26999999999999996\n", - "0.14999999999999997\n", - "0.33\n", - "0.25000000000000006\n", - "0.019999999999999907\n", - "0.07999999999999996\n" - ] - } - ], + "outputs": [], "source": [ "from util.datastructures import MetaPathRatingGraph, MetaPath\n", "graph = MetaPathRatingGraph()\n", @@ -1768,22 +1652,101 @@ " if metapath is another_metapath:\n", " continue\n", " if float(metapath['rating']) <= float(another_metapath['rating']):\n", - " graph.add_user_rating(MetaPath.from_list(another_metapath), MetaPath.from_list(metapath), \n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", " distance=float(another_metapath['rating']) - float(metapath['rating']))" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "ename": "NotFittedError", + "evalue": "This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotFittedError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdomain_scoring\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdomain_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDomainScoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdomain_score\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/32de-python/domain_scoring/domain_scoring.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, metapath_graph, test_size)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Test accuracy is {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_preprocess\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetapath_unrated\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/base.py\u001b[0m in \u001b[0;36mscore\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X, check_input)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0mclasses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpredict\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \"\"\"\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0mcheck_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'tree_'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_X_predict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mproba\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_is_fitted\u001b[0;34m(estimator, attributes, msg, all_or_any)\u001b[0m\n\u001b[1;32m 766\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 767\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mall_or_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mattr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mattributes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 768\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 769\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 770\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNotFittedError\u001b[0m: This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." + ] + } + ], "source": [ "import domain_scoring.domain_scoring as domain_scoring\n", - "domain_score = domain_scoring.DomainScoring(rated_paths, mode=args.mode)\n", - "domain_score.fit(graph)" + "domain_score = domain_scoring.DomainScoring()\n", + "domain_score.fit(graph, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature ranking:\n", + "1. feature 8 (0.252934)\n", + "2. feature 0 (0.241715)\n", + "3. feature 6 (0.141816)\n", + "4. feature 1 (0.069899)\n", + "5. feature 9 (0.061769)\n", + "6. feature 2 (0.059382)\n", + "7. feature 4 (0.059261)\n", + "8. feature 10 (0.036441)\n", + "9. feature 3 (0.034537)\n", + "10. feature 7 (0.021453)\n", + "11. feature 5 (0.014518)\n", + "12. feature 11 (0.006274)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAGNlJREFUeJzt3X+0XWV95/H3J8RggUFQaiOBBBxEAW0lMpFVRS6iEiwadKyGqUIdppVFmVKm1kBdU4JdHYtdM+paDKWdImPRNhZGBbFIbOH6oxUIvwpCfsmPkAS8/AYRkRg+88d+EjY35+aem3tO7r3P/bzW2uuevfez9/c599x8zj7P3vtEtomIiLrMmOgORERE7yXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPaUfSX0r65ET3I6KflOvco1uS7gNeCfwCEGDgYNs/Hsc+jwa+ZHv/nnRyipF0CbDe9p9MdF+iLjMnugMxpRj4DdvX9XCfW94kdmxjaRfbm3vYn51GUj45R9/kjyvGSh0XSkdK+hdJj0u6tRyRb1n325LukvSUpB9J+t2yfDfgH4F9Jf2krJ8t6RJJn2ptf7Sk9a35eyV9QtK/AU9LmiHpVZIul/SQpLsl/dcRn0Br/1v2LemPJA1J2ihpkaTjJa2W9Iikc1rbnivpMknLSn9vkvSrrfWvk3Rd+T3cIek9w+peKOmbkn4CnAr8FvCJsq8rSrsl5ff0lKQfSjqxtY9TJH1P0l9Ieqw814Wt9XtL+kJ5Ho9K+mpr3QnltXlc0vclvaG1bomkDaXmSknHjPT7iynCdqZMXU3AvcDbOyzfF3gEOK7MH1vmX1HmjwcOKI+PAn4KvLHMHw3cP2x/lwCfas2/qE3pxy2l7q40bzg3AZ8EdgEOAH4EvHOE57F1/2Xfm1rb/hfgIeBLwG7AocAzwLzS/lzg58D7Svs/BO4pj2cCa4El5fExwFPAa1p1HweOLPO7Dn+uZfl/BH6lPP5N4OnW/Cml/n8uz/s0YGNr228Cfw/sWfp0VFl+ODAEHFG2+0j5Pb4EOBi4v1VjLnDgRP+9ZRrflCP3GKuvlyPGx1pHhR8Gvmn7GgDb/0wTtu8u81fbvq88/h6wnCbkx+Pzth+w/XPgPwD72P4z25tLrb8BFne5r+eA/+FmeGcZsA/wOdvP2L4LuAv4tVb7m21/rbT/XzQhfWSZdrd9vu1fuBm+ugo4qbXtFbavByh934bt/2d7qDy+jOYNY0GryTrbX7Bt4IvAqyS9UtJs4DjgY7afKr+L75Vtfge4yPZNblxK8yZxJLAZmAW8XtJM2/fbvrfL311MUhlzj7Fa5G3H3OcBH2wNQYjmb+taAEnHA39Cc4Q4A/gl4PZx9mPDsPpzJD3Wqj8D+G6X+3q0BCXAz8rPh1rrfwbs0ZrfOkRk25I20nyKUHtdsQ6Y02nbkUg6GTiL5hMIwO40bzhbbD2Bbftnkij9ewXwmO2nOux2HnBya7hKNEft+9r+nqQ/AJYCh0q6BvhD2w+O1teYvBLuMVadxtzXA39r+2PbNJZmAZfTHN1fYft5SV9r7afTydSf0gyJbPGqDm3a260H7rH92i763wtbr+xRk6z7AQ/QPKe5w9rOBVa35oc/3xfNS5oL/DVwjO0flGW3MsK5jmHWAy+XtGeHgF8P/JntT3fa0PYyYJmkPUr9P6cZAoopKsMy0QtfAt4j6V3l5OZLy4nKfWk+7s8CHinBfjzwrta2Q8ArJO3ZWnYb8O5ycnA2cOYo9W8EflJOsr5U0i6SDpN0RO+e4ou8SdKJknahOcJ+FrgeuAH4aenHTEkDwAk0Y+AjGQJe3ZrfHXgeeKT8Lj8KvL6bTrm5JPVq4EJJe5U+bBn++j/AaZIWAEjaXdK7y8+DJR1T3oifo/mk8nxXv4mYtBLuMRYdL1m0vQFYBPwx8DDNUMTHgRm2nwZ+H7isDJssBq5obbuaJvzuKeP4s4FLaYZt7gO+RTMOPmI/bD9PE6JvpDlJ+BBNmO3Jjtnu0XXp/4doTo7+FvC+Mr69CXgPzbmGR4ALgI/YXjvCfgAuBg7bcg7D9kqacfzraYZfDgO+P4b+foTmPoRVNG8cZwLYvplm3P2C8jqs4YUj811pjtQfpvkE8svAOcSU1tVNTOVSq8/RvBlcbPv8Yes/BvwezYmZnwC/a3tVWXcOzZn9XwBn2l7e02cQsRNJOhf497ZPnui+RGzPqEfuam60uIDmLPxhwEmSXjes2Zdt/6rtw4G/AD5btj0U+CBwCM3lcBeWMcqIiOijboZlFgBrba8rHzuX0XwE36p89N5iD14Yr3svsKxcFnYf217SFRERfdDN1TJzePHlWxvoENCSTgf+G83lVW9vbfuDVrONvPiysIgpxfZ5E92HiG707ISq7QttH0Rzd95/79V+IyJi7Lo5ct/Ii6/d3a8sG8lXgIta27a/7a/jtpLy1ZQRETvAdsfzmN0cua8ADpI0r1wHuxi4st1A0kGt2RNoLrOitFssaZakA4GDaK5J7tTBvk/nnntu6kzCGqkzeWukzuStYW//mHjUI3fbmyWdQfN9IFsuhVwp6Txghe2rgDMkvYPmBojHKdfP2r5L0j/QfDfHJuB0j9ajiIgYt66+fsD2t4DXDlt2buvxH2xn208DHW95joiI/phWd6gODAykziSskTqTt0bqTN4ao5kU/82epIzWRESMkSQ8jhOqERExxSTcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq1FW4S1ooaZWkNZKWdFh/lqQ7Jd0m6duS9m+t2yzpFkm3Svp6LzsfERGdyfb2G0gzgDXAscADwApgse1VrTZHAzfYflbSacCA7cVl3VO29xylhkfrR0REvJgkbKvTum6O3BcAa22vs70JWAYsajew/R3bz5bZ64E57fo70OeIiBiHbsJ9DrC+Nb+BF4f3cKcCV7fmd5V0o6R/lbRopI3G44DZs5HU0+mA2bP70dWIiJ1iZi93JunDwJuAo1uL59l+UNKBwLWSbrd9by/rrhsaoteDOhoa6vEeIyJ2nm7CfSMwtzW/X1n2IpLeAZwDvK0M3wBg+8Hy815Jg8DhwDbhvnTp0q2PBwYGGBgY6Kb/ERHTxuDgIIODg1217eaE6i7AapoTqg8CNwIn2V7ZanM4cBlwnO27W8v3Ap6x/ZykfYB/ARa1T8aWduM6oSqp90fuQE7yRsRktr0TqqMeudveLOkMYDnNGP3FtldKOg9YYfsq4DPA7sBlkgSss30icAjwV5I2l20/PTzYIyKi90Y9ct8pnciRe0TEmI33UsiIiJhiEu4RERVKuI9Br6+nz7X0EdEvGXMfaZ9sO+be6zoZ14+I8ciYe0TENJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKtRVuEtaKGmVpDWSlnRYf5akOyXdJunbkvZvrTulbLda0sm97HxERHQm29tvIM0A1gDHAg8AK4DFtle12hwN3GD7WUmnAQO2F0vaG7gJmA8IuBmYb/vJYTU8Wj9G6SM7vvUI+wSG96nXdTrViIjoliRsq9O6bo7cFwBrba+zvQlYBixqN7D9HdvPltnrgTnl8XHActtP2n4CWA4s3JEnERER3esm3OcA61vzG3ghvDs5Fbh6hG03jrJtRET0wMxe7kzSh4E3AUePddulS5dufTwwMMDAwEDP+hURUYPBwUEGBwe7atvNmPuRwFLbC8v82YBtnz+s3TuAzwNvs/1oWbaYZvz9tDJ/EXCd7a8M2zZj7hERY7S9Mfduwn0XYDXNCdUHgRuBk2yvbLU5HLgMOM723a3l7ROqM8rjN5Xx93aNhHtExBhtL9xHHZaxvVnSGTQnQ2cAF9teKek8YIXtq4DPALsDl0kSsM72ibYfl/SnNKFu4LzhwR4REb036pH7TulEjtwjIsZsvJdCRkTEFJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAp1Fe6SFkpaJWmNpCUd1h8l6WZJmyS9f9i6zZJukXSrpK/3quMRETGymaM1kDQDuAA4FngAWCHpCturWs3WAacAH++wi5/ant+LzkZERHdGDXdgAbDW9joAScuARcDWcLd9f1nnDturB/2MiIgx6GZYZg6wvjW/oSzr1q6SbpT0r5IWjal3ERGxQ7o5ch+vebYflHQgcK2k223fO7zR0qVLtz4eGBhgYGBgJ3QtImLqGBwcZHBwsKu2sjuNpLQaSEcCS20vLPNnA7Z9foe2lwDfsP3VEfbVcb0kj9aPUfrIjm89wj6B4X3qdZ1ONSIiuiUJ2x2HvrsZllkBHCRpnqRZwGLgyu3VaxXeq2yDpH2AXwfu6rrnERGxQ0YNd9ubgTOA5cCdwDLbKyWdJ+kEAElHSFoPfAC4SNIdZfNDgJsk3Qr8M/DpYVfZREREH4w6LLNTOpFhmYiIMRvvsExEREwxCfdJ5oDZs5HU0+mA2bMn+mlFxE6WYZmR9snEDMvsrOcSEVNfhmUiIqaZhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhrsJd0kJJqyStkbSkw/qjJN0saZOk9w9bd0rZbrWkk3vV8YiIGJlsb7+BNANYAxwLPACsABbbXtVqMxfYE/g4cKXtr5blewM3AfMBATcD820/OayGR+vHKH1kx7ceYZ/A8D71us7OqDFSnYiY+iRhW53WdXPkvgBYa3ud7U3AMmBRu4Ht+23/ELbJpeOA5baftP0EsBxYOOZnEBERY9JNuM8B1rfmN5Rl3Ri+7cYxbBsRETto5kR3YIulS5dufTwwMMDAwMCE9SUiYjIaHBxkcHCwq7bdjLkfCSy1vbDMnw3Y9vkd2l4CfKM15r4YGLB9Wpm/CLjO9leGbZcx9z7VGKlOREx94x1zXwEcJGmepFnAYuDK7dVrPb4GeKekl5WTq+8syyIioo9GDXfbm4EzaE6G3gkss71S0nmSTgCQdISk9cAHgIsk3VG2fRz4U5orZm4AzisnViMioo9GHZbZKZ3IsEzfaoxUJyKmvvEOy0RExBSTcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCfZo6YPZsJPV0OmD27Il+WhFR5P9QHWmf1P1/qOb/ao2Y+vJ/qEZETDMJ94iICiXcIyIqlHCPiKhQwj36KlflREyMXC0z0j7J1TJTqU7EdJSrZaJ6vf6E0OnTQT6FxFSSI/eR9kmO3KdznXwKialg3EfukhZKWiVpjaQlHdbPkrRM0lpJP5A0tyyfJ+kZSbeU6cLxPZWIiOjGzNEaSJoBXAAcCzwArJB0he1VrWanAo/Zfo2kDwGfARaXdT+yPb/H/Y6IiO3o5sh9AbDW9jrbm4BlwKJhbRYBXyyPL6d5I9ii40eGiIjon27CfQ6wvjW/oSzr2Mb2ZuAJSS8v6w6QdLOk6yS9dbwdjoiI0Y06LLODthytPwjMtf24pPnA1yUdavvpPtWNiAi6C/eNwNzW/H5lWdsGYH/gAUm7AHvafqysew7A9i2S7gYOBm4ZXmTp0qVbHw8MDDAwMNDdM4iImCYGBwcZHBzsqu2ol0KWsF5NM47+IHAjcJLtla02pwOvt326pMXAibYXS9qH5kTr85JeDXwHeIPtJ4bVyKWQfaqROpO3xkh1Irq1vUshRz1yt71Z0hnAcpox+ottr5R0HrDC9lXAxcClktYCj/LClTJvAz4l6TngeeBjw4M9IiJ6LzcxjbRP6j46TJ2JrzFSnYhu5esHIiKmmYR7RESFEu4RERVKuEdEVCjhHhFRoYR7RESFEu4Rk0z+U5DohYR7xCSzbmgIQ0+ndUND29TJm0jdchPTSPuk7htlUmfia0yXOtE/uYkpImKaSbhHRFQo4R4RUaGEe0REhRLuEREVSrhHRFQo4R4RUaGEe0REhRLuEdFXvb4TNnfBdid3qI60T3IX5HSuU9Nzqa1O7oJ9Qe5QjYiYZhLuEREVSrhHRFQo4R4RU16+vnhbOaE60j7JSbvpXKem51JbnXx98QtyQjUiYppJuEdEVCjhHhFRoYR7RESFugp3SQslrZK0RtKSDutnSVomaa2kH0ia21p3Tlm+UtK7etn5iIidaSpdlTNquEuaAVwAHAccBpwk6XXDmp0KPGb7NcDngM+UbQ8FPggcAhwPXCip45ndnWEwdSZljdSZvDVS58XWDQ1hGHW6ros2W6Z1Q0Pj6NHIujlyXwCstb3O9iZgGbBoWJtFwBfL48uBt5fH7wWW2f6F7fuAtWV/E2IwdSZljdSZvDVSZ/LWGE034T4HWN+a31CWdWxjezPwpKSXd9h2Y4dtIyKix/p1QnXChl4iIoLmDqztTcCRwLda82cDS4a1uRp4c3m8C/BQp7bAt7a0G7Z9t8NTmTJlypSpNY2U3TMZ3QrgIEnzgAeBxcBJw9p8AzgFuAH4TeDasvxK4MuSPkszHHMQcOPwAiPdPhsRETtm1HC3vVnSGcBymmGci22vlHQesML2VcDFwKWS1gKP0rwBYPsuSf8A3AVsAk4f15fIREREVybFF4dFRERvTZs7VCWdJemHkm6X9GVJs/pQY7s3e/WwzsskXVZuDLtT0pv7UONiSUOSbu/1vjvUOlPSHWX6/T7V2E/SteX31bc6pdYMSbdIurKH+9zm9ZC0t6TlklZLukbSy3pVr+x/V0k3SLq1/M7O7eX+W3UOLjVuKT+f7MfrI+k+Sf9WamwzPDyO/XZ6bT5Q8mazpPm9qjUmo51QrWEC9gXuAWaV+a8AJ/e4xgzgR8A84CXAbcDr+vR8/i/w0fJ4JrBnH2q8FXgjcHufX5vDgNuBXWlOxi8HXt2HOrOBN5bHewCr+/j6nAV8Cbiyn68HcD7wifJ4CfDnfXguu5WfuwDXAwv6/PcwA3gA2L8P+74H2LsP++302rwWeA3N+cf5/fydjTRNmyN3mj/O3SXNBHaj+QPqpW5u9ho3SXsCR9m+BMDNDWJP9bqO7e8Dj/d6vx0cAtxg++du7pH4LvD+Xhex/WPbt5XHTwMr6cM9F5L2A94N/E0v9zvC69G+efCLwIm9rFnqPlMe7kpzINHvcdx3AHfbXj9qy7ETfRit6PTa2F5tey0TeFn4tAh32w8A/xO4n+ZGqids/1OPy3Rzs1cvHAg8IumS8jH2ryX9Uh/q7Cw/BI4qQwy70QTj/v0sKOkAmiOtG/qw+88Cf0T/QxDglbaHoHnzAl7Z6wJliOlW4MfAt22v6HWNYT4E/H2f9m3gGkkrJP1On2pMGtMi3CXtRXOUM49miGYPSf9pYnu1w2YC84H/bXs+8AzN/QRTku1VNMML3wb+EbgV2NyvepL2oPmKjDPLEXwv9/0bwFD5hCB2/lFbz99QbD9v+3BgP+DN5fui+kLSS2i+suSyPpV4i+0jaA4gfk/SW/tUZ1KYFuFO81HvHtuPlY/+XwV+vcc1NgJzW/P7lWW9tgFYb/umMn85TdhPWbYvsX2E7QHgCWBNP+qUIbnLgUttX9GHEm8B3ivpHpqjz2Mk/W0f6mwxJOlXACTNBh7qV6Ey9HcdsLBfNWi+XPBm2w/3Y+e2Hyw/Hwa+xgR+z9XOMF3C/X7gSEkvLd9KeSzNmGsvbb3Zq1yJs5jmJq6eKh/D10s6uCw6luY+gn7YKUefkn65/JwLvA/4uz6V+gJwl+3P92Pntv/Y9lzbr6Z5/a+1fXIPSwx/Pa4Efrs8PgXo6RuWpH22XIFThv7eCazqZY1hTqJPQzKSdiuf2pC0O/AumiHBnpVg5H8rEzPuPhFncSdiAs6lCfTbaU4+vaQPNRbSXIWxFji7j8/l12jeTG6j+RTysj7U+Duak84/p3lz/Ggfn893af6h3QoM9KnGW2iGe24rdW4BFvbxOR1Nb6+W2eb1APYG/qn8zS0H9urxc3hD+T3dVv7dfLKPv6/dgIeBf9en/R/Yeu3v6OW/zxFemxNpzsH9jObO/qv79bsbacpNTBERFZouwzIREdNKwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq9P8BGXZbYfGzeTAAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "importances = domain_score.classifier.feature_importances_\n", + "indices = np.argsort(importances)[::-1]\n", + "\n", + "# Print the feature ranking\n", + "print(\"Feature ranking:\")\n", + "\n", + "for f in range(len(domain_score.classifier.feature_importances_)):\n", + " print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n", + "\n", + "# Plot the feature importances of the forest\n", + "plt.figure()\n", + "plt.title(\"Feature importances\")\n", + "plt.bar(range(len(domain_score.classifier.feature_importances_)), importances[indices],\n", + " color=\"r\", align=\"center\")\n", + "plt.xticks(range(len(domain_score.classifier.feature_importances_)), indices)\n", + "plt.xlim([-1, len(domain_score.classifier.feature_importances_)])\n", + "plt.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1802,7 +1765,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.5.2" } }, "nbformat": 4, From bd18e05ae90e1797fb273a4d814bf5def77baf61 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:42:47 +0100 Subject: [PATCH 06/35] Add exporting of decision tree graph and add feature names --- load_data_open-day.ipynb | 186 ++++++++++++++++++++++----------------- 1 file changed, 105 insertions(+), 81 deletions(-) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb index 8251149..64ddc5b 100644 --- a/load_data_open-day.ipynb +++ b/load_data_open-day.ipynb @@ -1058,57 +1058,57 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'id': 1, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.1'}\n", - "{'id': 2, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.25'}\n", - "{'id': 3, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", - "{'id': 4, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15'}\n", - "{'id': 5, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.2'}\n", - "{'id': 6, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.16'}\n", - "{'id': 7, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", - "{'id': 8, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.32'}\n", - "{'id': 9, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", - "{'id': 10, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.19'}\n", - "{'id': 11, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.46'}\n", - "{'id': 12, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.33'}\n", - "{'id': 13, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.12'}\n", - "{'id': 14, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.27'}\n", - "{'id': 15, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.2'}\n", - "{'id': 16, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.29'}\n", - "{'id': 17, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.72'}\n", - "{'id': 18, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.5'}\n", - "{'id': 19, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.31'}\n", - "{'id': 20, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.52'}\n", - "{'id': 21, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.38'}\n", - "{'id': 22, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.44'}\n", - "{'id': 23, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.6'}\n", - "{'id': 24, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", - "{'id': 25, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.39'}\n", - "{'id': 26, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", - "{'id': 27, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", - "{'id': 28, 'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.55'}\n", - "{'id': 29, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.48'}\n", - "{'id': 30, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", - "{'id': 31, 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33'}\n", - "{'id': 32, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.22'}\n", - "{'id': 33, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.7'}\n", - "{'id': 34, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26'}\n", - "{'id': 35, 'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.54'}\n", - "{'id': 36, 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", - "{'id': 37, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.34'}\n", - "{'id': 38, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54'}\n", - "{'id': 39, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.36'}\n", - "{'id': 40, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.28'}\n", - "{'id': 41, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55'}\n", - "{'id': 42, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.34'}\n", - "{'id': 43, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52'}\n", - "{'id': 44, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", - "{'id': 45, 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.56'}\n", - "{'id': 46, 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.69'}\n", - "{'id': 47, 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.57'}\n", - "{'id': 48, 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42'}\n", - "{'id': 49, 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.75'}\n", - "{'id': 50, 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.67'}\n", - "{'id': 51, 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.62'}\n" + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.1', 'id': 1}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.25', 'id': 2}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15', 'id': 3}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15', 'id': 4}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.2', 'id': 5}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.16', 'id': 6}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26', 'id': 7}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.32', 'id': 8}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33', 'id': 9}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.19', 'id': 10}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.46', 'id': 11}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.33', 'id': 12}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.12', 'id': 13}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.27', 'id': 14}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.2', 'id': 15}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.29', 'id': 16}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.72', 'id': 17}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.5', 'id': 18}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.31', 'id': 19}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.52', 'id': 20}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.38', 'id': 21}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.44', 'id': 22}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.6', 'id': 23}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55', 'id': 24}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.39', 'id': 25}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52', 'id': 26}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42', 'id': 27}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.55', 'id': 28}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.48', 'id': 29}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52', 'id': 30}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33', 'id': 31}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.22', 'id': 32}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.7', 'id': 33}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26', 'id': 34}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.54', 'id': 35}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54', 'id': 36}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.34', 'id': 37}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54', 'id': 38}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.36', 'id': 39}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.28', 'id': 40}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55', 'id': 41}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.34', 'id': 42}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52', 'id': 43}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42', 'id': 44}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.56', 'id': 45}\n", + "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.69', 'id': 46}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.57', 'id': 47}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42', 'id': 48}\n", + "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.75', 'id': 49}\n", + "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.67', 'id': 50}\n", + "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.62', 'id': 51}\n" ] } ], @@ -1638,7 +1638,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1658,22 +1658,16 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "metadata": {}, "outputs": [ { - "ename": "NotFittedError", - "evalue": "This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotFittedError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdomain_scoring\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdomain_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDomainScoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdomain_score\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/32de-python/domain_scoring/domain_scoring.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, metapath_graph, test_size)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 44\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Test accuracy is {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_preprocess\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetapath_unrated\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMetaPath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/base.py\u001b[0m in \u001b[0;36mscore\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X, check_input)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0mclasses\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpredict\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \"\"\"\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0mcheck_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'tree_'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_X_predict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mproba\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_is_fitted\u001b[0;34m(estimator, attributes, msg, all_or_any)\u001b[0m\n\u001b[1;32m 766\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 767\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mall_or_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mattr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mattributes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 768\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 769\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 770\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNotFittedError\u001b[0m: This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." + "name": "stdout", + "output_type": "stream", + "text": [ + "135\n", + "59\n", + "Test accuracy is 0.7288135593220338\n" ] } ], @@ -1685,7 +1679,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -1693,25 +1687,25 @@ "output_type": "stream", "text": [ "Feature ranking:\n", - "1. feature 8 (0.252934)\n", - "2. feature 0 (0.241715)\n", - "3. feature 6 (0.141816)\n", - "4. feature 1 (0.069899)\n", - "5. feature 9 (0.061769)\n", - "6. feature 2 (0.059382)\n", - "7. feature 4 (0.059261)\n", - "8. feature 10 (0.036441)\n", - "9. feature 3 (0.034537)\n", - "10. feature 7 (0.021453)\n", - "11. feature 5 (0.014518)\n", - "12. feature 11 (0.006274)\n" + "1. feature 2 (0.281819)\n", + "2. feature 6 (0.262366)\n", + "3. feature 4 (0.128521)\n", + "4. feature 11 (0.093004)\n", + "5. feature 3 (0.081649)\n", + "6. feature 10 (0.043133)\n", + "7. feature 1 (0.032640)\n", + "8. feature 7 (0.027219)\n", + "9. feature 9 (0.022623)\n", + "10. feature 5 (0.013184)\n", + "11. feature 0 (0.008306)\n", + "12. feature 8 (0.005537)\n" ] }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAGNlJREFUeJzt3X+0XWV95/H3J8RggUFQaiOBBBxEAW0lMpFVRS6iEiwadKyGqUIdppVFmVKm1kBdU4JdHYtdM+paDKWdImPRNhZGBbFIbOH6oxUIvwpCfsmPkAS8/AYRkRg+88d+EjY35+aem3tO7r3P/bzW2uuevfez9/c599x8zj7P3vtEtomIiLrMmOgORERE7yXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPaUfSX0r65ET3I6KflOvco1uS7gNeCfwCEGDgYNs/Hsc+jwa+ZHv/nnRyipF0CbDe9p9MdF+iLjMnugMxpRj4DdvX9XCfW94kdmxjaRfbm3vYn51GUj45R9/kjyvGSh0XSkdK+hdJj0u6tRyRb1n325LukvSUpB9J+t2yfDfgH4F9Jf2krJ8t6RJJn2ptf7Sk9a35eyV9QtK/AU9LmiHpVZIul/SQpLsl/dcRn0Br/1v2LemPJA1J2ihpkaTjJa2W9Iikc1rbnivpMknLSn9vkvSrrfWvk3Rd+T3cIek9w+peKOmbkn4CnAr8FvCJsq8rSrsl5ff0lKQfSjqxtY9TJH1P0l9Ieqw814Wt9XtL+kJ5Ho9K+mpr3QnltXlc0vclvaG1bomkDaXmSknHjPT7iynCdqZMXU3AvcDbOyzfF3gEOK7MH1vmX1HmjwcOKI+PAn4KvLHMHw3cP2x/lwCfas2/qE3pxy2l7q40bzg3AZ8EdgEOAH4EvHOE57F1/2Xfm1rb/hfgIeBLwG7AocAzwLzS/lzg58D7Svs/BO4pj2cCa4El5fExwFPAa1p1HweOLPO7Dn+uZfl/BH6lPP5N4OnW/Cml/n8uz/s0YGNr228Cfw/sWfp0VFl+ODAEHFG2+0j5Pb4EOBi4v1VjLnDgRP+9ZRrflCP3GKuvlyPGx1pHhR8Gvmn7GgDb/0wTtu8u81fbvq88/h6wnCbkx+Pzth+w/XPgPwD72P4z25tLrb8BFne5r+eA/+FmeGcZsA/wOdvP2L4LuAv4tVb7m21/rbT/XzQhfWSZdrd9vu1fuBm+ugo4qbXtFbavByh934bt/2d7qDy+jOYNY0GryTrbX7Bt4IvAqyS9UtJs4DjgY7afKr+L75Vtfge4yPZNblxK8yZxJLAZmAW8XtJM2/fbvrfL311MUhlzj7Fa5G3H3OcBH2wNQYjmb+taAEnHA39Cc4Q4A/gl4PZx9mPDsPpzJD3Wqj8D+G6X+3q0BCXAz8rPh1rrfwbs0ZrfOkRk25I20nyKUHtdsQ6Y02nbkUg6GTiL5hMIwO40bzhbbD2Bbftnkij9ewXwmO2nOux2HnBya7hKNEft+9r+nqQ/AJYCh0q6BvhD2w+O1teYvBLuMVadxtzXA39r+2PbNJZmAZfTHN1fYft5SV9r7afTydSf0gyJbPGqDm3a260H7rH92i763wtbr+xRk6z7AQ/QPKe5w9rOBVa35oc/3xfNS5oL/DVwjO0flGW3MsK5jmHWAy+XtGeHgF8P/JntT3fa0PYyYJmkPUr9P6cZAoopKsMy0QtfAt4j6V3l5OZLy4nKfWk+7s8CHinBfjzwrta2Q8ArJO3ZWnYb8O5ycnA2cOYo9W8EflJOsr5U0i6SDpN0RO+e4ou8SdKJknahOcJ+FrgeuAH4aenHTEkDwAk0Y+AjGQJe3ZrfHXgeeKT8Lj8KvL6bTrm5JPVq4EJJe5U+bBn++j/AaZIWAEjaXdK7y8+DJR1T3oifo/mk8nxXv4mYtBLuMRYdL1m0vQFYBPwx8DDNUMTHgRm2nwZ+H7isDJssBq5obbuaJvzuKeP4s4FLaYZt7gO+RTMOPmI/bD9PE6JvpDlJ+BBNmO3Jjtnu0XXp/4doTo7+FvC+Mr69CXgPzbmGR4ALgI/YXjvCfgAuBg7bcg7D9kqacfzraYZfDgO+P4b+foTmPoRVNG8cZwLYvplm3P2C8jqs4YUj811pjtQfpvkE8svAOcSU1tVNTOVSq8/RvBlcbPv8Yes/BvwezYmZnwC/a3tVWXcOzZn9XwBn2l7e02cQsRNJOhf497ZPnui+RGzPqEfuam60uIDmLPxhwEmSXjes2Zdt/6rtw4G/AD5btj0U+CBwCM3lcBeWMcqIiOijboZlFgBrba8rHzuX0XwE36p89N5iD14Yr3svsKxcFnYf217SFRERfdDN1TJzePHlWxvoENCSTgf+G83lVW9vbfuDVrONvPiysIgpxfZ5E92HiG707ISq7QttH0Rzd95/79V+IyJi7Lo5ct/Ii6/d3a8sG8lXgIta27a/7a/jtpLy1ZQRETvAdsfzmN0cua8ADpI0r1wHuxi4st1A0kGt2RNoLrOitFssaZakA4GDaK5J7tTBvk/nnntu6kzCGqkzeWukzuStYW//mHjUI3fbmyWdQfN9IFsuhVwp6Txghe2rgDMkvYPmBojHKdfP2r5L0j/QfDfHJuB0j9ajiIgYt66+fsD2t4DXDlt2buvxH2xn208DHW95joiI/phWd6gODAykziSskTqTt0bqTN4ao5kU/82epIzWRESMkSQ8jhOqERExxSTcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq1FW4S1ooaZWkNZKWdFh/lqQ7Jd0m6duS9m+t2yzpFkm3Svp6LzsfERGdyfb2G0gzgDXAscADwApgse1VrTZHAzfYflbSacCA7cVl3VO29xylhkfrR0REvJgkbKvTum6O3BcAa22vs70JWAYsajew/R3bz5bZ64E57fo70OeIiBiHbsJ9DrC+Nb+BF4f3cKcCV7fmd5V0o6R/lbRopI3G44DZs5HU0+mA2bP70dWIiJ1iZi93JunDwJuAo1uL59l+UNKBwLWSbrd9by/rrhsaoteDOhoa6vEeIyJ2nm7CfSMwtzW/X1n2IpLeAZwDvK0M3wBg+8Hy815Jg8DhwDbhvnTp0q2PBwYGGBgY6Kb/ERHTxuDgIIODg1217eaE6i7AapoTqg8CNwIn2V7ZanM4cBlwnO27W8v3Ap6x/ZykfYB/ARa1T8aWduM6oSqp90fuQE7yRsRktr0TqqMeudveLOkMYDnNGP3FtldKOg9YYfsq4DPA7sBlkgSss30icAjwV5I2l20/PTzYIyKi90Y9ct8pnciRe0TEmI33UsiIiJhiEu4RERVKuI9Br6+nz7X0EdEvGXMfaZ9sO+be6zoZ14+I8ciYe0TENJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKtRVuEtaKGmVpDWSlnRYf5akOyXdJunbkvZvrTulbLda0sm97HxERHQm29tvIM0A1gDHAg8AK4DFtle12hwN3GD7WUmnAQO2F0vaG7gJmA8IuBmYb/vJYTU8Wj9G6SM7vvUI+wSG96nXdTrViIjoliRsq9O6bo7cFwBrba+zvQlYBixqN7D9HdvPltnrgTnl8XHActtP2n4CWA4s3JEnERER3esm3OcA61vzG3ghvDs5Fbh6hG03jrJtRET0wMxe7kzSh4E3AUePddulS5dufTwwMMDAwEDP+hURUYPBwUEGBwe7atvNmPuRwFLbC8v82YBtnz+s3TuAzwNvs/1oWbaYZvz9tDJ/EXCd7a8M2zZj7hERY7S9Mfduwn0XYDXNCdUHgRuBk2yvbLU5HLgMOM723a3l7ROqM8rjN5Xx93aNhHtExBhtL9xHHZaxvVnSGTQnQ2cAF9teKek8YIXtq4DPALsDl0kSsM72ibYfl/SnNKFu4LzhwR4REb036pH7TulEjtwjIsZsvJdCRkTEFJNwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAol3CMiKpRwj4ioUMI9IqJCCfeIiAp1Fe6SFkpaJWmNpCUd1h8l6WZJmyS9f9i6zZJukXSrpK/3quMRETGymaM1kDQDuAA4FngAWCHpCturWs3WAacAH++wi5/ant+LzkZERHdGDXdgAbDW9joAScuARcDWcLd9f1nnDturB/2MiIgx6GZYZg6wvjW/oSzr1q6SbpT0r5IWjal3ERGxQ7o5ch+vebYflHQgcK2k223fO7zR0qVLtz4eGBhgYGBgJ3QtImLqGBwcZHBwsKu2sjuNpLQaSEcCS20vLPNnA7Z9foe2lwDfsP3VEfbVcb0kj9aPUfrIjm89wj6B4X3qdZ1ONSIiuiUJ2x2HvrsZllkBHCRpnqRZwGLgyu3VaxXeq2yDpH2AXwfu6rrnERGxQ0YNd9ubgTOA5cCdwDLbKyWdJ+kEAElHSFoPfAC4SNIdZfNDgJsk3Qr8M/DpYVfZREREH4w6LLNTOpFhmYiIMRvvsExEREwxCfdJ5oDZs5HU0+mA2bMn+mlFxE6WYZmR9snEDMvsrOcSEVNfhmUiIqaZhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhhHtERIUS7hERFUq4R0RUKOEeEVGhrsJd0kJJqyStkbSkw/qjJN0saZOk9w9bd0rZbrWkk3vV8YiIGJlsb7+BNANYAxwLPACsABbbXtVqMxfYE/g4cKXtr5blewM3AfMBATcD820/OayGR+vHKH1kx7ceYZ/A8D71us7OqDFSnYiY+iRhW53WdXPkvgBYa3ud7U3AMmBRu4Ht+23/ELbJpeOA5baftP0EsBxYOOZnEBERY9JNuM8B1rfmN5Rl3Ri+7cYxbBsRETto5kR3YIulS5dufTwwMMDAwMCE9SUiYjIaHBxkcHCwq7bdjLkfCSy1vbDMnw3Y9vkd2l4CfKM15r4YGLB9Wpm/CLjO9leGbZcx9z7VGKlOREx94x1zXwEcJGmepFnAYuDK7dVrPb4GeKekl5WTq+8syyIioo9GDXfbm4EzaE6G3gkss71S0nmSTgCQdISk9cAHgIsk3VG2fRz4U5orZm4AzisnViMioo9GHZbZKZ3IsEzfaoxUJyKmvvEOy0RExBSTcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCPSKiQgn3iIgKJdwjIiqUcI+IqFDCfZo6YPZsJPV0OmD27Il+WhFR5P9QHWmf1P1/qOb/ao2Y+vJ/qEZETDMJ94iICiXcIyIqlHCPiKhQwj36KlflREyMXC0z0j7J1TJTqU7EdJSrZaJ6vf6E0OnTQT6FxFSSI/eR9kmO3KdznXwKialg3EfukhZKWiVpjaQlHdbPkrRM0lpJP5A0tyyfJ+kZSbeU6cLxPZWIiOjGzNEaSJoBXAAcCzwArJB0he1VrWanAo/Zfo2kDwGfARaXdT+yPb/H/Y6IiO3o5sh9AbDW9jrbm4BlwKJhbRYBXyyPL6d5I9ii40eGiIjon27CfQ6wvjW/oSzr2Mb2ZuAJSS8v6w6QdLOk6yS9dbwdjoiI0Y06LLODthytPwjMtf24pPnA1yUdavvpPtWNiAi6C/eNwNzW/H5lWdsGYH/gAUm7AHvafqysew7A9i2S7gYOBm4ZXmTp0qVbHw8MDDAwMNDdM4iImCYGBwcZHBzsqu2ol0KWsF5NM47+IHAjcJLtla02pwOvt326pMXAibYXS9qH5kTr85JeDXwHeIPtJ4bVyKWQfaqROpO3xkh1Irq1vUshRz1yt71Z0hnAcpox+ottr5R0HrDC9lXAxcClktYCj/LClTJvAz4l6TngeeBjw4M9IiJ6LzcxjbRP6j46TJ2JrzFSnYhu5esHIiKmmYR7RESFEu4RERVKuEdEVCjhHhFRoYR7RESFEu4Rk0z+U5DohYR7xCSzbmgIQ0+ndUND29TJm0jdchPTSPuk7htlUmfia0yXOtE/uYkpImKaSbhHRFQo4R4RUaGEe0REhRLuEREVSrhHRFQo4R4RUaGEe0REhRLuEdFXvb4TNnfBdid3qI60T3IX5HSuU9Nzqa1O7oJ9Qe5QjYiYZhLuEREVSrhHRFQo4R4RU16+vnhbOaE60j7JSbvpXKem51JbnXx98QtyQjUiYppJuEdEVCjhHhFRoYR7RESFugp3SQslrZK0RtKSDutnSVomaa2kH0ia21p3Tlm+UtK7etn5iIidaSpdlTNquEuaAVwAHAccBpwk6XXDmp0KPGb7NcDngM+UbQ8FPggcAhwPXCip45ndnWEwdSZljdSZvDVS58XWDQ1hGHW6ros2W6Z1Q0Pj6NHIujlyXwCstb3O9iZgGbBoWJtFwBfL48uBt5fH7wWW2f6F7fuAtWV/E2IwdSZljdSZvDVSZ/LWGE034T4HWN+a31CWdWxjezPwpKSXd9h2Y4dtIyKix/p1QnXChl4iIoLmDqztTcCRwLda82cDS4a1uRp4c3m8C/BQp7bAt7a0G7Z9t8NTmTJlypSpNY2U3TMZ3QrgIEnzgAeBxcBJw9p8AzgFuAH4TeDasvxK4MuSPkszHHMQcOPwAiPdPhsRETtm1HC3vVnSGcBymmGci22vlHQesML2VcDFwKWS1gKP0rwBYPsuSf8A3AVsAk4f15fIREREVybFF4dFRERvTZs7VCWdJemHkm6X9GVJs/pQY7s3e/WwzsskXVZuDLtT0pv7UONiSUOSbu/1vjvUOlPSHWX6/T7V2E/SteX31bc6pdYMSbdIurKH+9zm9ZC0t6TlklZLukbSy3pVr+x/V0k3SLq1/M7O7eX+W3UOLjVuKT+f7MfrI+k+Sf9WamwzPDyO/XZ6bT5Q8mazpPm9qjUmo51QrWEC9gXuAWaV+a8AJ/e4xgzgR8A84CXAbcDr+vR8/i/w0fJ4JrBnH2q8FXgjcHufX5vDgNuBXWlOxi8HXt2HOrOBN5bHewCr+/j6nAV8Cbiyn68HcD7wifJ4CfDnfXguu5WfuwDXAwv6/PcwA3gA2L8P+74H2LsP++302rwWeA3N+cf5/fydjTRNmyN3mj/O3SXNBHaj+QPqpW5u9ho3SXsCR9m+BMDNDWJP9bqO7e8Dj/d6vx0cAtxg++du7pH4LvD+Xhex/WPbt5XHTwMr6cM9F5L2A94N/E0v9zvC69G+efCLwIm9rFnqPlMe7kpzINHvcdx3AHfbXj9qy7ETfRit6PTa2F5tey0TeFn4tAh32w8A/xO4n+ZGqids/1OPy3Rzs1cvHAg8IumS8jH2ryX9Uh/q7Cw/BI4qQwy70QTj/v0sKOkAmiOtG/qw+88Cf0T/QxDglbaHoHnzAl7Z6wJliOlW4MfAt22v6HWNYT4E/H2f9m3gGkkrJP1On2pMGtMi3CXtRXOUM49miGYPSf9pYnu1w2YC84H/bXs+8AzN/QRTku1VNMML3wb+EbgV2NyvepL2oPmKjDPLEXwv9/0bwFD5hCB2/lFbz99QbD9v+3BgP+DN5fui+kLSS2i+suSyPpV4i+0jaA4gfk/SW/tUZ1KYFuFO81HvHtuPlY/+XwV+vcc1NgJzW/P7lWW9tgFYb/umMn85TdhPWbYvsX2E7QHgCWBNP+qUIbnLgUttX9GHEm8B3ivpHpqjz2Mk/W0f6mwxJOlXACTNBh7qV6Ey9HcdsLBfNWi+XPBm2w/3Y+e2Hyw/Hwa+xgR+z9XOMF3C/X7gSEkvLd9KeSzNmGsvbb3Zq1yJs5jmJq6eKh/D10s6uCw6luY+gn7YKUefkn65/JwLvA/4uz6V+gJwl+3P92Pntv/Y9lzbr6Z5/a+1fXIPSwx/Pa4Efrs8PgXo6RuWpH22XIFThv7eCazqZY1hTqJPQzKSdiuf2pC0O/AumiHBnpVg5H8rEzPuPhFncSdiAs6lCfTbaU4+vaQPNRbSXIWxFji7j8/l12jeTG6j+RTysj7U+Duak84/p3lz/Ggfn893af6h3QoM9KnGW2iGe24rdW4BFvbxOR1Nb6+W2eb1APYG/qn8zS0H9urxc3hD+T3dVv7dfLKPv6/dgIeBf9en/R/Yeu3v6OW/zxFemxNpzsH9jObO/qv79bsbacpNTBERFZouwzIREdNKwj0iokIJ94iICiXcIyIqlHCPiKhQwj0iokIJ94iICiXcIyIq9P8BGXZbYfGzeTAAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAE2CAYAAACaxNI3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXVV99/HPN0RALhEQNQqEAAoIKHIRaQUZpBWwarD1AopQxYpFKmpVykP7EGzVR33qpVIUBVFEm4oIWCwXCwwgAoFwhyREgRACD4hcVTAQfs8fa51kz8mZzJnM2mdm9vm+X6/zmrP32Xv99pnLb/ZZ67fXVkRgZmbNMmW8D8DMzMpzcjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3frO5K+Ien48T4OszrJde7WLUn3AC8GngUEBLBtRPy/MbS5D3BmRGxR5CAnGUmnA0si4n+P97FYs0wd7wOwSSWAv4iIywq22fonsWY7S2tFxPKCx9MzkvzJ2WrjXy4bLXVcKe0p6SpJj0q6MZ+Rt177a0l3SHpC0q8kfSivXw/4b+Blkp7Mr0+XdLqkz1T230fSksry3ZI+Lelm4HeSpkh6qaQfS3pI0q8l/d2wb6DSfqttSZ+S9KCkpZJmSTpQ0kJJD0s6rrLvCZLOkjQnH+/1kl5deX17SZfl78Otkt7aFvdkST+T9CRwBPBe4NO5rfPydsfm79MTkm6TdFCljcMlXSnpS5Ieye/1gMrrG0v6Tn4fv5X0k8prb8k/m0cl/ULSqyqvHSvpvhxzvqR9h/v+2SQREX740dUDuBt4Y4f1LwMeBvbPy/vl5Rfm5QOBmfn53sDvgdfk5X2Ae9vaOx34TGV5yDb5OG7Icdch/cO5HjgeWAuYCfwK+PNh3seK9nPbz1T2/SDwEHAmsB6wA/AHYMu8/QnAH4G35+3/HrgrP58KLAKOzc/3BZ4AXlGJ+yiwZ15ep/295vV/BbwkP38n8LvK8uE5/gfy+/4wsLSy78+A/wCm5WPaO6/fBXgQ2D3v9778fXwesC1wbyXGDGCr8f5982NsD5+522idm88YH6mcFR4K/CwiLgKIiEtIyfbNefmCiLgnP78SuJiU5MfiaxFxf0T8EXgtsGlEfDYiludYpwIHd9nWMuBzkbp35gCbAl+NiD9ExB3AHcDOle3nRcQ5efsvk5L0nvmxfkR8ISKejdR9dT5wSGXf8yLiGoB87KuIiLMj4sH8/CzSP4w9KpssjojvREQA3wNeKunFkqYD+wNHRsQT+XtxZd7nb4BvRsT1kXyf9E9iT2A5sDawk6SpEXFvRNzd5ffOJij3udtozYpV+9y3BN5V6YIQ6XfrUgBJBwL/m3SGOAV4PnDLGI/jvrb4m0l6pBJ/CnBFl239NidKgKfy14cqrz8FbFBZXtFFFBEhaSnpU4Sqr2WLgc067TscSYcBHyd9AgFYn/QPp2XFAHZEPCWJfHwvBB6JiCc6NLslcFilu0qks/aXRcSVkj4GzAZ2kHQR8PcR8cBIx2oTl5O7jVanPvclwBkRceQqG0trAz8mnd2fFxHPSTqn0k6nwdTfk7pEWl7aYZvqfkuAuyJiuy6Ov4QVlT1KmXVz4H7Se5rRtu0MYGFluf39DlmWNAP4FrBvRFyd193IMGMdbZYAm0ia1iHBLwE+GxGf77RjRMwB5kjaIMf/P6QuIJuk3C1jJZwJvFXSm/Lg5rp5oPJlpI/7awMP58R+IPCmyr4PAi+UNK2y7ibgzXlwcDpwzAjx5wJP5kHWdSWtJWlHSbuXe4tD7CbpIElrkc6wnwauAa4Ffp+PY6qkAeAtpD7w4TwIbF1ZXh94Dng4fy/fD+zUzUFFKkm9ADhZ0kb5GFrdX98GPixpDwBJ60t6c/66raR98z/iZaRPKs919Z2wCcvJ3UajY8liRNwHzAL+F/AbUlfEJ4EpEfE74KPAWbnb5GDgvMq+C0nJ767cjz8d+D6p2+Ye4EJSP/iwxxERz5GS6GtIg4QPkZLZNNbMas+u8/G/mzQ4+l7g7bl/+xngraSxhoeBk4D3RcSiYdoBOA3YsTWGERHzSf3415C6X3YEfjGK430f6TqEBaR/HMcARMQ8Ur/7SfnncCcrz8zXIZ2p/4b0CeRFwHHYpNbVRUy51OqrpH8Gp0XEF9pePxL4CGlg5kngQxGxIL92HGlk/1ngmIi4uOg7MOshSScA20TEYeN9LGarM+KZu9KFFieRRuF3BA6RtH3bZj+IiFdHxC7Al4Cv5H13AN4FvJJUDndy7qM0M7MaddMtswewKCIW54+dc0gfwVfIH71bNmBlf93bgDm5LOweVi3pMjOzGnRTLbMZQ8u37qNDgpZ0FPAJUnnVGyv7Xl3ZbClDy8LMJpWIOHG8j8GsG8UGVCPi5Ih4OenqvH8q1a6ZmY1eN2fuSxlau7t5Xjec/wS+Wdm3Ottfx30leWpKM7M1EBEdxzG7OXO/Dni5pC1zHezBwE+rG0h6eWXxLaQyK/J2B0taW9JWwMtJNcmdDrD2xwknnOA4EzCG40zcGI4zcWNErP6ceMQz94hYLulo0nwgrVLI+ZJOBK6LiPOBoyX9GekCiEfJ9bMRcYekH5Hm5ngGOCpGOiIzMxuzrqYfiIgLge3a1p1Qef6x1ez7eaDjJc9mZlaPvrpCdWBgwHEmYAzHmbgxHGfixhjJhLjNniT31piZjZIkYgwDqmZmNsk4uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNVAjkvvM6dORVPQxc/r08X5bZmZrrBFXqErqfOfmMRCMOOuamdl48hWqZmZ9xsndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzcR6H07JOeedLM6uJZIYdrk1VnhSwdxzNPmtlYeFZIM7M+4+RuZtZAXSV3SQdIWiDpTknHdnj945Jul3STpJ9L2qLy2nJJN0i6UdK5JQ/ezMw6G7HPXdIU4E5gP+B+4Drg4IhYUNlmH+DaiHha0oeBgYg4OL/2RERMGyGG+9zNzEZprH3uewCLImJxRDwDzAFmVTeIiMsj4um8eA2wWTX+GhyzmZmNQTfJfTNgSWX5PoYm73ZHABdUlteRNFfSLyXNGm4nMzMrZ2rJxiQdCuwG7FNZvWVEPCBpK+BSSbdExN0l45qZ2VDdJPelwIzK8uZ53RCS/gw4DnhD7r4BICIeyF/vljQI7AKsktxnz5694vnAwAADAwPdHL+ZWd8YHBxkcHCwq227GVBdC1hIGlB9AJgLHBIR8yvb7AKcBewfEb+urN8I+ENELJO0KXAVMKs6GJu384CqmdkorW5AdcQz94hYLulo4GJSH/1pETFf0onAdRFxPvBFYH3gLEkCFkfEQcArgVMkLc/7fr49sZuZWXmefmC4NvGZu5lNbJ5+wMyszzi5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fWPS7pd0k2Sfi5pi8prh+f9Fko6rOTBm5lZZ4qI1W8gTQHuBPYD7geuAw6OiAWVbfYBro2IpyV9GBiIiIMlbQxcD+wKCJgH7BoRj7fFiJGOY4RjZM33HqZNoP2YSsfpFMPMrFuSiAh1eq2bM/c9gEURsTgingHmALOqG0TE5RHxdF68BtgsP98fuDgiHo+Ix4CLgQPW5E2YmVn3uknumwFLKsv3sTJ5d3IEcMEw+y4dYV8zMytgasnGJB0K7AbsU7JdMzMbnW6S+1JgRmV587xuCEl/BhwHvCF337T2HWjb97JOQWbPnr3i+cDAAAMDA502MzPrW4ODgwwODna1bTcDqmsBC0kDqg8Ac4FDImJ+ZZtdgLOA/SPi15X11QHVKfn5brn/vRrDA6pmZqO0ugHVEc/cI2K5pKNJg6FTgNMiYr6kE4HrIuJ84IvA+sBZkgQsjoiDIuJRSf9MSuoBnNie2M3MrLwRz9x7chA+czczG7WxlkKamdkk4+RuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBdJXdJB0haIOlOScd2eH1vSfMkPSPpL9teWy7pBkk3Sjq31IGbmdnwpo60gaQpwEnAfsD9wHWSzouIBZXNFgOHA5/s0MTvI2LXEgdrZmbdGTG5A3sAiyJiMYCkOcAsYEVyj4h782vRYX8VOE4zMxuFbrplNgOWVJbvy+u6tY6kuZJ+KWnWqI7OzMzWSDdn7mO1ZUQ8IGkr4FJJt0TE3T2Ia2bWt7pJ7kuBGZXlzfO6rkTEA/nr3ZIGgV2AVZL77NmzVzwfGBhgYGCg2xBmZn1hcHCQwcHBrrZVRKdu8soG0lrAQtKA6gPAXOCQiJjfYdvTgfMj4uy8vBHwh4hYJmlT4CpgVttgLJJipOMY4RhZ872HaRNoP6bScTrFMDPrliQiouO45oh97hGxHDgauBi4HZgTEfMlnSjpLTnA7pKWAO8Avinp1rz7K4HrJd0IXAJ8vj2xm5lZeSOeuffkIHzmbmY2amM6czczs8nHyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNwnmJnTpyOp6GPm9Onj/bbMrMc8K+RwbTI+s0L26r2Y2eTnWSHNzPqMk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fW9Jc2T9Iykv2x77fC830JJh5U6cDMzG96I87lLmgLcCewH3A9cBxwcEQsq28wApgGfBH4aET/J6zcGrgd2JU0rPg/YNSIeb4vh+dxrijFcHDOb/MY6n/sewKKIWBwRzwBzgFnVDSLi3oi4DVbJS/sDF0fE4xHxGHAxcMCo34GZmY1KN8l9M2BJZfm+vK4b7fsuHcW+Zma2hjygambWQFO72GYpMKOyvHle142lwEDbvpd12nD27Nkrng8MDDAwMNBpMzOzvjU4OMjg4GBX23YzoLoWsJA0oPoAMBc4JCLmd9j2dOD8iDg7L1cHVKfk57vl/vfqfh5QrSnGcHHMbPIb04BqRCwHjiYNht4OzImI+ZJOlPSWHGB3SUuAdwDflHRr3vdR4J9JSf1a4MT2xG5mZuWNeObek4PwmXttMYaLY2aT31hLIc3MbJJxcu9TM6dPR1LRx8zp08f7bZlZ5m6Z4dqk2d0y7v4xm/zcLWNm1mec3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3K1WvljKbHz4Iqbh2sQXMU2mOGb9yBcxmZn1GSd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswbqKrlLOkDSAkl3Sjq2w+trS5ojaZGkqyXNyOu3lPQHSTfkx8ml34CZma1q6kgbSJoCnATsB9wPXCfpvIhYUNnsCOCRiHiFpHcDXwQOzq/9KiJ2LXzcZma2Gt2cue8BLIqIxRHxDDAHmNW2zSzge/n5j0n/CFo63gLKzMzq001y3wxYUlm+L6/ruE1ELAcek7RJfm2mpHmSLpO011gP2MzMRjZit8waap2tPwDMiIhHJe0KnCtph4j4XU1xzcyM7pL7UmBGZXnzvK7qPmAL4H5JawHTIuKR/NoygIi4QdKvgW2BG9qDzJ49e8XzgYEBBgYGunsHZmZ9YnBwkMHBwa62VUSsfoOUrBeS+tEfAOYCh0TE/Mo2RwE7RcRRkg4GDoqIgyVtShpofU7S1sDlwKsi4rG2GDHScYxwjKz53sO0CbQfU+k4vYjRL3HM+pEkIqLjuOaIZ+4RsVzS0cDFpD760yJivqQTgesi4nzgNOD7khYBv2VlpcwbgM9IWgY8BxzZntjNzKy8Ec/ce3IQPnOvLUa/xDHrR6s7c/cVqmZmDeTkbo0wc/p0JBV7zJw+fbzfktmYuFtmuDZxt0w/x3HXj00G7pYxM+szTu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORu1qXStfSup7c6uc59uDZxnXs/x/FUCjYZuM7dzKzPOLmbmTWQk7vZBOO+fSvBfe7DtUmz+3UdZ/xjjHccm/zc525m1mec3M36lLt/ms3dMsO1iT/693OcJr2X8Y5j9XG3jJlZn3FyNzNrICd3M7MGcnI3M2sgJ3czswZycjezWpUuuXS5ZXdcCjlcm7jcrp/jNOm9NC2Oyy1XcimkmVmfcXI3M2sgJ3czm/Q8lcKqukrukg6QtEDSnZKO7fD62pLmSFok6WpJMyqvHZfXz5f0ppIHb2YGsPjBBwko+lj84IO9fROFjZjcJU0BTgL2B3YEDpG0fdtmRwCPRMQrgK8CX8z77gC8C3glcCBwsqSOnf+9MOg4EzKG40zcGI4z1GT6hNDNmfsewKKIWBwRzwBzgFlt28wCvpef/xh4Y37+NmBORDwbEfcAi3J742LQcSZkDMeZuDEcZ6huPyGc0MU2dX9C6Ca5bwYsqSzfl9d13CYilgOPS9qkw75LO+xrZmaF1TWgOm5dL2ZmRroYYHUPYE/gwsryPwDHtm1zAfC6/Hwt4KFO2wIXtrZr27/0WIgffvjhR188hsvdUxnZdcDLJW0JPAAcDBzSts1/AYcD1wLvBC7N638K/EDSV0jdMS8H5rYHGO4KKzMzWzMjJveIWC7paOBiUjfOaRExX9KJwHURcT5wGvB9SYuA35L+ARARd0j6EXAH8Axw1JjmGTAzs65MiLllzMysLF+hambWQE7uZmYN5ORuZtZA3VTLTGqStgW+AbwkInaS9GrgbRHxL4XaXwv4ILA5qWT0qspr/1gwzq6rez0ibigQoyfvpS3mZsCWVH4XI+KKgu3vT3o/l+SrpFvrPxAR3ynQ/idW93pEfHmsMXKcacBxpPdyQUT8sPLayRFxVIk4ub1av2eV9tYD/h6YERF/I+kVwHa5SKM4SX8KzGTo79oZBdrdAvgSqSLwAuBL+Wp+JJ0bEQeNNcYaHVfTB1QlXQ58CjglInbJ626LiJ0KtX8qsB6pxPN9wOUR8Yn82g0RsdqkPIo4l+Wn6wK7AzeTLhZ7NXB9RPxJgRg9eS+VeF8A3k2qplqeV0dEvK1Q+58D9gJuAN4KfDUivp5fK/J+JJ2Qn24HvJZU/kuONzciDh1rjBznbNL0HdcAHyBVn70nIv5Y+Pes9u9ZJdZ/AvOAw/KJ13rALyPiNaViVGJ9H9gGuImhv2sfLdD2z4GzST+bI4DdgLdGxG8l3djKOz030kVMk/1BKtcEuLGy7qaC7d9SeT4V+BbwE2CdasyC8X4CvKqyvBPw40n6XhYC69T4s78VmJqfbwT8N/CV9t+HQrGuADasLG8IXFGw/Zvalo8HrgJeCNwwSb9n17e3C9xc0+/CfPLJbA1tt/9sDgVuJ/0zKfazGe2jH/rcH5a0DelqLiS9g3QxVilrt55EmiDtQ6Szg0uBDQrGadkuIm6txLyNNOtmCb1+L3cBz6uh3ZapEfEsQEQ8RjoTnSbpLCrvtZCXAMsqy8vyulLWyTO0AhARnwW+Tfqn8sKCcXr5PVsm6fms/NvcBvhj4RgttwF1TdD+PEnrthYi4kzgGOAi4KU1xRxRPyT3jwCnANtLWgp8DPjbgu1fL+mA6oqI+AxwOql/r7RbJJ0qaSA/vg3cUqjtXr+XPwA3STpF0r+1HgXb/7WkfVoLEbE8Io4gfWIo9Q+x5QxgrqTZkmaTrtb+3up3GZX/YuVsqwBExHdJfdbLOu2whnr5PZtNmpJkC0k/AC4BVrlfRCGbAndIukjST1uPQm2fCryuuiIi/od0tf5thWKMWuP73FskrQ9MiYgnx/tYxiKfIfwt8Ia86grgGxHx9Pgd1ZqRdHin9RFRJCnms0Ii4qkOr20WEUtLxKm0uSuwd168IiJuLNl+L4zD9+yFpPmrBFwTEQ+XbL8SZ59O6yPi8jriTQSNTe6SDo2IM4erZohCVQxtMWsZje8Q5/mkCoOFpdvO7fesikHS2sC2eXFh5CqDQm3XXmHUFm8v4BURcbqkFwEbRMTdhdruSVVOW8y6K5kuiYj9Rlo3WdRdmTdaTS6FXD9/3bAXwYYbjSd9XC8Z522ksqu1ga0kvQb4TBSqMMlOJ1UxtCpwlgJnAUWTu6QBUtfFPaQzty0kHV4wgfxr/tqxwoiV72/MctXM7qSqmdNJYwlnAq8vFKL1e9yxKqdQjBWGq2QifVIca9vrkqqyNpW0MSunCJ9G4fs9SPpFROwl6Uly337rJVK1zLSC4b5NrswjNX6LpB8C45Lcx2UUt5cP4EU9ilPbaHxbnHnACxhaYXBr4Rg9qWLI72W7yvK2wLwa4tRWYVRp8yZSwqh+z24pGSO3WWtVTqXd2iqZSIONd5MGT++uPG4Gjq4jZhfHtHGBNmqtzBvtox8GVK+SdLGkI/JZQl3qHI2veiYiHm9bV7pvrVdVDM+LStdSRNxJPdUzdVYYtSyL9Nfc+p6tP8L2a6ruqpyW2iqZIuJrEbEV8MmI2Kry2DkiTqojZhcuKdBG3ZV5o9LkbhkAImJbSXuQpiE+XtIdpPu6nlk4VGs0fi6VRBhlu0sAbpf0HmCt3Bf+UeCXhWPMZmgVw+uB9xeOAak651RS9wWk+uDra4hzS1uc91KuwqjlR5JOATaS9DekC41OLRwDVlblnJOXD6JsVU5Lq5LpEob+Po/5op+KUyR9lJXFAYOkiw2LjbuMQol7SnyEdG1IqzLvbtLv9Lho7IBqJ5I2Bb4MvDci1ircdk9G4/Ng5/HAm/Kqi4B/icLVMr2oYpC0DukPYq+86krg5Igo+imhVxVGkv6c9HMRcFFE/Lxk+5U4tVfl1F3JlGOcSvp00GrzfcDyiPhgqRijOJaSV/lOiMq8xid3pTk53k46c98GOAf4UUTMG9cDm8DGo4pB6Ybqm0dE6TPqVvt1Vxh9ISKOHWldoVi1VeW0xamtkim3f3NE7DzSul4YS3Ifj8q8bvRDn/vNQKuiZNuIOLZkYpf0i/z1SUlPVB5PSnqiVJxKvJ9L2qiyvLGkiwq1vW5OspvmdjfJj5kUrmLI8QYlTcsx5wHfVrolY+k4byMNeF6Yl19T8AKWlj/vsO7AwjFaVTnHkiYRg5VVOaXjDJDmsvl34GTgTklvWO1Oo7c891G3Ym7NysqcXhtLt0y1Mq/TY1w0vs8d2DoiQtIGkjaIiN+VbDwi9spfV/tDlLRxRDxaIOSmkS4Lb8V/VNKLC7QLcCTpCt6XkSaOankCqGOg6wUR8YSkDwJnRMQJkuo4cz8B2IPUp0tE3CRpqxINS/pb4Chgm7Zj35DyYyGQPoXuQv75RMT9kupIIP8KvKn1SSfXcP8HaVKsUj4FXCbpLlJy3ZLCYzv5xGFYEfFIfrrGn0oj4pT89OSI+M2atlNaPyT3HXMN+iaAJP0GODxXTPTSJUCJPr3nJM2IiHsBlG5cXqRvLSK+BnxN0t9FngmwZlMlvRR4F2kcoS7PRMTj0pCTs1L9kT8kTfP6eeAfKuufrCSOkpblk5W6q3JWqWSSVKx6RmmenKeAV5Bq9yF1/ZSuyppH+lkLmAE8mp9vBNwLbAVDkvxYXCXpHuA/gZ8UOplbY/2Q3L8FfCIiLoMVHze/Bfxpj4+jxGg8pCT4C6WpjEUaWPtQobZbelXFcCJpQPgXEXFd/li+qHAMqLHCKJelPi7pa8AjrUG03N30uoi4tkScil5V5dRayRQRz0n690jT4dYyzpLjbAWgNAfTORHx33n5QFKlUclYvarM60o/DKhOiEGbwqPxm5IqWaCGSpZeVDEo3RjkoxFRvI+9Q6zaK4wk3QjsmmvdW2em15f6mbfFqr0qpxeVTJL+L3A16Sy31kQk6daIeNVI6wrGq60yr+tj6IPkfg6pf/L7edWhwG4R8fYeH0epm0N0HNSKsnN+9OQfoqS5EbFHyTbHi6Sbou0mE5JuiYhXF47Ts6qcSvu1VDIpTQmwHmkQtfVPI6LslACtWBeR/kFVr3V4Q0TsXzDGhKrM64dqmQ8ALyLdKeVs0sVGdVyQM5JS3TKfqjz+iTQV7OxCbbf0qorhKkknSdpb0q6tR+kgdVYYVdwl6aOSnpcfx5Cu8iytV1U5vahkOo90/cHOEbFhfhRP7NkhpDxwDmk6ihfldSXVWpk3Wv1w5r476SP5TFaOMUSpM6puR+MlbVLHAJvS/Ru/GhF/VbDN/UiTXw2pYmiNWxSM06m9iIg3dlg/ljir3Oqs07oxxngx8G+kOdeDNID+sYh4qFD7K6pygF9VXtqQdGu695aIU4l3Y0TskiuZtmhVMpX8JCJpX9KY0d7kuxYBV+aB/VpIWj8ifl9T22pV5gGUrswb9fH0QXJfCHySNPfLc631EbG4UPt3s5rR+NaATl2USkBuj4gdCrU3hdSfP496qxh6RtI84O1tFUbn1NEfXhdJLwA2pkdVOZJuJfXrfw84Pg9419HNtBZplst9gQ8DT0XE9iVj5Dh/Shp43iAiZkjaGTgyyt5YfCdS9+8mpBwwXpV5QH9Uy/wmIv6rrsZ7ORqf2/06K8v4ppA+Bhabl7xXVQwAkl4CfA54WUQcKGkH4E8i4rTCoWqvMFLNc3mPQ1VO7ZVMSvPWrE8aVL0SeG2pTzodfAXYnzxVckTcPNyt6nKAAAAHg0lEQVT41RhMlMo8oD/O3Pcj9a21T4D0k8JxejIar6FzfjwL3BMRVxWO0ZMqBkkXkLp/jo+InSVNJU2XWryCoQcVRpeT5/JudfdIui0idiocp/aqnF5VMuU+/N1If5dXkeb8uTo63AWqQKxrI+J11e640kUCE6Uyr6UfztzfD2xPKu1rdcsEaVClpPsl/SNDR+PvLxyj6MRNq3Ek8HHSwGqdVQybRsSPJB2XAzwrqfjAbeUMrTUdxA6SilYYAetFxFwNvVDq2YLtt6j6Dzd/0ir6dxwRyyUdQjrbrU1EfBxA6Qrbvyb9o58OrFNDuCW5ayaULsY6hnQPhpLukvRPDK3Mq2NQvSv9kNxfGxHbjbzZmB1Cusz9HFbesabYaHzuAx32LLpwX+h5pOO/MiJK/wFU/V5p9snWWeieQPtc9SV8qvJ8XdJUBPNou+H0GPVqLu+7lC4w+0ZePop6EshVkk4iXW25YgAyCt6aUNLRpC6y3Uh34/oOqXumDh8GvkaaI2kpcDHpe1fSB0jdWWfn5SsZn8o8oD+6ZU4HvhQRd/QoXi2j8XkQENKFJTD07CAi4h9W3WuNY/WkiiGXPX4d2BG4nVSe9o7S9dQd4tZRYbQ1K/tXHyXN5f3eUgP3lTi1VuVU4tReySTpk6QEOC8i6viUU431+vbuy07rxhij1sq8UR9PHyT3+aQE1bqtV+veiaVH/Wsfjc9xOpX1Fbv6tdJm7VUMSvOsH00a6HqS1M//9Sg8z3qHuHVUGL0jdzFNiLm8bahOfyOl/27qrswbrX7oljmgR3F6MRoPKTetOOPI/1SKXozWwyqGM0j94J/Ly+8hfSJ5Z8kgPaow+jTpasRaaqhb6q7KqcTpVSVTrST9CenT1Is0dL71aUDpaQFqrcwbrcYn917+14yIJW0DanVc1XkE8J1c9yxSF8AHCse4hdQPuhOpD/wxSXVUMezUdvZ8mdJkS6VVJ7x6FviP0hVGwP/kbob2PurSNejfJlfl5PZvkfRDoGhyB75LrmTKy3eS3tukSu7A2sAGpFxXnRr5CeAdhWOdoDQvU62Ved1qfHLvoV6MxhPpcuadc3Jv1T+XjtGrKoYbJO0ZEdfkeK+jhnuo9qjC6N3560cq6wLYunCcXlXl9KSSqW6RbnN5uaTv9uBEr1eVeV1xci+nF6PxrSsVTyBPx5vrqz9TMsn3sIphN+CXku7NyzOAha3KoLGOi/SywihqvhK5oldVOb2qZOqVUyW9M/KNbiRtTJqOt9jEYfSuMq8rTu7lbBdt83tIej3p4oySvkMasHlXXn4f6cz6LwvGWJc0XWndVQx1j4e8JX/tWGFUIoCkN0bEpZI6fv9r+Ej+EVJVzvaSlpKrcgrHAPgEafxoa0lXkSuZaojTK3Xewazll5J26FVl3kgaXy3TK70Yjc9tdppadpV1tlKdFUaSZkfE7Fxy25pjaMXXiCg2HtLLqpzxqmSqi1adX2gm6QrsktUyPanM65bP3Meox6PxAE9J2isiWjfmfj3pdmU2vDorjJ7MP/fbWJnUodxt/FboZVUOPapk6qFe3MGsV5V5XXFyH7tejsZD6ts/ozWgSqqWOXw121u9FUYb5K/bka4LOC/HeCswt1CMql5V5fSqkqknIuLCfJHRh4AbgXMpfFI0XvXsw3G3TCGStqz7h9v2sXwaQEQ8McJultVZYSTpCuAvYuVsjRsCP4uIotc6KE0x3S4iomhVjqQzgZPaKpk+EhGHlYzTK0rz0h8DbA7cRJpE7uqSV9xONP1wJ6ZeOVU13+0nIp4DPp2fP+HE3h1JL5D0ZVL98SWS/rXyyaeUlwDLKsvL8rqiImKrDo/S5ZawspLpHkn3kPrcXyvpVkm1Tg9Rk2NIn6wWR8S+wC7AY6vfZXJzt0w5vRiNh959LG+SXlQYnQHMVbpnL6S5/L9bqvFxqMqZUP3HBTwdEU9LQtI6EbFA0oQpW6yDk3s5z0ma0TYaX0ef17tzu+019HWcvTXFNm2ThJ0o6aaSASLis0rz0++dV70/Im4sGOINwKWkvvxVqnIofKHMROs/LuC+/Mn6XODnkh4FmvYeh3ByL6cXo/EAO5AS+16kP+orgW/WEKdJelJhFGk63GJz1rTpWVVOE0XE2/PT2UozXr4AuHAcD6l2HlAtKHfDtEbjnw88FGVvCIGkH5EqcX6QV70HeEFEvGv4vfqb0gydZ5D+oCFXGEXNUwuXJOmE/LRjVU5EHDpex2YTk5N7Ib0ajZd0R/tUtZ3WWdK0CqNeVeXY5OdqmXJ6NRp/Q57nA6hvsq2maGCFUU+qcmzyc597Ob0aja91sq2GalKFUa1VOdYc7pYpJP+xvR/4GOkWaI8Cz4uINxeOs+XqXm9glcOY5Qt/VvlFr6k+vHZKtydsVeVcUbgqxxrCyb0GkvYhj8ZHxLKRtrd6SXo+HSqMarj5iNmE4eRujecKI+tHTu7WeK4wsn7kahnrB64wsr7jM3drvHwThe2AIRVGpHuPusLIGsnJ3RrPFUbWj5zczcwayH3uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDfT/AeacRzne/39WAAAAAElFTkSuQmCC\n", "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1736,11 +1730,41 @@ "plt.title(\"Feature importances\")\n", "plt.bar(range(len(domain_score.classifier.feature_importances_)), importances[indices],\n", " color=\"r\", align=\"center\")\n", - "plt.xticks(range(len(domain_score.classifier.feature_importances_)), indices)\n", + "#dict_as_list = list(domain_score.vectorizer.vocabulary_.keys())[list(domain_score.vectorizer.vocabulary_.values())\n", + "#dict_as_list = dict_as_list * 2 \n", + "a = list(domain_score.vectorizer.vocabulary_.values())\n", + "a.extend([i + 6 for i in list(domain_score.vectorizer.vocabulary_.values())])\n", + "\n", + "features = list(domain_score.vectorizer.vocabulary_.keys())\n", + "features.extend([i + '2' for i in list(domain_score.vectorizer.vocabulary_.keys())])\n", + "features_ordered = [features[a.index(i)] for i in indices]\n", + "\n", + "plt.xticks(range(0,len(features_ordered)), \n", + " features_ordered,\n", + " rotation=90)\n", "plt.xlim([-1, len(domain_score.classifier.feature_importances_)])\n", "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import export_graphviz\n", + "export_graphviz(domain_score.classifier, out_file='tree.dot', feature_names=features_ordered)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is **included manually**! **Feature names aren't correct**, somewhere they get messed up!\n", + "\n", + "![tree](https://user-images.githubusercontent.com/3034832/36925096-86f2bba6-1e71-11e8-8dd4-4974146d9ca5.png)\n" + ] + }, { "cell_type": "code", "execution_count": null, From 0b48d1d3eea7eb256162bbc8531e36fcab7e5590 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:45:39 +0100 Subject: [PATCH 07/35] Tidy up notebook --- load_data_open-day.ipynb | 1572 +------------------------------------- 1 file changed, 9 insertions(+), 1563 deletions(-) diff --git a/load_data_open-day.ipynb b/load_data_open-day.ipynb index 64ddc5b..5f5d481 100644 --- a/load_data_open-day.ipynb +++ b/load_data_open-day.ipynb @@ -22,1007 +22,15 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'dataset': 'Rotten Tomato',\n", - " 'edge_type_selection': [['PRODUCED', True],\n", - " ['DIRECTED', True],\n", - " ['WROTE', True],\n", - " ['ACTED_IN', True]],\n", - " 'meta_paths': [{'time_to_rate': 0.024361},\n", - " {'id': 1,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.1'},\n", - " {'id': 2,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.25'},\n", - " {'id': 3,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.15'},\n", - " {'id': 4,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.15'},\n", - " {'id': 5,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.2'},\n", - " {'time_to_rate': 150.249221},\n", - " {'id': 6,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.16'},\n", - " {'id': 7,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.26'},\n", - " {'id': 8,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.32'},\n", - " {'id': 9,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 10,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.19'},\n", - " {'time_to_rate': 145.500076},\n", - " {'id': 11,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.46'},\n", - " {'id': 12,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 13,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.12'},\n", - " {'id': 14,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.27'},\n", - " {'id': 15,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.2'},\n", - " {'time_to_rate': 135.839568},\n", - " {'id': 16,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.29'},\n", - " {'id': 17,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.72'},\n", - " {'id': 18,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.5'},\n", - " {'id': 19,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.31'},\n", - " {'id': 20,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'time_to_rate': 385.761841},\n", - " {'id': 21,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.38'},\n", - " {'id': 22,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'],\n", - " 'rating': '0.44'},\n", - " {'id': 23,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.6'},\n", - " {'id': 24,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 25,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.39'},\n", - " {'time_to_rate': 105.28709},\n", - " {'id': 26,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'id': 27,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 28,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 29,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.48'},\n", - " {'id': 30,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.52'},\n", - " {'time_to_rate': 95.974948},\n", - " {'id': 31,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 32,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.22'},\n", - " {'id': 33,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.7'},\n", - " {'id': 34,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.26'},\n", - " {'id': 35,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'time_to_rate': 130.046159},\n", - " {'id': 36,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'id': 37,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.34'},\n", - " {'id': 38,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'id': 39,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.36'},\n", - " {'id': 40,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.28'},\n", - " {'time_to_rate': 98.257121},\n", - " {'id': 41,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 42,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.34'},\n", - " {'id': 43,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'id': 44,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 45,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.56'},\n", - " {'time_to_rate': 39.029786},\n", - " {'id': 46,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.69'},\n", - " {'id': 47,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.57'},\n", - " {'id': 48,\n", - " 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 49,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.75'},\n", - " {'id': 50,\n", - " 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.67'},\n", - " {'time_to_rate': 69.869488},\n", - " {'id': 51,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.62'},\n", - " {'time_to_rate': 21.587904}],\n", - " 'node_type_selection': [['Person', True], ['Movie', True]],\n", - " 'purpose': '',\n", - " 'username': 'Merlin'}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "data =json.load(open(path, \"r\", encoding=\"utf8\"))\n", - "data" + "data =json.load(open(path, \"r\", encoding=\"utf8\"))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'time_to_rate': 0.024361},\n", - " {'id': 1,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.1'},\n", - " {'id': 2,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.25'},\n", - " {'id': 3,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.15'},\n", - " {'id': 4,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.15'},\n", - " {'id': 5,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.2'},\n", - " {'time_to_rate': 150.249221},\n", - " {'id': 6,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.16'},\n", - " {'id': 7,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.26'},\n", - " {'id': 8,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.32'},\n", - " {'id': 9,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 10,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.19'},\n", - " {'time_to_rate': 145.500076},\n", - " {'id': 11,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.46'},\n", - " {'id': 12,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 13,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.12'},\n", - " {'id': 14,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.27'},\n", - " {'id': 15,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.2'},\n", - " {'time_to_rate': 135.839568},\n", - " {'id': 16,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.29'},\n", - " {'id': 17,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.72'},\n", - " {'id': 18,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.5'},\n", - " {'id': 19,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.31'},\n", - " {'id': 20,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'time_to_rate': 385.761841},\n", - " {'id': 21,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.38'},\n", - " {'id': 22,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'],\n", - " 'rating': '0.44'},\n", - " {'id': 23,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.6'},\n", - " {'id': 24,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 25,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.39'},\n", - " {'time_to_rate': 105.28709},\n", - " {'id': 26,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'id': 27,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 28,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 29,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.48'},\n", - " {'id': 30,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.52'},\n", - " {'time_to_rate': 95.974948},\n", - " {'id': 31,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 32,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.22'},\n", - " {'id': 33,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.7'},\n", - " {'id': 34,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.26'},\n", - " {'id': 35,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'time_to_rate': 130.046159},\n", - " {'id': 36,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'id': 37,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.34'},\n", - " {'id': 38,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'id': 39,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.36'},\n", - " {'id': 40,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.28'},\n", - " {'time_to_rate': 98.257121},\n", - " {'id': 41,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 42,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.34'},\n", - " {'id': 43,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'id': 44,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 45,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.56'},\n", - " {'time_to_rate': 39.029786},\n", - " {'id': 46,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.69'},\n", - " {'id': 47,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.57'},\n", - " {'id': 48,\n", - " 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 49,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.75'},\n", - " {'id': 50,\n", - " 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.67'},\n", - " {'time_to_rate': 69.869488},\n", - " {'id': 51,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.62'},\n", - " {'time_to_rate': 21.587904}]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[\"meta_paths\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, "outputs": [ { "data": { @@ -1040,7 +48,7 @@ " 'rating': '0.1'}" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1051,87 +59,7 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.1', 'id': 1}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.25', 'id': 2}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15', 'id': 3}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.15', 'id': 4}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.2', 'id': 5}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.16', 'id': 6}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26', 'id': 7}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.32', 'id': 8}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33', 'id': 9}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.19', 'id': 10}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.46', 'id': 11}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.33', 'id': 12}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.12', 'id': 13}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.27', 'id': 14}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.2', 'id': 15}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.29', 'id': 16}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.72', 'id': 17}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.5', 'id': 18}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.31', 'id': 19}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.52', 'id': 20}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.38', 'id': 21}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.44', 'id': 22}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.6', 'id': 23}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55', 'id': 24}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.39', 'id': 25}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52', 'id': 26}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42', 'id': 27}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.55', 'id': 28}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'], 'rating': '0.48', 'id': 29}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52', 'id': 30}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.33', 'id': 31}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'], 'rating': '0.22', 'id': 32}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.7', 'id': 33}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.26', 'id': 34}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'DIRECTED', 'Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.54', 'id': 35}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54', 'id': 36}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.34', 'id': 37}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.54', 'id': 38}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.36', 'id': 39}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.28', 'id': 40}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.55', 'id': 41}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.34', 'id': 42}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.52', 'id': 43}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42', 'id': 44}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.56', 'id': 45}\n", - "{'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.69', 'id': 46}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'], 'rating': '0.57', 'id': 47}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'], 'rating': '0.42', 'id': 48}\n", - "{'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'], 'rating': '0.75', 'id': 49}\n", - "{'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.67', 'id': 50}\n", - "{'metapath': ['Person', 'ACTED_IN', 'Movie', 'PRODUCED', 'Person'], 'rating': '0.62', 'id': 51}\n" - ] - } - ], - "source": [ - "i = 0\n", - "first = True\n", - "for probably_path in data[\"meta_paths\"]:\n", - " # Ignore first time_to_rate\n", - " if first:\n", - " first = False\n", - " continue\n", - " i += 1\n", - " if i == 6:\n", - " # Ignore time_to_rate\n", - " i = 0\n", - " else:\n", - " if 'time_to_rate' not in probably_path.keys():\n", - " print(probably_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -1157,488 +85,7 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[{'id': 1,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.1'},\n", - " {'id': 2,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.25'},\n", - " {'id': 3,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.15'},\n", - " {'id': 4,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.15'},\n", - " {'id': 5,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.2'}],\n", - " [{'id': 6,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.16'},\n", - " {'id': 7,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.26'},\n", - " {'id': 8,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.32'},\n", - " {'id': 9,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 10,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.19'}],\n", - " [{'id': 11,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.46'},\n", - " {'id': 12,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 13,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.12'},\n", - " {'id': 14,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.27'},\n", - " {'id': 15,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.2'}],\n", - " [{'id': 16,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.29'},\n", - " {'id': 17,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.72'},\n", - " {'id': 18,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.5'},\n", - " {'id': 19,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.31'},\n", - " {'id': 20,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.52'}],\n", - " [{'id': 21,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.38'},\n", - " {'id': 22,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'DIRECTED', 'Person'],\n", - " 'rating': '0.44'},\n", - " {'id': 23,\n", - " 'metapath': ['Person', 'PRODUCED', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.6'},\n", - " {'id': 24,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 25,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.39'}],\n", - " [{'id': 26,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'id': 27,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 28,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 29,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.48'},\n", - " {'id': 30,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.52'}],\n", - " [{'id': 31,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.33'},\n", - " {'id': 32,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.22'},\n", - " {'id': 33,\n", - " 'metapath': ['Person', 'ACTED_IN', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.7'},\n", - " {'id': 34,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.26'},\n", - " {'id': 35,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.54'}],\n", - " [{'id': 36,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'id': 37,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person'],\n", - " 'rating': '0.34'},\n", - " {'id': 38,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.54'},\n", - " {'id': 39,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.36'},\n", - " {'id': 40,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.28'}],\n", - " [{'id': 41,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.55'},\n", - " {'id': 42,\n", - " 'metapath': ['Person',\n", - " 'WROTE',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.34'},\n", - " {'id': 43,\n", - " 'metapath': ['Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.52'},\n", - " {'id': 44,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 45,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.56'}],\n", - " [{'id': 46,\n", - " 'metapath': ['Person',\n", - " 'PRODUCED',\n", - " 'Movie',\n", - " 'PRODUCED',\n", - " 'Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'WROTE',\n", - " 'Person'],\n", - " 'rating': '0.69'},\n", - " {'id': 47,\n", - " 'metapath': ['Person',\n", - " 'DIRECTED',\n", - " 'Movie',\n", - " 'ACTED_IN',\n", - " 'Person',\n", - " 'ACTED_IN',\n", - " 'Movie',\n", - " 'DIRECTED',\n", - " 'Person'],\n", - " 'rating': '0.57'},\n", - " {'id': 48,\n", - " 'metapath': ['Person', 'WROTE', 'Movie', 'ACTED_IN', 'Person'],\n", - " 'rating': '0.42'},\n", - " {'id': 49,\n", - " 'metapath': ['Person', 'DIRECTED', 'Movie', 'WROTE', 'Person'],\n", - " 'rating': '0.75'},\n", - " {'id': 50,\n", - " 'metapath': ['Person', 'WROTE', 'Movie', 'PRODUCED', 'Person'],\n", - " 'rating': '0.67'}]]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "batches" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -1658,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -1679,7 +126,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1703,9 +150,8 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAE2CAYAAACaxNI3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXVV99/HPN0RALhEQNQqEAAoIKHIRaQUZpBWwarD1AopQxYpFKmpVykP7EGzVR33qpVIUBVFEm4oIWCwXCwwgAoFwhyREgRACD4hcVTAQfs8fa51kz8mZzJnM2mdm9vm+X6/zmrP32Xv99pnLb/ZZ67fXVkRgZmbNMmW8D8DMzMpzcjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3frO5K+Ien48T4OszrJde7WLUn3AC8GngUEBLBtRPy/MbS5D3BmRGxR5CAnGUmnA0si4n+P97FYs0wd7wOwSSWAv4iIywq22fonsWY7S2tFxPKCx9MzkvzJ2WrjXy4bLXVcKe0p6SpJj0q6MZ+Rt177a0l3SHpC0q8kfSivXw/4b+Blkp7Mr0+XdLqkz1T230fSksry3ZI+Lelm4HeSpkh6qaQfS3pI0q8l/d2wb6DSfqttSZ+S9KCkpZJmSTpQ0kJJD0s6rrLvCZLOkjQnH+/1kl5deX17SZfl78Otkt7aFvdkST+T9CRwBPBe4NO5rfPydsfm79MTkm6TdFCljcMlXSnpS5Ieye/1gMrrG0v6Tn4fv5X0k8prb8k/m0cl/ULSqyqvHSvpvhxzvqR9h/v+2SQREX740dUDuBt4Y4f1LwMeBvbPy/vl5Rfm5QOBmfn53sDvgdfk5X2Ae9vaOx34TGV5yDb5OG7Icdch/cO5HjgeWAuYCfwK+PNh3seK9nPbz1T2/SDwEHAmsB6wA/AHYMu8/QnAH4G35+3/HrgrP58KLAKOzc/3BZ4AXlGJ+yiwZ15ep/295vV/BbwkP38n8LvK8uE5/gfy+/4wsLSy78+A/wCm5WPaO6/fBXgQ2D3v9778fXwesC1wbyXGDGCr8f5982NsD5+522idm88YH6mcFR4K/CwiLgKIiEtIyfbNefmCiLgnP78SuJiU5MfiaxFxf0T8EXgtsGlEfDYiludYpwIHd9nWMuBzkbp35gCbAl+NiD9ExB3AHcDOle3nRcQ5efsvk5L0nvmxfkR8ISKejdR9dT5wSGXf8yLiGoB87KuIiLMj4sH8/CzSP4w9KpssjojvREQA3wNeKunFkqYD+wNHRsQT+XtxZd7nb4BvRsT1kXyf9E9iT2A5sDawk6SpEXFvRNzd5ffOJij3udtozYpV+9y3BN5V6YIQ6XfrUgBJBwL/m3SGOAV4PnDLGI/jvrb4m0l6pBJ/CnBFl239NidKgKfy14cqrz8FbFBZXtFFFBEhaSnpU4Sqr2WLgc067TscSYcBHyd9AgFYn/QPp2XFAHZEPCWJfHwvBB6JiCc6NLslcFilu0qks/aXRcSVkj4GzAZ2kHQR8PcR8cBIx2oTl5O7jVanPvclwBkRceQqG0trAz8mnd2fFxHPSTqn0k6nwdTfk7pEWl7aYZvqfkuAuyJiuy6Ov4QVlT1KmXVz4H7Se5rRtu0MYGFluf39DlmWNAP4FrBvRFyd193IMGMdbZYAm0ia1iHBLwE+GxGf77RjRMwB5kjaIMf/P6QuIJuk3C1jJZwJvFXSm/Lg5rp5oPJlpI/7awMP58R+IPCmyr4PAi+UNK2y7ibgzXlwcDpwzAjx5wJP5kHWdSWtJWlHSbuXe4tD7CbpIElrkc6wnwauAa4Ffp+PY6qkAeAtpD7w4TwIbF1ZXh94Dng4fy/fD+zUzUFFKkm9ADhZ0kb5GFrdX98GPixpDwBJ60t6c/66raR98z/iZaRPKs919Z2wCcvJ3UajY8liRNwHzAL+F/AbUlfEJ4EpEfE74KPAWbnb5GDgvMq+C0nJ767cjz8d+D6p2+Ye4EJSP/iwxxERz5GS6GtIg4QPkZLZNNbMas+u8/G/mzQ4+l7g7bl/+xngraSxhoeBk4D3RcSiYdoBOA3YsTWGERHzSf3415C6X3YEfjGK430f6TqEBaR/HMcARMQ8Ur/7SfnncCcrz8zXIZ2p/4b0CeRFwHHYpNbVRUy51OqrpH8Gp0XEF9pePxL4CGlg5kngQxGxIL92HGlk/1ngmIi4uOg7MOshSScA20TEYeN9LGarM+KZu9KFFieRRuF3BA6RtH3bZj+IiFdHxC7Al4Cv5H13AN4FvJJUDndy7qM0M7MaddMtswewKCIW54+dc0gfwVfIH71bNmBlf93bgDm5LOweVi3pMjOzGnRTLbMZQ8u37qNDgpZ0FPAJUnnVGyv7Xl3ZbClDy8LMJpWIOHG8j8GsG8UGVCPi5Ih4OenqvH8q1a6ZmY1eN2fuSxlau7t5Xjec/wS+Wdm3Ottfx30leWpKM7M1EBEdxzG7OXO/Dni5pC1zHezBwE+rG0h6eWXxLaQyK/J2B0taW9JWwMtJNcmdDrD2xwknnOA4EzCG40zcGI4zcWNErP6ceMQz94hYLulo0nwgrVLI+ZJOBK6LiPOBoyX9GekCiEfJ9bMRcYekH5Hm5ngGOCpGOiIzMxuzrqYfiIgLge3a1p1Qef6x1ez7eaDjJc9mZlaPvrpCdWBgwHEmYAzHmbgxHGfixhjJhLjNniT31piZjZIkYgwDqmZmNsk4uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNVAjkvvM6dORVPQxc/r08X5bZmZrrBFXqErqfOfmMRCMOOuamdl48hWqZmZ9xsndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzcR6H07JOeedLM6uJZIYdrk1VnhSwdxzNPmtlYeFZIM7M+4+RuZtZAXSV3SQdIWiDpTknHdnj945Jul3STpJ9L2qLy2nJJN0i6UdK5JQ/ezMw6G7HPXdIU4E5gP+B+4Drg4IhYUNlmH+DaiHha0oeBgYg4OL/2RERMGyGG+9zNzEZprH3uewCLImJxRDwDzAFmVTeIiMsj4um8eA2wWTX+GhyzmZmNQTfJfTNgSWX5PoYm73ZHABdUlteRNFfSLyXNGm4nMzMrZ2rJxiQdCuwG7FNZvWVEPCBpK+BSSbdExN0l45qZ2VDdJPelwIzK8uZ53RCS/gw4DnhD7r4BICIeyF/vljQI7AKsktxnz5694vnAwAADAwPdHL+ZWd8YHBxkcHCwq227GVBdC1hIGlB9AJgLHBIR8yvb7AKcBewfEb+urN8I+ENELJO0KXAVMKs6GJu384CqmdkorW5AdcQz94hYLulo4GJSH/1pETFf0onAdRFxPvBFYH3gLEkCFkfEQcArgVMkLc/7fr49sZuZWXmefmC4NvGZu5lNbJ5+wMyszzi5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fWPS7pd0k2Sfi5pi8prh+f9Fko6rOTBm5lZZ4qI1W8gTQHuBPYD7geuAw6OiAWVbfYBro2IpyV9GBiIiIMlbQxcD+wKCJgH7BoRj7fFiJGOY4RjZM33HqZNoP2YSsfpFMPMrFuSiAh1eq2bM/c9gEURsTgingHmALOqG0TE5RHxdF68BtgsP98fuDgiHo+Ix4CLgQPW5E2YmVn3uknumwFLKsv3sTJ5d3IEcMEw+y4dYV8zMytgasnGJB0K7AbsU7JdMzMbnW6S+1JgRmV587xuCEl/BhwHvCF337T2HWjb97JOQWbPnr3i+cDAAAMDA502MzPrW4ODgwwODna1bTcDqmsBC0kDqg8Ac4FDImJ+ZZtdgLOA/SPi15X11QHVKfn5brn/vRrDA6pmZqO0ugHVEc/cI2K5pKNJg6FTgNMiYr6kE4HrIuJ84IvA+sBZkgQsjoiDIuJRSf9MSuoBnNie2M3MrLwRz9x7chA+czczG7WxlkKamdkk4+RuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBdJXdJB0haIOlOScd2eH1vSfMkPSPpL9teWy7pBkk3Sjq31IGbmdnwpo60gaQpwEnAfsD9wHWSzouIBZXNFgOHA5/s0MTvI2LXEgdrZmbdGTG5A3sAiyJiMYCkOcAsYEVyj4h782vRYX8VOE4zMxuFbrplNgOWVJbvy+u6tY6kuZJ+KWnWqI7OzMzWSDdn7mO1ZUQ8IGkr4FJJt0TE3T2Ia2bWt7pJ7kuBGZXlzfO6rkTEA/nr3ZIGgV2AVZL77NmzVzwfGBhgYGCg2xBmZn1hcHCQwcHBrrZVRKdu8soG0lrAQtKA6gPAXOCQiJjfYdvTgfMj4uy8vBHwh4hYJmlT4CpgVttgLJJipOMY4RhZ872HaRNoP6bScTrFMDPrliQiouO45oh97hGxHDgauBi4HZgTEfMlnSjpLTnA7pKWAO8Avinp1rz7K4HrJd0IXAJ8vj2xm5lZeSOeuffkIHzmbmY2amM6czczs8nHyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNwnmJnTpyOp6GPm9Onj/bbMrMc8K+RwbTI+s0L26r2Y2eTnWSHNzPqMk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fW9Jc2T9Iykv2x77fC830JJh5U6cDMzG96I87lLmgLcCewH3A9cBxwcEQsq28wApgGfBH4aET/J6zcGrgd2JU0rPg/YNSIeb4vh+dxrijFcHDOb/MY6n/sewKKIWBwRzwBzgFnVDSLi3oi4DVbJS/sDF0fE4xHxGHAxcMCo34GZmY1KN8l9M2BJZfm+vK4b7fsuHcW+Zma2hjygambWQFO72GYpMKOyvHle142lwEDbvpd12nD27Nkrng8MDDAwMNBpMzOzvjU4OMjg4GBX23YzoLoWsJA0oPoAMBc4JCLmd9j2dOD8iDg7L1cHVKfk57vl/vfqfh5QrSnGcHHMbPIb04BqRCwHjiYNht4OzImI+ZJOlPSWHGB3SUuAdwDflHRr3vdR4J9JSf1a4MT2xG5mZuWNeObek4PwmXttMYaLY2aT31hLIc3MbJJxcu9TM6dPR1LRx8zp08f7bZlZ5m6Z4dqk2d0y7v4xm/zcLWNm1mec3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3K1WvljKbHz4Iqbh2sQXMU2mOGb9yBcxmZn1GSd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswbqKrlLOkDSAkl3Sjq2w+trS5ojaZGkqyXNyOu3lPQHSTfkx8ml34CZma1q6kgbSJoCnATsB9wPXCfpvIhYUNnsCOCRiHiFpHcDXwQOzq/9KiJ2LXzcZma2Gt2cue8BLIqIxRHxDDAHmNW2zSzge/n5j0n/CFo63gLKzMzq001y3wxYUlm+L6/ruE1ELAcek7RJfm2mpHmSLpO011gP2MzMRjZit8waap2tPwDMiIhHJe0KnCtph4j4XU1xzcyM7pL7UmBGZXnzvK7qPmAL4H5JawHTIuKR/NoygIi4QdKvgW2BG9qDzJ49e8XzgYEBBgYGunsHZmZ9YnBwkMHBwa62VUSsfoOUrBeS+tEfAOYCh0TE/Mo2RwE7RcRRkg4GDoqIgyVtShpofU7S1sDlwKsi4rG2GDHScYxwjKz53sO0CbQfU+k4vYjRL3HM+pEkIqLjuOaIZ+4RsVzS0cDFpD760yJivqQTgesi4nzgNOD7khYBv2VlpcwbgM9IWgY8BxzZntjNzKy8Ec/ce3IQPnOvLUa/xDHrR6s7c/cVqmZmDeTkbo0wc/p0JBV7zJw+fbzfktmYuFtmuDZxt0w/x3HXj00G7pYxM+szTu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORu1qXStfSup7c6uc59uDZxnXs/x/FUCjYZuM7dzKzPOLmbmTWQk7vZBOO+fSvBfe7DtUmz+3UdZ/xjjHccm/zc525m1mec3M36lLt/ms3dMsO1iT/693OcJr2X8Y5j9XG3jJlZn3FyNzNrICd3M7MGcnI3M2sgJ3czswZycjezWpUuuXS5ZXdcCjlcm7jcrp/jNOm9NC2Oyy1XcimkmVmfcXI3M2sgJ3czm/Q8lcKqukrukg6QtEDSnZKO7fD62pLmSFok6WpJMyqvHZfXz5f0ppIHb2YGsPjBBwko+lj84IO9fROFjZjcJU0BTgL2B3YEDpG0fdtmRwCPRMQrgK8CX8z77gC8C3glcCBwsqSOnf+9MOg4EzKG40zcGI4z1GT6hNDNmfsewKKIWBwRzwBzgFlt28wCvpef/xh4Y37+NmBORDwbEfcAi3J742LQcSZkDMeZuDEcZ6huPyGc0MU2dX9C6Ca5bwYsqSzfl9d13CYilgOPS9qkw75LO+xrZmaF1TWgOm5dL2ZmRroYYHUPYE/gwsryPwDHtm1zAfC6/Hwt4KFO2wIXtrZr27/0WIgffvjhR188hsvdUxnZdcDLJW0JPAAcDBzSts1/AYcD1wLvBC7N638K/EDSV0jdMS8H5rYHGO4KKzMzWzMjJveIWC7paOBiUjfOaRExX9KJwHURcT5wGvB9SYuA35L+ARARd0j6EXAH8Axw1JjmGTAzs65MiLllzMysLF+hambWQE7uZmYN5ORuZtZA3VTLTGqStgW+AbwkInaS9GrgbRHxL4XaXwv4ILA5qWT0qspr/1gwzq6rez0ibigQoyfvpS3mZsCWVH4XI+KKgu3vT3o/l+SrpFvrPxAR3ynQ/idW93pEfHmsMXKcacBxpPdyQUT8sPLayRFxVIk4ub1av2eV9tYD/h6YERF/I+kVwHa5SKM4SX8KzGTo79oZBdrdAvgSqSLwAuBL+Wp+JJ0bEQeNNcYaHVfTB1QlXQ58CjglInbJ626LiJ0KtX8qsB6pxPN9wOUR8Yn82g0RsdqkPIo4l+Wn6wK7AzeTLhZ7NXB9RPxJgRg9eS+VeF8A3k2qplqeV0dEvK1Q+58D9gJuAN4KfDUivp5fK/J+JJ2Qn24HvJZU/kuONzciDh1rjBznbNL0HdcAHyBVn70nIv5Y+Pes9u9ZJdZ/AvOAw/KJ13rALyPiNaViVGJ9H9gGuImhv2sfLdD2z4GzST+bI4DdgLdGxG8l3djKOz030kVMk/1BKtcEuLGy7qaC7d9SeT4V+BbwE2CdasyC8X4CvKqyvBPw40n6XhYC69T4s78VmJqfbwT8N/CV9t+HQrGuADasLG8IXFGw/Zvalo8HrgJeCNwwSb9n17e3C9xc0+/CfPLJbA1tt/9sDgVuJ/0zKfazGe2jH/rcH5a0DelqLiS9g3QxVilrt55EmiDtQ6Szg0uBDQrGadkuIm6txLyNNOtmCb1+L3cBz6uh3ZapEfEsQEQ8RjoTnSbpLCrvtZCXAMsqy8vyulLWyTO0AhARnwW+Tfqn8sKCcXr5PVsm6fms/NvcBvhj4RgttwF1TdD+PEnrthYi4kzgGOAi4KU1xRxRPyT3jwCnANtLWgp8DPjbgu1fL+mA6oqI+AxwOql/r7RbJJ0qaSA/vg3cUqjtXr+XPwA3STpF0r+1HgXb/7WkfVoLEbE8Io4gfWIo9Q+x5QxgrqTZkmaTrtb+3up3GZX/YuVsqwBExHdJfdbLOu2whnr5PZtNmpJkC0k/AC4BVrlfRCGbAndIukjST1uPQm2fCryuuiIi/od0tf5thWKMWuP73FskrQ9MiYgnx/tYxiKfIfwt8Ia86grgGxHx9Pgd1ZqRdHin9RFRJCnms0Ii4qkOr20WEUtLxKm0uSuwd168IiJuLNl+L4zD9+yFpPmrBFwTEQ+XbL8SZ59O6yPi8jriTQSNTe6SDo2IM4erZohCVQxtMWsZje8Q5/mkCoOFpdvO7fesikHS2sC2eXFh5CqDQm3XXmHUFm8v4BURcbqkFwEbRMTdhdruSVVOW8y6K5kuiYj9Rlo3WdRdmTdaTS6FXD9/3bAXwYYbjSd9XC8Z522ksqu1ga0kvQb4TBSqMMlOJ1UxtCpwlgJnAUWTu6QBUtfFPaQzty0kHV4wgfxr/tqxwoiV72/MctXM7qSqmdNJYwlnAq8vFKL1e9yxKqdQjBWGq2QifVIca9vrkqqyNpW0MSunCJ9G4fs9SPpFROwl6Uly337rJVK1zLSC4b5NrswjNX6LpB8C45Lcx2UUt5cP4EU9ilPbaHxbnHnACxhaYXBr4Rg9qWLI72W7yvK2wLwa4tRWYVRp8yZSwqh+z24pGSO3WWtVTqXd2iqZSIONd5MGT++uPG4Gjq4jZhfHtHGBNmqtzBvtox8GVK+SdLGkI/JZQl3qHI2veiYiHm9bV7pvrVdVDM+LStdSRNxJPdUzdVYYtSyL9Nfc+p6tP8L2a6ruqpyW2iqZIuJrEbEV8MmI2Kry2DkiTqojZhcuKdBG3ZV5o9LkbhkAImJbSXuQpiE+XtIdpPu6nlk4VGs0fi6VRBhlu0sAbpf0HmCt3Bf+UeCXhWPMZmgVw+uB9xeOAak651RS9wWk+uDra4hzS1uc91KuwqjlR5JOATaS9DekC41OLRwDVlblnJOXD6JsVU5Lq5LpEob+Po/5op+KUyR9lJXFAYOkiw2LjbuMQol7SnyEdG1IqzLvbtLv9Lho7IBqJ5I2Bb4MvDci1ircdk9G4/Ng5/HAm/Kqi4B/icLVMr2oYpC0DukPYq+86krg5Igo+imhVxVGkv6c9HMRcFFE/Lxk+5U4tVfl1F3JlGOcSvp00GrzfcDyiPhgqRijOJaSV/lOiMq8xid3pTk53k46c98GOAf4UUTMG9cDm8DGo4pB6Ybqm0dE6TPqVvt1Vxh9ISKOHWldoVi1VeW0xamtkim3f3NE7DzSul4YS3Ifj8q8bvRDn/vNQKuiZNuIOLZkYpf0i/z1SUlPVB5PSnqiVJxKvJ9L2qiyvLGkiwq1vW5OspvmdjfJj5kUrmLI8QYlTcsx5wHfVrolY+k4byMNeF6Yl19T8AKWlj/vsO7AwjFaVTnHkiYRg5VVOaXjDJDmsvl34GTgTklvWO1Oo7c891G3Ym7NysqcXhtLt0y1Mq/TY1w0vs8d2DoiQtIGkjaIiN+VbDwi9spfV/tDlLRxRDxaIOSmkS4Lb8V/VNKLC7QLcCTpCt6XkSaOankCqGOg6wUR8YSkDwJnRMQJkuo4cz8B2IPUp0tE3CRpqxINS/pb4Chgm7Zj35DyYyGQPoXuQv75RMT9kupIIP8KvKn1SSfXcP8HaVKsUj4FXCbpLlJy3ZLCYzv5xGFYEfFIfrrGn0oj4pT89OSI+M2atlNaPyT3HXMN+iaAJP0GODxXTPTSJUCJPr3nJM2IiHsBlG5cXqRvLSK+BnxN0t9FngmwZlMlvRR4F2kcoS7PRMTj0pCTs1L9kT8kTfP6eeAfKuufrCSOkpblk5W6q3JWqWSSVKx6RmmenKeAV5Bq9yF1/ZSuyppH+lkLmAE8mp9vBNwLbAVDkvxYXCXpHuA/gZ8UOplbY/2Q3L8FfCIiLoMVHze/Bfxpj4+jxGg8pCT4C6WpjEUaWPtQobZbelXFcCJpQPgXEXFd/li+qHAMqLHCKJelPi7pa8AjrUG03N30uoi4tkScil5V5dRayRQRz0n690jT4dYyzpLjbAWgNAfTORHx33n5QFKlUclYvarM60o/DKhOiEGbwqPxm5IqWaCGSpZeVDEo3RjkoxFRvI+9Q6zaK4wk3QjsmmvdW2em15f6mbfFqr0qpxeVTJL+L3A16Sy31kQk6daIeNVI6wrGq60yr+tj6IPkfg6pf/L7edWhwG4R8fYeH0epm0N0HNSKsnN+9OQfoqS5EbFHyTbHi6Sbou0mE5JuiYhXF47Ts6qcSvu1VDIpTQmwHmkQtfVPI6LslACtWBeR/kFVr3V4Q0TsXzDGhKrM64dqmQ8ALyLdKeVs0sVGdVyQM5JS3TKfqjz+iTQV7OxCbbf0qorhKkknSdpb0q6tR+kgdVYYVdwl6aOSnpcfx5Cu8iytV1U5vahkOo90/cHOEbFhfhRP7NkhpDxwDmk6ihfldSXVWpk3Wv1w5r476SP5TFaOMUSpM6puR+MlbVLHAJvS/Ru/GhF/VbDN/UiTXw2pYmiNWxSM06m9iIg3dlg/ljir3Oqs07oxxngx8G+kOdeDNID+sYh4qFD7K6pygF9VXtqQdGu695aIU4l3Y0TskiuZtmhVMpX8JCJpX9KY0d7kuxYBV+aB/VpIWj8ifl9T22pV5gGUrswb9fH0QXJfCHySNPfLc631EbG4UPt3s5rR+NaATl2USkBuj4gdCrU3hdSfP496qxh6RtI84O1tFUbn1NEfXhdJLwA2pkdVOZJuJfXrfw84Pg9419HNtBZplst9gQ8DT0XE9iVj5Dh/Shp43iAiZkjaGTgyyt5YfCdS9+8mpBwwXpV5QH9Uy/wmIv6rrsZ7ORqf2/06K8v4ppA+Bhabl7xXVQwAkl4CfA54WUQcKGkH4E8i4rTCoWqvMFLNc3mPQ1VO7ZVMSvPWrE8aVL0SeG2pTzodfAXYnzxVckTcPNyt6nKAAAAHg0lEQVT41RhMlMo8oD/O3Pcj9a21T4D0k8JxejIar6FzfjwL3BMRVxWO0ZMqBkkXkLp/jo+InSVNJU2XWryCoQcVRpeT5/JudfdIui0idiocp/aqnF5VMuU+/N1If5dXkeb8uTo63AWqQKxrI+J11e640kUCE6Uyr6UfztzfD2xPKu1rdcsEaVClpPsl/SNDR+PvLxyj6MRNq3Ek8HHSwGqdVQybRsSPJB2XAzwrqfjAbeUMrTUdxA6SilYYAetFxFwNvVDq2YLtt6j6Dzd/0ir6dxwRyyUdQjrbrU1EfBxA6Qrbvyb9o58OrFNDuCW5ayaULsY6hnQPhpLukvRPDK3Mq2NQvSv9kNxfGxHbjbzZmB1Cusz9HFbesabYaHzuAx32LLpwX+h5pOO/MiJK/wFU/V5p9snWWeieQPtc9SV8qvJ8XdJUBPNou+H0GPVqLu+7lC4w+0ZePop6EshVkk4iXW25YgAyCt6aUNLRpC6y3Uh34/oOqXumDh8GvkaaI2kpcDHpe1fSB0jdWWfn5SsZn8o8oD+6ZU4HvhQRd/QoXi2j8XkQENKFJTD07CAi4h9W3WuNY/WkiiGXPX4d2BG4nVSe9o7S9dQd4tZRYbQ1K/tXHyXN5f3eUgP3lTi1VuVU4tReySTpk6QEOC8i6viUU431+vbuy07rxhij1sq8UR9PHyT3+aQE1bqtV+veiaVH/Wsfjc9xOpX1Fbv6tdJm7VUMSvOsH00a6HqS1M//9Sg8z3qHuHVUGL0jdzFNiLm8bahOfyOl/27qrswbrX7oljmgR3F6MRoPKTetOOPI/1SKXozWwyqGM0j94J/Ly+8hfSJ5Z8kgPaow+jTpasRaaqhb6q7KqcTpVSVTrST9CenT1Is0dL71aUDpaQFqrcwbrcYn917+14yIJW0DanVc1XkE8J1c9yxSF8AHCse4hdQPuhOpD/wxSXVUMezUdvZ8mdJkS6VVJ7x6FviP0hVGwP/kbob2PurSNejfJlfl5PZvkfRDoGhyB75LrmTKy3eS3tukSu7A2sAGpFxXnRr5CeAdhWOdoDQvU62Ved1qfHLvoV6MxhPpcuadc3Jv1T+XjtGrKoYbJO0ZEdfkeK+jhnuo9qjC6N3560cq6wLYunCcXlXl9KSSqW6RbnN5uaTv9uBEr1eVeV1xci+nF6PxrSsVTyBPx5vrqz9TMsn3sIphN+CXku7NyzOAha3KoLGOi/SywihqvhK5oldVOb2qZOqVUyW9M/KNbiRtTJqOt9jEYfSuMq8rTu7lbBdt83tIej3p4oySvkMasHlXXn4f6cz6LwvGWJc0XWndVQx1j4e8JX/tWGFUIoCkN0bEpZI6fv9r+Ej+EVJVzvaSlpKrcgrHAPgEafxoa0lXkSuZaojTK3Xewazll5J26FVl3kgaXy3TK70Yjc9tdppadpV1tlKdFUaSZkfE7Fxy25pjaMXXiCg2HtLLqpzxqmSqi1adX2gm6QrsktUyPanM65bP3Meox6PxAE9J2isiWjfmfj3pdmU2vDorjJ7MP/fbWJnUodxt/FboZVUOPapk6qFe3MGsV5V5XXFyH7tejsZD6ts/ozWgSqqWOXw121u9FUYb5K/bka4LOC/HeCswt1CMql5V5fSqkqknIuLCfJHRh4AbgXMpfFI0XvXsw3G3TCGStqz7h9v2sXwaQEQ8McJultVZYSTpCuAvYuVsjRsCP4uIotc6KE0x3S4iomhVjqQzgZPaKpk+EhGHlYzTK0rz0h8DbA7cRJpE7uqSV9xONP1wJ6ZeOVU13+0nIp4DPp2fP+HE3h1JL5D0ZVL98SWS/rXyyaeUlwDLKsvL8rqiImKrDo/S5ZawspLpHkn3kPrcXyvpVkm1Tg9Rk2NIn6wWR8S+wC7AY6vfZXJzt0w5vRiNh959LG+SXlQYnQHMVbpnL6S5/L9bqvFxqMqZUP3HBTwdEU9LQtI6EbFA0oQpW6yDk3s5z0ma0TYaX0ef17tzu+019HWcvTXFNm2ThJ0o6aaSASLis0rz0++dV70/Im4sGOINwKWkvvxVqnIofKHMROs/LuC+/Mn6XODnkh4FmvYeh3ByL6cXo/EAO5AS+16kP+orgW/WEKdJelJhFGk63GJz1rTpWVVOE0XE2/PT2UozXr4AuHAcD6l2HlAtKHfDtEbjnw88FGVvCIGkH5EqcX6QV70HeEFEvGv4vfqb0gydZ5D+oCFXGEXNUwuXJOmE/LRjVU5EHDpex2YTk5N7Ib0ajZd0R/tUtZ3WWdK0CqNeVeXY5OdqmXJ6NRp/Q57nA6hvsq2maGCFUU+qcmzyc597Ob0aja91sq2GalKFUa1VOdYc7pYpJP+xvR/4GOkWaI8Cz4uINxeOs+XqXm9glcOY5Qt/VvlFr6k+vHZKtydsVeVcUbgqxxrCyb0GkvYhj8ZHxLKRtrd6SXo+HSqMarj5iNmE4eRujecKI+tHTu7WeK4wsn7kahnrB64wsr7jM3drvHwThe2AIRVGpHuPusLIGsnJ3RrPFUbWj5zczcwayH3uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDfT/AeacRzne/39WAAAAAElFTkSuQmCC\n", "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1748,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ From f2c8358b7fb28d076d6f9a2d083654abaa9b0a03 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:47:45 +0100 Subject: [PATCH 08/35] Move notebook --- .../sb-1.0-load_data_open-day.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename load_data_open-day.ipynb => notebooks/sb-1.0-load_data_open-day.ipynb (100%) diff --git a/load_data_open-day.ipynb b/notebooks/sb-1.0-load_data_open-day.ipynb similarity index 100% rename from load_data_open-day.ipynb rename to notebooks/sb-1.0-load_data_open-day.ipynb From 086de3de427a0bb083c56e192afd45eb6efc4591 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:52:31 +0100 Subject: [PATCH 09/35] Rename notebook --- ...d_data_open-day.ipynb => sb-1.0-classification_open-day.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename notebooks/{sb-1.0-load_data_open-day.ipynb => sb-1.0-classification_open-day.ipynb} (100%) diff --git a/notebooks/sb-1.0-load_data_open-day.ipynb b/notebooks/sb-1.0-classification_open-day.ipynb similarity index 100% rename from notebooks/sb-1.0-load_data_open-day.ipynb rename to notebooks/sb-1.0-classification_open-day.ipynb From fdbc5cfae0e678842f184c6e8821499f7b904c46 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Fri, 2 Mar 2018 23:53:05 +0100 Subject: [PATCH 10/35] Add pruned notebook --- notebooks/sb-1.0-load_data_open-day.ipynb | 138 ++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 notebooks/sb-1.0-load_data_open-day.ipynb diff --git a/notebooks/sb-1.0-load_data_open-day.ipynb b/notebooks/sb-1.0-load_data_open-day.ipynb new file mode 100644 index 0000000..ac25dd2 --- /dev/null +++ b/notebooks/sb-1.0-load_data_open-day.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "data =json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 1,\n", + " 'metapath': ['Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.1'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"meta_paths\"][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "i = 0\n", + "first = True\n", + "batches = []\n", + "batch = []\n", + "for probably_path in data[\"meta_paths\"]:\n", + " # Ignore first time_to_rate\n", + " if first:\n", + " first = False\n", + " continue\n", + " i += 1\n", + " if i == 6:\n", + " # Ignore time_to_rate\n", + " i = 0\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from util.datastructures import MetaPathRatingGraph, MetaPath\n", + "graph = MetaPathRatingGraph()\n", + "\n", + "for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 7690ee433df3a1f3bacbc5d882b5c55ea39c42ee Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Sat, 3 Mar 2018 00:24:41 +0100 Subject: [PATCH 11/35] Add dynamic rendering of decision tree --- Dockerfile | 4 +- .../sb-1.0-classification_open-day.ipynb | 848 +++++++++++++++++- requirements.txt | 1 + 3 files changed, 837 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index d931d29..aefcfd4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:16.04 EXPOSE 8000 # TODO: Do we really need python3-dev? -RUN apt-get update && apt-get install -y python3-pip python3 dirmngr +RUN apt-get update && apt-get install -y python3-pip python3 dirmngr graphviz RUN apt-key adv --keyserver pgp.skewed.de --recv-key 612DEFB798507F25 RUN echo "deb http://downloads.skewed.de/apt/xenial xenial universe" | tee -a /etc/apt/sources.list @@ -9,6 +9,8 @@ RUN echo "deb-src http://downloads.skewed.de/apt/xenial xenial universe" | tee - RUN apt-get update && apt-get install -y libboost-all-dev RUN apt-get update -qq && apt-get install -y python3-graph-tool +RUN pip3 install jupyter + COPY . /32de-python/ WORKDIR /32de-python diff --git a/notebooks/sb-1.0-classification_open-day.ipynb b/notebooks/sb-1.0-classification_open-day.ipynb index 5f5d481..76ff5dc 100644 --- a/notebooks/sb-1.0-classification_open-day.ipynb +++ b/notebooks/sb-1.0-classification_open-day.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" ] }, { @@ -88,6 +88,16 @@ "execution_count": 6, "metadata": {}, "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('..')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], "source": [ "from util.datastructures import MetaPathRatingGraph, MetaPath\n", "graph = MetaPathRatingGraph()\n", @@ -105,15 +115,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "135\n", - "59\n", "Test accuracy is 0.7288135593220338\n" ] } @@ -126,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -150,8 +158,9 @@ }, { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAE2CAYAAACaxNI3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXVV99/HPN0RALhEQNQqEAAoIKHIRaQUZpBWwarD1AopQxYpFKmpVykP7EGzVR33qpVIUBVFEm4oIWCwXCwwgAoFwhyREgRACD4hcVTAQfs8fa51kz8mZzJnM2mdm9vm+X6/zmrP32Xv99pnLb/ZZ67fXVkRgZmbNMmW8D8DMzMpzcjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3frO5K+Ien48T4OszrJde7WLUn3AC8GngUEBLBtRPy/MbS5D3BmRGxR5CAnGUmnA0si4n+P97FYs0wd7wOwSSWAv4iIywq22fonsWY7S2tFxPKCx9MzkvzJ2WrjXy4bLXVcKe0p6SpJj0q6MZ+Rt177a0l3SHpC0q8kfSivXw/4b+Blkp7Mr0+XdLqkz1T230fSksry3ZI+Lelm4HeSpkh6qaQfS3pI0q8l/d2wb6DSfqttSZ+S9KCkpZJmSTpQ0kJJD0s6rrLvCZLOkjQnH+/1kl5deX17SZfl78Otkt7aFvdkST+T9CRwBPBe4NO5rfPydsfm79MTkm6TdFCljcMlXSnpS5Ieye/1gMrrG0v6Tn4fv5X0k8prb8k/m0cl/ULSqyqvHSvpvhxzvqR9h/v+2SQREX740dUDuBt4Y4f1LwMeBvbPy/vl5Rfm5QOBmfn53sDvgdfk5X2Ae9vaOx34TGV5yDb5OG7Icdch/cO5HjgeWAuYCfwK+PNh3seK9nPbz1T2/SDwEHAmsB6wA/AHYMu8/QnAH4G35+3/HrgrP58KLAKOzc/3BZ4AXlGJ+yiwZ15ep/295vV/BbwkP38n8LvK8uE5/gfy+/4wsLSy78+A/wCm5WPaO6/fBXgQ2D3v9778fXwesC1wbyXGDGCr8f5982NsD5+522idm88YH6mcFR4K/CwiLgKIiEtIyfbNefmCiLgnP78SuJiU5MfiaxFxf0T8EXgtsGlEfDYiludYpwIHd9nWMuBzkbp35gCbAl+NiD9ExB3AHcDOle3nRcQ5efsvk5L0nvmxfkR8ISKejdR9dT5wSGXf8yLiGoB87KuIiLMj4sH8/CzSP4w9KpssjojvREQA3wNeKunFkqYD+wNHRsQT+XtxZd7nb4BvRsT1kXyf9E9iT2A5sDawk6SpEXFvRNzd5ffOJij3udtozYpV+9y3BN5V6YIQ6XfrUgBJBwL/m3SGOAV4PnDLGI/jvrb4m0l6pBJ/CnBFl239NidKgKfy14cqrz8FbFBZXtFFFBEhaSnpU4Sqr2WLgc067TscSYcBHyd9AgFYn/QPp2XFAHZEPCWJfHwvBB6JiCc6NLslcFilu0qks/aXRcSVkj4GzAZ2kHQR8PcR8cBIx2oTl5O7jVanPvclwBkRceQqG0trAz8mnd2fFxHPSTqn0k6nwdTfk7pEWl7aYZvqfkuAuyJiuy6Ov4QVlT1KmXVz4H7Se5rRtu0MYGFluf39DlmWNAP4FrBvRFyd193IMGMdbZYAm0ia1iHBLwE+GxGf77RjRMwB5kjaIMf/P6QuIJuk3C1jJZwJvFXSm/Lg5rp5oPJlpI/7awMP58R+IPCmyr4PAi+UNK2y7ibgzXlwcDpwzAjx5wJP5kHWdSWtJWlHSbuXe4tD7CbpIElrkc6wnwauAa4Ffp+PY6qkAeAtpD7w4TwIbF1ZXh94Dng4fy/fD+zUzUFFKkm9ADhZ0kb5GFrdX98GPixpDwBJ60t6c/66raR98z/iZaRPKs919Z2wCcvJ3UajY8liRNwHzAL+F/AbUlfEJ4EpEfE74KPAWbnb5GDgvMq+C0nJ767cjz8d+D6p2+Ye4EJSP/iwxxERz5GS6GtIg4QPkZLZNNbMas+u8/G/mzQ4+l7g7bl/+xngraSxhoeBk4D3RcSiYdoBOA3YsTWGERHzSf3415C6X3YEfjGK430f6TqEBaR/HMcARMQ8Ur/7SfnncCcrz8zXIZ2p/4b0CeRFwHHYpNbVRUy51OqrpH8Gp0XEF9pePxL4CGlg5kngQxGxIL92HGlk/1ngmIi4uOg7MOshSScA20TEYeN9LGarM+KZu9KFFieRRuF3BA6RtH3bZj+IiFdHxC7Al4Cv5H13AN4FvJJUDndy7qM0M7MaddMtswewKCIW54+dc0gfwVfIH71bNmBlf93bgDm5LOweVi3pMjOzGnRTLbMZQ8u37qNDgpZ0FPAJUnnVGyv7Xl3ZbClDy8LMJpWIOHG8j8GsG8UGVCPi5Ih4OenqvH8q1a6ZmY1eN2fuSxlau7t5Xjec/wS+Wdm3Ottfx30leWpKM7M1EBEdxzG7OXO/Dni5pC1zHezBwE+rG0h6eWXxLaQyK/J2B0taW9JWwMtJNcmdDrD2xwknnOA4EzCG40zcGI4zcWNErP6ceMQz94hYLulo0nwgrVLI+ZJOBK6LiPOBoyX9GekCiEfJ9bMRcYekH5Hm5ngGOCpGOiIzMxuzrqYfiIgLge3a1p1Qef6x1ez7eaDjJc9mZlaPvrpCdWBgwHEmYAzHmbgxHGfixhjJhLjNniT31piZjZIkYgwDqmZmNsk4uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNVAjkvvM6dORVPQxc/r08X5bZmZrrBFXqErqfOfmMRCMOOuamdl48hWqZmZ9xsndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzcR6H07JOeedLM6uJZIYdrk1VnhSwdxzNPmtlYeFZIM7M+4+RuZtZAXSV3SQdIWiDpTknHdnj945Jul3STpJ9L2qLy2nJJN0i6UdK5JQ/ezMw6G7HPXdIU4E5gP+B+4Drg4IhYUNlmH+DaiHha0oeBgYg4OL/2RERMGyGG+9zNzEZprH3uewCLImJxRDwDzAFmVTeIiMsj4um8eA2wWTX+GhyzmZmNQTfJfTNgSWX5PoYm73ZHABdUlteRNFfSLyXNGm4nMzMrZ2rJxiQdCuwG7FNZvWVEPCBpK+BSSbdExN0l45qZ2VDdJPelwIzK8uZ53RCS/gw4DnhD7r4BICIeyF/vljQI7AKsktxnz5694vnAwAADAwPdHL+ZWd8YHBxkcHCwq227GVBdC1hIGlB9AJgLHBIR8yvb7AKcBewfEb+urN8I+ENELJO0KXAVMKs6GJu384CqmdkorW5AdcQz94hYLulo4GJSH/1pETFf0onAdRFxPvBFYH3gLEkCFkfEQcArgVMkLc/7fr49sZuZWXmefmC4NvGZu5lNbJ5+wMyszzi5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fWPS7pd0k2Sfi5pi8prh+f9Fko6rOTBm5lZZ4qI1W8gTQHuBPYD7geuAw6OiAWVbfYBro2IpyV9GBiIiIMlbQxcD+wKCJgH7BoRj7fFiJGOY4RjZM33HqZNoP2YSsfpFMPMrFuSiAh1eq2bM/c9gEURsTgingHmALOqG0TE5RHxdF68BtgsP98fuDgiHo+Ix4CLgQPW5E2YmVn3uknumwFLKsv3sTJ5d3IEcMEw+y4dYV8zMytgasnGJB0K7AbsU7JdMzMbnW6S+1JgRmV587xuCEl/BhwHvCF337T2HWjb97JOQWbPnr3i+cDAAAMDA502MzPrW4ODgwwODna1bTcDqmsBC0kDqg8Ac4FDImJ+ZZtdgLOA/SPi15X11QHVKfn5brn/vRrDA6pmZqO0ugHVEc/cI2K5pKNJg6FTgNMiYr6kE4HrIuJ84IvA+sBZkgQsjoiDIuJRSf9MSuoBnNie2M3MrLwRz9x7chA+czczG7WxlkKamdkk4+RuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBdJXdJB0haIOlOScd2eH1vSfMkPSPpL9teWy7pBkk3Sjq31IGbmdnwpo60gaQpwEnAfsD9wHWSzouIBZXNFgOHA5/s0MTvI2LXEgdrZmbdGTG5A3sAiyJiMYCkOcAsYEVyj4h782vRYX8VOE4zMxuFbrplNgOWVJbvy+u6tY6kuZJ+KWnWqI7OzMzWSDdn7mO1ZUQ8IGkr4FJJt0TE3T2Ia2bWt7pJ7kuBGZXlzfO6rkTEA/nr3ZIGgV2AVZL77NmzVzwfGBhgYGCg2xBmZn1hcHCQwcHBrrZVRKdu8soG0lrAQtKA6gPAXOCQiJjfYdvTgfMj4uy8vBHwh4hYJmlT4CpgVttgLJJipOMY4RhZ872HaRNoP6bScTrFMDPrliQiouO45oh97hGxHDgauBi4HZgTEfMlnSjpLTnA7pKWAO8Avinp1rz7K4HrJd0IXAJ8vj2xm5lZeSOeuffkIHzmbmY2amM6czczs8nHyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNwnmJnTpyOp6GPm9Onj/bbMrMc8K+RwbTI+s0L26r2Y2eTnWSHNzPqMk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fW9Jc2T9Iykv2x77fC830JJh5U6cDMzG96I87lLmgLcCewH3A9cBxwcEQsq28wApgGfBH4aET/J6zcGrgd2JU0rPg/YNSIeb4vh+dxrijFcHDOb/MY6n/sewKKIWBwRzwBzgFnVDSLi3oi4DVbJS/sDF0fE4xHxGHAxcMCo34GZmY1KN8l9M2BJZfm+vK4b7fsuHcW+Zma2hjygambWQFO72GYpMKOyvHle142lwEDbvpd12nD27Nkrng8MDDAwMNBpMzOzvjU4OMjg4GBX23YzoLoWsJA0oPoAMBc4JCLmd9j2dOD8iDg7L1cHVKfk57vl/vfqfh5QrSnGcHHMbPIb04BqRCwHjiYNht4OzImI+ZJOlPSWHGB3SUuAdwDflHRr3vdR4J9JSf1a4MT2xG5mZuWNeObek4PwmXttMYaLY2aT31hLIc3MbJJxcu9TM6dPR1LRx8zp08f7bZlZ5m6Z4dqk2d0y7v4xm/zcLWNm1mec3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3K1WvljKbHz4Iqbh2sQXMU2mOGb9yBcxmZn1GSd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswbqKrlLOkDSAkl3Sjq2w+trS5ojaZGkqyXNyOu3lPQHSTfkx8ml34CZma1q6kgbSJoCnATsB9wPXCfpvIhYUNnsCOCRiHiFpHcDXwQOzq/9KiJ2LXzcZma2Gt2cue8BLIqIxRHxDDAHmNW2zSzge/n5j0n/CFo63gLKzMzq001y3wxYUlm+L6/ruE1ELAcek7RJfm2mpHmSLpO011gP2MzMRjZit8waap2tPwDMiIhHJe0KnCtph4j4XU1xzcyM7pL7UmBGZXnzvK7qPmAL4H5JawHTIuKR/NoygIi4QdKvgW2BG9qDzJ49e8XzgYEBBgYGunsHZmZ9YnBwkMHBwa62VUSsfoOUrBeS+tEfAOYCh0TE/Mo2RwE7RcRRkg4GDoqIgyVtShpofU7S1sDlwKsi4rG2GDHScYxwjKz53sO0CbQfU+k4vYjRL3HM+pEkIqLjuOaIZ+4RsVzS0cDFpD760yJivqQTgesi4nzgNOD7khYBv2VlpcwbgM9IWgY8BxzZntjNzKy8Ec/ce3IQPnOvLUa/xDHrR6s7c/cVqmZmDeTkbo0wc/p0JBV7zJw+fbzfktmYuFtmuDZxt0w/x3HXj00G7pYxM+szTu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORu1qXStfSup7c6uc59uDZxnXs/x/FUCjYZuM7dzKzPOLmbmTWQk7vZBOO+fSvBfe7DtUmz+3UdZ/xjjHccm/zc525m1mec3M36lLt/ms3dMsO1iT/693OcJr2X8Y5j9XG3jJlZn3FyNzNrICd3M7MGcnI3M2sgJ3czswZycjezWpUuuXS5ZXdcCjlcm7jcrp/jNOm9NC2Oyy1XcimkmVmfcXI3M2sgJ3czm/Q8lcKqukrukg6QtEDSnZKO7fD62pLmSFok6WpJMyqvHZfXz5f0ppIHb2YGsPjBBwko+lj84IO9fROFjZjcJU0BTgL2B3YEDpG0fdtmRwCPRMQrgK8CX8z77gC8C3glcCBwsqSOnf+9MOg4EzKG40zcGI4z1GT6hNDNmfsewKKIWBwRzwBzgFlt28wCvpef/xh4Y37+NmBORDwbEfcAi3J742LQcSZkDMeZuDEcZ6huPyGc0MU2dX9C6Ca5bwYsqSzfl9d13CYilgOPS9qkw75LO+xrZmaF1TWgOm5dL2ZmRroYYHUPYE/gwsryPwDHtm1zAfC6/Hwt4KFO2wIXtrZr27/0WIgffvjhR188hsvdUxnZdcDLJW0JPAAcDBzSts1/AYcD1wLvBC7N638K/EDSV0jdMS8H5rYHGO4KKzMzWzMjJveIWC7paOBiUjfOaRExX9KJwHURcT5wGvB9SYuA35L+ARARd0j6EXAH8Axw1JjmGTAzs65MiLllzMysLF+hambWQE7uZmYN5ORuZtZA3VTLTGqStgW+AbwkInaS9GrgbRHxL4XaXwv4ILA5qWT0qspr/1gwzq6rez0ibigQoyfvpS3mZsCWVH4XI+KKgu3vT3o/l+SrpFvrPxAR3ynQ/idW93pEfHmsMXKcacBxpPdyQUT8sPLayRFxVIk4ub1av2eV9tYD/h6YERF/I+kVwHa5SKM4SX8KzGTo79oZBdrdAvgSqSLwAuBL+Wp+JJ0bEQeNNcYaHVfTB1QlXQ58CjglInbJ626LiJ0KtX8qsB6pxPN9wOUR8Yn82g0RsdqkPIo4l+Wn6wK7AzeTLhZ7NXB9RPxJgRg9eS+VeF8A3k2qplqeV0dEvK1Q+58D9gJuAN4KfDUivp5fK/J+JJ2Qn24HvJZU/kuONzciDh1rjBznbNL0HdcAHyBVn70nIv5Y+Pes9u9ZJdZ/AvOAw/KJ13rALyPiNaViVGJ9H9gGuImhv2sfLdD2z4GzST+bI4DdgLdGxG8l3djKOz030kVMk/1BKtcEuLGy7qaC7d9SeT4V+BbwE2CdasyC8X4CvKqyvBPw40n6XhYC69T4s78VmJqfbwT8N/CV9t+HQrGuADasLG8IXFGw/Zvalo8HrgJeCNwwSb9n17e3C9xc0+/CfPLJbA1tt/9sDgVuJ/0zKfazGe2jH/rcH5a0DelqLiS9g3QxVilrt55EmiDtQ6Szg0uBDQrGadkuIm6txLyNNOtmCb1+L3cBz6uh3ZapEfEsQEQ8RjoTnSbpLCrvtZCXAMsqy8vyulLWyTO0AhARnwW+Tfqn8sKCcXr5PVsm6fms/NvcBvhj4RgttwF1TdD+PEnrthYi4kzgGOAi4KU1xRxRPyT3jwCnANtLWgp8DPjbgu1fL+mA6oqI+AxwOql/r7RbJJ0qaSA/vg3cUqjtXr+XPwA3STpF0r+1HgXb/7WkfVoLEbE8Io4gfWIo9Q+x5QxgrqTZkmaTrtb+3up3GZX/YuVsqwBExHdJfdbLOu2whnr5PZtNmpJkC0k/AC4BVrlfRCGbAndIukjST1uPQm2fCryuuiIi/od0tf5thWKMWuP73FskrQ9MiYgnx/tYxiKfIfwt8Ia86grgGxHx9Pgd1ZqRdHin9RFRJCnms0Ii4qkOr20WEUtLxKm0uSuwd168IiJuLNl+L4zD9+yFpPmrBFwTEQ+XbL8SZ59O6yPi8jriTQSNTe6SDo2IM4erZohCVQxtMWsZje8Q5/mkCoOFpdvO7fesikHS2sC2eXFh5CqDQm3XXmHUFm8v4BURcbqkFwEbRMTdhdruSVVOW8y6K5kuiYj9Rlo3WdRdmTdaTS6FXD9/3bAXwYYbjSd9XC8Z522ksqu1ga0kvQb4TBSqMMlOJ1UxtCpwlgJnAUWTu6QBUtfFPaQzty0kHV4wgfxr/tqxwoiV72/MctXM7qSqmdNJYwlnAq8vFKL1e9yxKqdQjBWGq2QifVIca9vrkqqyNpW0MSunCJ9G4fs9SPpFROwl6Uly337rJVK1zLSC4b5NrswjNX6LpB8C45Lcx2UUt5cP4EU9ilPbaHxbnHnACxhaYXBr4Rg9qWLI72W7yvK2wLwa4tRWYVRp8yZSwqh+z24pGSO3WWtVTqXd2iqZSIONd5MGT++uPG4Gjq4jZhfHtHGBNmqtzBvtox8GVK+SdLGkI/JZQl3qHI2veiYiHm9bV7pvrVdVDM+LStdSRNxJPdUzdVYYtSyL9Nfc+p6tP8L2a6ruqpyW2iqZIuJrEbEV8MmI2Kry2DkiTqojZhcuKdBG3ZV5o9LkbhkAImJbSXuQpiE+XtIdpPu6nlk4VGs0fi6VRBhlu0sAbpf0HmCt3Bf+UeCXhWPMZmgVw+uB9xeOAak651RS9wWk+uDra4hzS1uc91KuwqjlR5JOATaS9DekC41OLRwDVlblnJOXD6JsVU5Lq5LpEob+Po/5op+KUyR9lJXFAYOkiw2LjbuMQol7SnyEdG1IqzLvbtLv9Lho7IBqJ5I2Bb4MvDci1ircdk9G4/Ng5/HAm/Kqi4B/icLVMr2oYpC0DukPYq+86krg5Igo+imhVxVGkv6c9HMRcFFE/Lxk+5U4tVfl1F3JlGOcSvp00GrzfcDyiPhgqRijOJaSV/lOiMq8xid3pTk53k46c98GOAf4UUTMG9cDm8DGo4pB6Ybqm0dE6TPqVvt1Vxh9ISKOHWldoVi1VeW0xamtkim3f3NE7DzSul4YS3Ifj8q8bvRDn/vNQKuiZNuIOLZkYpf0i/z1SUlPVB5PSnqiVJxKvJ9L2qiyvLGkiwq1vW5OspvmdjfJj5kUrmLI8QYlTcsx5wHfVrolY+k4byMNeF6Yl19T8AKWlj/vsO7AwjFaVTnHkiYRg5VVOaXjDJDmsvl34GTgTklvWO1Oo7c891G3Ym7NysqcXhtLt0y1Mq/TY1w0vs8d2DoiQtIGkjaIiN+VbDwi9spfV/tDlLRxRDxaIOSmkS4Lb8V/VNKLC7QLcCTpCt6XkSaOankCqGOg6wUR8YSkDwJnRMQJkuo4cz8B2IPUp0tE3CRpqxINS/pb4Chgm7Zj35DyYyGQPoXuQv75RMT9kupIIP8KvKn1SSfXcP8HaVKsUj4FXCbpLlJy3ZLCYzv5xGFYEfFIfrrGn0oj4pT89OSI+M2atlNaPyT3HXMN+iaAJP0GODxXTPTSJUCJPr3nJM2IiHsBlG5cXqRvLSK+BnxN0t9FngmwZlMlvRR4F2kcoS7PRMTj0pCTs1L9kT8kTfP6eeAfKuufrCSOkpblk5W6q3JWqWSSVKx6RmmenKeAV5Bq9yF1/ZSuyppH+lkLmAE8mp9vBNwLbAVDkvxYXCXpHuA/gZ8UOplbY/2Q3L8FfCIiLoMVHze/Bfxpj4+jxGg8pCT4C6WpjEUaWPtQobZbelXFcCJpQPgXEXFd/li+qHAMqLHCKJelPi7pa8AjrUG03N30uoi4tkScil5V5dRayRQRz0n690jT4dYyzpLjbAWgNAfTORHx33n5QFKlUclYvarM60o/DKhOiEGbwqPxm5IqWaCGSpZeVDEo3RjkoxFRvI+9Q6zaK4wk3QjsmmvdW2em15f6mbfFqr0qpxeVTJL+L3A16Sy31kQk6daIeNVI6wrGq60yr+tj6IPkfg6pf/L7edWhwG4R8fYeH0epm0N0HNSKsnN+9OQfoqS5EbFHyTbHi6Sbou0mE5JuiYhXF47Ts6qcSvu1VDIpTQmwHmkQtfVPI6LslACtWBeR/kFVr3V4Q0TsXzDGhKrM64dqmQ8ALyLdKeVs0sVGdVyQM5JS3TKfqjz+iTQV7OxCbbf0qorhKkknSdpb0q6tR+kgdVYYVdwl6aOSnpcfx5Cu8iytV1U5vahkOo90/cHOEbFhfhRP7NkhpDxwDmk6ihfldSXVWpk3Wv1w5r476SP5TFaOMUSpM6puR+MlbVLHAJvS/Ru/GhF/VbDN/UiTXw2pYmiNWxSM06m9iIg3dlg/ljir3Oqs07oxxngx8G+kOdeDNID+sYh4qFD7K6pygF9VXtqQdGu695aIU4l3Y0TskiuZtmhVMpX8JCJpX9KY0d7kuxYBV+aB/VpIWj8ifl9T22pV5gGUrswb9fH0QXJfCHySNPfLc631EbG4UPt3s5rR+NaATl2USkBuj4gdCrU3hdSfP496qxh6RtI84O1tFUbn1NEfXhdJLwA2pkdVOZJuJfXrfw84Pg9419HNtBZplst9gQ8DT0XE9iVj5Dh/Shp43iAiZkjaGTgyyt5YfCdS9+8mpBwwXpV5QH9Uy/wmIv6rrsZ7ORqf2/06K8v4ppA+Bhabl7xXVQwAkl4CfA54WUQcKGkH4E8i4rTCoWqvMFLNc3mPQ1VO7ZVMSvPWrE8aVL0SeG2pTzodfAXYnzxVckTcPNyt6nKAAAAHg0lEQVT41RhMlMo8oD/O3Pcj9a21T4D0k8JxejIar6FzfjwL3BMRVxWO0ZMqBkkXkLp/jo+InSVNJU2XWryCoQcVRpeT5/JudfdIui0idiocp/aqnF5VMuU+/N1If5dXkeb8uTo63AWqQKxrI+J11e640kUCE6Uyr6UfztzfD2xPKu1rdcsEaVClpPsl/SNDR+PvLxyj6MRNq3Ek8HHSwGqdVQybRsSPJB2XAzwrqfjAbeUMrTUdxA6SilYYAetFxFwNvVDq2YLtt6j6Dzd/0ir6dxwRyyUdQjrbrU1EfBxA6Qrbvyb9o58OrFNDuCW5ayaULsY6hnQPhpLukvRPDK3Mq2NQvSv9kNxfGxHbjbzZmB1Cusz9HFbesabYaHzuAx32LLpwX+h5pOO/MiJK/wFU/V5p9snWWeieQPtc9SV8qvJ8XdJUBPNou+H0GPVqLu+7lC4w+0ZePop6EshVkk4iXW25YgAyCt6aUNLRpC6y3Uh34/oOqXumDh8GvkaaI2kpcDHpe1fSB0jdWWfn5SsZn8o8oD+6ZU4HvhQRd/QoXi2j8XkQENKFJTD07CAi4h9W3WuNY/WkiiGXPX4d2BG4nVSe9o7S9dQd4tZRYbQ1K/tXHyXN5f3eUgP3lTi1VuVU4tReySTpk6QEOC8i6viUU431+vbuy07rxhij1sq8UR9PHyT3+aQE1bqtV+veiaVH/Wsfjc9xOpX1Fbv6tdJm7VUMSvOsH00a6HqS1M//9Sg8z3qHuHVUGL0jdzFNiLm8bahOfyOl/27qrswbrX7oljmgR3F6MRoPKTetOOPI/1SKXozWwyqGM0j94J/Ly+8hfSJ5Z8kgPaow+jTpasRaaqhb6q7KqcTpVSVTrST9CenT1Is0dL71aUDpaQFqrcwbrcYn917+14yIJW0DanVc1XkE8J1c9yxSF8AHCse4hdQPuhOpD/wxSXVUMezUdvZ8mdJkS6VVJ7x6FviP0hVGwP/kbob2PurSNejfJlfl5PZvkfRDoGhyB75LrmTKy3eS3tukSu7A2sAGpFxXnRr5CeAdhWOdoDQvU62Ved1qfHLvoV6MxhPpcuadc3Jv1T+XjtGrKoYbJO0ZEdfkeK+jhnuo9qjC6N3560cq6wLYunCcXlXl9KSSqW6RbnN5uaTv9uBEr1eVeV1xci+nF6PxrSsVTyBPx5vrqz9TMsn3sIphN+CXku7NyzOAha3KoLGOi/SywihqvhK5oldVOb2qZOqVUyW9M/KNbiRtTJqOt9jEYfSuMq8rTu7lbBdt83tIej3p4oySvkMasHlXXn4f6cz6LwvGWJc0XWndVQx1j4e8JX/tWGFUIoCkN0bEpZI6fv9r+Ej+EVJVzvaSlpKrcgrHAPgEafxoa0lXkSuZaojTK3Xewazll5J26FVl3kgaXy3TK70Yjc9tdppadpV1tlKdFUaSZkfE7Fxy25pjaMXXiCg2HtLLqpzxqmSqi1adX2gm6QrsktUyPanM65bP3Meox6PxAE9J2isiWjfmfj3pdmU2vDorjJ7MP/fbWJnUodxt/FboZVUOPapk6qFe3MGsV5V5XXFyH7tejsZD6ts/ozWgSqqWOXw121u9FUYb5K/bka4LOC/HeCswt1CMql5V5fSqkqknIuLCfJHRh4AbgXMpfFI0XvXsw3G3TCGStqz7h9v2sXwaQEQ8McJultVZYSTpCuAvYuVsjRsCP4uIotc6KE0x3S4iomhVjqQzgZPaKpk+EhGHlYzTK0rz0h8DbA7cRJpE7uqSV9xONP1wJ6ZeOVU13+0nIp4DPp2fP+HE3h1JL5D0ZVL98SWS/rXyyaeUlwDLKsvL8rqiImKrDo/S5ZawspLpHkn3kPrcXyvpVkm1Tg9Rk2NIn6wWR8S+wC7AY6vfZXJzt0w5vRiNh959LG+SXlQYnQHMVbpnL6S5/L9bqvFxqMqZUP3HBTwdEU9LQtI6EbFA0oQpW6yDk3s5z0ma0TYaX0ef17tzu+019HWcvTXFNm2ThJ0o6aaSASLis0rz0++dV70/Im4sGOINwKWkvvxVqnIofKHMROs/LuC+/Mn6XODnkh4FmvYeh3ByL6cXo/EAO5AS+16kP+orgW/WEKdJelJhFGk63GJz1rTpWVVOE0XE2/PT2UozXr4AuHAcD6l2HlAtKHfDtEbjnw88FGVvCIGkH5EqcX6QV70HeEFEvGv4vfqb0gydZ5D+oCFXGEXNUwuXJOmE/LRjVU5EHDpex2YTk5N7Ib0ajZd0R/tUtZ3WWdK0CqNeVeXY5OdqmXJ6NRp/Q57nA6hvsq2maGCFUU+qcmzyc597Ob0aja91sq2GalKFUa1VOdYc7pYpJP+xvR/4GOkWaI8Cz4uINxeOs+XqXm9glcOY5Qt/VvlFr6k+vHZKtydsVeVcUbgqxxrCyb0GkvYhj8ZHxLKRtrd6SXo+HSqMarj5iNmE4eRujecKI+tHTu7WeK4wsn7kahnrB64wsr7jM3drvHwThe2AIRVGpHuPusLIGsnJ3RrPFUbWj5zczcwayH3uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDfT/AeacRzne/39WAAAAAElFTkSuQmCC\n", "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -176,10 +185,8 @@ "plt.title(\"Feature importances\")\n", "plt.bar(range(len(domain_score.classifier.feature_importances_)), importances[indices],\n", " color=\"r\", align=\"center\")\n", - "#dict_as_list = list(domain_score.vectorizer.vocabulary_.keys())[list(domain_score.vectorizer.vocabulary_.values())\n", - "#dict_as_list = dict_as_list * 2 \n", "a = list(domain_score.vectorizer.vocabulary_.values())\n", - "a.extend([i + 6 for i in list(domain_score.vectorizer.vocabulary_.values())])\n", + "a.extend([i + len(list(domain_score.vectorizer.vocabulary_.values())) for i in list(domain_score.vectorizer.vocabulary_.values())])\n", "\n", "features = list(domain_score.vectorizer.vocabulary_.keys())\n", "features.extend([i + '2' for i in list(domain_score.vectorizer.vocabulary_.keys())])\n", @@ -194,21 +201,832 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Tree\n", + "\n", + "\n", + "0\n", + "\n", + "directed <= 0.117\n", + "gini = 0.498\n", + "samples = 135\n", + "value = [72, 63]\n", + "\n", + "\n", + "1\n", + "\n", + "produced <= 0.33\n", + "gini = 0.185\n", + "samples = 29\n", + "value = [26, 3]\n", + "\n", + "\n", + "0->1\n", + "\n", + "\n", + "True\n", + "\n", + "\n", + "8\n", + "\n", + "produced <= 0.429\n", + "gini = 0.491\n", + "samples = 106\n", + "value = [46, 60]\n", + "\n", + "\n", + "0->8\n", + "\n", + "\n", + "False\n", + "\n", + "\n", + "2\n", + "\n", + "person <= 0.237\n", + "gini = 0.48\n", + "samples = 5\n", + "value = [2, 3]\n", + "\n", + "\n", + "1->2\n", + "\n", + "\n", + "\n", + "\n", + "7\n", + "\n", + "gini = 0.0\n", + "samples = 24\n", + "value = [24, 0]\n", + "\n", + "\n", + "1->7\n", + "\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "gini = 0.0\n", + "samples = 2\n", + "value = [0, 2]\n", + "\n", + "\n", + "2->3\n", + "\n", + "\n", + "\n", + "\n", + "4\n", + "\n", + "produced <= 0.294\n", + "gini = 0.444\n", + "samples = 3\n", + "value = [2, 1]\n", + "\n", + "\n", + "2->4\n", + "\n", + "\n", + "\n", + "\n", + "5\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [0, 1]\n", + "\n", + "\n", + "4->5\n", + "\n", + "\n", + "\n", + "\n", + "6\n", + "\n", + "gini = 0.0\n", + "samples = 2\n", + "value = [2, 0]\n", + "\n", + "\n", + "4->6\n", + "\n", + "\n", + "\n", + "\n", + "9\n", + "\n", + "directed <= 0.705\n", + "gini = 0.447\n", + "samples = 86\n", + "value = [29, 57]\n", + "\n", + "\n", + "8->9\n", + "\n", + "\n", + "\n", + "\n", + "54\n", + "\n", + "movie2 <= 0.345\n", + "gini = 0.255\n", + "samples = 20\n", + "value = [17, 3]\n", + "\n", + "\n", + "8->54\n", + "\n", + "\n", + "\n", + "\n", + "10\n", + "\n", + "produced <= 0.346\n", + "gini = 0.41\n", + "samples = 80\n", + "value = [23, 57]\n", + "\n", + "\n", + "9->10\n", + "\n", + "\n", + "\n", + "\n", + "53\n", + "\n", + "gini = 0.0\n", + "samples = 6\n", + "value = [6, 0]\n", + "\n", + "\n", + "9->53\n", + "\n", + "\n", + "\n", + "\n", + "11\n", + "\n", + "gini = 0.0\n", + "samples = 19\n", + "value = [0, 19]\n", + "\n", + "\n", + "10->11\n", + "\n", + "\n", + "\n", + "\n", + "12\n", + "\n", + "person <= 0.588\n", + "gini = 0.47\n", + "samples = 61\n", + "value = [23, 38]\n", + "\n", + "\n", + "10->12\n", + "\n", + "\n", + "\n", + "\n", + "13\n", + "\n", + "wrote2 <= 0.58\n", + "gini = 0.426\n", + "samples = 52\n", + "value = [16, 36]\n", + "\n", + "\n", + "12->13\n", + "\n", + "\n", + "\n", + "\n", + "46\n", + "\n", + "movie2 <= 0.168\n", + "gini = 0.346\n", + "samples = 9\n", + "value = [7, 2]\n", + "\n", + "\n", + "12->46\n", + "\n", + "\n", + "\n", + "\n", + "14\n", + "\n", + "person <= 0.14\n", + "gini = 0.444\n", + "samples = 12\n", + "value = [8, 4]\n", + "\n", + "\n", + "13->14\n", + "\n", + "\n", + "\n", + "\n", + "21\n", + "\n", + "directed2 <= 0.282\n", + "gini = 0.32\n", + "samples = 40\n", + "value = [8, 32]\n", + "\n", + "\n", + "13->21\n", + "\n", + "\n", + "\n", + "\n", + "15\n", + "\n", + "movie2 <= 0.335\n", + "gini = 0.32\n", + "samples = 10\n", + "value = [8, 2]\n", + "\n", + "\n", + "14->15\n", + "\n", + "\n", + "\n", + "\n", + "20\n", + "\n", + "gini = 0.0\n", + "samples = 2\n", + "value = [0, 2]\n", + "\n", + "\n", + "14->20\n", + "\n", + "\n", + "\n", + "\n", + "16\n", + "\n", + "gini = 0.0\n", + "samples = 5\n", + "value = [5, 0]\n", + "\n", + "\n", + "15->16\n", + "\n", + "\n", + "\n", + "\n", + "17\n", + "\n", + "directed2 <= 0.281\n", + "gini = 0.48\n", + "samples = 5\n", + "value = [3, 2]\n", + "\n", + "\n", + "15->17\n", + "\n", + "\n", + "\n", + "\n", + "18\n", + "\n", + "gini = 0.444\n", + "samples = 3\n", + "value = [2, 1]\n", + "\n", + "\n", + "17->18\n", + "\n", + "\n", + "\n", + "\n", + "19\n", + "\n", + "gini = 0.5\n", + "samples = 2\n", + "value = [1, 1]\n", + "\n", + "\n", + "17->19\n", + "\n", + "\n", + "\n", + "\n", + "22\n", + "\n", + "acted_in2 <= 0.529\n", + "gini = 0.444\n", + "samples = 21\n", + "value = [7, 14]\n", + "\n", + "\n", + "21->22\n", + "\n", + "\n", + "\n", + "\n", + "41\n", + "\n", + "wrote2 <= 0.666\n", + "gini = 0.1\n", + "samples = 19\n", + "value = [1, 18]\n", + "\n", + "\n", + "21->41\n", + "\n", + "\n", + "\n", + "\n", + "23\n", + "\n", + "person <= 0.581\n", + "gini = 0.388\n", + "samples = 19\n", + "value = [5, 14]\n", + "\n", + "\n", + "22->23\n", + "\n", + "\n", + "\n", + "\n", + "40\n", + "\n", + "gini = 0.0\n", + "samples = 2\n", + "value = [2, 0]\n", + "\n", + "\n", + "22->40\n", + "\n", + "\n", + "\n", + "\n", + "24\n", + "\n", + "produced2 <= 0.335\n", + "gini = 0.346\n", + "samples = 18\n", + "value = [4, 14]\n", + "\n", + "\n", + "23->24\n", + "\n", + "\n", + "\n", + "\n", + "39\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "23->39\n", + "\n", + "\n", + "\n", + "\n", + "25\n", + "\n", + "wrote <= 0.61\n", + "gini = 0.245\n", + "samples = 14\n", + "value = [2, 12]\n", + "\n", + "\n", + "24->25\n", + "\n", + "\n", + "\n", + "\n", + "34\n", + "\n", + "movie2 <= 0.167\n", + "gini = 0.5\n", + "samples = 4\n", + "value = [2, 2]\n", + "\n", + "\n", + "24->34\n", + "\n", + "\n", + "\n", + "\n", + "26\n", + "\n", + "gini = 0.0\n", + "samples = 9\n", + "value = [0, 9]\n", + "\n", + "\n", + "25->26\n", + "\n", + "\n", + "\n", + "\n", + "27\n", + "\n", + "acted_in <= 0.153\n", + "gini = 0.48\n", + "samples = 5\n", + "value = [2, 3]\n", + "\n", + "\n", + "25->27\n", + "\n", + "\n", + "\n", + "\n", + "28\n", + "\n", + "movie <= 0.486\n", + "gini = 0.375\n", + "samples = 4\n", + "value = [1, 3]\n", + "\n", + "\n", + "27->28\n", + "\n", + "\n", + "\n", + "\n", + "33\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "27->33\n", + "\n", + "\n", + "\n", + "\n", + "29\n", + "\n", + "gini = 0.0\n", + "samples = 2\n", + "value = [0, 2]\n", + "\n", + "\n", + "28->29\n", + "\n", + "\n", + "\n", + "\n", + "30\n", + "\n", + "movie2 <= 0.437\n", + "gini = 0.5\n", + "samples = 2\n", + "value = [1, 1]\n", + "\n", + "\n", + "28->30\n", + "\n", + "\n", + "\n", + "\n", + "31\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [0, 1]\n", + "\n", + "\n", + "30->31\n", + "\n", + "\n", + "\n", + "\n", + "32\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "30->32\n", + "\n", + "\n", + "\n", + "\n", + "35\n", + "\n", + "wrote <= 0.587\n", + "gini = 0.444\n", + "samples = 3\n", + "value = [2, 1]\n", + "\n", + "\n", + "34->35\n", + "\n", + "\n", + "\n", + "\n", + "38\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [0, 1]\n", + "\n", + "\n", + "34->38\n", + "\n", + "\n", + "\n", + "\n", + "36\n", + "\n", + "gini = 0.5\n", + "samples = 2\n", + "value = [1, 1]\n", + "\n", + "\n", + "35->36\n", + "\n", + "\n", + "\n", + "\n", + "37\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "35->37\n", + "\n", + "\n", + "\n", + "\n", + "42\n", + "\n", + "gini = 0.0\n", + "samples = 17\n", + "value = [0, 17]\n", + "\n", + "\n", + "41->42\n", + "\n", + "\n", + "\n", + "\n", + "43\n", + "\n", + "acted_in <= 0.162\n", + "gini = 0.5\n", + "samples = 2\n", + "value = [1, 1]\n", + "\n", + "\n", + "41->43\n", + "\n", + "\n", + "\n", + "\n", + "44\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "43->44\n", + "\n", + "\n", + "\n", + "\n", + "45\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [0, 1]\n", + "\n", + "\n", + "43->45\n", + "\n", + "\n", + "\n", + "\n", + "47\n", + "\n", + "gini = 0.0\n", + "samples = 4\n", + "value = [4, 0]\n", + "\n", + "\n", + "46->47\n", + "\n", + "\n", + "\n", + "\n", + "48\n", + "\n", + "movie2 <= 0.345\n", + "gini = 0.48\n", + "samples = 5\n", + "value = [3, 2]\n", + "\n", + "\n", + "46->48\n", + "\n", + "\n", + "\n", + "\n", + "49\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [0, 1]\n", + "\n", + "\n", + "48->49\n", + "\n", + "\n", + "\n", + "\n", + "50\n", + "\n", + "person2 <= 0.385\n", + "gini = 0.375\n", + "samples = 4\n", + "value = [3, 1]\n", + "\n", + "\n", + "48->50\n", + "\n", + "\n", + "\n", + "\n", + "51\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "50->51\n", + "\n", + "\n", + "\n", + "\n", + "52\n", + "\n", + "gini = 0.444\n", + "samples = 3\n", + "value = [2, 1]\n", + "\n", + "\n", + "50->52\n", + "\n", + "\n", + "\n", + "\n", + "55\n", + "\n", + "acted_in <= 0.588\n", + "gini = 0.188\n", + "samples = 19\n", + "value = [17, 2]\n", + "\n", + "\n", + "54->55\n", + "\n", + "\n", + "\n", + "\n", + "62\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [0, 1]\n", + "\n", + "\n", + "54->62\n", + "\n", + "\n", + "\n", + "\n", + "56\n", + "\n", + "directed <= 0.679\n", + "gini = 0.111\n", + "samples = 17\n", + "value = [16, 1]\n", + "\n", + "\n", + "55->56\n", + "\n", + "\n", + "\n", + "\n", + "61\n", + "\n", + "gini = 0.5\n", + "samples = 2\n", + "value = [1, 1]\n", + "\n", + "\n", + "55->61\n", + "\n", + "\n", + "\n", + "\n", + "57\n", + "\n", + "gini = 0.0\n", + "samples = 13\n", + "value = [13, 0]\n", + "\n", + "\n", + "56->57\n", + "\n", + "\n", + "\n", + "\n", + "58\n", + "\n", + "person2 <= 0.358\n", + "gini = 0.375\n", + "samples = 4\n", + "value = [3, 1]\n", + "\n", + "\n", + "56->58\n", + "\n", + "\n", + "\n", + "\n", + "59\n", + "\n", + "gini = 0.0\n", + "samples = 1\n", + "value = [1, 0]\n", + "\n", + "\n", + "58->59\n", + "\n", + "\n", + "\n", + "\n", + "60\n", + "\n", + "gini = 0.444\n", + "samples = 3\n", + "value = [2, 1]\n", + "\n", + "\n", + "58->60\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.tree import export_graphviz\n", - "export_graphviz(domain_score.classifier, out_file='tree.dot', feature_names=features_ordered)" + "import graphviz\n", + "dot_data = export_graphviz(domain_score.classifier, out_file=None, feature_names=features_ordered)\n", + "graph = graphviz.Source(dot_data) \n", + "graph " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This is **included manually**! **Feature names aren't correct**, somewhere they get messed up!\n", - "\n", - "![tree](https://user-images.githubusercontent.com/3034832/36925096-86f2bba6-1e71-11e8-8dd4-4974146d9ca5.png)\n" + "**Feature names aren't correct**, somewhere they get messed up!\n" ] }, { diff --git a/requirements.txt b/requirements.txt index 3be8357..9ff1d67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ scipy # TODO: Introduct development (non-deployment) dependencies pytest-cov Flask-Session +graphviz \ No newline at end of file From b821fc34ba20ac2b36a726d10ead44c3d04d5c14 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Thu, 8 Mar 2018 16:17:28 +0100 Subject: [PATCH 12/35] Allow enabling development mode for server. --- deployment/run-server.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/deployment/run-server.sh b/deployment/run-server.sh index 7af236e..6f8f64e 100755 --- a/deployment/run-server.sh +++ b/deployment/run-server.sh @@ -4,4 +4,5 @@ docker rm server-container docker run --name server-container \ --publish=${1:-8000}:8000 \ -d \ + -e METAEXP_DEV=${2:-'false'}\ server \ No newline at end of file From c3b0d82eff324438e299bb516e497921079fd9c6 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 12 Mar 2018 14:03:36 +0100 Subject: [PATCH 13/35] Add exploration notebook. --- notebooks/pl-1-1.0-exploration_open-day.ipynb | 585 ++++++++++++++++++ 1 file changed, 585 insertions(+) create mode 100644 notebooks/pl-1-1.0-exploration_open-day.ipynb diff --git a/notebooks/pl-1-1.0-exploration_open-day.ipynb b/notebooks/pl-1-1.0-exploration_open-day.ipynb new file mode 100644 index 0000000..71451f2 --- /dev/null +++ b/notebooks/pl-1-1.0-exploration_open-day.ipynb @@ -0,0 +1,585 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "## Help Python find our packages\n", + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import json\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import domain_scoring.domain_scoring as domain_scoring" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "potato = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "merlin = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "type_selection = merlin[\"edge_type_selection\"] + merlin[\"node_type_selection\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['PRODUCED', 'DIRECTED', 'WROTE', 'ACTED_IN', 'Person', 'Movie']" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "types = []\n", + "for pair in type_selection:\n", + " types.append(pair[0])\n", + "types" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 1,\n", + " 'metapath': ['Person',\n", + " 'ACTED_IN',\n", + " 'Movie',\n", + " 'DIRECTED',\n", + " 'Person',\n", + " 'PRODUCED',\n", + " 'Movie',\n", + " 'ACTED_IN',\n", + " 'Person'],\n", + " 'rating': '0.11'}" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"meta_paths\"][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_mps(data):\n", + " i = 0\n", + " first = True\n", + " batches = []\n", + " batch = []\n", + " for probably_path in data[\"meta_paths\"]:\n", + " # Ignore first time_to_rate\n", + " if first:\n", + " first = False\n", + " continue\n", + " i += 1\n", + " if i == 6:\n", + " # Ignore time_to_rate\n", + " i = 0\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)\n", + " return batches" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "def construct_graph(batches):\n", + " ## Construct rating graph\n", + " from util.datastructures import MetaPathRatingGraph, MetaPath\n", + " graph = MetaPathRatingGraph()\n", + "\n", + " for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "## Clean up data // remove time_to_rate from array of ratings.\n", + "def clean_up(data):\n", + " batches = extract_mps(data)\n", + " return batches, construct_graph(batches)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "m_batches, m_graph = clean_up(merlin)\n", + "p_batches, p_graph = clean_up(potato)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "domain_score = domain_scoring.DomainScoring()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.7288135593220338\n" + ] + } + ], + "source": [ + "domain_score.fit(m_graph, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.6491228070175439\n" + ] + } + ], + "source": [ + "domain_score.fit(p_graph, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "def to_dataframe(batches):\n", + " data = {\"id\": [], \"meta_path\": [], \"rating\": [], \"batch_number\": []}\n", + " i = 0\n", + " for batch in batches:\n", + " for rating in batch:\n", + " data[\"id\"].append(rating[\"id\"])\n", + " data[\"meta_path\"].append(rating[\"metapath\"])\n", + " data[\"rating\"].append(rating[\"rating\"])\n", + " data[\"batch_number\"].append(i)\n", + " i += 1\n", + " return pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe = to_dataframe(m_batches)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dataframe.iloc[0][\"meta_path\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
batch_numberidmeta_pathrating
001[Person, PRODUCED, Movie, DIRECTED, Person, PR...0.1
102[Person, PRODUCED, Movie, ACTED_IN, Person]0.25
203[Person, DIRECTED, Movie, WROTE, Person, PRODU...0.15
304[Person, DIRECTED, Movie, PRODUCED, Person, WR...0.15
405[Person, PRODUCED, Movie, ACTED_IN, Person, AC...0.2
\n", + "
" + ], + "text/plain": [ + " batch_number id meta_path rating\n", + "0 0 1 [Person, PRODUCED, Movie, DIRECTED, Person, PR... 0.1\n", + "1 0 2 [Person, PRODUCED, Movie, ACTED_IN, Person] 0.25\n", + "2 0 3 [Person, DIRECTED, Movie, WROTE, Person, PRODU... 0.15\n", + "3 0 4 [Person, DIRECTED, Movie, PRODUCED, Person, WR... 0.15\n", + "4 0 5 [Person, PRODUCED, Movie, ACTED_IN, Person, AC... 0.2" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "## Let's add some features\n", + "# Length\n", + "dataframe[\"length\"] = dataframe.apply(lambda row: len(row[\"meta_path\"]), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Edge/Node Type counts\n", + "for mp_type in types:\n", + " dataframe[mp_type + \"_count\"] = dataframe.apply(lambda row: row[\"meta_path\"].count(mp_type), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert rating to a number\n", + "dataframe[\"rating\"] = dataframe[\"rating\"].apply(pd.to_numeric)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
batch_numberidmeta_pathratinglengthPRODUCED_countDIRECTED_countWROTE_countACTED_IN_countPerson_countMovie_count
001[Person, PRODUCED, Movie, DIRECTED, Person, PR...0.19210132
102[Person, PRODUCED, Movie, ACTED_IN, Person]0.255100121
203[Person, DIRECTED, Movie, WROTE, Person, PRODU...0.159111132
304[Person, DIRECTED, Movie, PRODUCED, Person, WR...0.159111132
405[Person, PRODUCED, Movie, ACTED_IN, Person, AC...0.29100332
\n", + "
" + ], + "text/plain": [ + " batch_number id meta_path rating \\\n", + "0 0 1 [Person, PRODUCED, Movie, DIRECTED, Person, PR... 0.1 \n", + "1 0 2 [Person, PRODUCED, Movie, ACTED_IN, Person] 0.25 \n", + "2 0 3 [Person, DIRECTED, Movie, WROTE, Person, PRODU... 0.15 \n", + "3 0 4 [Person, DIRECTED, Movie, PRODUCED, Person, WR... 0.15 \n", + "4 0 5 [Person, PRODUCED, Movie, ACTED_IN, Person, AC... 0.2 \n", + "\n", + " length PRODUCED_count DIRECTED_count WROTE_count ACTED_IN_count \\\n", + "0 9 2 1 0 1 \n", + "1 5 1 0 0 1 \n", + "2 9 1 1 1 1 \n", + "3 9 1 1 1 1 \n", + "4 9 1 0 0 3 \n", + "\n", + " Person_count Movie_count \n", + "0 3 2 \n", + "1 2 1 \n", + "2 3 2 \n", + "3 3 2 \n", + "4 3 2 " + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.40560000000000002" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe[\"rating\"].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From ed7a369a991b19d45da448f325780edfd97f8a67 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 12 Mar 2018 14:11:59 +0100 Subject: [PATCH 14/35] Update exploration notebook. --- notebooks/pl-1-1.0-exploration_open-day.ipynb | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/notebooks/pl-1-1.0-exploration_open-day.ipynb b/notebooks/pl-1-1.0-exploration_open-day.ipynb index 71451f2..eea9bd9 100644 --- a/notebooks/pl-1-1.0-exploration_open-day.ipynb +++ b/notebooks/pl-1-1.0-exploration_open-day.ipynb @@ -553,6 +553,47 @@ "dataframe[\"rating\"].mean()" ] }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAADGdJREFUeJzt3V+Infldx/H3pwm50VayZATJn02QtBBqcfGYCoJW2YUsQiJYJQuChWoQjBWrYkTZrvFuBXuVi0ZY9GYb173QUSIBteIfupLZulSTEB1ia4Zc7HQ3tRdCt9GvFzurp7MnOc+ZnMlJvn2/YGCe5/nteb6BzDsPz+xzTqoKSVIv71n0AJKk+TPuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIa2rmoE+/Zs6cOHjy4qNNL0iPp1Vdf/UpVLU1bt7C4Hzx4kJWVlUWdXpIeSUm+PGSdt2UkqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDW0sIeYpAclyQM5j59HrIeJcVd7s0Y3iaHWI8/bMpLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhgbFPcmxJNeTrCY5M+H4p5O8tvH1r0m+Ov9RJUlDTX1vmSQ7gHPAU8AacDnJclVdfWdNVf3y2PpfBJ7YhlklSQMNuXI/CqxW1Y2qegu4AJy4x/pngM/OYzhJ0tYMifte4ObY9trGvndJ8jhwCPjruxw/lWQlycr6+vqss0qSBhoS90lvhn2390M9CbxcVf896WBVna+qUVWNlpaWhs4oSZrRkLivAfvHtvcBt+6y9iTekpGkhRsS98vA4SSHkuzi7YAvb16U5APAbuDz8x1RkjSrqXGvqjvAaeAScA14qaquJDmb5PjY0meAC+VH2EjSwg36mL2qughc3LTv2U3bz81vLEnS/fAJVUlqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhgbFPcmxJNeTrCY5c5c1P5XkapIrSV6c75iSpFnsnLYgyQ7gHPAUsAZcTrJcVVfH1hwGfgP4waq6neQ7t2tgSdJ0Q67cjwKrVXWjqt4CLgAnNq35OeBcVd0GqKrX5zumJGkWQ+K+F7g5tr22sW/c+4H3J/mHJK8kOTavASVJs5t6WwbIhH014XUOAx8B9gF/l+SDVfXVb3qh5BRwCuDAgQMzDytJGmbIlfsasH9sex9wa8KaP62qb1TVvwPXeTv236SqzlfVqKpGS0tLW51ZkjTFkLhfBg4nOZRkF3ASWN605k+AHwFIsoe3b9PcmOegkqThpsa9qu4Ap4FLwDXgpaq6kuRskuMbyy4BbyS5CnwO+LWqemO7hpYk3VuqNt8+fzBGo1GtrKws5NzSvSRhUT8X0jRJXq2q0bR1PqEqSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLU0JBPYpIeGo899hi3b9/e9vMkkz6AbL52797Nm2++ue3n0bcm465Hyu3bt9u8He+D+AdE37q8LSNJDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1NCjuSY4luZ5kNcmZCcc/lmQ9yWsbXz87/1ElSUNNfYgpyQ7gHPAUsAZcTrJcVVc3Lf2jqjq9DTNKkmY05Mr9KLBaVTeq6i3gAnBie8eSJN2PIXHfC9wc217b2LfZTyT5YpKXk+yf9EJJTiVZSbKyvr6+hXElSUMMifukN8DY/OYefwYcrKoPAX8J/OGkF6qq81U1qqrR0tLSbJNKkgYbEvc1YPxKfB9wa3xBVb1RVV/f2Px94PvmM54kaSuGxP0ycDjJoSS7gJPA8viCJN81tnkcuDa/ESVJs5r6f8tU1Z0kp4FLwA7ghaq6kuQssFJVy8AnkhwH7gBvAh/bxpklSVNkUe+NPRqNamVlZSHn1qMrSav3c+/yZ9GDk+TVqhpNW+cTqpLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDQ2Ke5JjSa4nWU1y5h7rPpqkkkz9ZG5J0vaZGvckO4BzwNPAEeCZJEcmrHsv8AngH+c9pCRpNjsHrDkKrFbVDYAkF4ATwNVN634HeB741blOKI2pT70PnvuORY8xF/Wp9y16BDU2JO57gZtj22vAh8cXJHkC2F9Vf57EuGvb5Le/RlUteoy5SEI9t+gp1NWQe+6ZsO//frqSvAf4NPArU18oOZVkJcnK+vr68CklSTMZEvc1YP/Y9j7g1tj2e4EPAn+T5EvADwDLk36pWlXnq2pUVaOlpaWtTy1Juqchcb8MHE5yKMku4CSw/M7BqvrPqtpTVQer6iDwCnC8qla2ZWJJ0lRT415Vd4DTwCXgGvBSVV1JcjbJ8e0eUJI0uyG/UKWqLgIXN+179i5rP3L/Y0mS7odPqEpSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNTQo7kmOJbmeZDXJmQnHfz7JPyd5LcnfJzky/1ElSUNNjXuSHcA54GngCPDMhHi/WFXfU1XfCzwP/N7cJ5UkDTbkyv0osFpVN6rqLeACcGJ8QVV9bWzz24Ca34iSpFntHLBmL3BzbHsN+PDmRUl+AfgksAv40blMJ0nakiFX7pmw711X5lV1rqq+G/h14LcmvlByKslKkpX19fXZJpUkDTYk7mvA/rHtfcCte6y/APz4pANVdb6qRlU1WlpaGj6lJGkmQ+J+GTic5FCSXcBJYHl8QZLDY5s/Bvzb/EaUJM1q6j33qrqT5DRwCdgBvFBVV5KcBVaqahk4neRJ4BvAbeBntnNoSdK9DfmFKlV1Ebi4ad+zY9//0pznkiTdB59QlaSGjLskNWTcJamhQffcpYdJMunRi0fP7t27Fz2CGjPueqRUbf87WyR5IOeRtpO3ZSSpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqaFBcU9yLMn1JKtJzkw4/skkV5N8MclfJXl8/qNKkoaaGvckO4BzwNPAEeCZJEc2LfsnYFRVHwJeBp6f96CSpOGGXLkfBVar6kZVvQVcAE6ML6iqz1XVf21svgLsm++YkqRZDIn7XuDm2Pbaxr67+TjwF/czlCTp/uwcsCYT9k38aPgkPw2MgB++y/FTwCmAAwcODBxRkjSrIVfua8D+se19wK3Ni5I8CfwmcLyqvj7pharqfFWNqmq0tLS0lXklSQMMiftl4HCSQ0l2ASeB5fEFSZ4APsPbYX99/mNKkmYxNe5VdQc4DVwCrgEvVdWVJGeTHN9Y9rvAtwN/nOS1JMt3eTlJ0gMw5J47VXURuLhp37Nj3z8557kkSffBJ1QlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8ZdkhoaFPckx5JcT7Ka5MyE4z+U5AtJ7iT56PzHlCTNYmrck+wAzgFPA0eAZ5Ic2bTsP4CPAS/Oe0BJ0ux2DlhzFFitqhsASS4AJ4Cr7yyoqi9tHPufbZhRkjSjIbdl9gI3x7bXNvZJkh5SQ67cM2FfbeVkSU4BpwAOHDiwlZeQZpZM+is8//+maks/FtK2GHLlvgbsH9veB9zaysmq6nxVjapqtLS0tJWXkGZWVQ/kS3qYDIn7ZeBwkkNJdgEngeXtHUuSdD+mxr2q7gCngUvANeClqrqS5GyS4wBJvj/JGvCTwGeSXNnOoSVJ9zbknjtVdRG4uGnfs2PfX+bt2zWSpIeAT6hKUkPGXZIaMu6S1JBxl6SGjLskNZRFPXyRZB348kJOLt3bHuArix5CuovHq2rqU6ALi7v0sEqyUlWjRc8h3Q9vy0hSQ8Zdkhoy7tK7nV/0ANL98p67JDXklbskNWTcpQ1JXkjyepJ/WfQs0v0y7tL/+wPg2KKHkObBuEsbqupvgTcXPYc0D8Zdkhoy7pLUkHGXpIaMuyQ1ZNylDUk+C3we+ECStSQfX/RM0lb5hKokNeSVuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhv4X13IX60hXcW4AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.boxplot(dataframe[\"rating\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9, 5])" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe[\"length\"].unique()" + ] + }, { "cell_type": "code", "execution_count": null, From df4d49040ef6f0cd7116f6eb6d513f92f52a76ff Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Mon, 12 Mar 2018 15:16:01 +0100 Subject: [PATCH 15/35] Add Dockerfile and jupyter notebook config --- Dockerfile-Notebook | 9 +++++++++ deployment/jupyter_notebook_config.py | 7 +++++++ 2 files changed, 16 insertions(+) create mode 100644 Dockerfile-Notebook create mode 100644 deployment/jupyter_notebook_config.py diff --git a/Dockerfile-Notebook b/Dockerfile-Notebook new file mode 100644 index 0000000..24badff --- /dev/null +++ b/Dockerfile-Notebook @@ -0,0 +1,9 @@ +FROM server:latest +EXPOSE 8888 + +RUN apt-get update && apt-get install -y graphviz +RUN pip3 install jupyter + +COPY . /32de-python/ + +ENTRYPOINT ["jupyter", "notebook", "--config", "deployment/jupyter_notebook_config.py"] diff --git a/deployment/jupyter_notebook_config.py b/deployment/jupyter_notebook_config.py new file mode 100644 index 0000000..ea7f629 --- /dev/null +++ b/deployment/jupyter_notebook_config.py @@ -0,0 +1,7 @@ +# Set ip to '*' to bind on all interfaces (ips) for the public server +c.NotebookApp.ip = '*' +c.NotebookApp.password = u'sha1:ba8ffcde0b68:95fa25d7225a3915db1db76799f1695a0483afb4' +c.NotebookApp.open_browser = False + +c.NotebookApp.port = 8888 +c.NotebookApp.allow_root = True From 3a9a53932450fe0b3f7a2a171cd337dca1feb459 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 12 Mar 2018 17:17:33 +0100 Subject: [PATCH 16/35] Rename notebook --- ...ploration_open-day.ipynb => pl-1.0-exploration_open-day.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename notebooks/{pl-1-1.0-exploration_open-day.ipynb => pl-1.0-exploration_open-day.ipynb} (100%) diff --git a/notebooks/pl-1-1.0-exploration_open-day.ipynb b/notebooks/pl-1.0-exploration_open-day.ipynb similarity index 100% rename from notebooks/pl-1-1.0-exploration_open-day.ipynb rename to notebooks/pl-1.0-exploration_open-day.ipynb From 67815560a7be551e9a567d7833570cbd0a39dea6 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 12 Mar 2018 17:17:51 +0100 Subject: [PATCH 17/35] Add regression notebook. --- domain_scoring/domain_scoring.py | 39 ++- notebooks/pl-1.0-regression_open-day.ipynb | 298 +++++++++++++++++++++ 2 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 notebooks/pl-1.0-regression_open-day.ipynb diff --git a/domain_scoring/domain_scoring.py b/domain_scoring/domain_scoring.py index a66621c..68f56bf 100644 --- a/domain_scoring/domain_scoring.py +++ b/domain_scoring/domain_scoring.py @@ -1,5 +1,7 @@ from typing import List, Tuple import numpy +from sklearn.linear_model import HuberRegressor + from util.datastructures import MetaPathRatingGraph from util.datastructures import MetaPath from util.lists import all_pairs @@ -73,8 +75,8 @@ def _transform_to_domain_values(self, """ Transforms the classified ordering of all meta-paths pairs to the domain values. - :param inferred_ratings: user-defined and inferred rating for all meta-paths - :return: Total order of all meta-paths with values in [0,1] + :param inferred_ratings: user-defined and inferred rating for all meta-paths. + :return: Total order of all meta-paths with values in [0,1]. """ return self.domain_value_transformer.transform(metapaths_pairs, classification) @@ -89,10 +91,10 @@ def _fit_vectorizer(self, metapath_graph: MetaPathRatingGraph) -> None: def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]): """ - Computes all pairwise tuples (a, b) of the meta-paths with their feature vector. If a is ranked higher than b + Computes all pairwise tuples (a, b) of the meta-paths. If a is ranked higher than b a > b then the label is 1, 0 otherwise. - :param metapath_graph: The meta-path graph representing the ordering of all meta-path + :param metapath_graph: The meta-path graph representing the ordering of all meta-path. :return: (x, y) The feature vector and class labels. """ @@ -107,3 +109,32 @@ def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tup metapath_labels.append(LARGER) # > return metapath_pairs, metapath_labels + +class DomainScoringRegressor(DomainScoring): + + def __init__(self): + """ + Extracts the domain value of meta-paths via regression. + """ + super() + self.classifier = HuberRegressor() + + def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]): + """ + Computes all pairwise distances (a, b) of the meta-paths. + + :param metapath_graph: The meta-path graph representing the ordering of all meta-path. + :return: (x, y) The meta-paths pairs and their respective distance. + """ + + metapath_pairs = [] + metapath_labels = [] + + for superior, inferior, distance in metapath_graph.stream_meta_path_distances(): + metapath_pairs.append((inferior, superior)) + metapath_labels.append(distance) # < + + metapath_pairs.append((superior, inferior)) + metapath_labels.append(-distance) # > + + return metapath_pairs, metapath_labels \ No newline at end of file diff --git a/notebooks/pl-1.0-regression_open-day.ipynb b/notebooks/pl-1.0-regression_open-day.ipynb new file mode 100644 index 0000000..de0170f --- /dev/null +++ b/notebooks/pl-1.0-regression_open-day.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "## Help Python find our packages\n", + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import json\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import domain_scoring.domain_scoring as domain_scoring" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "merlin =json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "potato = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_mps(data):\n", + " i = 0\n", + " first = True\n", + " batches = []\n", + " batch = []\n", + " for probably_path in data[\"meta_paths\"]:\n", + " # Ignore first time_to_rate\n", + " if first:\n", + " first = False\n", + " continue\n", + " i += 1\n", + " if i == 6:\n", + " # Ignore time_to_rate\n", + " i = 0\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)\n", + " return batches" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def construct_graph(batches):\n", + " ## Construct rating graph\n", + " from util.datastructures import MetaPathRatingGraph, MetaPath\n", + " graph = MetaPathRatingGraph()\n", + "\n", + " for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "## Clean up data // remove time_to_rate from array of ratings.\n", + "def clean_up(data):\n", + " batches = extract_mps(data)\n", + " return batches, construct_graph(batches)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "m_batches, m_graph = clean_up(merlin)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "domain_score = domain_scoring.DomainScoringRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import ARDRegression\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.ensemble import RandomForestRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure other predictor and transformer\n", + "domain_score.vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 1), token_pattern='\\\\b\\\\w+\\\\b')\n", + "domain_score.classifier = RandomForestRegressor(random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.673905722753528\n" + ] + } + ], + "source": [ + "domain_score.fit(m_graph, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature ranking:\n", + "1. feature 2 (0.229121)\n", + "2. feature 8 (0.208065)\n", + "3. feature 3 (0.122093)\n", + "4. feature 6 (0.110971)\n", + "5. feature 11 (0.080615)\n", + "6. feature 0 (0.064824)\n", + "7. feature 9 (0.062290)\n", + "8. feature 10 (0.052530)\n", + "9. feature 1 (0.046091)\n", + "10. feature 4 (0.023400)\n", + "11. feature 7 (0.000000)\n", + "12. feature 5 (0.000000)\n" + ] + }, + { + "data": { + "text/plain": [ + "(-1, 12)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAE0CAYAAADALuP1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3Xu8XFV5//HPl3C/B5NagYQARSxSuXgALQjWCxdbQFpsoaLRomgrRWu10tr+oKn601rrz6ooiFzUKuAN01ZFfoCgIMgJBOQiGgKaGKsoQSgiMfD0j7UO2RkmOXPOWXufc7K+79drXpm9Z89+9sycPLNmrWevrYjAzMzqsNFkH4CZmXXHSd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9Vk/RRSf8w2cdh1hW5Tt/GQ9K9wFOBxxqrnx4RKyawz+cDn4qInSd2dNOTpAuA5RHx95N9LLbhckvfJuLoiNi6cRt3wi9B0saTGX8iJM2Y7GOwOjjpW3GSniPpOkkPSLolt+BHHnu1pDslPSRpqaTX5fVbAV8BdpT0P/m2o6QLJL2j8fznS1reWL5X0tsk3Qo8LGnj/LzPS7pP0j2STlvPsT6x/5F9S/obST+V9GNJL5X0Eknfk3S/pL9rPPdMSZ+TdHF+PTdJ2qfx+G9L+np+H26XdExP3I9I+rKkh4GTgZcDf5Nf+3/k7U6XdHfe/x2Sjmvs41WSvinpXyStzK/1qMbjO0g6X9KK/Piljcf+QNLifGzXSXpW47G3SfpRjnmXpBcO8LHbdBERvvk25htwL/CiPut3An4OvITUqHhxXp6dH/99YHdAwGHAL4H982PPJ3VvNPd3AfCOxvJa2+TjWAzMAbbIMRcB/wfYFNgNWAocsY7X8cT+875X5+duArwWuA/4NLAN8EzgV8BuefszgV8Dx+ft3wLck+9vAiwB/i4fxwuAh4A9G3F/ARycj3nz3teat3sZsGPe5k+Ah4Gn5cdeleO/FpgB/DmwgjXdtv8FXAzMzMdzWF6/P/BT4KD8vPn5fdwM2BNYBuyYt50H7D7Zf2++lbu5pW8TcWluKT7QaEWeBHw5Ir4cEY9HxOXAMOlLgIj4r4i4O5Krga8Bz5vgcfxbRCyLiEeAA0hfMAsiYlVELAU+Bpww4L5+DbwzIn4NXATMAj4QEQ9FxO3A7cCzGtsviojP5e3/lZS8n5NvWwPvzsdxJfCfwImN534pIq7N79Ov+h1MRHw2IlbkbS4Gvg8c2NjkBxHxsYh4DLgQeBrwVElPA44CXh8RKyPi1/n9hvQlcXZE3BARj0XEhcCj+ZgfIyX/vSRtEhH3RsTdA753Ng046dtEvDQits+3l+Z1uwAva3wZPAAcQkpGSDpK0vW5q+QB0pfBrAkex7LG/V1IXUTN+H9HGnQexM9zAgV4JP/7k8bjj5CS+ZNiR8TjwHJSy3xHYFleN+IHpF9C/Y67L0mvbHTDPADszdrv13834v8y392a9Mvn/ohY2We3uwB/3fMezSG17pcAbyL9ivmppIsk7Tjacdr04aRvpS0DPtn4Mtg+IraKiHdL2gz4PPAvwFMjYnvgy6SuHoB+pWQPA1s2ln+zzzbN5y0D7umJv01EvGTCr6y/OSN3JG0E7EzqYlkBzMnrRswFfrSO437SsqRdSL9STgWekt+v21jzfq3PMmAHSduv47F39rxHW0bEZwAi4tMRcQjpyyGA9wwQz6YJJ30r7VPA0ZKOkDRD0uZ5gHRnUt/2ZqR+8tV50PHwxnN/AjxF0naNdYuBl+RByd8ktULX59vAg3kwcot8DHtLOqDYK1zbsyX9oVLl0JtI3STXAzeQvrD+RtImeTD7aFKX0br8hDQGMWIrUtK9D9IgOKmlP6qI+DFpYPwsSTPzMRyaH/4Y8HpJBynZStLvS9pG0p6SXpC/oH9F+mXz2DrC2DTkpG9FRcQy4FhSl8p9pFblW4GNIuIh4DTgEmAl8KfAwsZzvwt8Bliaux12BD4J3EIaaPwaaWByffEfIyXXfUmDqj8DzgW2W9/zJuBLpAHWlcArgD/M/eergGNI/eo/A84CXplf47p8nNSX/oCkSyPiDuB9wLdIXwi/A1w7hmN7BWmM4rukgds3AUTEMKlf/0P5uJeQBoUhfSm/Ox/zfwO/QfosbQPhk7PMxknSmcBvRcRJk30sZoNyS9/MrCJO+mZmFXH3jplZRdzSNzOriJO+mVlFptyshLNmzYp58+ZN9mGYmU0rixYt+llEzB5tuymX9OfNm8fw8PBkH4aZ2bQi6QeDbOfuHTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlaRKXdy1oRpkCvJjZEnpTOzDYRb+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6vIhne5xK6UviyjL8loZh1wS9/MrCJO+mZmFXHSNzOryEBJX9KRku6StETS6X0ef7OkOyTdKukKSbs0Hpsv6fv5Nr/kwZuZ2diMmvQlzQA+DBwF7AWcKGmvns1uBoYi4lnA54B/zs/dATgDOAg4EDhD0sxyh29mZmMxSEv/QGBJRCyNiFXARcCxzQ0i4qqI+GVevB7YOd8/Arg8Iu6PiJXA5cCRZQ7dzMzGapCkvxOwrLG8PK9bl5OBr4zzuWZm1qJB6vT7FaT3LSqXdBIwBBw2ludKOgU4BWDu3LkDHJKZmY3HIC395cCcxvLOwIrejSS9CHg7cExEPDqW50bEORExFBFDs2fPHvTYzcxsjAZJ+jcCe0jaVdKmwAnAwuYGkvYDziYl/J82HroMOFzSzDyAe3heZ2Zmk2DU7p2IWC3pVFKyngGcFxG3S1oADEfEQuC9wNbAZ5WmJ/hhRBwTEfdL+ifSFwfAgoi4v5VXYmZmo1JMsTlfhoaGYnh4ePw7KD0nDvSfF8dz75jZFCJpUUQMjbadz8g1M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVpGBkr6kIyXdJWmJpNP7PH6opJskrZZ0fM9jj0lanG8LSx24mZmN3cajbSBpBvBh4MXAcuBGSQsj4o7GZj8EXgW8pc8uHomIfQscq5mZTdCoSR84EFgSEUsBJF0EHAs8kfQj4t782OMtHKOZmRUySPfOTsCyxvLyvG5Qm0salnS9pJeO6ejMzKyoQVr66rMuxhBjbkSskLQbcKWk70TE3WsFkE4BTgGYO3fuGHZtZmZjMUhLfzkwp7G8M7Bi0AARsSL/uxT4OrBfn23OiYihiBiaPXv2oLs2M7MxGiTp3wjsIWlXSZsCJwADVeFImilps3x/FnAwjbEAMzPr1qhJPyJWA6cClwF3ApdExO2SFkg6BkDSAZKWAy8DzpZ0e376bwPDkm4BrgLe3VP1Y2ZmHVLEWLrn2zc0NBTDw8Pj34H6DUFMUL/3qHScKfY5mNn0ImlRRAyNtp3PyDUzq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVpGNJ/sAbD26usi7mVXDLX0zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OK+Ixc85m/ZhVxS9/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCoyUNKXdKSkuyQtkXR6n8cPlXSTpNWSju95bL6k7+fb/FIHbmZmYzdq0pc0A/gwcBSwF3CipL16Nvsh8Crg0z3P3QE4AzgIOBA4Q9LMiR+2mZmNxyAt/QOBJRGxNCJWARcBxzY3iIh7I+JW4PGe5x4BXB4R90fESuBy4MgCx21mZuMwSNLfCVjWWF6e1w1iIs81M7PCBkn6/U7XHPR0y4GeK+kUScOShu+7774Bd21mZmM1SNJfDsxpLO8MrBhw/wM9NyLOiYihiBiaPXv2gLs2M7OxGiTp3wjsIWlXSZsCJwALB9z/ZcDhkmbmAdzD8zozM5sEoyb9iFgNnEpK1ncCl0TE7ZIWSDoGQNIBkpYDLwPOlnR7fu79wD+RvjhuBBbkdWZmNgkUU2w2xKGhoRgeHh7/DrqaMbJ0nC5iTHYcM2uNpEURMTTadj4j18ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFNp7sA7CKeGI3s0nnlr6ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwinlrZNjylp3D29M22AXFL38ysIm7pm42HLwhj05Rb+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzigyU9CUdKekuSUsknd7n8c0kXZwfv0HSvLx+nqRHJC3Ot4+WPXwzMxuLUev0Jc0APgy8GFgO3ChpYUTc0djsZGBlRPyWpBOA9wB/kh+7OyL2LXzcZmY2DoO09A8ElkTE0ohYBVwEHNuzzbHAhfn+54AXSm2cvWJWGan8zao2SNLfCVjWWF6e1/XdJiJWA78AnpIf21XSzZKulvS8CR6vmZlNwCDTMPRrGvSeL76ubX4MzI2In0t6NnCppGdGxINrPVk6BTgFYO7cuQMckpmZjccgLf3lwJzG8s7AinVtI2ljYDvg/oh4NCJ+DhARi4C7gaf3BoiIcyJiKCKGZs+ePfZXYWZmAxkk6d8I7CFpV0mbAicAC3u2WQjMz/ePB66MiJA0Ow8EI2k3YA9gaZlDNzOzsRq1eyciVks6FbgMmAGcFxG3S1oADEfEQuDjwCclLQHuJ30xABwKLJC0GngMeH1E3N/GCzEzs9Eppth0rkNDQzE8PDz+HXQ15W0XF+qYrq9lQ4uzIb2WyY5jrZG0KCKGRtvOZ+SamVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVGfXC6GZmxXRx/WJbL7f0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCoyUNKXdKSkuyQtkXR6n8c3k3RxfvwGSfMaj/1tXn+XpCPKHbqZmY3VqElf0gzgw8BRwF7AiZL26tnsZGBlRPwW8H7gPfm5ewEnAM8EjgTOyvszM7NJMEhL/0BgSUQsjYhVwEXAsT3bHAtcmO9/DnihJOX1F0XEoxFxD7Ak78/MzCbBIBdG3wlY1lheDhy0rm0iYrWkXwBPyeuv73nuTr0BJJ0CnJIX/0fSXQMd/cTMAn420JYTu5jzYHEmfsHoLuJMrfesqzj+bKZunK4+m6kfA2CXQTYaJOn3e1d7L0G/rm0GeS4RcQ5wzgDHUoyk4YgYcpypFcNxpm4Mx5m6McZikO6d5cCcxvLOwIp1bSNpY2A74P4Bn2tmZh0ZJOnfCOwhaVdJm5IGZhf2bLMQmJ/vHw9cGRGR15+Qq3t2BfYAvl3m0M3MbKxG7d7JffSnApcBM4DzIuJ2SQuA4YhYCHwc+KSkJaQW/gn5ubdLugS4A1gNvCEiHmvptYxVV91JG1KcDem1bGhxNqTXsqHF6bTrejRKDXIzM6uBz8g1M6uIk76ZWUWc9M3MKuKkb2ZWkUFOztpgSHo68BHgqRGxt6RnAcdExDsm+dDGTdJOpDPxnvgsI+KaQvueAbyGdH7FVyPi2sZjf1/ifesiRp+YvwvMY+337BMF938E6fVcERH3Ntb/WUScN41ivHl9j0fEvxaKs/8ocW4qEGNb4G9J79lXIuLTjcfOioi/mGiMnnhbAn8NzI2I10raA9gzIv6zZJzxqKp6R9LVwFuBsyNiv7zutojYu9D+5wDvJU018RXgvRHx6/zYpRHx0hJxGvHeA/wJqSR2pBQ2IuKYQvs/F9iSdG7FK4CrI+LN+bGbImK9/1mnSoyeeJ8EdgcWs/Z7dlqh/b8LOAS4CTga+H8R8cH8WKn3rPUYeV9n5Lt7Agew5vyco4FrIuI1heJcle9uDgwBt5DO5n8WcENEHFIgxueB75Omhfkz4NfAn0bEoy39nV0MLAJemRuYWwDfioh9S8YZl4io5gbcmP+9ubFuccH9Xw68HtgX+CBwHfCU3pgF490FbNbi+3Vr4/7GpHrjLwCblXo9XcToiXcnubHT0nv2HWDjfH974MvA+0v+DXQRoyfe14BtGsvbkH6VlY5zEfA7jeW9gQsK7Xtxz/LbgWtJc4Td1MJrGe79PIBbSscZz622Pv2fSdqdPP+PpOOBHxfc/+yI+GhELI6IvwTOAq5pxixsKbBJC/sdsenInYhYHRGnkFrIVwJbT6MYTbcBv9nCfkdsHBGrASLiAVKreFtJn6XxWqdBjKa5wKrG8ipS91hpz4iI74wsRMRtpAZUCZtJeiLfRcQ7SQ2Ma0iJv7RVuXU/kmt2Bx5tIc6YVdWnD7yB9EE/Q9KPgHuAkwrufxNJm0fErwAi4lOS/pt0NvNWBeOM+CWwWNIVNP6golBXBTAs6ciI+Gpj3wskrSCNjUyXGE2zgDskfZu137MiXWLA3ZIOi4ir834fA06W9A7gj6ZRjKZPAt+W9EVSEjsOKDYG0nBn7u77VI5zEumXWQn/AbwA+P8jKyLiQkk/If0qL+1M4KvAHEn/DhwMvLqFOGNWVZ/+CElbARtFxEOF9/tXpJ+KV/es3w/454h4ceF48/utj4gL+603kHRYv/W9n9kE9r9F3t8jfR7bKSJ+NB1i9Nnv/sDz8uI1EXFzCzE2B/4cOHQkDvCRkUbUdCPpKcBzSOMT10dEF9Mrj6qKpC/ppNzq7luNEIWqECZDngTv6XnxrsgDxy3EabviZcpWO4xXW5VVXVS79Il5CLBHRJwvaTawdaQLI5WOswXpb6DoNTW6qkRqxLsiIl442rrJUEv3zkjXyjZdBOuqNFTS80lXLLuX1JqYI2l+icTSE6dvxQtlf+KfT6p2eG5eXg58FiiS9CV9MyIOkfQQa4+viFS9s22JOI14fSurSK3XiXpf/rdvtQupsqeYXMUzRKriOZ80jvQpUpdFyTjHkKrfNgV2lbQvsKBQ19vI//2+lUgF9g888WtlS2CWpJmsuabItsCOpeJMyGSPJHd5Iw20dhHnatJlIZsj97e1EGcRqTU8svx0YFELcVqteMkxpkS1AzCz0H5arazKMVqrdumJs5iUvJqfza0txFlEuhZHa3FouRIJeCNprPDR/O/I7Rbg1Db/Hga91Va9c52kr0k6OX8Lt2XLiOi9bsDqFuJsEo2fwRHxPdqp5mm74gWmTrXDFYX203ZlFbRb7dK0KlJGG/ls2ihKAFgdEb9oad8jWq1EiogPRMSuwFsiYtfGbZ+I+FCpOBNRS/cOABGxh6QDSfP9v13SHaQLt3+qcKi2S0NHDEv6OKm6AlK1w6IW4rRd8QJTp9phwhddzdqurIJ2q12aLpF0NrC9pNeSTm46t4U4t0n6U2BGHtM5jXSuS0ldVSKdLek01gxKf510UmgrY25jUcVAbj+SZgH/Crw8ImYU3vdupNLQ3wVWkktDo3G6fKE4m5HKUA8hJatrgLMiomgLue2Kl0acSa92KHjWbOuVVV1Wu0h6MXA46bO5LCIubyHGlqSTpg7Pqy4D3lH69XRUiXQu6ZfeyOf9CuCxKHQW80RUlfTz/BvHkVr6uwNfBC6JiDZax62Vhq4j1g7AzhFxa9ux2jBVqh0KT2PQemVVW9UuPTHeExFvG23ddNFFJZKkWyJin9HWTYba+vRvIfV5LoiIp0fE20omfEkn5X/fnEvEXge8trFclKSvS9o2J/zFwPmSipWeSfpm/vchSQ82bg9JerBQjM3z8c+SNFPSDvk2j8mpdijSvZMrq74PfJh0Zvb3JB263ieNPcYxpM/9q3l5X0m9168uod/5JUeVDiLpcknbN5ZnSrqscIwzgLeRJl+DNZVIpT2Wu3hH4u7GmiquSVVVnz6wW0SEpG0kbR0R/1N4/52WhgLbRcSDkl4DnB8RZ0gq1tKPPNFVRKz39UiaGRErxxnmdcCbSAm+WV/+IClhFpG/WNYpIu7Pd0v9sngfcPhICzyX8X4GeHah/QOcQaoS+zpARCzOX5ZFSPpz4C+A3Xv+rrahfF87wKxI00oAEBErJf1G4RjHAfuR/9YiYoWkNv6/vhW4StJSUkNiF6bIGbm1Jf1n5przHQBJug+Yn6seJiwizs53z4qI+0rscxQbS3oa8MekvtDJcgUwri6RiPgA8AFJfxl5psiWLCIN3IlUwbEy398e+CGwaz6e+9e1gzF6UmWVpNLVPKsj4hdSqbHnJ/k0abbY/wuc3lj/UMH3qelxSXMj4ocAknah/JxVq3LDr7VKJKU5fh4B9iCdFyDgu6XH2sartqR/DvDmiLgKnvgJPjLgWtJ1ku4BLga+MIFW8Gj+kTTY9c2IuDH/hPx+S7HWp0TWabXaIZfRIemjwMKI+HJePgp4UYkYPbqorGq12iWXT/5C0geA+0fGpvIv5YMi4oZSsbK3A99UmgId0t/CKYVjtF6JFBGPS3pfRDwXmHJjbLUN5HY2uNIoDX0p6azMoqWhShcfOS0i3l9qnxM4lgkPfnZV7SBpUUQ8u2fdcEQMFY7TemVVh9UuNwP751r9kZbscKkB755Ys1hTwfWtNiq4OqpE+kdSwv9CTLEkW1vS/yKpL6/Z+hqKwhc36YnZZmnoVRHxeyX3Oc7jKJH0O/lCzgOD32Dt2vZDI+KIknF6Yk73yqrF0XPxD0m3RsSzCsfpO9AdBacV6aoSSWm6jy1Jg7cjX/QRhaf7GI/aqnf+DJgNfJ50oY5ZwKtKB8kVNfMlfYX0c/vHpAG30q6T9CFJz5O0/8ithTijKdG901W1w4mkv4Ev5tvsvK6otiurcozWq12ypZJOk7RJvr2RdMZxaW9t3P6BNB3ymYVjdFKJBFxKOodin4jYJt8mPeFDfS39IdLP4XmsGc+IFlos95A+9Esi4lsl990T56o+qyMiXlBo/wNVvEjaYaIDe5JeSJrMa61qh5Hxl9Jaqt5q7v/miNgvV1bNGamsKvm3NhJjtHUF4vwG8G+k+eiDNHD/poj4ack4feLOIU1JPuEv5WYlErCk8dA2wHUR8fKJxuiJ9wJS197zgN2Am4Fv5MKFSVVb0r8LeAtpLpnHR9ZHxA8Kx9FIaWjafXvJpU35y2udFS8jg6MF4mxE6sddRMvVDkpTRJ9LOiFnrqR9gNdF+Qtjf4fUb3wh8PY80F466S8CjuupdvliG33tk0GpLOnWiPidAvvaDphJd5VII+NuBwC/R7qM6iMR8Yw2Yo1FbdU790XEf3QQp9XS0BGSngq8C9gxIo6StBfw3Ij4eIn9d1Xx0nG1w/uBI8hT60bELevqS56gLiqruqh26XKq8A+ypkRzI9KJlLeU2HfXlUhKcy5tBXyLNIZ0QNu/jAZVW0v/haT+295JsL5QOM51pNZdszT0XRFRtDQ0jxmcn2PtI2lj0rS0E24Z9cRpveKlq2oHSTdExEHNbpDSA8ZdVlZ1VO1yNamf/ezGe3ZbROxdOE5zvqLVwL0RcW3hGJ1UIkl6P+lEvEdJF2C/hvT5POlqZ12rraX/auAZpNLAke6dIA3qlrRVsy86Ir6udqajnRURl0j62xxntaQ2Bj9/JunvWbvi5eeFY7yZXO0gqc1qh2W5iyeU5sY5jcIzU0bEY0pTJLSa9Bu/UEamxNhLUtFql2zLiPi21j4JrPhU4dHNZT7VbFTkX5nF82BE/BWksSNS3jmfND35ZqVjjVVtSX+f0q3gdVgq6R9YuzS0+KXlgIeVZqYcabU8B2hjPvITSaf8j0xHew3lK14uJf0M/kZEtDE98IjXAx8AdiJdnetrpAG+0q6T9CHSCXoPj6yMspcyfGvj/uakCrFFpAHXklqdKjyPf6zz113hQoulSicBfiQv/wUtVCJJOpU0iPts4AfAeaS/70lXW/fOx4D3R8QdLceZSerTPZg1J+acGY15RQrF2R/4IPBM4HZS+eHxbdWDt1nx0lW1g6SDe7sM+q0rEKfVyqp1xCxW7dKz335Thb+8VAFEHoCGdDIbrGksvRz4ZUQsKBEnx+qkEknSW0n/7xdFRBsXUBq32pL+naSSrZHLmY1cH7V0yWZXpaGbA6eSBiYfIg0afTDKn5HZVcVL69UO6nMiWb9101HJapfGPjciNSQuUctThUu6NiIOHm2dTUxt3TtHdhTn3+lTGtqCT5D6c9+Vl08ktZJeVjhO6xUvbVc7SHouqaU6W2tPc70tUPRM6Ryv1cqqHKO1apcRuc/7VNI5Jw+P+oSJ2UrSIRExMqX377Jm5toiuqpEmsqqSvql6/HXo6vS0D17qk6uklT0P/2IiFjWM5BXesD4VlL/596kcYkHJJWsdtgU2Jr0N9+cSvdB4PhCMZouIFdW5eXvkfr3iyV9YLhxfzXwmdLdVNnlkt7Ck8cnSte3nwycl2vqAR4gnUVf0sfIlUgAEXGrpE8DTvo2IWcoTSDWamkocLOk50TE9QCSDiKVh5XWRcVLq9UOkS7teLWkCzr68m+9sqqjahdYk3jf0FgXpLGXYiJd0GgfpSvcKdq5SHqiFFnzAAAEQ0lEQVQnlUhTmZN+O7oqDT0IeKWkH+bluaSLZX+HsmMIrVe8dFjtcK6kl40MqudB94ui/IRrrVVWdVzt8sRJem3LLfwzyNNr5/MDFhRO/q1WIk0HTvrt6Ko0tKsxij2jZ24SSQdT9lfFFqTZSNuuduji6kyQzjtYCOwm6VpyZVWhff9B/rdvtUuhGEh6QURcKekP+z3ewi/X80jjYH+cl19B+sXXN/44vYFUifQMST8iVyIV3P+U56Tfjusl7dV2aWiHYxQf5MlXxuq3btwi4r2l9jWK3qszzaP81ZkgXUPhi6Qk/BDpPITvldjxyOeeS02blS2n5y+YUiWOhwJXAkezZg6m5r+lk/7uEfFHjeV/lLS41M5zJdJQRLyo7UqkqcxJvx2HAPOVJixrrTS0bV1XvHSkk/lq6Kayqu1ql4fy534ba5I9tPMlCfBIz+s5mHTZwSI6rkSaspz029FVt0vbuq54aV1EfDWfR3EKaZ77L1EwsTR0UVnVdrXL1vnfPUnnT3yJlPiPJp14VNrrgU80Xs9KYP56th+PriqRpqyqTs6y8ZG0S4ddSa1Smt/+jcDOpKT/HNJEWEXPlJV0AfDRnsqq+aVPaMv7brPaBUlfA/4oGjNTAp+NiGKNm56TwLYFiIgHR3naeOL0mw4lIqJoJdJUVtuVs2x8zlU3V2jqwhtJrdYfRLrU5H7AfS3EOYg0/869ku4lnXR2mKTvSCoyTYak7ZSuxnUlcIWk9zVaySXNBVY1lleRzjYvJiIeJ51dTkQ82EbCz/vetc+tmoQP7t6xwXRV8dKFX0XEryQhabOI+K6kPVuI00UXXxfVLpDGIr6tdI3pAI5jzQXsS2qt62USKpGmLCd9G0RXFS9dWJ5/tVxKSjIrgRWlg3TUHdZqtcuIiHin0rUbnpdXvToibi4dhzQeETz5HJASLfGuK5GmLCd9G0RXFS+ti4jj8t0zlWbC3A746iQe0kS0Wu3SFGlK6JLTQvezFynhH0JKxN8APlpo311XIk1ZHsi1geTunJGKl82Bn0b5i3XYGOTZTj9B+uKCXO0SLU2t3TZJl5Aqw/49rzoR2D4i/njdzxp432fku30rkSLiNRONMV046duouqp4scF1Ve3SJfW5bGW/dROM0Xol0lTn6h0bRFcVLzagrqpdOnZznqMIaG0CwdYrkaY69+nbILqqeLGx2dBONOpiAsGuKpGmLHfv2Kjyf5BXA28iXWZuJbBJRLxkUg+scvlEoyf9B56udedac9nEvkpVRCldZnSkEumaliqRpiwnfRsTSYeRK14iYtVo21t7JG1Bn2qXgheesQ2Qk77ZNNVmtYttuJz0zaapLqpdbMPj6h2z6auLahfbwLilbzZNSbqTdLLRWtUupEt0TrvrN1g3nPTNpqmuql1sw+Kkb2ZWEffpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRf4XmrL2XrlCWaAAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "importances = domain_score.classifier.feature_importances_\n", + "indices = np.argsort(importances)[::-1]\n", + "\n", + "# Print the feature ranking\n", + "print(\"Feature ranking:\")\n", + "\n", + "for f in range(len(domain_score.classifier.feature_importances_)):\n", + " print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n", + "\n", + "# Plot the feature importances of the forest\n", + "plt.figure()\n", + "plt.title(\"Feature importances\")\n", + "plt.bar(range(len(domain_score.classifier.feature_importances_)), importances[indices],\n", + " color=\"r\", align=\"center\")\n", + "a = list(domain_score.vectorizer.vocabulary_.values())\n", + "a.extend([i + len(list(domain_score.vectorizer.vocabulary_.values())) for i in list(domain_score.vectorizer.vocabulary_.values())])\n", + "\n", + "features = list(domain_score.vectorizer.vocabulary_.keys())\n", + "features.extend([i + '2' for i in list(domain_score.vectorizer.vocabulary_.keys())])\n", + "features_ordered = [features[a.index(i)] for i in indices]\n", + "\n", + "plt.xticks(range(0,len(features_ordered)), \n", + " features_ordered,\n", + " rotation=90)\n", + "plt.xlim([-1, len(domain_score.classifier.feature_importances_)])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c42fcc35c35343115631dff4245fc115e08d4f3d Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 12 Mar 2018 17:41:16 +0100 Subject: [PATCH 18/35] Add accuracy score to regressor. --- domain_scoring/domain_scoring.py | 20 ++++-- notebooks/pl-1.0-regression_open-day.ipynb | 71 ++++++++++++---------- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/domain_scoring/domain_scoring.py b/domain_scoring/domain_scoring.py index 68f56bf..721372d 100644 --- a/domain_scoring/domain_scoring.py +++ b/domain_scoring/domain_scoring.py @@ -1,6 +1,6 @@ from typing import List, Tuple import numpy -from sklearn.linear_model import HuberRegressor +from sklearn.ensemble import RandomForestRegressor from util.datastructures import MetaPathRatingGraph from util.datastructures import MetaPath @@ -43,7 +43,7 @@ def fit(self, metapath_graph: MetaPathRatingGraph, test_size: float = False) -> self.classifier = self.classifier.fit(self._preprocess(x_train), y_train) if test_size: - print('Test accuracy is {}'.format(self.classifier.score(X=self._preprocess(x_test), y=y_test))) + self._test_score(x_test, y_test) def predict(self, metapath_unrated: List[MetaPath]) -> List[Tuple[MetaPath, int]]: """ @@ -110,6 +110,9 @@ def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tup return metapath_pairs, metapath_labels + def _test_score(self, x_test, y_test): + print('Test accuracy is {}'.format(self.classifier.score(X=self._preprocess(x_test), y=y_test))) + class DomainScoringRegressor(DomainScoring): def __init__(self): @@ -117,7 +120,7 @@ def __init__(self): Extracts the domain value of meta-paths via regression. """ super() - self.classifier = HuberRegressor() + self.classifier = RandomForestRegressor() def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]): """ @@ -137,4 +140,13 @@ def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tup metapath_pairs.append((superior, inferior)) metapath_labels.append(-distance) # > - return metapath_pairs, metapath_labels \ No newline at end of file + return metapath_pairs, metapath_labels + + def _test_score(self, x_test, y_test): + """ + Converts regression result into a binary classification and uses mean accuracy. + """ + test_predict = self.classifier.predict(self._preprocess(x_test)) + score = numpy.mean(numpy.logical_and(numpy.array(y_test) > 0, numpy.array(test_predict) > 0)) + print('Test accuracy is {}'.format(score)) + print('R^2 is {}'.format(self.classifier.score(X=self._preprocess(x_test), y=y_test))) \ No newline at end of file diff --git a/notebooks/pl-1.0-regression_open-day.ipynb b/notebooks/pl-1.0-regression_open-day.ipynb index de0170f..d82b06e 100644 --- a/notebooks/pl-1.0-regression_open-day.ipynb +++ b/notebooks/pl-1.0-regression_open-day.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -136,16 +136,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ + "p_batches, p_graph = clean_up(potato)\n", "m_batches, m_graph = clean_up(merlin)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ @@ -154,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ @@ -165,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -176,14 +177,14 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Test accuracy is 0.673905722753528\n" + "Test accuracy is 0.3389830508474576\n" ] } ], @@ -193,7 +194,24 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.42105263157894735\n" + ] + } + ], + "source": [ + "domain_score.fit(p_graph, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, "metadata": {}, "outputs": [ { @@ -201,18 +219,18 @@ "output_type": "stream", "text": [ "Feature ranking:\n", - "1. feature 2 (0.229121)\n", - "2. feature 8 (0.208065)\n", - "3. feature 3 (0.122093)\n", - "4. feature 6 (0.110971)\n", - "5. feature 11 (0.080615)\n", - "6. feature 0 (0.064824)\n", - "7. feature 9 (0.062290)\n", - "8. feature 10 (0.052530)\n", - "9. feature 1 (0.046091)\n", - "10. feature 4 (0.023400)\n", - "11. feature 7 (0.000000)\n", - "12. feature 5 (0.000000)\n" + "1. feature 8 (0.345370)\n", + "2. feature 2 (0.245315)\n", + "3. feature 4 (0.067725)\n", + "4. feature 6 (0.049114)\n", + "5. feature 5 (0.043297)\n", + "6. feature 9 (0.041178)\n", + "7. feature 0 (0.040368)\n", + "8. feature 11 (0.036193)\n", + "9. feature 7 (0.035172)\n", + "10. feature 10 (0.033848)\n", + "11. feature 3 (0.033179)\n", + "12. feature 1 (0.029241)\n" ] }, { @@ -221,13 +239,13 @@ "(-1, 12)" ] }, - "execution_count": 30, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAE0CAYAAADALuP1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3Xu8XFV5//HPl3C/B5NagYQARSxSuXgALQjWCxdbQFpsoaLRomgrRWu10tr+oKn601rrz6ooiFzUKuAN01ZFfoCgIMgJBOQiGgKaGKsoQSgiMfD0j7UO2RkmOXPOWXufc7K+79drXpm9Z89+9sycPLNmrWevrYjAzMzqsNFkH4CZmXXHSd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9Vk/RRSf8w2cdh1hW5Tt/GQ9K9wFOBxxqrnx4RKyawz+cDn4qInSd2dNOTpAuA5RHx95N9LLbhckvfJuLoiNi6cRt3wi9B0saTGX8iJM2Y7GOwOjjpW3GSniPpOkkPSLolt+BHHnu1pDslPSRpqaTX5fVbAV8BdpT0P/m2o6QLJL2j8fznS1reWL5X0tsk3Qo8LGnj/LzPS7pP0j2STlvPsT6x/5F9S/obST+V9GNJL5X0Eknfk3S/pL9rPPdMSZ+TdHF+PTdJ2qfx+G9L+np+H26XdExP3I9I+rKkh4GTgZcDf5Nf+3/k7U6XdHfe/x2Sjmvs41WSvinpXyStzK/1qMbjO0g6X9KK/Piljcf+QNLifGzXSXpW47G3SfpRjnmXpBcO8LHbdBERvvk25htwL/CiPut3An4OvITUqHhxXp6dH/99YHdAwGHAL4H982PPJ3VvNPd3AfCOxvJa2+TjWAzMAbbIMRcB/wfYFNgNWAocsY7X8cT+875X5+duArwWuA/4NLAN8EzgV8BuefszgV8Dx+ft3wLck+9vAiwB/i4fxwuAh4A9G3F/ARycj3nz3teat3sZsGPe5k+Ah4Gn5cdeleO/FpgB/DmwgjXdtv8FXAzMzMdzWF6/P/BT4KD8vPn5fdwM2BNYBuyYt50H7D7Zf2++lbu5pW8TcWluKT7QaEWeBHw5Ir4cEY9HxOXAMOlLgIj4r4i4O5Krga8Bz5vgcfxbRCyLiEeAA0hfMAsiYlVELAU+Bpww4L5+DbwzIn4NXATMAj4QEQ9FxO3A7cCzGtsviojP5e3/lZS8n5NvWwPvzsdxJfCfwImN534pIq7N79Ov+h1MRHw2IlbkbS4Gvg8c2NjkBxHxsYh4DLgQeBrwVElPA44CXh8RKyPi1/n9hvQlcXZE3BARj0XEhcCj+ZgfIyX/vSRtEhH3RsTdA753Ng046dtEvDQits+3l+Z1uwAva3wZPAAcQkpGSDpK0vW5q+QB0pfBrAkex7LG/V1IXUTN+H9HGnQexM9zAgV4JP/7k8bjj5CS+ZNiR8TjwHJSy3xHYFleN+IHpF9C/Y67L0mvbHTDPADszdrv13834v8y392a9Mvn/ohY2We3uwB/3fMezSG17pcAbyL9ivmppIsk7Tjacdr04aRvpS0DPtn4Mtg+IraKiHdL2gz4PPAvwFMjYnvgy6SuHoB+pWQPA1s2ln+zzzbN5y0D7umJv01EvGTCr6y/OSN3JG0E7EzqYlkBzMnrRswFfrSO437SsqRdSL9STgWekt+v21jzfq3PMmAHSduv47F39rxHW0bEZwAi4tMRcQjpyyGA9wwQz6YJJ30r7VPA0ZKOkDRD0uZ5gHRnUt/2ZqR+8tV50PHwxnN/AjxF0naNdYuBl+RByd8ktULX59vAg3kwcot8DHtLOqDYK1zbsyX9oVLl0JtI3STXAzeQvrD+RtImeTD7aFKX0br8hDQGMWIrUtK9D9IgOKmlP6qI+DFpYPwsSTPzMRyaH/4Y8HpJBynZStLvS9pG0p6SXpC/oH9F+mXz2DrC2DTkpG9FRcQy4FhSl8p9pFblW4GNIuIh4DTgEmAl8KfAwsZzvwt8Bliaux12BD4J3EIaaPwaaWByffEfIyXXfUmDqj8DzgW2W9/zJuBLpAHWlcArgD/M/eergGNI/eo/A84CXplf47p8nNSX/oCkSyPiDuB9wLdIXwi/A1w7hmN7BWmM4rukgds3AUTEMKlf/0P5uJeQBoUhfSm/Ox/zfwO/QfosbQPhk7PMxknSmcBvRcRJk30sZoNyS9/MrCJO+mZmFXH3jplZRdzSNzOriJO+mVlFptyshLNmzYp58+ZN9mGYmU0rixYt+llEzB5tuymX9OfNm8fw8PBkH4aZ2bQi6QeDbOfuHTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlaRKXdy1oRpkCvJjZEnpTOzDYRb+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6vIhne5xK6UviyjL8loZh1wS9/MrCJO+mZmFXHSNzOryEBJX9KRku6StETS6X0ef7OkOyTdKukKSbs0Hpsv6fv5Nr/kwZuZ2diMmvQlzQA+DBwF7AWcKGmvns1uBoYi4lnA54B/zs/dATgDOAg4EDhD0sxyh29mZmMxSEv/QGBJRCyNiFXARcCxzQ0i4qqI+GVevB7YOd8/Arg8Iu6PiJXA5cCRZQ7dzMzGapCkvxOwrLG8PK9bl5OBr4zzuWZm1qJB6vT7FaT3LSqXdBIwBBw2ludKOgU4BWDu3LkDHJKZmY3HIC395cCcxvLOwIrejSS9CHg7cExEPDqW50bEORExFBFDs2fPHvTYzcxsjAZJ+jcCe0jaVdKmwAnAwuYGkvYDziYl/J82HroMOFzSzDyAe3heZ2Zmk2DU7p2IWC3pVFKyngGcFxG3S1oADEfEQuC9wNbAZ5WmJ/hhRBwTEfdL+ifSFwfAgoi4v5VXYmZmo1JMsTlfhoaGYnh4ePw7KD0nDvSfF8dz75jZFCJpUUQMjbadz8g1M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVpGBkr6kIyXdJWmJpNP7PH6opJskrZZ0fM9jj0lanG8LSx24mZmN3cajbSBpBvBh4MXAcuBGSQsj4o7GZj8EXgW8pc8uHomIfQscq5mZTdCoSR84EFgSEUsBJF0EHAs8kfQj4t782OMtHKOZmRUySPfOTsCyxvLyvG5Qm0salnS9pJeO6ejMzKyoQVr66rMuxhBjbkSskLQbcKWk70TE3WsFkE4BTgGYO3fuGHZtZmZjMUhLfzkwp7G8M7Bi0AARsSL/uxT4OrBfn23OiYihiBiaPXv2oLs2M7MxGiTp3wjsIWlXSZsCJwADVeFImilps3x/FnAwjbEAMzPr1qhJPyJWA6cClwF3ApdExO2SFkg6BkDSAZKWAy8DzpZ0e376bwPDkm4BrgLe3VP1Y2ZmHVLEWLrn2zc0NBTDw8Pj34H6DUFMUL/3qHScKfY5mNn0ImlRRAyNtp3PyDUzq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVpGNJ/sAbD26usi7mVXDLX0zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OK+Ixc85m/ZhVxS9/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCoyUNKXdKSkuyQtkXR6n8cPlXSTpNWSju95bL6k7+fb/FIHbmZmYzdq0pc0A/gwcBSwF3CipL16Nvsh8Crg0z3P3QE4AzgIOBA4Q9LMiR+2mZmNxyAt/QOBJRGxNCJWARcBxzY3iIh7I+JW4PGe5x4BXB4R90fESuBy4MgCx21mZuMwSNLfCVjWWF6e1w1iIs81M7PCBkn6/U7XHPR0y4GeK+kUScOShu+7774Bd21mZmM1SNJfDsxpLO8MrBhw/wM9NyLOiYihiBiaPXv2gLs2M7OxGiTp3wjsIWlXSZsCJwALB9z/ZcDhkmbmAdzD8zozM5sEoyb9iFgNnEpK1ncCl0TE7ZIWSDoGQNIBkpYDLwPOlnR7fu79wD+RvjhuBBbkdWZmNgkUU2w2xKGhoRgeHh7/DrqaMbJ0nC5iTHYcM2uNpEURMTTadj4j18ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFNp7sA7CKeGI3s0nnlr6ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwinlrZNjylp3D29M22AXFL38ysIm7pm42HLwhj05Rb+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzigyU9CUdKekuSUsknd7n8c0kXZwfv0HSvLx+nqRHJC3Ot4+WPXwzMxuLUev0Jc0APgy8GFgO3ChpYUTc0djsZGBlRPyWpBOA9wB/kh+7OyL2LXzcZmY2DoO09A8ElkTE0ohYBVwEHNuzzbHAhfn+54AXSm2cvWJWGan8zao2SNLfCVjWWF6e1/XdJiJWA78AnpIf21XSzZKulvS8CR6vmZlNwCDTMPRrGvSeL76ubX4MzI2In0t6NnCppGdGxINrPVk6BTgFYO7cuQMckpmZjccgLf3lwJzG8s7AinVtI2ljYDvg/oh4NCJ+DhARi4C7gaf3BoiIcyJiKCKGZs+ePfZXYWZmAxkk6d8I7CFpV0mbAicAC3u2WQjMz/ePB66MiJA0Ow8EI2k3YA9gaZlDNzOzsRq1eyciVks6FbgMmAGcFxG3S1oADEfEQuDjwCclLQHuJ30xABwKLJC0GngMeH1E3N/GCzEzs9Eppth0rkNDQzE8PDz+HXQ15W0XF+qYrq9lQ4uzIb2WyY5jrZG0KCKGRtvOZ+SamVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCripG9mVhEnfTOzijjpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRZz0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVGfXC6GZmxXRx/WJbL7f0zcwq4qRvZlYRJ30zs4o46ZuZVcRJ38ysIk76ZmYVcdI3M6uIk76ZWUWc9M3MKuKkb2ZWESd9M7OKOOmbmVXESd/MrCJO+mZmFXHSNzOriJO+mVlFnPTNzCoyUNKXdKSkuyQtkXR6n8c3k3RxfvwGSfMaj/1tXn+XpCPKHbqZmY3VqElf0gzgw8BRwF7AiZL26tnsZGBlRPwW8H7gPfm5ewEnAM8EjgTOyvszM7NJMEhL/0BgSUQsjYhVwEXAsT3bHAtcmO9/DnihJOX1F0XEoxFxD7Ak78/MzCbBIBdG3wlY1lheDhy0rm0iYrWkXwBPyeuv73nuTr0BJJ0CnJIX/0fSXQMd/cTMAn420JYTu5jzYHEmfsHoLuJMrfesqzj+bKZunK4+m6kfA2CXQTYaJOn3e1d7L0G/rm0GeS4RcQ5wzgDHUoyk4YgYcpypFcNxpm4Mx5m6McZikO6d5cCcxvLOwIp1bSNpY2A74P4Bn2tmZh0ZJOnfCOwhaVdJm5IGZhf2bLMQmJ/vHw9cGRGR15+Qq3t2BfYAvl3m0M3MbKxG7d7JffSnApcBM4DzIuJ2SQuA4YhYCHwc+KSkJaQW/gn5ubdLugS4A1gNvCEiHmvptYxVV91JG1KcDem1bGhxNqTXsqHF6bTrejRKDXIzM6uBz8g1M6uIk76ZWUWc9M3MKuKkb2ZWkUFOztpgSHo68BHgqRGxt6RnAcdExDsm+dDGTdJOpDPxnvgsI+KaQvueAbyGdH7FVyPi2sZjf1/ifesiRp+YvwvMY+337BMF938E6fVcERH3Ntb/WUScN41ivHl9j0fEvxaKs/8ocW4qEGNb4G9J79lXIuLTjcfOioi/mGiMnnhbAn8NzI2I10raA9gzIv6zZJzxqKp6R9LVwFuBsyNiv7zutojYu9D+5wDvJU018RXgvRHx6/zYpRHx0hJxGvHeA/wJqSR2pBQ2IuKYQvs/F9iSdG7FK4CrI+LN+bGbImK9/1mnSoyeeJ8EdgcWs/Z7dlqh/b8LOAS4CTga+H8R8cH8WKn3rPUYeV9n5Lt7Agew5vyco4FrIuI1heJcle9uDgwBt5DO5n8WcENEHFIgxueB75Omhfkz4NfAn0bEoy39nV0MLAJemRuYWwDfioh9S8YZl4io5gbcmP+9ubFuccH9Xw68HtgX+CBwHfCU3pgF490FbNbi+3Vr4/7GpHrjLwCblXo9XcToiXcnubHT0nv2HWDjfH974MvA+0v+DXQRoyfe14BtGsvbkH6VlY5zEfA7jeW9gQsK7Xtxz/LbgWtJc4Td1MJrGe79PIBbSscZz622Pv2fSdqdPP+PpOOBHxfc/+yI+GhELI6IvwTOAq5pxixsKbBJC/sdsenInYhYHRGnkFrIVwJbT6MYTbcBv9nCfkdsHBGrASLiAVKreFtJn6XxWqdBjKa5wKrG8ipS91hpz4iI74wsRMRtpAZUCZtJeiLfRcQ7SQ2Ma0iJv7RVuXU/kmt2Bx5tIc6YVdWnD7yB9EE/Q9KPgHuAkwrufxNJm0fErwAi4lOS/pt0NvNWBeOM+CWwWNIVNP6golBXBTAs6ciI+Gpj3wskrSCNjUyXGE2zgDskfZu137MiXWLA3ZIOi4ir834fA06W9A7gj6ZRjKZPAt+W9EVSEjsOKDYG0nBn7u77VI5zEumXWQn/AbwA+P8jKyLiQkk/If0qL+1M4KvAHEn/DhwMvLqFOGNWVZ/+CElbARtFxEOF9/tXpJ+KV/es3w/454h4ceF48/utj4gL+603kHRYv/W9n9kE9r9F3t8jfR7bKSJ+NB1i9Nnv/sDz8uI1EXFzCzE2B/4cOHQkDvCRkUbUdCPpKcBzSOMT10dEF9Mrj6qKpC/ppNzq7luNEIWqECZDngTv6XnxrsgDxy3EabviZcpWO4xXW5VVXVS79Il5CLBHRJwvaTawdaQLI5WOswXpb6DoNTW6qkRqxLsiIl442rrJUEv3zkjXyjZdBOuqNFTS80lXLLuX1JqYI2l+icTSE6dvxQtlf+KfT6p2eG5eXg58FiiS9CV9MyIOkfQQa4+viFS9s22JOI14fSurSK3XiXpf/rdvtQupsqeYXMUzRKriOZ80jvQpUpdFyTjHkKrfNgV2lbQvsKBQ19vI//2+lUgF9g888WtlS2CWpJmsuabItsCOpeJMyGSPJHd5Iw20dhHnatJlIZsj97e1EGcRqTU8svx0YFELcVqteMkxpkS1AzCz0H5arazKMVqrdumJs5iUvJqfza0txFlEuhZHa3FouRIJeCNprPDR/O/I7Rbg1Db/Hga91Va9c52kr0k6OX8Lt2XLiOi9bsDqFuJsEo2fwRHxPdqp5mm74gWmTrXDFYX203ZlFbRb7dK0KlJGG/ls2ihKAFgdEb9oad8jWq1EiogPRMSuwFsiYtfGbZ+I+FCpOBNRS/cOABGxh6QDSfP9v13SHaQLt3+qcKi2S0NHDEv6OKm6AlK1w6IW4rRd8QJTp9phwhddzdqurIJ2q12aLpF0NrC9pNeSTm46t4U4t0n6U2BGHtM5jXSuS0ldVSKdLek01gxKf510UmgrY25jUcVAbj+SZgH/Crw8ImYU3vdupNLQ3wVWkktDo3G6fKE4m5HKUA8hJatrgLMiomgLue2Kl0acSa92KHjWbOuVVV1Wu0h6MXA46bO5LCIubyHGlqSTpg7Pqy4D3lH69XRUiXQu6ZfeyOf9CuCxKHQW80RUlfTz/BvHkVr6uwNfBC6JiDZax62Vhq4j1g7AzhFxa9ux2jBVqh0KT2PQemVVW9UuPTHeExFvG23ddNFFJZKkWyJin9HWTYba+vRvIfV5LoiIp0fE20omfEkn5X/fnEvEXge8trFclKSvS9o2J/zFwPmSipWeSfpm/vchSQ82bg9JerBQjM3z8c+SNFPSDvk2j8mpdijSvZMrq74PfJh0Zvb3JB263ieNPcYxpM/9q3l5X0m9168uod/5JUeVDiLpcknbN5ZnSrqscIwzgLeRJl+DNZVIpT2Wu3hH4u7GmiquSVVVnz6wW0SEpG0kbR0R/1N4/52WhgLbRcSDkl4DnB8RZ0gq1tKPPNFVRKz39UiaGRErxxnmdcCbSAm+WV/+IClhFpG/WNYpIu7Pd0v9sngfcPhICzyX8X4GeHah/QOcQaoS+zpARCzOX5ZFSPpz4C+A3Xv+rrahfF87wKxI00oAEBErJf1G4RjHAfuR/9YiYoWkNv6/vhW4StJSUkNiF6bIGbm1Jf1n5przHQBJug+Yn6seJiwizs53z4qI+0rscxQbS3oa8MekvtDJcgUwri6RiPgA8AFJfxl5psiWLCIN3IlUwbEy398e+CGwaz6e+9e1gzF6UmWVpNLVPKsj4hdSqbHnJ/k0abbY/wuc3lj/UMH3qelxSXMj4ocAknah/JxVq3LDr7VKJKU5fh4B9iCdFyDgu6XH2sartqR/DvDmiLgKnvgJPjLgWtJ1ku4BLga+MIFW8Gj+kTTY9c2IuDH/hPx+S7HWp0TWabXaIZfRIemjwMKI+HJePgp4UYkYPbqorGq12iWXT/5C0geA+0fGpvIv5YMi4oZSsbK3A99UmgId0t/CKYVjtF6JFBGPS3pfRDwXmHJjbLUN5HY2uNIoDX0p6azMoqWhShcfOS0i3l9qnxM4lgkPfnZV7SBpUUQ8u2fdcEQMFY7TemVVh9UuNwP751r9kZbscKkB755Ys1hTwfWtNiq4OqpE+kdSwv9CTLEkW1vS/yKpL6/Z+hqKwhc36YnZZmnoVRHxeyX3Oc7jKJH0O/lCzgOD32Dt2vZDI+KIknF6Yk73yqrF0XPxD0m3RsSzCsfpO9AdBacV6aoSSWm6jy1Jg7cjX/QRhaf7GI/aqnf+DJgNfJ50oY5ZwKtKB8kVNfMlfYX0c/vHpAG30q6T9CFJz5O0/8ithTijKdG901W1w4mkv4Ev5tvsvK6otiurcozWq12ypZJOk7RJvr2RdMZxaW9t3P6BNB3ymYVjdFKJBFxKOodin4jYJt8mPeFDfS39IdLP4XmsGc+IFlos95A+9Esi4lsl990T56o+qyMiXlBo/wNVvEjaYaIDe5JeSJrMa61qh5Hxl9Jaqt5q7v/miNgvV1bNGamsKvm3NhJjtHUF4vwG8G+k+eiDNHD/poj4ack4feLOIU1JPuEv5WYlErCk8dA2wHUR8fKJxuiJ9wJS197zgN2Am4Fv5MKFSVVb0r8LeAtpLpnHR9ZHxA8Kx9FIaWjafXvJpU35y2udFS8jg6MF4mxE6sddRMvVDkpTRJ9LOiFnrqR9gNdF+Qtjf4fUb3wh8PY80F466S8CjuupdvliG33tk0GpLOnWiPidAvvaDphJd5VII+NuBwC/R7qM6iMR8Yw2Yo1FbdU790XEf3QQp9XS0BGSngq8C9gxIo6StBfw3Ij4eIn9d1Xx0nG1w/uBI8hT60bELevqS56gLiqruqh26XKq8A+ypkRzI9KJlLeU2HfXlUhKcy5tBXyLNIZ0QNu/jAZVW0v/haT+295JsL5QOM51pNZdszT0XRFRtDQ0jxmcn2PtI2lj0rS0E24Z9cRpveKlq2oHSTdExEHNbpDSA8ZdVlZ1VO1yNamf/ezGe3ZbROxdOE5zvqLVwL0RcW3hGJ1UIkl6P+lEvEdJF2C/hvT5POlqZ12rraX/auAZpNLAke6dIA3qlrRVsy86Ir6udqajnRURl0j62xxntaQ2Bj9/JunvWbvi5eeFY7yZXO0gqc1qh2W5iyeU5sY5jcIzU0bEY0pTJLSa9Bu/UEamxNhLUtFql2zLiPi21j4JrPhU4dHNZT7VbFTkX5nF82BE/BWksSNS3jmfND35ZqVjjVVtSX+f0q3gdVgq6R9YuzS0+KXlgIeVZqYcabU8B2hjPvITSaf8j0xHew3lK14uJf0M/kZEtDE98IjXAx8AdiJdnetrpAG+0q6T9CHSCXoPj6yMspcyfGvj/uakCrFFpAHXklqdKjyPf6zz113hQoulSicBfiQv/wUtVCJJOpU0iPts4AfAeaS/70lXW/fOx4D3R8QdLceZSerTPZg1J+acGY15RQrF2R/4IPBM4HZS+eHxbdWDt1nx0lW1g6SDe7sM+q0rEKfVyqp1xCxW7dKz335Thb+8VAFEHoCGdDIbrGksvRz4ZUQsKBEnx+qkEknSW0n/7xdFRBsXUBq32pL+naSSrZHLmY1cH7V0yWZXpaGbA6eSBiYfIg0afTDKn5HZVcVL69UO6nMiWb9101HJapfGPjciNSQuUctThUu6NiIOHm2dTUxt3TtHdhTn3+lTGtqCT5D6c9+Vl08ktZJeVjhO6xUvbVc7SHouqaU6W2tPc70tUPRM6Ryv1cqqHKO1apcRuc/7VNI5Jw+P+oSJ2UrSIRExMqX377Jm5toiuqpEmsqqSvql6/HXo6vS0D17qk6uklT0P/2IiFjWM5BXesD4VlL/596kcYkHJJWsdtgU2Jr0N9+cSvdB4PhCMZouIFdW5eXvkfr3iyV9YLhxfzXwmdLdVNnlkt7Ck8cnSte3nwycl2vqAR4gnUVf0sfIlUgAEXGrpE8DTvo2IWcoTSDWamkocLOk50TE9QCSDiKVh5XWRcVLq9UOkS7teLWkCzr68m+9sqqjahdYk3jf0FgXpLGXYiJd0GgfpSvcKdq5SHqiFFnzAAAEQ0lEQVQnlUhTmZN+O7oqDT0IeKWkH+bluaSLZX+HsmMIrVe8dFjtcK6kl40MqudB94ui/IRrrVVWdVzt8sRJem3LLfwzyNNr5/MDFhRO/q1WIk0HTvrt6Ko0tKsxij2jZ24SSQdT9lfFFqTZSNuuduji6kyQzjtYCOwm6VpyZVWhff9B/rdvtUuhGEh6QURcKekP+z3ewi/X80jjYH+cl19B+sXXN/44vYFUifQMST8iVyIV3P+U56Tfjusl7dV2aWiHYxQf5MlXxuq3btwi4r2l9jWK3qszzaP81ZkgXUPhi6Qk/BDpPITvldjxyOeeS02blS2n5y+YUiWOhwJXAkezZg6m5r+lk/7uEfFHjeV/lLS41M5zJdJQRLyo7UqkqcxJvx2HAPOVJixrrTS0bV1XvHSkk/lq6Kayqu1ql4fy534ba5I9tPMlCfBIz+s5mHTZwSI6rkSaspz029FVt0vbuq54aV1EfDWfR3EKaZ77L1EwsTR0UVnVdrXL1vnfPUnnT3yJlPiPJp14VNrrgU80Xs9KYP56th+PriqRpqyqTs6y8ZG0S4ddSa1Smt/+jcDOpKT/HNJEWEXPlJV0AfDRnsqq+aVPaMv7brPaBUlfA/4oGjNTAp+NiGKNm56TwLYFiIgHR3naeOL0mw4lIqJoJdJUVtuVs2x8zlU3V2jqwhtJrdYfRLrU5H7AfS3EOYg0/869ku4lnXR2mKTvSCoyTYak7ZSuxnUlcIWk9zVaySXNBVY1lleRzjYvJiIeJ51dTkQ82EbCz/vetc+tmoQP7t6xwXRV8dKFX0XEryQhabOI+K6kPVuI00UXXxfVLpDGIr6tdI3pAI5jzQXsS2qt62USKpGmLCd9G0RXFS9dWJ5/tVxKSjIrgRWlg3TUHdZqtcuIiHin0rUbnpdXvToibi4dhzQeETz5HJASLfGuK5GmLCd9G0RXFS+ti4jj8t0zlWbC3A746iQe0kS0Wu3SFGlK6JLTQvezFynhH0JKxN8APlpo311XIk1ZHsi1geTunJGKl82Bn0b5i3XYGOTZTj9B+uKCXO0SLU2t3TZJl5Aqw/49rzoR2D4i/njdzxp432fku30rkSLiNRONMV046duouqp4scF1Ve3SJfW5bGW/dROM0Xol0lTn6h0bRFcVLzagrqpdOnZznqMIaG0CwdYrkaY69+nbILqqeLGx2dBONOpiAsGuKpGmLHfv2Kjyf5BXA28iXWZuJbBJRLxkUg+scvlEoyf9B56udedac9nEvkpVRCldZnSkEumaliqRpiwnfRsTSYeRK14iYtVo21t7JG1Bn2qXgheesQ2Qk77ZNNVmtYttuJz0zaapLqpdbMPj6h2z6auLahfbwLilbzZNSbqTdLLRWtUupEt0TrvrN1g3nPTNpqmuql1sw+Kkb2ZWEffpm5lVxEnfzKwiTvpmZhVx0jczq4iTvplZRf4XmrL2XrlCWaAAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAE0CAYAAADALuP1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3X28XVV95/HP1/CkPAhKastzoFGLVkCvYAui4xNgR8AWW6i2aK2UVsZaqpVWO9jUdnwY6zgWK6io1VJEW2k6oyLjA1QR5UYCCpYaIpoYW6MEoYpg4Dt/rHXNzuUk99x79943yf6+X6/zytlPa+1zTu7v7LPWb68l20RExDA8aKFPICIi+pOgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+jFokt4h6U8X+jwi+qLk6cdcSLoNeARwX2P1I22vm0eZTwU+YPuA+Z3d9knSe4G1tl+z0OcSO65c6cd8PMf2Ho3HnAN+GyTttJD1z4ekRQt9DjEMCfrROklPknSNpDsk3VCv4Ke2vUjSVyXdJWm1pN+p63cHPgbsJ+k/62M/Se+V9LrG8U+VtLaxfJukV0m6EfiBpJ3qcf8gab2kr0t62VbO9SflT5Ut6Y8kfUfStyWdKunZkv5N0u2S/qRx7GslfVjSB+vr+ZKkIxrbf07SZ+r7cJOkk6fV+zeSPirpB8CLgecDf1Rf+z/X/c6TdGst/2ZJz22U8UJJn5X0PyVtqK/1pMb2h0l6j6R1dfvljW3/VdLKem7XSHpcY9urJH2r1nmLpKeP8bHH9sJ2HnnM+gHcBjxjxPr9ge8Bz6ZcVDyzLi+u238JOAwQ8BTgh8Dj67anUpo3muW9F3hdY3mzfep5rAQOBB5c61wB/HdgF+BQYDVwwhZex0/Kr2VvrMfuDLwEWA9cAuwJPAb4EXBo3f+1wI+B0+r+rwC+Xp/vDKwC/qSex9OAu4BHNer9PnBsPefdpr/Wut/zgP3qPr8G/AD4mbrthbX+lwCLgN8F1rGp2fb/Ah8E9qnn85S6/vHAd4Bj6nFn1vdxV+BRwBpgv7rvIcBhC/3/LY/2HrnSj/m4vF4p3tG4inwB8FHbH7V9v+0rgUnKlwC2/6/tW11cBXwCePI8z+N/215j+27giZQvmGW277W9GngncPqYZf0Y+AvbPwYuBfYF3mr7Lts3ATcBj2vsv8L2h+v+f0UJ3k+qjz2A19fz+BTwf4AzGsf+k+3P1ffpR6NOxvaHbK+r+3wQ+BpwdGOXb9h+p+37gPcBPwM8QtLPACcBZ9veYPvH9f2G8iVxoe0v2L7P9vuAe+o530cJ/odL2tn2bbZvHfO9i+1Agn7Mx6m2966PU+u6g4HnNb4M7gCOowQjJJ0k6draVHIH5ctg33mex5rG84MpTUTN+v+E0uk8ju/VAApwd/33Pxrb76YE8wfUbft+YC3lynw/YE1dN+UblF9Co857JEm/2WiGuQN4LJu/X//eqP+H9ekelF8+t9veMKLYg4E/nPYeHUi5ul8FvJzyK+Y7ki6VtN9M5xnbjwT9aNsa4P2NL4O9be9u+/WSdgX+AfifwCNs7w18lNLUAzAqlewHwEMayz89Yp/mcWuAr0+rf0/bz573KxvtwKknkh4EHEBpYlkHHFjXTTkI+NYWzvsBy5IOpvxKOQd4eH2/vsKm92tr1gAPk7T3Frb9xbT36CG2/x7A9iW2j6N8ORh4wxj1xXYiQT/a9gHgOZJOkLRI0m61g/QAStv2rpR28o210/FZjWP/A3i4pIc21q0Enl07JX+achW6NV8E7qydkQ+u5/BYSU9s7RVu7gmSflklc+jllGaSa4EvUL6w/kjSzrUz+zmUJqMt+Q9KH8SU3SlBdz2UTnDKlf6MbH+b0jH+dkn71HM4vm5+J3C2pGNU7C7plyTtKelRkp5Wv6B/RPllc98WqontUIJ+tMr2GuAUSpPKespV5SuBB9m+C3gZcBmwAfh1YHnj2H8F/h5YXZsd9gPeD9xA6Wj8BKVjcmv130cJrkdSOlW/C7wLeOjWjpuHf6J0sG4AfgP45dp+fi9wMqVd/bvA24HfrK9xS95NaUu/Q9Lltm8G3gx8nvKF8PPA52Zxbr9B6aP4V0rH7csBbE9S2vX/up73KkqnMJQv5dfXc/534Kcon2XsIHJzVsQcSXot8LO2X7DQ5xIxrlzpR0QMSIJ+RMSApHknImJAcqUfETEgCfoREQOyzY1KuO+++/qQQw5Z6NOIiNiurFix4ru2F8+03zYX9A855BAmJycX+jQiIrYrkr4xzn5p3omIGJAE/YiIARkr6Es6sU6msErSeSO2ny3py3U0wM9KOryuP0TS3XX9SknvaPsFRETE+GZs01eZxu0CymQYa4HrJC2v44JMucT2O+r+J1PGFT+xbrvV9pHtnnZERMzFOFf6RwOrbK+ug0hdShlQ6yds39lYnBoZMCIitjHjBP392Xyyh7VsPhEEAJJeKulW4I2UkRSnLJF0vaSrJI2cIUnSWZImJU2uX79+FqcfERGzMU7QHzVhwwOu5G1fYPsw4FXAa+rqbwMH2T4KOBe4RNJeI469yPaE7YnFi2dMM42IiDkaJ+ivpTE7EJtmBtqSS4FTAWzfY/t79fkK4FbgkXM71YiImK9xgv51wFJJSyTtQplgenlzB0lLG4u/RJm8GUmLa0cwkg4FlgKr2zjxLZLaf0RE7CBmzN6xvVHSOcAVwCLgYts3SVoGTNpeDpwj6RmUWXo2AGfWw48HlknaSJly7Wzbt3fxQiIiYmbb3NDKExMTntcwDF1cmW9j71FExHSSVtiemGm/3JEbETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSBjBX1JJ0q6RdIqSeeN2H62pC9LWinps5IOb2z743rcLZJOaPPkIyJidmYM+pIWARcAJwGHA2c0g3p1ie2ft30k8Ebgr+qxhwOnA48BTgTeXsuLiIgFMM6V/tHAKturbd8LXAqc0tzB9p2Nxd0B1+enAJfavsf214FVtbyIiFgAO42xz/7AmsbyWuCY6TtJeilwLrAL8LTGsddOO3b/EceeBZwFcNBBB41z3hERMQfjXOlrxDo/YIV9ge3DgFcBr5nlsRfZnrA9sXjx4jFOKSIi5mKcoL8WOLCxfACwbiv7XwqcOsdjIyKiQ+ME/euApZKWSNqF0jG7vLmDpKWNxV8CvlafLwdOl7SrpCXAUuCL8z/tiIiYixnb9G1vlHQOcAWwCLjY9k2SlgGTtpcD50h6BvBjYANwZj32JkmXATcDG4GX2r6vo9cSEREzkP2AJvYFNTEx4cnJybkXoFHdCPO0jb1HERHTSVphe2Km/XJHbkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAjBX0JZ0o6RZJqySdN2L7uZJulnSjpE9KOrix7T5JK+tjeZsnHxERs7PTTDtIWgRcADwTWAtcJ2m57Zsbu10PTNj+oaTfBd4I/FrddrftI1s+74iImINxrvSPBlbZXm37XuBS4JTmDrY/bfuHdfFa4IB2TzMiItowTtDfH1jTWF5b123Ji4GPNZZ3kzQp6VpJp446QNJZdZ/J9evXj3FKERExFzM27wAasc4jd5ReAEwAT2msPsj2OkmHAp+S9GXbt25WmH0RcBHAxMTEyLIjImL+xrnSXwsc2Fg+AFg3fSdJzwBeDZxs+56p9bbX1X9XA58BjprH+UZExDyME/SvA5ZKWiJpF+B0YLMsHElHARdSAv53Guv3kbRrfb4vcCzQ7ADefkntPiIiejBj847tjZLOAa4AFgEX275J0jJg0vZy4E3AHsCHVALYN22fDPwccKGk+ylfMK+flvUTERE9kr1tNaFPTEx4cnJy7gV0cdU86j1qu55t7HOIiO2LpBW2J2baL3fkRkQMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAzIWEFf0omSbpG0StJ5I7afK+lmSTdK+qSkgxvbzpT0tfo4s82Tj4iI2Zkx6EtaBFwAnAQcDpwh6fBpu10PTNh+HPBh4I312IcB5wPHAEcD50vap73Tj4iI2RjnSv9oYJXt1bbvBS4FTmnuYPvTtn9YF68FDqjPTwCutH277Q3AlcCJ7Zx6RETM1jhBf39gTWN5bV23JS8GPjabYyWdJWlS0uT69evHOKWIiJiLcYK+RqzzyB2lFwATwJtmc6zti2xP2J5YvHjxGKcUERFzMU7QXwsc2Fg+AFg3fSdJzwBeDZxs+57ZHBsREf0YJ+hfByyVtETSLsDpwPLmDpKOAi6kBPzvNDZdATxL0j61A/dZdV1ERCyAnWbawfZGSedQgvUi4GLbN0laBkzaXk5pztkD+JAkgG/aPtn27ZL+nPLFAbDM9u2dvJKIiJiR7JHN8wtmYmLCk5OTcy9Ao7oR5mnUe9R2PdvY5xAR2xdJK2xPzLRf7siNiBiQBP2IiAFJ0I+IGJAE/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJAE/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJAE/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJCxgr6kEyXdImmVpPNGbD9e0pckbZR02rRt90laWR/L2zrxiIiYvZ1m2kHSIuAC4JnAWuA6Sctt39zY7ZvAC4FXjCjibttHtnCuERExTzMGfeBoYJXt1QCSLgVOAX4S9G3fVrfd38E5RkRES8Zp3tkfWNNYXlvXjWs3SZOSrpV06qgdJJ1V95lcv379LIqOiIjZGCfoa8Q6z6KOg2xPAL8O/C9Jhz2gMPsi2xO2JxYvXjyLoiMiYjbGCfprgQMbywcA68atwPa6+u9q4DPAUbM4v4iIaNE4Qf86YKmkJZJ2AU4HxsrCkbSPpF3r832BY2n0BURERL9mDPq2NwLnAFcAXwUus32TpGWSTgaQ9ERJa4HnARdKuqke/nPApKQbgE8Dr5+W9RMRET2SPZvm+e5NTEx4cnJy7gVoVBfEPI16j9quZxv7HCJi+yJpRe0/3arckRsRMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIGMFfUknSrpF0ipJ543YfrykL0naKOm0advOlPS1+jizrROPiIjZmzHoS1oEXACcBBwOnCHp8Gm7fRN4IXDJtGMfBpwPHAMcDZwvaZ/5n3ZERMzFOFf6RwOrbK+2fS9wKXBKcwfbt9m+Ebh/2rEnAFfavt32BuBK4MQWzjsiIuZgnKC/P7Cmsby2rhvHWMdKOkvSpKTJ9evXj1l0RETM1jhBXyPWeczyxzrW9kW2J2xPLF68eMyiIyJitsYJ+muBAxvLBwDrxix/PsdGRETLxgn61wFLJS2RtAtwOrB8zPKvAJ4laZ/agfusui4iIhbAjEHf9kbgHEqw/ipwme2bJC2TdDKApCdKWgs8D7hQ0k312NuBP6d8cVwHLKvrIiJiAcget3m+HxMTE56cnJx7ARrVjTBPo96jtuvZxj6HiNi+SFphe2Km/XJHbkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMyE4LfQKxFX0NEx0Rg5Er/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJCxgr6kEyXdImmVpPNGbN9V0gfr9i9IOqSuP0TS3ZJW1sc72j39iIiYjRnz9CUtAi4AngmsBa6TtNz2zY3dXgxssP2zkk4H3gD8Wt12q+0jWz7viIiYg3Gu9I8GVtlebfte4FLglGn7nAK8rz7/MPB0qYs7iyIiYj7GCfr7A2say2vrupH72N4IfB94eN22RNL1kq6S9ORRFUg6S9KkpMn169fP6gVERMT4xhmGYdQV+/R7+be0z7eBg2x/T9ITgMslPcb2nZvtaF8EXAQwMTGRcQL6luEeIgZjnCv9tcCBjeUDgHVb2kfSTsBDgdtt32P7ewC2VwC3Ao+c70lHRMTcjBP0rwOWSloiaRfgdGD5tH2WA2fW56cBn7JtSYtrRzCSDgWWAqvbOfWIiJitGZt3bG+UdA5wBbAIuNj2TZKWAZO2lwPvBt4vaRVwO+WLAeB4YJmkjcB9wNm2b+/ihcR2IM1IEQtO3sb+aCYmJjw5OTn3AvoKLG3X00cdQ6knYoAkrbA9MdN+GU8/djw70hdyRMsyDENExIAk6EdEDEiadyK2ZelviZYl6EdEf9LfsuDSvBMRMSAJ+hERA5LmnYiIudhOm5FypR8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSBjBX1JJ0q6RdIqSeeN2L6rpA/W7V+QdEhj2x/X9bdIOqG9U4+IiNmaMehLWgRcAJwEHA6cIenwabu9GNhg+2eBtwBvqMceDpwOPAY4EXh7LS8iIhbAOFf6RwOrbK+2fS9wKXDKtH1OAd5Xn38YeLok1fWX2r7H9teBVbW8iIhYAONMorI/sKaxvBY4Zkv72N4o6fvAw+v6a6cdu//0CiSdBZxVF/9T0i1jnf387At8d6w95zdZwnj1zH9Chj7q2bbes77qyWez7daTz6bp4HF2GifojzqL6dO7bGmfcY7F9kXARWOcS2skTdqeSD3bVh2pZ9utI/Vsu3XMxjjNO2uBAxvLBwDrtrSPpJ2AhwK3j3lsRET0ZJygfx2wVNISSbtQOmaXT9tnOXBmfX4a8CnbrutPr9k9S4ClwBfbOfWIiJitGZt3ahv9OcAVwCLgYts3SVoGTNpeDrwbeL+kVZQr/NPrsTdJugy4GdgIvNT2fR29ltnqqzlpR6pnR3otO1o9O9Jr2dHq6bXpeiZyD7OvR0TEtiF35EZEDEiCfkTEgCToR0QMSIJ+RMSAjHNzVsySpEcCfwM8wvZjJT0OONn261oq//Fb2277Sy3Vswj4bcr9FR+3/bnGtte09XpqeQ8B/hA4yPZLJC0FHmX7/7RYxwmU1/JJ27c11v+W7YvbqqdR7i8Ch9D4O7P9ty2U28vnImkv4I9rPR+zfUlj29tt/14b9dTy+v5s9qfcwdr8bK5uodxzt7bd9l/Nt475GkT2jqQDgTdRhoD4GPAm2z+u2y63fWrL9V0FvBK40PZRdd1XbD+2pfI/XZ/uBkwAN1Dufn4c8AXbx7VUz7uAh1DurfgN4Crb59ZtX7K91S+fWdb1QWAF8Jv1i/LBwOdtH9lS+X8JHAd8CXgO8L9sv61ua/W11DLfDxwGrASm0pRt+2UtlN3L5yLpH4CvUYZS+S3gx8Cv276n5Xr6/mzeAPwaJZW8+dmc3ELZ59enjwKeyKZ7mp4DXG37t+dbx7zZ3uEfwJXA2cCRwNuAa4CH123Xd1DfddPLBlZ2UM+lwM83lh8LvLfF8m9sPN+Jkm/8j8Cubb9vlHs+pr9nN7RY/peBnerzvYGPAm/p8P/AV6kXVR2U3cvnMv3/LPBq4HOUcbW+tB1/NrcAu3bx2TTq+ASwZ2N5T8qvss7qHPcxlDb9xbbfYXul7f8GvB24WtJhjBgLqAXfbZYt6TTg2x3U82jbX55asP0VyhdbW3ZplL3R9lmUK9dPAXu0WA/AvfXqfuo9Owy4p8Xyd7K9EcD2HZQrr70kfYjG62zRV4Cf7qBc6O9z2VXST2KE7b+gfMFcTQn8ben7s1kN7NxBuU0HAfc2lu+lNPUtuKG06e8saTfbPwKw/QFJ/065y3j3Dup7KeWP49GSvgV8HXhBB/V8tf7U/wAlWL6AcoXZlklJJ9r++NQK28skraP0WbTptcDHgQMl/R1wLPCiFsu/VdJTbF8F4HJn+IslvQ74lRbrmbIvcLOkL9L48nILTQj097n8M/A04P816nmfpP+g/GJuS9+fzQ+BlZI+yeafzbyb3hreD3xR0kcof5vPBebdn9OGobTp/wHl5+hV09YfBbzR9jM7qnd34EG27+qo/N2A3wWOr6uuBv5m6stteyPp4cCTKP0T19oebzja8cp+MIDtu0ds29/2t9qqq5b5lFHrp/8fjAX5bM4ctd72+0atn0c9jweeXBevtn19m+XP1SCCfl8kvaD+ihjZg+8Oeu7rH8xBtjudg6CrTJRG+Z+0/fSZ1rVUVyeZG33rOuOpr0yUvrLRptW5C/DIuniLa2JHy3UcByy1/R5Ji4E9XCaTWlBDad4Buk+lZFNT0Z4tlbdVkk6mZCXtAiyRdCSwrKUmhGY9IzNRaOHnav218hBgX0n7sGkOhr2A/eZb/oj6RmZuUH4ltVH+Z20fJ+kuNu8vEiVDZK826qneQ8l4+oW6vBb4ENBWmuvU/+ORmSgt1QHw5vrvyGw0SmZPayQ9lTLT3221ngMlndnmF3/N4pmgvHfvofQhfIDSbLmwFronuc8HcBVlusZmhshXOqhncU+vZwVl7oLm67mxg3q6zET5fUqfxz3136nHDcA5HdTXeebGmOexTwtldJrx1Cizl0wUOs5Ga5S7gvKLaGr5kcCKlutYSflC6fRvcy6PoWTvTHmI7enj+W/soJ5rJH1C0ovr1WtXNtr+foflT+ksE8X2W20vAV5he0njcYTtv+6gyj4yN8bxyRbK6DrjaUpfmShdZ6NN2dmN5lDb/0b7/yfudYn0U59NFwkjczKo5h16SqW0vVTS0ZR5BV4t6WbKBPEfaLmqr0j6dWBRbc99GeUehLZ1mYky5UJJL2NTp/RnKDe3td3W2kfmxjjmPbkr3Wc8TekrE6XrbLQpk5LeTXld1HpWtFzHZZIuBPaW9BLKzW3varmOORlUR66kQymplL8IbKCmUrpx23cHde4L/BXwfNuLWi77IZQbZp5VV10BvM4tZ+/0kYlS/9h3prS1QrnT9D63fAdjX5kbY5xHK3eadpnxNK2ezjNR+spGk7QrJa36OMr7djXwdtut/kqS9EzK36aAK2xf2Wb5czWooD+lh1TKvShXQ6dTOkA/Alxmu+2riR2GpBtsHzHTupbq6jxzY4xzmHfQ7znjqZdMlL6y0Rr1PQw4wPaNLZf7BtuvmmndQhhEm76kF9R/z61paL8DvKSx3LYbKG2Ry2w/0varugj4kq6UtHdjeR9JV7RY/mfrv3dJurPxuEvSnW3VU91Xm96m6j6UTdk1ramZG18DLqDcmf1vko7f6kHdmHPzjqTdarDat37mD6uPQ+gm4+l84FWUwddgUyZK2/WcTOkA/XhdPlLS9Pm426jnM5L2qu/hSuA9ktpOpx51789JLdcxJ0Np0+81lRI41LYl7SlpD9v/2VE9+7rctg6A7Q2Sfqqtwl0HbrO91fdN0j62N8yzulcCn5a0mhIQD6ab9uk3A8+aupKsabx/DzyhjcJrINki27fXp/O5Gv8d4OWUAN/MYb+T8mXWtucCR03VZXudpC7+ls6nZNd9ptazsn6Rte2htu+U9NvAe2yfL6mVK31Jvwv8HnDYtDL3pJv+tlkbRNC3fWF9+nbb63uo8jE1t/1hgCStB86s2Qhtul/SQba/SanoYLoZS2gmnwTm3FShMr7L3cBSSl6zgH9tu421ekDmhqQ2MzdWUD4DUbJeNtTnewPfBJbUem/fUgEzsf1W4K2S/pvraJQdu7dexHSdibLR9velNvq4t2onST8D/CqlT6xNl1BG8v0fwHmN9XfN5zNv0yCCfsM1kr4OfBD4xxauTrfkIuBc25+GnzQpTHUgt+nVwGdVhnKG0gF2Vst1jGNef6W275f0Ztu/ALTatjpCp5kbNf0USe8Altv+aF0+CXhGW/VUfWU89ZWJ0lc22p9Rkh4+a/u62pT4tTYKrinU35f0VuD2qX7D+qv/GNtfaKOe+RhcR24jlfJUyl2ZradS9twpuS+bsjc+31X2xgzn0Ean5J9RAv4/usP/lD1mbqyw/YRp6yZtT7RYRy8ZT7WuzjNR+shGU5mA5mW239JWmVuo53rg8VP/l+uv2ck2Mrbma3BBf0rHqZQfobR/Nq8mJ9z+ZC0jOyDd8zgyLQX9uyjDMdzHpvx5u91hC6bX2UnmRi37CuBf2Dzn/HjbJ7RYRy8XF9tyJspcSPq07f/ScR0rPW0CIEk32n5cl/WOYxDZO1Nqj/2Zkj5G+dn4bUrHUdt+C1gM/ANlcot9gRd2UM8rG48/pQyF+9oO6plJG42wl1NytI+wvWd9tB7we8rcADiD8n/gI/WxuK5rUy8ZT/SUidJ1NlrDNZL+WtKTJT1+6tFyHaslvUzSzvXx+5S7wRfcoK70a3v+5ZSc+c93WM8E5WfqIWzqN3HX3/Iq00K+0XYrwWXcTBRJD5tvJ5Wkp1GaXJ4MHApcD/xL7bRsjaTrbR9VMzcOnMrc6Oqz6TJ7S9LTKYN5bZbxNNWX1EL5P8lEAVY1Nu0JXGP7+W3U06jvetfpRbe2roV6Rr0/tv20Fuv4KeB/U+YjMCXZ4eW2v9NWHXM1tKCvqVRKyofc1R/jLcArKGPW3D+13vY3uqivUa8ogzr9fEvlfZ2tZKJMdVq2pba3PhH4L5TpLe+2/eiW6/gypc34fcCra0de60FfZSjqd1FuYjpI0hHA77ilycRrG/GTqIOH0UHGk6SHAvvQUyaKpBXAc6dlo31kW2igIm2+AAAIHElEQVQH35EMLXunr1TK9bb/ueUyH0DS29iUovkgyg1hN7RVfp+ZKCpj4ewOfJ7SFv7Ejq6KOsvcmOYtwAnU4Yht37ClPpi56CPjaQEyUXrJRpP0COAvgf1snyTpcOAXbL+7xTq6HsZ9zoZ2pX8N5equmUr5l7ZbTaWsP7vPoPykaw7q9Y8t19McR2YjcJvtz7VZR62nj0yUt1BukLqHMvn21ZRspAfMpjSPOnrJ3Kh1fcH2Mc3mibY7WXvMeOotE6WPbLTap/ceSiw4QtJOlCGQW/mFXOu4itLXdmHj8/+K7ce2VcdcDe1Kf/dme6ftz6ibG01eBDyakk431bxjSqdua9zfIGHflfQaNs9E+V6bFdj+Ayht4JT37z2U4Zx3bbGO+1Ru9e886ANrahOPVcb6eRntjxh5LjXjSVKXGU9qfqnUXxmtx47GL6GpIT4Ol9RFNtq+ti+T9MdQJpeX1HYH+ENsf1Gb32jWxTDusza0oL9a0p+yeSplF9OXHdHmVcN0tV16i1d2HXRKnkG5RX5qaN2raTkTRdI5lE7cJwDfAC6mNPO07RpJf025Qe8HUyvd/pR8ZwNvBfanzGj1CUqnaJsup7xH/2K7iyGIp6xWuQlsatL136ObTJRXNp7vRsmsW0HpDG3TD1RGJ5365fIkoO15KXoZxn0uhta8sw+lTfdYNt2Y81o3xq9pqZ53Am+xfXOb5TbKP7g+fWn9d+pL7PnAD20v66jeLjNRXkn5PFbY7uyKqI/MjVrPsdOb2katm2cdfWU8LUgmStvZaI1yHw+8DXgMcBMlnfa0Nu/X0Ohh3J/fdTLHOIYW9HtJpZT0VUqa29Q0gFPzo7Zdz+dsHzvTuhbq6TQTZUekETesjVrXQj2dZzwtlLaz0Rrl7gacQ+lov4uSPPA2t3Tnb+3zOK02IXU6jPtcDK155+8YkUrZgRM7LLtpd0nH2Z4aAvkX2TSiaJs6zUTpU9eZG5J+gXJ1t1ibD9u9F9D2nd+9ZDz1lYnSdTZaw99S+g3+si6fQfm1/Lw2Cq99HudQ7gf6wYwH9GxoQb+XVMoef8K9GLi45lMD3EG5G7h1ttdM65Tq4s7PPryXmrlRl/+N0r7fVrreLsAelL+t5vDDdwKntVTHlBspfSCPpbRJ3yGp1Yyn6p3UTBQA2zdKugRoO/1wsvF8I/D3XWSjUSZFb2ZRfVpS218uV0p6BQ/sO1rwkTaHFvTPVxmkqtNUyr64TMxyhMpMXXJ3k6T3kYnSl04zN1ymkLxK0nu7/vLvI+Op6iUTpcdstOslPcn2tQCSjqGkCbdp6uLrpY11pvS9LKihBf1eUin7Uq/wz6cOrVtzg5d1EPz7yETpSx+ZGwDvkvS8qSSBmkRwqdsdcK2vjKdOM1EWIBvtGOA3JX2zLh9EmZT9y7TU9+aW71Zv09CCfqeplAvgYkr/xK/W5d+gXO39csv1PMrTxlmRdCztXx314VxK38Shkj5HzdzooJ5OZzWrHkwZKbbTjCfK1epFwKMlfYuaidJi+f+1UQ9My0ZrsZ4pnfW5SXqa7U9JGvk3uC20Kgwt6F8r6fCuUikXwGG2f6Wx/GeSVnZQz9t44MxYo9ZtD26m3G/wQ0rmxuWUdv22TZ/V7BBantXM9pvaLG+UmokyYfsZXWWiTDWD1ZTWZubZefWLudUU5I6b3Y4HPgU8h03jVjX/TdDv2XHAmSoDiXWWStmju6dl7xxLmXawFX1movSo08yNhm1lVrN56TkTpa9stC7dVf9WvsKmYA8LM43pSEML+n2lUvblbOBvG9k7G4Azt7L/bPWZidKXPjI3sP3xel/IWZRx+/+JFr+Qe9ZXJkpv2Wgd2qP++yjK/RP/RAn8z6HcfLjgBnVz1o5k2g0gewHYvnOGw+Za18Hbwp2EbZD0XuAd0zI3zmz7RjOV8fp/HziAEvSfRBlArO0hBTpXfxlPZ9udZKL0kI3WOUmfAH7FjZFJgQ/ZXvALz0HNnLUjsX0/5a5CbN/ZVcCv3qV+ZjTqwzGU8Xduk3Qb5camp0j6sqQ2hyj+fcqV3jdcpuY7CljfYvm9sb1kxKP1gC/poSqzmH0K+KSkNzeu+rc3BwH3NpbvpYwEsOCG1ryzo+nrZ3cfmSh96etK60e2fyQJSbva/ldJj+qp7lYsQCZKX9lofXg/8EWV+bINPJdNE9gvqAT97dtvUf5DTW+aaPsqrPNMlL702Ey1tv46upzy5bwBWNdT3W3pOxOlr2y0ztn+C5Vx+59cV73I9vULeU5TEvS3b4dTAv5xlD/CfwHe0UE9O0QmSp9sP7c+fa3KyJ4PBT6+gKc0F31nonSajdY3l+G62x6ye97Skbsdk3QZJZPm7+qqM4C9bf/qlo+ac10/xaZMlN2A77j9yS1iGyLp/Pp0ZCaK7d9uub4jKCm1m2WjtTnkcSTob9c0Yvq9UetaqGeHyUSJ2esjE6XPbLShS/bO9u36OnYM0NnAUbADZaLEnHSeidJzNtqgpU1/+9b5wFHVdp+JEvPSVybKNjsc8Y4kzTvbMW2aNnGktjJV6h/7i4CXU6bM2wDsbPvZbZQf2z6VKQanMlGu7iITpd4E9oCA1NVNYEOVoB+zIukp1EwU2/fOtH/EuCQ9mBHZaB1MCjNoCfoRsU3oMxttyBL0I2Kb0Fc22tAleycithV9ZaMNWq70I2KbIOmrlBvBNstGo0xtuj3Pe7FNSdCPiG1CX9loQ5egHxExIGnTj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJD/DyZ3eMLAXQPQAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -263,15 +281,6 @@ " rotation=90)\n", "plt.xlim([-1, len(domain_score.classifier.feature_importances_)])\n" ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "plt.show()" - ] } ], "metadata": { From 0b8f9303d81616ece4da822f0132e715e952101d Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Tue, 13 Mar 2018 10:08:02 +0100 Subject: [PATCH 19/35] Add tini to Docker image --- Dockerfile-Notebook | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Dockerfile-Notebook b/Dockerfile-Notebook index 24badff..f6f72d8 100644 --- a/Dockerfile-Notebook +++ b/Dockerfile-Notebook @@ -1,9 +1,16 @@ FROM server:latest -EXPOSE 8888 RUN apt-get update && apt-get install -y graphviz RUN pip3 install jupyter +# Add Tini. Tini operates as a process subreaper for jupyter. This prevents +# kernel crashes. +ENV TINI_VERSION v0.6.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini +RUN chmod +x /usr/bin/tini +ENTRYPOINT ["/usr/bin/tini", "--"] + COPY . /32de-python/ -ENTRYPOINT ["jupyter", "notebook", "--config", "deployment/jupyter_notebook_config.py"] +EXPOSE 8888 +CMD ["jupyter", "notebook", "--config", "deployment/jupyter_notebook_config.py"] From 619c5a164ecbcbd9e3808930ec89a5d29ae302c5 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Fri, 9 Mar 2018 17:08:19 +0100 Subject: [PATCH 20/35] Introduce script to create Jupyter Notebook Docker image. --- Dockerfile.jupyter | 13 +++++++++++++ deployment/build-notebook.sh | 2 ++ deployment/run-notebook.sh | 7 +++++++ deployment/stop-notebook.sh | 2 ++ 4 files changed, 24 insertions(+) create mode 100644 Dockerfile.jupyter create mode 100755 deployment/build-notebook.sh create mode 100755 deployment/run-notebook.sh create mode 100755 deployment/stop-notebook.sh diff --git a/Dockerfile.jupyter b/Dockerfile.jupyter new file mode 100644 index 0000000..d7cae36 --- /dev/null +++ b/Dockerfile.jupyter @@ -0,0 +1,13 @@ +# Based on Jupyter's Scipy-Notebook +# https://github.com/jupyter/docker-stacks/blob/master/scipy-notebook/Dockerfile +FROM jupyter/scipy-notebook + +USER root + +RUN conda install -c ostrokach-forge graph-tool --quiet --yes +RUN pip install graphviz + +COPY requirements.txt /32de-python/ +RUN pip install -r /32de-python/requirements.txt + +USER $NB_UID \ No newline at end of file diff --git a/deployment/build-notebook.sh b/deployment/build-notebook.sh new file mode 100755 index 0000000..aec3614 --- /dev/null +++ b/deployment/build-notebook.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker build -t notebook -f Dockerfile.jupyter ${1:-.} \ No newline at end of file diff --git a/deployment/run-notebook.sh b/deployment/run-notebook.sh new file mode 100755 index 0000000..19a9baa --- /dev/null +++ b/deployment/run-notebook.sh @@ -0,0 +1,7 @@ +#!/bin/bash +docker stop notebook-container +docker rm notebook-container +docker run --name notebook-container \ + --publish=${1:-8888}:8888 \ + -d \ + notebook \ No newline at end of file diff --git a/deployment/stop-notebook.sh b/deployment/stop-notebook.sh new file mode 100755 index 0000000..27c1c2f --- /dev/null +++ b/deployment/stop-notebook.sh @@ -0,0 +1,2 @@ +#!/bin/bash +docker stop notebook-container \ No newline at end of file From d6defb4e6a7e0153833deac6a0d7bc845947422b Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Tue, 13 Mar 2018 10:13:51 +0100 Subject: [PATCH 21/35] Correct script for other Dockerfile --- Dockerfile.jupyter | 13 ------------- deployment/build-notebook.sh | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 Dockerfile.jupyter diff --git a/Dockerfile.jupyter b/Dockerfile.jupyter deleted file mode 100644 index d7cae36..0000000 --- a/Dockerfile.jupyter +++ /dev/null @@ -1,13 +0,0 @@ -# Based on Jupyter's Scipy-Notebook -# https://github.com/jupyter/docker-stacks/blob/master/scipy-notebook/Dockerfile -FROM jupyter/scipy-notebook - -USER root - -RUN conda install -c ostrokach-forge graph-tool --quiet --yes -RUN pip install graphviz - -COPY requirements.txt /32de-python/ -RUN pip install -r /32de-python/requirements.txt - -USER $NB_UID \ No newline at end of file diff --git a/deployment/build-notebook.sh b/deployment/build-notebook.sh index aec3614..00bd32f 100755 --- a/deployment/build-notebook.sh +++ b/deployment/build-notebook.sh @@ -1,2 +1,2 @@ #!/bin/bash -docker build -t notebook -f Dockerfile.jupyter ${1:-.} \ No newline at end of file +docker build -t notebook -f Dockerfile-Notebook ${1:-.} From 23880052e7a83e33035fda16902cc542bb85e31e Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Tue, 13 Mar 2018 12:22:14 +0100 Subject: [PATCH 22/35] Fix not initializing with super. --- domain_scoring/domain_scoring.py | 2 +- notebooks/pl-1.0-regression_open-day.ipynb | 113 +++++++-------------- 2 files changed, 36 insertions(+), 79 deletions(-) diff --git a/domain_scoring/domain_scoring.py b/domain_scoring/domain_scoring.py index 5a4a3d3..c9e618f 100644 --- a/domain_scoring/domain_scoring.py +++ b/domain_scoring/domain_scoring.py @@ -124,7 +124,7 @@ def __init__(self): """ Extracts the domain value of meta-paths via regression. """ - super() + super().__init__() self.classifier = RandomForestRegressor() def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]): diff --git a/notebooks/pl-1.0-regression_open-day.ipynb b/notebooks/pl-1.0-regression_open-day.ipynb index d82b06e..ae232cd 100644 --- a/notebooks/pl-1.0-regression_open-day.ipynb +++ b/notebooks/pl-1.0-regression_open-day.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 28, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -136,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -146,16 +146,29 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "descriptor '__init__' requires a 'super' object but received a 'DomainScoringRegressor'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdomain_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDomainScoringRegressor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/32de-python/domain_scoring/domain_scoring.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0mExtracts\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mdomain\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0mof\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpaths\u001b[0m \u001b[0mvia\u001b[0m \u001b[0mregression\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 126\u001b[0m \"\"\"\n\u001b[0;32m--> 127\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 128\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassifier\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestRegressor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: descriptor '__init__' requires a 'super' object but received a 'DomainScoringRegressor'" + ] + } + ], "source": [ "domain_score = domain_scoring.DomainScoringRegressor()" ] }, { "cell_type": "code", - "execution_count": 95, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -166,94 +179,38 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Configure other predictor and transformer\n", - "domain_score.vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 1), token_pattern='\\\\b\\\\w+\\\\b')\n", + "domain_score.vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), token_pattern='\\\\b\\\\w+\\\\b')\n", "domain_score.classifier = RandomForestRegressor(random_state=42)" ] }, { "cell_type": "code", - "execution_count": 97, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test accuracy is 0.3389830508474576\n" - ] - } - ], + "outputs": [], "source": [ "domain_score.fit(m_graph, test_size=0.3)" ] }, { "cell_type": "code", - "execution_count": 98, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test accuracy is 0.42105263157894735\n" - ] - } - ], + "outputs": [], "source": [ "domain_score.fit(p_graph, test_size=0.3)" ] }, { "cell_type": "code", - "execution_count": 89, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature ranking:\n", - "1. feature 8 (0.345370)\n", - "2. feature 2 (0.245315)\n", - "3. feature 4 (0.067725)\n", - "4. feature 6 (0.049114)\n", - "5. feature 5 (0.043297)\n", - "6. feature 9 (0.041178)\n", - "7. feature 0 (0.040368)\n", - "8. feature 11 (0.036193)\n", - "9. feature 7 (0.035172)\n", - "10. feature 10 (0.033848)\n", - "11. feature 3 (0.033179)\n", - "12. feature 1 (0.029241)\n" - ] - }, - { - "data": { - "text/plain": [ - "(-1, 12)" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAE0CAYAAADALuP1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAIABJREFUeJzt3X28XVV95/HP1/CkPAhKastzoFGLVkCvYAui4xNgR8AWW6i2aK2UVsZaqpVWO9jUdnwY6zgWK6io1VJEW2k6oyLjA1QR5UYCCpYaIpoYW6MEoYpg4Dt/rHXNzuUk99x79943yf6+X6/zytlPa+1zTu7v7LPWb68l20RExDA8aKFPICIi+pOgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+jFokt4h6U8X+jwi+qLk6cdcSLoNeARwX2P1I22vm0eZTwU+YPuA+Z3d9knSe4G1tl+z0OcSO65c6cd8PMf2Ho3HnAN+GyTttJD1z4ekRQt9DjEMCfrROklPknSNpDsk3VCv4Ke2vUjSVyXdJWm1pN+p63cHPgbsJ+k/62M/Se+V9LrG8U+VtLaxfJukV0m6EfiBpJ3qcf8gab2kr0t62VbO9SflT5Ut6Y8kfUfStyWdKunZkv5N0u2S/qRx7GslfVjSB+vr+ZKkIxrbf07SZ+r7cJOkk6fV+zeSPirpB8CLgecDf1Rf+z/X/c6TdGst/2ZJz22U8UJJn5X0PyVtqK/1pMb2h0l6j6R1dfvljW3/VdLKem7XSHpcY9urJH2r1nmLpKeP8bHH9sJ2HnnM+gHcBjxjxPr9ge8Bz6ZcVDyzLi+u238JOAwQ8BTgh8Dj67anUpo3muW9F3hdY3mzfep5rAQOBB5c61wB/HdgF+BQYDVwwhZex0/Kr2VvrMfuDLwEWA9cAuwJPAb4EXBo3f+1wI+B0+r+rwC+Xp/vDKwC/qSex9OAu4BHNer9PnBsPefdpr/Wut/zgP3qPr8G/AD4mbrthbX+lwCLgN8F1rGp2fb/Ah8E9qnn85S6/vHAd4Bj6nFn1vdxV+BRwBpgv7rvIcBhC/3/LY/2HrnSj/m4vF4p3tG4inwB8FHbH7V9v+0rgUnKlwC2/6/tW11cBXwCePI8z+N/215j+27giZQvmGW277W9GngncPqYZf0Y+AvbPwYuBfYF3mr7Lts3ATcBj2vsv8L2h+v+f0UJ3k+qjz2A19fz+BTwf4AzGsf+k+3P1ffpR6NOxvaHbK+r+3wQ+BpwdGOXb9h+p+37gPcBPwM8QtLPACcBZ9veYPvH9f2G8iVxoe0v2L7P9vuAe+o530cJ/odL2tn2bbZvHfO9i+1Agn7Mx6m2966PU+u6g4HnNb4M7gCOowQjJJ0k6draVHIH5ctg33mex5rG84MpTUTN+v+E0uk8ju/VAApwd/33Pxrb76YE8wfUbft+YC3lynw/YE1dN+UblF9Co857JEm/2WiGuQN4LJu/X//eqP+H9ekelF8+t9veMKLYg4E/nPYeHUi5ul8FvJzyK+Y7ki6VtN9M5xnbjwT9aNsa4P2NL4O9be9u+/WSdgX+AfifwCNs7w18lNLUAzAqlewHwEMayz89Yp/mcWuAr0+rf0/bz573KxvtwKknkh4EHEBpYlkHHFjXTTkI+NYWzvsBy5IOpvxKOQd4eH2/vsKm92tr1gAPk7T3Frb9xbT36CG2/x7A9iW2j6N8ORh4wxj1xXYiQT/a9gHgOZJOkLRI0m61g/QAStv2rpR28o210/FZjWP/A3i4pIc21q0Enl07JX+achW6NV8E7qydkQ+u5/BYSU9s7RVu7gmSflklc+jllGaSa4EvUL6w/kjSzrUz+zmUJqMt+Q9KH8SU3SlBdz2UTnDKlf6MbH+b0jH+dkn71HM4vm5+J3C2pGNU7C7plyTtKelRkp5Wv6B/RPllc98WqontUIJ+tMr2GuAUSpPKespV5SuBB9m+C3gZcBmwAfh1YHnj2H8F/h5YXZsd9gPeD9xA6Wj8BKVjcmv130cJrkdSOlW/C7wLeOjWjpuHf6J0sG4AfgP45dp+fi9wMqVd/bvA24HfrK9xS95NaUu/Q9Lltm8G3gx8nvKF8PPA52Zxbr9B6aP4V0rH7csBbE9S2vX/up73KkqnMJQv5dfXc/534Kcon2XsIHJzVsQcSXot8LO2X7DQ5xIxrlzpR0QMSIJ+RMSApHknImJAcqUfETEgCfoREQOyzY1KuO+++/qQQw5Z6NOIiNiurFix4ru2F8+03zYX9A855BAmJycX+jQiIrYrkr4xzn5p3omIGJAE/YiIARkr6Es6sU6msErSeSO2ny3py3U0wM9KOryuP0TS3XX9SknvaPsFRETE+GZs01eZxu0CymQYa4HrJC2v44JMucT2O+r+J1PGFT+xbrvV9pHtnnZERMzFOFf6RwOrbK+ug0hdShlQ6yds39lYnBoZMCIitjHjBP392Xyyh7VsPhEEAJJeKulW4I2UkRSnLJF0vaSrJI2cIUnSWZImJU2uX79+FqcfERGzMU7QHzVhwwOu5G1fYPsw4FXAa+rqbwMH2T4KOBe4RNJeI469yPaE7YnFi2dMM42IiDkaJ+ivpTE7EJtmBtqSS4FTAWzfY/t79fkK4FbgkXM71YiImK9xgv51wFJJSyTtQplgenlzB0lLG4u/RJm8GUmLa0cwkg4FlgKr2zjxLZLaf0RE7CBmzN6xvVHSOcAVwCLgYts3SVoGTNpeDpwj6RmUWXo2AGfWw48HlknaSJly7Wzbt3fxQiIiYmbb3NDKExMTntcwDF1cmW9j71FExHSSVtiemGm/3JEbETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSBjBX1JJ0q6RdIqSeeN2H62pC9LWinps5IOb2z743rcLZJOaPPkIyJidmYM+pIWARcAJwGHA2c0g3p1ie2ft30k8Ebgr+qxhwOnA48BTgTeXsuLiIgFMM6V/tHAKturbd8LXAqc0tzB9p2Nxd0B1+enAJfavsf214FVtbyIiFgAO42xz/7AmsbyWuCY6TtJeilwLrAL8LTGsddOO3b/EceeBZwFcNBBB41z3hERMQfjXOlrxDo/YIV9ge3DgFcBr5nlsRfZnrA9sXjx4jFOKSIi5mKcoL8WOLCxfACwbiv7XwqcOsdjIyKiQ+ME/euApZKWSNqF0jG7vLmDpKWNxV8CvlafLwdOl7SrpCXAUuCL8z/tiIiYixnb9G1vlHQOcAWwCLjY9k2SlgGTtpcD50h6BvBjYANwZj32JkmXATcDG4GX2r6vo9cSEREzkP2AJvYFNTEx4cnJybkXoFHdCPO0jb1HERHTSVphe2Km/XJHbkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAjBX0JZ0o6RZJqySdN2L7uZJulnSjpE9KOrix7T5JK+tjeZsnHxERs7PTTDtIWgRcADwTWAtcJ2m57Zsbu10PTNj+oaTfBd4I/FrddrftI1s+74iImINxrvSPBlbZXm37XuBS4JTmDrY/bfuHdfFa4IB2TzMiItowTtDfH1jTWF5b123Ji4GPNZZ3kzQp6VpJp446QNJZdZ/J9evXj3FKERExFzM27wAasc4jd5ReAEwAT2msPsj2OkmHAp+S9GXbt25WmH0RcBHAxMTEyLIjImL+xrnSXwsc2Fg+AFg3fSdJzwBeDZxs+56p9bbX1X9XA58BjprH+UZExDyME/SvA5ZKWiJpF+B0YLMsHElHARdSAv53Guv3kbRrfb4vcCzQ7ADefkntPiIiejBj847tjZLOAa4AFgEX275J0jJg0vZy4E3AHsCHVALYN22fDPwccKGk+ylfMK+flvUTERE9kr1tNaFPTEx4cnJy7gV0cdU86j1qu55t7HOIiO2LpBW2J2baL3fkRkQMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAzIWEFf0omSbpG0StJ5I7afK+lmSTdK+qSkgxvbzpT0tfo4s82Tj4iI2Zkx6EtaBFwAnAQcDpwh6fBpu10PTNh+HPBh4I312IcB5wPHAEcD50vap73Tj4iI2RjnSv9oYJXt1bbvBS4FTmnuYPvTtn9YF68FDqjPTwCutH277Q3AlcCJ7Zx6RETM1jhBf39gTWN5bV23JS8GPjabYyWdJWlS0uT69evHOKWIiJiLcYK+RqzzyB2lFwATwJtmc6zti2xP2J5YvHjxGKcUERFzMU7QXwsc2Fg+AFg3fSdJzwBeDZxs+57ZHBsREf0YJ+hfByyVtETSLsDpwPLmDpKOAi6kBPzvNDZdATxL0j61A/dZdV1ERCyAnWbawfZGSedQgvUi4GLbN0laBkzaXk5pztkD+JAkgG/aPtn27ZL+nPLFAbDM9u2dvJKIiJiR7JHN8wtmYmLCk5OTcy9Ao7oR5mnUe9R2PdvY5xAR2xdJK2xPzLRf7siNiBiQBP2IiAFJ0I+IGJAE/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJAE/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJAE/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJCxgr6kEyXdImmVpPNGbD9e0pckbZR02rRt90laWR/L2zrxiIiYvZ1m2kHSIuAC4JnAWuA6Sctt39zY7ZvAC4FXjCjibttHtnCuERExTzMGfeBoYJXt1QCSLgVOAX4S9G3fVrfd38E5RkRES8Zp3tkfWNNYXlvXjWs3SZOSrpV06qgdJJ1V95lcv379LIqOiIjZGCfoa8Q6z6KOg2xPAL8O/C9Jhz2gMPsi2xO2JxYvXjyLoiMiYjbGCfprgQMbywcA68atwPa6+u9q4DPAUbM4v4iIaNE4Qf86YKmkJZJ2AU4HxsrCkbSPpF3r832BY2n0BURERL9mDPq2NwLnAFcAXwUus32TpGWSTgaQ9ERJa4HnARdKuqke/nPApKQbgE8Dr5+W9RMRET2SPZvm+e5NTEx4cnJy7gVoVBfEPI16j9quZxv7HCJi+yJpRe0/3arckRsRMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIGMFfUknSrpF0ipJ543YfrykL0naKOm0advOlPS1+jizrROPiIjZmzHoS1oEXACcBBwOnCHp8Gm7fRN4IXDJtGMfBpwPHAMcDZwvaZ/5n3ZERMzFOFf6RwOrbK+2fS9wKXBKcwfbt9m+Ebh/2rEnAFfavt32BuBK4MQWzjsiIuZgnKC/P7Cmsby2rhvHWMdKOkvSpKTJ9evXj1l0RETM1jhBXyPWeczyxzrW9kW2J2xPLF68eMyiIyJitsYJ+muBAxvLBwDrxix/PsdGRETLxgn61wFLJS2RtAtwOrB8zPKvAJ4laZ/agfusui4iIhbAjEHf9kbgHEqw/ipwme2bJC2TdDKApCdKWgs8D7hQ0k312NuBP6d8cVwHLKvrIiJiAcget3m+HxMTE56cnJx7ARrVjTBPo96jtuvZxj6HiNi+SFphe2Km/XJHbkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMSIJ+RMSAJOhHRAxIgn5ExIAk6EdEDEiCfkTEgCToR0QMyE4LfQKxFX0NEx0Rg5Er/YiIAUnQj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJCxgr6kEyXdImmVpPNGbN9V0gfr9i9IOqSuP0TS3ZJW1sc72j39iIiYjRnz9CUtAi4AngmsBa6TtNz2zY3dXgxssP2zkk4H3gD8Wt12q+0jWz7viIiYg3Gu9I8GVtlebfte4FLglGn7nAK8rz7/MPB0qYs7iyIiYj7GCfr7A2say2vrupH72N4IfB94eN22RNL1kq6S9ORRFUg6S9KkpMn169fP6gVERMT4xhmGYdQV+/R7+be0z7eBg2x/T9ITgMslPcb2nZvtaF8EXAQwMTGRcQL6luEeIgZjnCv9tcCBjeUDgHVb2kfSTsBDgdtt32P7ewC2VwC3Ao+c70lHRMTcjBP0rwOWSloiaRfgdGD5tH2WA2fW56cBn7JtSYtrRzCSDgWWAqvbOfWIiJitGZt3bG+UdA5wBbAIuNj2TZKWAZO2lwPvBt4vaRVwO+WLAeB4YJmkjcB9wNm2b+/ihcR2IM1IEQtO3sb+aCYmJjw5OTn3AvoKLG3X00cdQ6knYoAkrbA9MdN+GU8/djw70hdyRMsyDENExIAk6EdEDEiadyK2ZelviZYl6EdEf9LfsuDSvBMRMSAJ+hERA5LmnYiIudhOm5FypR8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSAJ+hERA5KgHxExIAn6EREDkqAfETEgCfoREQOSoB8RMSBjBX1JJ0q6RdIqSeeN2L6rpA/W7V+QdEhj2x/X9bdIOqG9U4+IiNmaMehLWgRcAJwEHA6cIenwabu9GNhg+2eBtwBvqMceDpwOPAY4EXh7LS8iIhbAOFf6RwOrbK+2fS9wKXDKtH1OAd5Xn38YeLok1fWX2r7H9teBVbW8iIhYAONMorI/sKaxvBY4Zkv72N4o6fvAw+v6a6cdu//0CiSdBZxVF/9T0i1jnf387At8d6w95zdZwnj1zH9Chj7q2bbes77qyWez7daTz6bp4HF2GifojzqL6dO7bGmfcY7F9kXARWOcS2skTdqeSD3bVh2pZ9utI/Vsu3XMxjjNO2uBAxvLBwDrtrSPpJ2AhwK3j3lsRET0ZJygfx2wVNISSbtQOmaXT9tnOXBmfX4a8CnbrutPr9k9S4ClwBfbOfWIiJitGZt3ahv9OcAVwCLgYts3SVoGTNpeDrwbeL+kVZQr/NPrsTdJugy4GdgIvNT2fR29ltnqqzlpR6pnR3otO1o9O9Jr2dHq6bXpeiZyD7OvR0TEtiF35EZEDEiCfkTEgCToR0QMSIJ+RMSAjHNzVsySpEcCfwM8wvZjJT0OONn261oq//Fb2277Sy3Vswj4bcr9FR+3/bnGtte09XpqeQ8B/hA4yPZLJC0FHmX7/7RYxwmU1/JJ27c11v+W7YvbqqdR7i8Ch9D4O7P9ty2U28vnImkv4I9rPR+zfUlj29tt/14b9dTy+v5s9qfcwdr8bK5uodxzt7bd9l/Nt475GkT2jqQDgTdRhoD4GPAm2z+u2y63fWrL9V0FvBK40PZRdd1XbD+2pfI/XZ/uBkwAN1Dufn4c8AXbx7VUz7uAh1DurfgN4Crb59ZtX7K91S+fWdb1QWAF8Jv1i/LBwOdtH9lS+X8JHAd8CXgO8L9sv61ua/W11DLfDxwGrASm0pRt+2UtlN3L5yLpH4CvUYZS+S3gx8Cv276n5Xr6/mzeAPwaJZW8+dmc3ELZ59enjwKeyKZ7mp4DXG37t+dbx7zZ3uEfwJXA2cCRwNuAa4CH123Xd1DfddPLBlZ2UM+lwM83lh8LvLfF8m9sPN+Jkm/8j8Cubb9vlHs+pr9nN7RY/peBnerzvYGPAm/p8P/AV6kXVR2U3cvnMv3/LPBq4HOUcbW+tB1/NrcAu3bx2TTq+ASwZ2N5T8qvss7qHPcxlDb9xbbfYXul7f8GvB24WtJhjBgLqAXfbZYt6TTg2x3U82jbX55asP0VyhdbW3ZplL3R9lmUK9dPAXu0WA/AvfXqfuo9Owy4p8Xyd7K9EcD2HZQrr70kfYjG62zRV4Cf7qBc6O9z2VXST2KE7b+gfMFcTQn8ben7s1kN7NxBuU0HAfc2lu+lNPUtuKG06e8saTfbPwKw/QFJ/065y3j3Dup7KeWP49GSvgV8HXhBB/V8tf7U/wAlWL6AcoXZlklJJ9r++NQK28skraP0WbTptcDHgQMl/R1wLPCiFsu/VdJTbF8F4HJn+IslvQ74lRbrmbIvcLOkL9L48nILTQj097n8M/A04P816nmfpP+g/GJuS9+fzQ+BlZI+yeafzbyb3hreD3xR0kcof5vPBebdn9OGobTp/wHl5+hV09YfBbzR9jM7qnd34EG27+qo/N2A3wWOr6uuBv5m6stteyPp4cCTKP0T19oebzja8cp+MIDtu0ds29/2t9qqq5b5lFHrp/8fjAX5bM4ctd72+0atn0c9jweeXBevtn19m+XP1SCCfl8kvaD+ihjZg+8Oeu7rH8xBtjudg6CrTJRG+Z+0/fSZ1rVUVyeZG33rOuOpr0yUvrLRptW5C/DIuniLa2JHy3UcByy1/R5Ji4E9XCaTWlBDad4Buk+lZFNT0Z4tlbdVkk6mZCXtAiyRdCSwrKUmhGY9IzNRaOHnav218hBgX0n7sGkOhr2A/eZb/oj6RmZuUH4ltVH+Z20fJ+kuNu8vEiVDZK826qneQ8l4+oW6vBb4ENBWmuvU/+ORmSgt1QHw5vrvyGw0SmZPayQ9lTLT3221ngMlndnmF3/N4pmgvHfvofQhfIDSbLmwFronuc8HcBVlusZmhshXOqhncU+vZwVl7oLm67mxg3q6zET5fUqfxz3136nHDcA5HdTXeebGmOexTwtldJrx1Cizl0wUOs5Ga5S7gvKLaGr5kcCKlutYSflC6fRvcy6PoWTvTHmI7enj+W/soJ5rJH1C0ovr1WtXNtr+foflT+ksE8X2W20vAV5he0njcYTtv+6gyj4yN8bxyRbK6DrjaUpfmShdZ6NN2dmN5lDb/0b7/yfudYn0U59NFwkjczKo5h16SqW0vVTS0ZR5BV4t6WbKBPEfaLmqr0j6dWBRbc99GeUehLZ1mYky5UJJL2NTp/RnKDe3td3W2kfmxjjmPbkr3Wc8TekrE6XrbLQpk5LeTXld1HpWtFzHZZIuBPaW9BLKzW3varmOORlUR66kQymplL8IbKCmUrpx23cHde4L/BXwfNuLWi77IZQbZp5VV10BvM4tZ+/0kYlS/9h3prS1QrnT9D63fAdjX5kbY5xHK3eadpnxNK2ezjNR+spGk7QrJa36OMr7djXwdtut/kqS9EzK36aAK2xf2Wb5czWooD+lh1TKvShXQ6dTOkA/Alxmu+2riR2GpBtsHzHTupbq6jxzY4xzmHfQ7znjqZdMlL6y0Rr1PQw4wPaNLZf7BtuvmmndQhhEm76kF9R/z61paL8DvKSx3LYbKG2Ry2w/0varugj4kq6UtHdjeR9JV7RY/mfrv3dJurPxuEvSnW3VU91Xm96m6j6UTdk1ramZG18DLqDcmf1vko7f6kHdmHPzjqTdarDat37mD6uPQ+gm4+l84FWUwddgUyZK2/WcTOkA/XhdPlLS9Pm426jnM5L2qu/hSuA9ktpOpx51789JLdcxJ0Np0+81lRI41LYl7SlpD9v/2VE9+7rctg6A7Q2Sfqqtwl0HbrO91fdN0j62N8yzulcCn5a0mhIQD6ab9uk3A8+aupKsabx/DzyhjcJrINki27fXp/O5Gv8d4OWUAN/MYb+T8mXWtucCR03VZXudpC7+ls6nZNd9ptazsn6Rte2htu+U9NvAe2yfL6mVK31Jvwv8HnDYtDL3pJv+tlkbRNC3fWF9+nbb63uo8jE1t/1hgCStB86s2Qhtul/SQba/SanoYLoZS2gmnwTm3FShMr7L3cBSSl6zgH9tu421ekDmhqQ2MzdWUD4DUbJeNtTnewPfBJbUem/fUgEzsf1W4K2S/pvraJQdu7dexHSdibLR9velNvq4t2onST8D/CqlT6xNl1BG8v0fwHmN9XfN5zNv0yCCfsM1kr4OfBD4xxauTrfkIuBc25+GnzQpTHUgt+nVwGdVhnKG0gF2Vst1jGNef6W275f0Ztu/ALTatjpCp5kbNf0USe8Altv+aF0+CXhGW/VUfWU89ZWJ0lc22p9Rkh4+a/u62pT4tTYKrinU35f0VuD2qX7D+qv/GNtfaKOe+RhcR24jlfJUyl2ZradS9twpuS+bsjc+31X2xgzn0Ean5J9RAv4/usP/lD1mbqyw/YRp6yZtT7RYRy8ZT7WuzjNR+shGU5mA5mW239JWmVuo53rg8VP/l+uv2ck2Mrbma3BBf0rHqZQfobR/Nq8mJ9z+ZC0jOyDd8zgyLQX9uyjDMdzHpvx5u91hC6bX2UnmRi37CuBf2Dzn/HjbJ7RYRy8XF9tyJspcSPq07f/ScR0rPW0CIEk32n5cl/WOYxDZO1Nqj/2Zkj5G+dn4bUrHUdt+C1gM/ANlcot9gRd2UM8rG48/pQyF+9oO6plJG42wl1NytI+wvWd9tB7we8rcADiD8n/gI/WxuK5rUy8ZT/SUidJ1NlrDNZL+WtKTJT1+6tFyHaslvUzSzvXx+5S7wRfcoK70a3v+5ZSc+c93WM8E5WfqIWzqN3HX3/Iq00K+0XYrwWXcTBRJD5tvJ5Wkp1GaXJ4MHApcD/xL7bRsjaTrbR9VMzcOnMrc6Oqz6TJ7S9LTKYN5bZbxNNWX1EL5P8lEAVY1Nu0JXGP7+W3U06jvetfpRbe2roV6Rr0/tv20Fuv4KeB/U+YjMCXZ4eW2v9NWHXM1tKCvqVRKyofc1R/jLcArKGPW3D+13vY3uqivUa8ogzr9fEvlfZ2tZKJMdVq2pba3PhH4L5TpLe+2/eiW6/gypc34fcCra0de60FfZSjqd1FuYjpI0hHA77ilycRrG/GTqIOH0UHGk6SHAvvQUyaKpBXAc6dlo31kW2igIm2+AAAIHElEQVQH35EMLXunr1TK9bb/ueUyH0DS29iUovkgyg1hN7RVfp+ZKCpj4ewOfJ7SFv7Ejq6KOsvcmOYtwAnU4Yht37ClPpi56CPjaQEyUXrJRpP0COAvgf1snyTpcOAXbL+7xTq6HsZ9zoZ2pX8N5equmUr5l7ZbTaWsP7vPoPykaw7q9Y8t19McR2YjcJvtz7VZR62nj0yUt1BukLqHMvn21ZRspAfMpjSPOnrJ3Kh1fcH2Mc3mibY7WXvMeOotE6WPbLTap/ceSiw4QtJOlCGQW/mFXOu4itLXdmHj8/+K7ce2VcdcDe1Kf/dme6ftz6ibG01eBDyakk431bxjSqdua9zfIGHflfQaNs9E+V6bFdj+Ayht4JT37z2U4Zx3bbGO+1Ru9e886ANrahOPVcb6eRntjxh5LjXjSVKXGU9qfqnUXxmtx47GL6GpIT4Ol9RFNtq+ti+T9MdQJpeX1HYH+ENsf1Gb32jWxTDusza0oL9a0p+yeSplF9OXHdHmVcN0tV16i1d2HXRKnkG5RX5qaN2raTkTRdI5lE7cJwDfAC6mNPO07RpJf025Qe8HUyvd/pR8ZwNvBfanzGj1CUqnaJsup7xH/2K7iyGIp6xWuQlsatL136ObTJRXNp7vRsmsW0HpDG3TD1RGJ5365fIkoO15KXoZxn0uhta8sw+lTfdYNt2Y81o3xq9pqZ53Am+xfXOb5TbKP7g+fWn9d+pL7PnAD20v66jeLjNRXkn5PFbY7uyKqI/MjVrPsdOb2katm2cdfWU8LUgmStvZaI1yHw+8DXgMcBMlnfa0Nu/X0Ohh3J/fdTLHOIYW9HtJpZT0VUqa29Q0gFPzo7Zdz+dsHzvTuhbq6TQTZUekETesjVrXQj2dZzwtlLaz0Rrl7gacQ+lov4uSPPA2t3Tnb+3zOK02IXU6jPtcDK155+8YkUrZgRM7LLtpd0nH2Z4aAvkX2TSiaJs6zUTpU9eZG5J+gXJ1t1ibD9u9F9D2nd+9ZDz1lYnSdTZaw99S+g3+si6fQfm1/Lw2Cq99HudQ7gf6wYwH9GxoQb+XVMoef8K9GLi45lMD3EG5G7h1ttdM65Tq4s7PPryXmrlRl/+N0r7fVrreLsAelL+t5vDDdwKntVTHlBspfSCPpbRJ3yGp1Yyn6p3UTBQA2zdKugRoO/1wsvF8I/D3XWSjUSZFb2ZRfVpS218uV0p6BQ/sO1rwkTaHFvTPVxmkqtNUyr64TMxyhMpMXXJ3k6T3kYnSl04zN1ymkLxK0nu7/vLvI+Op6iUTpcdstOslPcn2tQCSjqGkCbdp6uLrpY11pvS9LKihBf1eUin7Uq/wz6cOrVtzg5d1EPz7yETpSx+ZGwDvkvS8qSSBmkRwqdsdcK2vjKdOM1EWIBvtGOA3JX2zLh9EmZT9y7TU9+aW71Zv09CCfqeplAvgYkr/xK/W5d+gXO39csv1PMrTxlmRdCztXx314VxK38Shkj5HzdzooJ5OZzWrHkwZKbbTjCfK1epFwKMlfYuaidJi+f+1UQ9My0ZrsZ4pnfW5SXqa7U9JGvk3uC20Kgwt6F8r6fCuUikXwGG2f6Wx/GeSVnZQz9t44MxYo9ZtD26m3G/wQ0rmxuWUdv22TZ/V7BBantXM9pvaLG+UmokyYfsZXWWiTDWD1ZTWZubZefWLudUU5I6b3Y4HPgU8h03jVjX/TdDv2XHAmSoDiXWWStmju6dl7xxLmXawFX1movSo08yNhm1lVrN56TkTpa9stC7dVf9WvsKmYA8LM43pSEML+n2lUvblbOBvG9k7G4Azt7L/bPWZidKXPjI3sP3xel/IWZRx+/+JFr+Qe9ZXJkpv2Wgd2qP++yjK/RP/RAn8z6HcfLjgBnVz1o5k2g0gewHYvnOGw+Za18Hbwp2EbZD0XuAd0zI3zmz7RjOV8fp/HziAEvSfRBlArO0hBTpXfxlPZ9udZKL0kI3WOUmfAH7FjZFJgQ/ZXvALz0HNnLUjsX0/5a5CbN/ZVcCv3qV+ZjTqwzGU8Xduk3Qb5camp0j6sqQ2hyj+fcqV3jdcpuY7CljfYvm9sb1kxKP1gC/poSqzmH0K+KSkNzeu+rc3BwH3NpbvpYwEsOCG1ryzo+nrZ3cfmSh96etK60e2fyQJSbva/ldJj+qp7lYsQCZKX9lofXg/8EWV+bINPJdNE9gvqAT97dtvUf5DTW+aaPsqrPNMlL702Ey1tv46upzy5bwBWNdT3W3pOxOlr2y0ztn+C5Vx+59cV73I9vULeU5TEvS3b4dTAv5xlD/CfwHe0UE9O0QmSp9sP7c+fa3KyJ4PBT6+gKc0F31nonSajdY3l+G62x6ye97Skbsdk3QZJZPm7+qqM4C9bf/qlo+ac10/xaZMlN2A77j9yS1iGyLp/Pp0ZCaK7d9uub4jKCm1m2WjtTnkcSTob9c0Yvq9UetaqGeHyUSJ2esjE6XPbLShS/bO9u36OnYM0NnAUbADZaLEnHSeidJzNtqgpU1/+9b5wFHVdp+JEvPSVybKNjsc8Y4kzTvbMW2aNnGktjJV6h/7i4CXU6bM2wDsbPvZbZQf2z6VKQanMlGu7iITpd4E9oCA1NVNYEOVoB+zIukp1EwU2/fOtH/EuCQ9mBHZaB1MCjNoCfoRsU3oMxttyBL0I2Kb0Fc22tAleycithV9ZaMNWq70I2KbIOmrlBvBNstGo0xtuj3Pe7FNSdCPiG1CX9loQ5egHxExIGnTj4gYkAT9iIgBSdCPiBiQBP2IiAFJ0I+IGJD/DyZ3eMLAXQPQAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "importances = domain_score.classifier.feature_importances_\n", "indices = np.argsort(importances)[::-1]\n", @@ -299,7 +256,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.5.2" } }, "nbformat": 4, From f11c646d83e329648ebcae08be97f764675c66db Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Tue, 13 Mar 2018 12:24:45 +0100 Subject: [PATCH 23/35] Updated notebook. --- notebooks/pl-1.0-regression_open-day.ipynb | 124 +++++++++++++-------- 1 file changed, 80 insertions(+), 44 deletions(-) diff --git a/notebooks/pl-1.0-regression_open-day.ipynb b/notebooks/pl-1.0-regression_open-day.ipynb index ae232cd..97f1af5 100644 --- a/notebooks/pl-1.0-regression_open-day.ipynb +++ b/notebooks/pl-1.0-regression_open-day.ipynb @@ -2,18 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 15, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -21,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -46,16 +37,16 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "merlin =json.load(open(path, \"r\", encoding=\"utf8\"))" + "merlin = json.load(open(path, \"r\", encoding=\"utf8\"))" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -73,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -136,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -146,29 +137,16 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 11, "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "descriptor '__init__' requires a 'super' object but received a 'DomainScoringRegressor'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdomain_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdomain_scoring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDomainScoringRegressor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/32de-python/domain_scoring/domain_scoring.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0mExtracts\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mdomain\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0mof\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpaths\u001b[0m \u001b[0mvia\u001b[0m \u001b[0mregression\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 126\u001b[0m \"\"\"\n\u001b[0;32m--> 127\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 128\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassifier\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestRegressor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: descriptor '__init__' requires a 'super' object but received a 'DomainScoringRegressor'" - ] - } - ], + "outputs": [], "source": [ "domain_score = domain_scoring.DomainScoringRegressor()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -179,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -190,27 +168,85 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.3898305084745763\n", + "R^2 is 0.6858428553949141\n" + ] + } + ], "source": [ "domain_score.fit(m_graph, test_size=0.3)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.3157894736842105\n", + "R^2 is 0.7736986555155622\n" + ] + } + ], "source": [ "domain_score.fit(p_graph, test_size=0.3)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature ranking:\n", + "1. feature 8 (0.345370)\n", + "2. feature 2 (0.245315)\n", + "3. feature 4 (0.067725)\n", + "4. feature 6 (0.049114)\n", + "5. feature 5 (0.043297)\n", + "6. feature 9 (0.041178)\n", + "7. feature 0 (0.040368)\n", + "8. feature 11 (0.036193)\n", + "9. feature 7 (0.035172)\n", + "10. feature 10 (0.033848)\n", + "11. feature 3 (0.033179)\n", + "12. feature 1 (0.029241)\n" + ] + }, + { + "data": { + "text/plain": [ + "(-1, 12)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAE2CAYAAACaxNI3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXFWZ//HPN4RFQRREiQIJKIKDjAgIMiNLK46AI0ZnXEARRnHEQUbckZ/jkDCj/tTfuIwMioK4oBNFRRQHwQEaEIWEALJlUyCEgHFhdcFAeH5/nFPJTaU6Xd197u307e/79apX193Oc6ur+6lb5zz3XkUEZmbWLlPGewfMzKw8J3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnK3SUfS5yR9cLz3w6xOcp279UvSHcBTgUcBAQHsEhG/GkObBwHnRMQORXZygpF0NrAsIv51vPfF2mXqeO+ATSgB/G1EXFawzc6HxOg2ljaKiFUF96cxkvzN2WrjPy4bKfWcKe0n6SpJ90m6Ph+Rd5b9g6RbJT0o6ReS3prnPx74H+Dpkh7Ky6dJOlvSqZXtD5K0rDJ9u6T3S/o58HtJUyQ9TdK3Jf1a0i8l/fOQL6DSfqdtSe+TtELSckkzJR0maZGk30o6ubLtKZLOlTQn7++1kp5bWf5sSZfl38NNkg7vinu6pB9Kegg4FngD8P7c1vl5vZPy7+lBSTdLemWljWMkXSnpE5Luza/10MryrSR9Kb+O30n6bmXZy/N7c5+kn0j6y8qykyTdlWMukPSioX5/NkFEhB9+9PUAbgde3GP+04HfAofk6YPz9JPz9GHAjvn5AcAfgOfl6YOAO7vaOxs4tTK91jp5P67LcTclfeBcC3wQ2AjYEfgF8DdDvI7V7ee2H6ls+xbg18A5wOOB3YA/AjPy+qcAfwZeldd/D3Bbfj4VWAKclJ+/CHgQeFYl7n3Afnl60+7Xmuf/PbBtfv4a4PeV6WNy/Dfn1/02YHll2x8C/w1smffpgDx/T2AF8Py83Rvz73FjYBfgzkqM6cBO4/335sfYHj5yt5H6Xj5ivLdyVHgU8MOIuAggIi4hJduX5ekLI+KO/PxK4GJSkh+Lz0TE3RHxZ2AfYJuI+HBErMqxzgSO6LOtlcBHInXvzAG2AT4dEX+MiFuBW4E9KuvPj4jz8vqfJCXp/fJj84j4WEQ8Gqn76gLgyMq250fE1QB539cREd+JiBX5+bmkD4x9K6ssjYgvRUQAXwGeJumpkqYBhwDHRcSD+XdxZd7mH4HPR8S1kXyN9CGxH7AK2ATYXdLUiLgzIm7v83dnGyj3udtIzYx1+9xnAK+tdEGI9Ld1KYCkw4B/JR0hTgEeB9w4xv24qyv+dpLurcSfAlzRZ1u/y4kS4E/5568ry/8EbFGZXt1FFBEhaTnpW4Sqy7KlwHa9th2KpKOBd5G+gQBsTvrA6Vg9gB0Rf5JE3r8nA/dGxIM9mp0BHF3prhLpqP3pEXGlpHcCs4DdJF0EvCci7hluX23D5eRuI9Wrz30Z8NWIOG6dlaVNgG+Tju7Pj4jHJJ1XaafXYOofSF0iHU/rsU51u2XAbRGxax/7X8Lqyh6lzLo9cDfpNU3vWnc6sKgy3f1615qWNB34AvCiiPhZnnc9Q4x1dFkGbC1pyx4Jfhnw4Yj4aK8NI2IOMEfSFjn+/yV1AdkE5W4ZK+Ec4HBJL82Dm5vlgcqnk77ubwL8Nif2w4CXVrZdATxZ0paVeTcAL8uDg9OAE4eJPxd4KA+ybiZpI0nPkfT8ci9xLXtLeqWkjUhH2A8DVwPXAH/I+zFV0gDwclIf+FBWAM+oTG8OPAb8Nv8u3wTs3s9ORSpJvRA4XdKT8j50ur++CLxN0r4AkjaX9LL8cxdJL8ofxCtJ31Qe6+s3YRssJ3cbiZ4lixFxFzAT+D/Ab0hdEe8FpkTE74F3AOfmbpMjgPMr2y4iJb/bcj/+NOBrpG6bO4AfkfrBh9yPiHiMlESfRxok/DUpmW3J6Kz36Drv/+tIg6NvAF6V+7cfAQ4njTX8FjgNeGNELBmiHYCzgOd0xjAiYgGpH/9qUvfLc4CfjGB/30g6D2Eh6YPjRICImE/qdz8tvw+LWXNkvinpSP03pG8gTwFOxia0vk5iyqVWnyZ9GJwVER/rWn4c8HbSwMxDwFsjYqGkGcAC0h8awNURcXzB/TdrlKRTgGdGxNHjvS9m6zNsn7vSiRankcrb7gbmSTo/IhZWVvt6RJyR1z8c+BSp/A3gFxGxV9ndNjOz9emnW2ZfYElELM1fO+eQvoKvlr96d2zB2v11/QwEmZlZQf1Uy2zH2uVbd7F2zS0Ako4H3k0qr3pxZdGOkuaTTub4UEQM139otsGKiNnjvQ9m/Sg2oBoRp0fEzqSz8z6UZ98DTI+IvUln8n0jl1qZmVmN+jlyX87atbvb53lD+SbweYCIWEkqrSIirpP0S9KJLNdVN5DkS1OamY1CRPTs+u7nyH0esLOkGbkO9gjg+9UVJO1cmXw5qcwKSdvkAVkkPQPYmXQdjl47WPvjlFNOcZwNMIbjbLgxHGfDjRGx/mPiYY/cI2KVpBNI1wPplEIukDQbmBcRFwAnSHoJ6Sj9PtbUzx4InCppJWmQ9biIuH+4mGZmNjZ9XX4gIn4E7No175TK83cOsd13ge/2WlbSjtOmsXTFir7WnT27v/GwGdtuyx2/GvU9KMzMxlUrzlBdumIFAcM+Lutjnc6j3w+LXgYGBka97YYWp02vpW1x2vRa2hanqdeyPhvEbfYkxVj2Q9Lob+UzVJswbJ+Wmdl4kkSMYUDVzMwmGCd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2uhvpK7pEMlLZS0WNJJPZYfJ+lGSddLukLSsyvLTpa0RNICSS8tufNmZtabImL9K0hTgMXAwcDdwDzgiIhYWFlni4j4fX5+OHB8RBwmaTfg68A+wPbA/wLPiq6gkrpnjexFSIx+6yHaBMayT2ZmdZNERKjXsn6O3PcFlkTE0oh4BJgDzKyu0Ens2RbAY/n5K4A5EfFoRNwBLMntmZlZjab2sc52wLLK9F30SNCSjgfeDWwMvLiy7c8qqy3P88zMrEbFBlQj4vSI2Bk4CfhQqXbNzGzk+jlyXw5Mr0xvn+cN5ZvA5yvb7tDPtrNmzVr9fGBggIGBgT52zcxs8hgcHGRwcLCvdfsZUN0IWEQaUL0HmAscGRELKuvsHBG/yM8PBz4UEftWBlRfQOqO+TEeUDUzK2J9A6rDHrlHxCpJJwAXk7pxzoqIBZJmA/Mi4gLgBEkvAVYC9wHH5G1vlfQt4FbgEVIVjTOmmVnNhj1yb2QnfORuZjZiYy2FNDOzCcbJ3cyshZzczcxayMndzKyFnNzNzFrIyd3MrIWc3M3MWsjJ3cyshZzczcxayMndzKyFnNzNzFrIyd3MrIWc3M3MWsjJ3cyshZzczcxayMndzKyFnNzNzFrIyd3MrIWc3M3MWsjJ3cyshZzczcxayMndzKyFnNzNzFqor+Qu6VBJCyUtlnRSj+XvknSLpBsk/VjSDpVlqyRdJ+l6Sd8rufNmZtabImL9K0hTgMXAwcDdwDzgiIhYWFnnIOCaiHhY0tuAgYg4Ii97MCK2HCZGDLcfw2zP6Lceok1gLPtkZlY3SUSEei3r58h9X2BJRCyNiEeAOcDM6goRcXlEPJwnrwa2q8YfxT6bmdkY9JPctwOWVabvYu3k3e1Y4MLK9KaS5kr6qaSZQ21kZmblTC3ZmKSjgL2BgyqzZ0TEPZJ2Ai6VdGNE3F4yrpmZra2f5L4cmF6Z3j7PW4uklwAnAwfm7hsAIuKe/PN2SYPAnsA6yX3WrFmrnw8MDDAwMNDP/puZTRqDg4MMDg72tW4/A6obAYtIA6r3AHOBIyNiQWWdPYFzgUMi4peV+U8C/hgRKyVtA1wFzKwOxub1PKBqZjZC6xtQHfbIPSJWSToBuJjUR39WRCyQNBuYFxEXAB8HNgfOlSRgaUS8EvgL4AxJq/K2H+1O7GZmVt6wR+6N7MQEOXLfcdo0lq5YUSzGjG235Y5f/apYe2Y2uazvyN3Jfag2WTe5l47jrh8zG4ux1rmbmdkE4+RuZtZCTu5mZi3k5G5m1kJO7mZmLeTkbmbWQk7uZmYt5ORuZtZCTu5mZi3k5G5m1kJO7mZmLeTkbmbWQk7uZmYt5ORuZtZCTu5mZi3k5G5m1kJO7mZmLeTkbmbWQk7uZmYt5ORuZtZCTu5mZi3k5G5m1kJ9JXdJh0paKGmxpJN6LH+XpFsk3SDpx5J2qCw7Jm+3SNLRJXfezMx6U0SsfwVpCrAYOBi4G5gHHBERCyvrHARcExEPS3obMBARR0jaCrgW2AsQMB/YKyIe6IoRw+3HMPvI6Lceok2ge59Kx+kVw8ysX5KICPVa1s+R+77AkohYGhGPAHOAmdUVIuLyiHg4T14NbJefHwJcHBEPRMT9wMXAoaN5EWZm1r9+kvt2wLLK9F2sSd69HAtcOMS2y4fZ1szMCphasjFJRwF7AweVbNfMzEamn+S+HJhemd4+z1uLpJcAJwMH5u6bzrYDXdte1ivIrFmzVj8fGBhgYGCg12pmZpPW4OAgg4ODfa3bz4DqRsAi0oDqPcBc4MiIWFBZZ0/gXOCQiPhlZX51QHVKfr537n+vxvCAqpnZCK1vQHXYI/eIWCXpBNJg6BTgrIhYIGk2MC8iLgA+DmwOnCtJwNKIeGVE3Cfp30hJPYDZ3YndzMzKG/bIvZGd8JG7mdmIjbUU0szMJhgndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2uhvpK7pEMlLZS0WNJJPZYfIGm+pEck/V3XslWSrpN0vaTvldpxMzMb2tThVpA0BTgNOBi4G5gn6fyIWFhZbSlwDPDeHk38ISL2KrGzZmbWn2GTO7AvsCQilgJImgPMBFYn94i4My+LHturwH6amdkI9NMtsx2wrDJ9V57Xr00lzZX0U0kzR7R3ZmY2Kv0cuY/VjIi4R9JOwKWSboyI2xuIa2Y2afWT3JcD0yvT2+d5fYmIe/LP2yUNAnsC6yT3WbNmrX4+MDDAwMBAvyHMzCaFwcFBBgcH+1pXEb26ySsrSBsBi0gDqvcAc4EjI2JBj3XPBi6IiO/k6ScBf4yIlZK2Aa4CZnYNxiIphtuPYfaR0W89RJtA9z6VjtMrhplZvyQRET3HNYftc4+IVcAJwMXALcCciFggabakl+cAz5e0DHg18HlJN+XN/wK4VtL1wCXAR7sTu5mZlTfskXsjO+EjdzOzERvTkbuZmU08Tu5mZi3k5G5m1kJO7mZmLeTkbmbWQk7uZmYt5ORuZtZCTu5mZi3k5G5m1kJO7mZmLeTkbmbWQk7uZmYt5ORuZtZCTu5mZi3k5G5m1kJO7mZmLeTkbmbWQk7uZmYt5ORuZtZCTu5mZi3k5G5m1kJO7mZmLeTkbmbWQn0ld0mHSlooabGkk3osP0DSfEmPSPq7rmXH5O0WSTq61I6bmdnQFBHrX0GaAiwGDgbuBuYBR0TEwso604EtgfcC34+I7+b5WwHXAnsBAuYDe0XEA10xYrj9GGYfGf3WQ7QJdO9T6Ti9YpiZ9UsSEaFey/o5ct8XWBIRSyPiEWAOMLO6QkTcGRE3wzq57xDg4oh4ICLuBy4GDh3xKzAzsxHpJ7lvByyrTN+V5/Wje9vlI9jWzMxGyQOqZmYtNLWPdZYD0yvT2+d5/VgODHRte1mvFWfNmrX6+cDAAAMDA71WMzObtAYHBxkcHOxr3X4GVDcCFpEGVO8B5gJHRsSCHuueDVwQEd/J09UB1Sn5+d65/726nQdUzcxGaEwDqhGxCjiBNBh6CzAnIhZImi3p5TnA8yUtA14NfF7STXnb+4B/IyX1a4DZ3YndzMzKG/bIvZGd8JG7mdmIjbUU0szMJhgndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnI3M2shJ3czsxZycjczayEndzOzFnJyNzNrISd3M7MWcnLfwOw4bRqSij52nDZtvF+WmTXMd2Iaqk3G505MTb0WM5v4fCcmM7NJxsndzKyFnNzNzFrIyd3MrIWc3M3MWqiv5C7pUEkLJS2WdFKP5ZtImiNpiaSfSZqe58+Q9EdJ1+XH6aVfgJmZrWvqcCtImgKcBhwM3A3Mk3R+RCysrHYscG9EPEvS64CPA0fkZb+IiL0K77eZma1HP0fu+wJLImJpRDwCzAFmdq0zE/hKfv5t0gdBR88aTDMzq08/yX07YFll+q48r+c6EbEKuF/S1nnZjpLmS7pM0v5j3WEzMxvesN0yo9Q5Wr8HmB4R90naC/iepN0i4vfdG8yaNWv184GBAQYGBmraNTOziWlwcJDBwcG+1h328gOS9gNmRcShefoDQETExyrrXJjXuUbSRsA9EfHUHm1dBrwnIq7rmu/LD9QUY6g4O06bxtIVK4rGmbHtttzxq18VbdPMhjbWyw/MA3bOlS+bkAZKv9+1zg+AY/Lz1wCX5sDb5AFZJD0D2Bm4beQvwUpbumIFAUUfpT8szGz0hu2WiYhVkk4ALiZ9GJwVEQskzQbmRcQFwFnA1yQtAX7HmkqZA4FTJa0EHgOOi4j763ghZma2hq8KOVSbtLtbxlefNJv4fFVIGze+Pr3Z+PCR+1Bt4iP3iRSn9ACxB4dtIljfkbuT+1Bt4uQ+meO4wsgmAif30bSJk/tkjuNxCpsI3OduNoF4nMJKcHI328A0dQ6CP0TazcndbJKaqB8ivT5A/EG1Lve5D9Um7e7XdZzxj+E4G26MoeJsaNznbmY2yTi5m5n1aSJ1/7hbZqg2affXS8cZ/xiOs+HGGO84fW/rbhkzs8nFyd3MrIWc3M3MWsjJ3cyshZzczcxayMndzKyFnNzNzFrIyd3MrIWc3M3MWsjJ3cyshZzczcxaqK/kLulQSQslLZZ0Uo/lm0iaI2mJpJ9Jml5ZdnKev0DSS0vuvJmZ9TZscpc0BTgNOAR4DnCkpGd3rXYscG9EPAv4NPDxvO1uwGuBvwAOA06X1PMiN00YdJwNMobjbLgxHGfDjTGcfo7c9wWWRMTSiHgEmAPM7FpnJvCV/PzbwIvz81cAcyLi0Yi4A1iS2xsXg46zQcZwnA03huNsuDGG009y3w5YVpm+K8/ruU5ErAIekLR1j22X99jWzMwKq2tAddy6XszMjHSR+PU9gP2AH1WmPwCc1LXOhcAL8vONgF/3Whf4UWe9ru1L36fXDz/88GNSPIbK3VMZ3jxgZ0kzgHuAI4Aju9b5AXAMcA3wGuDSPP/7wNclfYrUHbMzMLc7wFB3EjEzs9EZNrlHxCpJJwAXk7pxzoqIBZJmA/Mi4gLgLOBrkpYAvyN9ABARt0r6FnAr8Ahw/Jjup2dmZn3ZIO6hamZmZfkMVTOzFnJyNzNrISd3M7MW6qdaZkKStAPwCVKVzoXAJ/IZtkj6XkS8snC8XYDPAdtGxO6Sngu8IiL+vVD7e61veURcVyjORsBbgO1JJbBXVZb9S6nXk9t7PPAeYHpE/KOkZwG75kH6UjEOIb2WS/JZ0p35b46IL5WKU2n3r4EdqfxvRcRXC7TbyPsiaUvg5Bznwoj4RmXZ6RFxfIk4ub2m35vtgBms/d5cUaDdd69veUR8cqwxRqO1A6qSfgx8B7iadO2bvYHDI+J3kq6PiD0Lx7sceB9wRqdtSTdHxO6F2r8sP90MeD7wc9LJYs8Fro2IvyoU50zg8aSS1TcCl0fEu/Oy6yJivR8yI4z1TWA+cHT+QHw88NOIeF6h9j8C7A9cBxwOfDoiPpuXFX0tuc2vAc8EbgBW5dkREe8o0HYj74uk75AuE3I18GZSldvrI+LPheM0/d58DHgdqXKv+t68okDbp+SnuwL7kErAIb2uuRFx1FhjjMpwJzFN1AdwQ9f0UcAtpH++62qINy//vH6ofSgU57vAX1amdwe+XbD9GyvPpwJfyDE3rb62QrGu7fE7+3nB9m8CpubnTwL+B/hUd8yC8RaQD5hqaLuR96XH/80HgauAJ5f8vxmH92YRsGkd700lxhXAEyrTTwCuqDPm+h5t7nPfWNJmnYmIOAc4EbgIeFoN8X4r6Zmks8aQ9GrSSV+l7RoRN3UmIuJm0lU3S9mk0vajEfFW0pHopcAWBeMArJT0ONb8zp4J/Llg+1Mj4lGAiLifdCS1paRzqbzOgm4GptXQLjT3vmyarwTbifVh4IukxPXkgnGafm9uAzauod2qbYGVlemVed64aHNyPxN4QXVGRPwv6Qzam2uI93bgDODZkpYD7wT+qYY4N0o6U9JAfnwRuLFg+9dKOrQ6IyJOBc4m9SWXNIt0SYodJH0duARY534BY/BLSQd1JiJiVUQcSzqKK/mB2LENcKukiyR9v/Mo1HZT78sPWHNV106cL5PGRlb22mCUmn5v/gjcIOkMSf/ZeRSO8VVgrqRZkmaRztj/yvo3qU9r+9zHi6TNgSkR8VBN7W9G+tA4MM+6AvhcRDxcR7y6SXoy6fpFAq6OiN8WbPtxABHxpx7LtouI5aVi5TYP6jU/Ii4vGacNxuG9OabX/Igomnxz4cMBefKKiLi+ZPsj2pe2J/cGqliOiohzhhoxjxpGyvM/xvSIWFS67a44tVR+VNq/JCIOHm5eoVi1VEo0re4Ko6YqP5qq/uqKuQmwS55cFLl6rnCM/YFnRcTZkp4CbBERt5eO04/WlkJWfJFcxQIQETdK+gZQqqRv8/zzCYXaWy9JryCVeG4C7CTpecCpUWDUvytOz8oP0lfPsba9GanyYxtJW7HmEtFbUsP1/oeqlCB96ynR/k8iYn9JD+V2Vy8iVWRsWSJOdjapwqhTHbUcOBcoVT7a+TvuWflRKAbAf+SfPau/WPP6ipA0QOoiuSPH2UHSMSU/4HPVzPNJv7uzSX385wAvLBVjRMZrJLepB81VsTylodczH3hi1+u5qYY4dVZ+nAjcTho8vb3y+DlwQg3xaq+U6HM/tirQRq0VRpU2G6n8oObqr0q780nfcDrTuwDzC8e4gfTBUX1vbiwZYySPNg+odjRVxXKVpIslHZuPRuvySEQ80DWvjr612io/IuIzEbET8N6I2Kny2CMiTqshZBOVEv24pEAbdVcYdTRV+VF39VfHxlHpxoyIxZT/m1gZKaN33pvNh1m/VpOhW+btpJrgThXL7aSa96IiYhdJ+5Iud/xBSbeS7h97TuFQt0h6PbBR7m99B/DTwjFgTeXHXCrJI8p2/5wh6R2sGRweJJ0EVrovtFMpcQlrv5Yxn1w0QiXuWzCLtSuMXgi8qUC73TqVH+fl6VdST+XHjfkErc7/yRsoW/3VcW1XnKNI3T8lfUvSGcCTJP0j6SSwMwvH6FvrB1Q76q5i6Yq1DfBJ4A0RsVHhth9POrHkpXnWRcC/R+FqmSYqP/I/28asSRpvBFZFxFtKxchxGqmU6GM/ipx5WWeFUVec2is/mqr+krQp6UBv/zzrSuD0iCj6rUfS35D+NwVcFBE/Ltn+iPalrcm96SoWpWtyvIp05P5M4DzgWxExv2ScNpH084jYY7h5hWLVXinRxz6MObk3XGHUSOVHU9VflXhbA9tHRNFvCJI+FhEnDTevKW3uc69WsfR6lPZzoFO5sktEnFRHYpf0Y0lPqkxvJemigu3/JP98SNKDlcdDkh4sFSdblfuMO7GfwZpqlmJypcQS4L+A04HFkg5c70b1GHW3jKTNclLaJr/nW+fHjtRTYXQK6YSyk/OsTuVH6TivIA1E/ihPP6/giV/VOIOStsy/w/nAF5Vu/1nS3/SYd1jhGH1rbZ97RJyRn54eEb9pIOQzIiIkbSFpi4j4fU1xtol0ujYAEXGfpKeWajwi9s8/1/sBKGmriLhvjOHeB1wm6TZS4ptBPf3H/wG8tHNkmM99+G/SxeTGLCeMIUXEvfnpWI6ujyOd9fx00sW2Oh4E6hiEfhWwZydWRNwtqY6DolOAfUnjLUTEDZJ2qiHOEyPiQUlvAb4aEadIKnLkLumfgOOBZ3a1+QTqGQ/rS2uTe8VVku4Avgl8t0BCGspzcm341oAk/QY4Jo/+l/SYpOkRcScp0AzqqZYZziXAqLsYlK5f8ifgWaS6YEjdJXVUfqxTKSGpZKXEfNJ7IGA6cF9+/iTgTmCnHPfeoRoYTkR8BviMpH+OfPXEmq3MByt1V348EhEPSGt9qanj73mqpKcBryWNWZX0DdJlxT8KfKAy/6GxvOdj1frk3mAVyxeAd0fEZbC6K+ALwF8XjvNB4CdKlxgWacDrrYVj9GNMlR8R8Zik/4p0eeQ6qiOqaq2UyGWdKF3n57yI+J88fRipyqSkpiqMmqr8aKr6azap+OAnETEvdwEuKdFwLk1+QNJngHs7RRu5G+gFEXFNiTgj1doB1V5qrmJpcnBwG1K1BNRYLTHMPpQYHPx/wM9I36hq+0NssFLipoj4y+HmjTFGIxVGOVbtlR9NVH8p3ejkHRFRuo+9O871wF6dv+X87fTaEhVSo9qftif3pqpYcj3wdcDX8qyjgL0j4lWF4/QcCIyGr5NSKLk/RLoMwSrW1J9HlD1dvztmLZUSue2LSB8c1ZrtAyPikIIxGjmI2NAqP8ZK0tyI2LfmGDdE141mJN0YEc+tM+5Q2lwt09FIFQvpa+tTSHd/+g7pJKA6BgffV3l8iHSJ1lk1xBlOiRNyzifVOO8REU/Ij+KJvaFKCYAjSX8D55FOq39KnldSIxVGNFT5UXf1V8VVkk6TdICkvTqPwjFuk/QOSRvnx4mks6PHxWQ4clenigWgrioWSc8nfb3ckTVjGVH3p7bSvWI/HREt/pEuAAAI8UlEQVR/X6i9vio/JG091sEiSS8ijRkcQL5DFnBlHjwsRvm2irlSYodOpURd742kzSPiDzW1fTDpolRrVRh1xnoKtL+68gP4RWXRE0i3QHxDiTiVeOvc8rLXvAJxev1+IiJe3GP+aGM8FfhP0vXwg1R08M6I+HWpGCPan0mQ3HcndZVsTfpnqKWKRdIi4L2ka7I81pkfEUtLxukRV8AtEbFbofZuZz2VH53Bw1Jyf+g+wIuAtwF/iohnF45xE6lP9yvAB/OAWvHkrnSJ5DNJJ/tMl7QHcFwUuql07sPdj3wRrDy7aIWRpCcCW9FQ5Yek+cCruqq/zhuvfuo2aX21DM1VsfwmIn5QuM11SPosa0rFppC6nIpd+7rJyg+la71sThpUvRLYp6ajnNoqJbp8CjiEfJnciPj5UGMko9FEhdE4VH40Uv0laVvgI8DTI+IwSbsBfxURZxWMUeu9I0a8P5PgyL2pAaiDSf2r3Ren+m7hONXrpDwK3BERV5WMkeM0UfnxKdKJRH8m3YT5CuBn0ePuPGOI0UilRI51TUS8oNqtUPpvrcEKo8YqP5qo/pJ0Iak764MRsYekqaRL85b8e76cfO+Iyvt/c0TsXirGSEyGI/fbJH2ItatY6hjkeBPwbFKZWqdbJkgDa8VEcxe7ulvSv7B25cfdJQNExLsAlM58/AfSP980YNOCMVZJOpJ0VF23ZblrJpROkjqRdF38ko4D3kUaWK2zwkjVD4/8raF4vqh8s+lc2mI3SXVUf20TEd+SdDKkm4xLKj0Q/fiImKu1T8h6tHCMvk2G5P5m0tfy7+TpK6mnimWfiNh1+NVGJ/cbD3mkVsPg4JGkU8PPY81di4pWfkg6gfQ1fG/SHXK+RHp/SrtK0mmks5RXD3RG+Vu5vQ34DOlaL8uBi0mDkyWdT3ovroyI0h8cVbcpnSz1uTx9PPUcFL2v8nwz0qUI5tN1k+4C/qB0Nc3ON5H9gO77IoxVU/eO6Mtk6JZppIpF0tnAJyLi1pLtVtqfkZ++Pf+sfhOJiPjAulsViVtn5cd7Scl8fkTUdoTTRKVEjvPC7i6yXvPGGKOpCqNxqfwoXf1VaXcv4LPAc4BbSGWqry55vkMey+mM591HunfEG+ouqhhyfyZBcm+kikXSAtI/W+f2cZ37Z5b+EOlVOlbkOuFdbdZa+dFGvd6Hmt6b2iuMxkvp6q9Ku5sBJ5AGvB8ijVt8NgqdCZvHJF6du34au3fE+kyGbplGqliAQxuIAenvf/XRYE7CdZyMVmvlR5PqrpSQ9Feko7WnaO37B2wJlL7MRSMVRk1VftRd/VXxVVK//kfy9OtJ335fU6LxPCbxftLZ77V80x2pyZDcT1G6HketVSwNfvU6FvhSrkcW6evfm+sIFBHLugaH6jgTsglfJldK5OnFpP73UmVwmwBbkP6fqpfFfRB4daEYHTeSxih2J/UZ3y+paIVR9kVy5QdARNwo6RtA6bK+6gXcHgX+u47qL2D3rm8DlyldRLCk/81djd1jO+NyZcjJkNwbqWJpSqRLJ+yRk3unLrkOTVR+NKXWSolItx68XNKX6/6Qb6LCKGuk8qPB6q/rJO0XEVcDSHoB5e+h+rr88+2VeQE8o3CcvkyG5F5rFUvTclI/hXzJ11xbe2oNSb6Jyo+mNFEpAXCmpNdEvpmKpK1Il5cueeGwpiqMaq38GIfqr72Bn0q6M09PBxZ19qNEvCh89vZYTYbk/lNJu9VVxTIOvkQaHH5tnn4j6ejt7wrH2TW6riMi6YWkk40mmneTxg6eIekqcqVEDXFqvUtWthnpstW1VhiRjj6/ADxb0nJy5UfB9l9eiQNd1V8F43TUNiYm6cURcamknv+DpbuA+zUZqmUaqWJpinpfVnSdeQXiNFL50YS6KyUqcbqvk7Ij6UzSCfU7a7Lyo6nqrzpJmhURs3I5dOe6TKt/RkQtY2LDmQxH7k1VsTTlT5L2j4jOjaxfSLpdXRFNVn40qNZKiYoN5S5ZY9Jw5UdT1V91eij/r9zMmqQO43P7y9Van9zH6wSCGr0N+GpnQJVULXPMetYfqSYrP5rSRKUEEfGjfNLcW4Hrge9R8IO3YU1VfjRW/VWjLfLPXUnnH5xPei2HA3PHa6da3y3TJl1fl7cEiIgHh9lstLFmtOWDUdI5wGldlRJvj4ijC8d5C6mqaHvgBtLFsH5W+kzYJihd+rlbREQtlR8NVH/VTtIVwN/GmitpPgH4YUSMy/khE+3rz6QWEY8B78/PH6wrsWdnqpk75DShUylxh6Q7SH3u+0i6SVLJS+eeSDpyWxoRLwL2BO5f/yYbpojYqcejeGKX9ERJnySdh3KJpP+ofCudaLYFVlamV+Z546L13TIt1NTX5SYqP5rS1LjLwxHxsCQkbRoRCyVNqDLccaj8aKr6qwlfBeYq3U8Z0v0PvjxeO+PkPvG8jjRQ011zXvqo6jFJ07sqPyZkH16D3Ut35W873wN+LOk+YKJ1bR0IXErqL16n8oPyJ/89s+siYbMl3VA4RiMi4sNK140/IM96U0RcP1774+Q+8exGSuz7k/7ZrgQ+X0OcVlR+NCkiXpWfzlK6EuUTgR+N4y6NRtOVH7VWfzUt0mWk67g2zoh5QHWCkfQtUuXK1/Os1wNPjIjXDr3VqGM9lTWVH48Dfh3lb6JgGxBJp+SnPSs/IuKowvH2IHVnrFX9VfJSvJOVk/sEI+nW7suh9ppXIE5rKj9s5Jqo/Giy+msycrXMxHNdvjYKUNsFkKBFlR82KrVXfjRc/TXpuM994qn9AkjZhK/8sDFpqvJjg7pMbpu4W2aC0Zrb7fVUqjIk/1O/CXgn6VZr9wEbR8TLSrRvGz6lW9N1Kj+uqKPyI58stU4SqutkqcnEyd2GJekgcuVHRKwcbn2zfkl6HD2qv2q4+cik4+RuZuOmyeqvycbJ3czGTVPVX5ORq2XMbDw1Vf016fjI3czGTb6Zzq7AWtVfpPu1Ttib6mwInNzNbNw0Vf01GTm5m5m1kPvczcxayMndzKyFnNzNzFrIyd3MrIWc3M3MWuj/AxwE3RCu5dwrAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "importances = domain_score.classifier.feature_importances_\n", "indices = np.argsort(importances)[::-1]\n", From 35208f05afe09f536898448ef8b49b99e73889b7 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Tue, 13 Mar 2018 12:26:08 +0100 Subject: [PATCH 24/35] Add script to copy notebooks from container to local directory. --- deployment/copy-notebooks.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 deployment/copy-notebooks.sh diff --git a/deployment/copy-notebooks.sh b/deployment/copy-notebooks.sh new file mode 100755 index 0000000..2440fca --- /dev/null +++ b/deployment/copy-notebooks.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +docker cp notebook-container:/32de-python/notebooks/. ${1:-.}/notebooks/. From a12c260f3d76e4280e0a4866ec4a34874f48fd26 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Tue, 13 Mar 2018 12:48:31 +0100 Subject: [PATCH 25/35] Add missing random state. --- domain_scoring/domain_scoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/domain_scoring/domain_scoring.py b/domain_scoring/domain_scoring.py index c9e618f..f26e608 100644 --- a/domain_scoring/domain_scoring.py +++ b/domain_scoring/domain_scoring.py @@ -125,7 +125,7 @@ def __init__(self): Extracts the domain value of meta-paths via regression. """ super().__init__() - self.classifier = RandomForestRegressor() + self.classifier = RandomForestRegressor(random_state=self.random_state) def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]): """ From 48b83b3f5b1933921ad6b0a98edf9d7274dfc373 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Wed, 14 Mar 2018 11:59:01 +0100 Subject: [PATCH 26/35] Add rnn notebook --- notebooks/pl-1.0-regression_open-day.ipynb | 31 +- notebooks/pl-1.0-rnn_open-day.ipynb | 387 +++++++++++++++++++++ 2 files changed, 407 insertions(+), 11 deletions(-) create mode 100644 notebooks/pl-1.0-rnn_open-day.ipynb diff --git a/notebooks/pl-1.0-regression_open-day.ipynb b/notebooks/pl-1.0-regression_open-day.ipynb index 97f1af5..bc75241 100644 --- a/notebooks/pl-1.0-regression_open-day.ipynb +++ b/notebooks/pl-1.0-regression_open-day.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -12,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -28,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -137,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -146,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -157,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -168,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -186,15 +195,15 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Test accuracy is 0.3157894736842105\n", - "R^2 is 0.7736986555155622\n" + "Test accuracy is 0.3684210526315789\n", + "R^2 is 0.7153305150809386\n" ] } ], diff --git a/notebooks/pl-1.0-rnn_open-day.ipynb b/notebooks/pl-1.0-rnn_open-day.ipynb new file mode 100644 index 0000000..0bc1c07 --- /dev/null +++ b/notebooks/pl-1.0-rnn_open-day.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "from keras.models import Sequential\n", + "from keras.layers import Dense, Activation" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [], + "source": [ + "## Help Python find our packages\n", + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import json\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import domain_scoring.domain_scoring as domain_scoring\n", + "\n", + "# Randomness\n", + "import random as rn\n", + "import tensorflow as tf\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "merlin = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "potato = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_mps(data):\n", + " count = 0\n", + " first = True\n", + " batches = []\n", + " batch = []\n", + " for probably_path in data[\"meta_paths\"]:\n", + " if count % 6 == 0:\n", + " # Don't add empty batches\n", + " if len(batch) > 0:\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)\n", + " count += 1\n", + " # append last batch\n", + " if len(batch) > 0:\n", + " batches.append(batch)\n", + " print('#meta-paths:', count - len(batches) - 1)\n", + " return batches" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "def construct_graph(batches):\n", + " ## Construct rating graph\n", + " from util.datastructures import MetaPathRatingGraph, MetaPath\n", + " graph = MetaPathRatingGraph()\n", + "\n", + " for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "## Clean up data // remove time_to_rate from array of ratings.\n", + "def clean_up(data):\n", + " batches = extract_mps(data)\n", + " return batches, construct_graph(batches)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#meta-paths: 51\n", + "#meta-paths: 51\n" + ] + } + ], + "source": [ + "p_batches, p_graph = clean_up(potato)\n", + "m_batches, m_graph = clean_up(merlin)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "domain_score = domain_scoring.DomainScoring()" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess meta-paths\n", + "domain_score.vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), token_pattern='\\\\b\\\\w+\\\\b')" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "domain_score.fit(m_graph)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "x, y = domain_score._extract_data_labels(m_graph)\n", + "x = domain_score._preprocess(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "test_size = 0.3\n", + "random_state = 42" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y,\n", + " test_size=test_size,\n", + " random_state=random_state,\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "131" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(x_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(131, 44)" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(x_train).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "131/131 [==============================] - 0s 3ms/step - loss: 0.6947 - acc: 0.5191\n", + "Epoch 2/10\n", + "131/131 [==============================] - 0s 172us/step - loss: 0.6815 - acc: 0.5878\n", + "Epoch 3/10\n", + "131/131 [==============================] - 0s 237us/step - loss: 0.6762 - acc: 0.5573\n", + "Epoch 4/10\n", + "131/131 [==============================] - 0s 220us/step - loss: 0.6721 - acc: 0.6260\n", + "Epoch 5/10\n", + "131/131 [==============================] - 0s 183us/step - loss: 0.6684 - acc: 0.6641\n", + "Epoch 6/10\n", + "131/131 [==============================] - 0s 196us/step - loss: 0.6656 - acc: 0.6870\n", + "Epoch 7/10\n", + "131/131 [==============================] - 0s 131us/step - loss: 0.6614 - acc: 0.6870\n", + "Epoch 8/10\n", + "131/131 [==============================] - 0s 193us/step - loss: 0.6601 - acc: 0.6794\n", + "Epoch 9/10\n", + "131/131 [==============================] - 0s 240us/step - loss: 0.6551 - acc: 0.6870\n", + "Epoch 10/10\n", + "131/131 [==============================] - 0s 170us/step - loss: 0.6537 - acc: 0.6641\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reproducible results\n", + "np.random.seed(random_state)\n", + "rn.seed(random_state)\n", + "tf.set_random_seed(random_state)\n", + "os.environ['PYTHONHASHSEED'] = '0'\n", + "\n", + "# Build model\n", + "model = Sequential()\n", + "model.add(Dense(44, activation='relu', input_dim=44))\n", + "model.add(Dense(1, activation='sigmoid'))\n", + "model.compile(optimizer='rmsprop',\n", + " loss='binary_crossentropy',\n", + " metrics=['accuracy'])\n", + "\n", + "# Train model\n", + "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=15)" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "57/57 [==============================] - 0s 3ms/step\n" + ] + }, + { + "data": { + "text/plain": [ + "[0.6610821872426752, 0.6666666698037532]" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.evaluate(np.array(x_test), np.array(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d91bec1cba4220d9afadbf05f92a781017b1d67f Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Wed, 14 Mar 2018 11:59:17 +0100 Subject: [PATCH 27/35] Refactor domain scoring class. --- domain_scoring/domain_scoring.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/domain_scoring/domain_scoring.py b/domain_scoring/domain_scoring.py index f26e608..6e3d6e3 100644 --- a/domain_scoring/domain_scoring.py +++ b/domain_scoring/domain_scoring.py @@ -35,6 +35,7 @@ def fit(self, metapath_graph: MetaPathRatingGraph, test_size: float = None) -> N """ self._fit_vectorizer(metapath_graph) x, y = self._extract_data_labels(metapath_graph) + x = self._preprocess(x) if test_size is not None: x_train, x_test, y_train, y_test = train_test_split(x, y, @@ -45,11 +46,19 @@ def fit(self, metapath_graph: MetaPathRatingGraph, test_size: float = None) -> N x_train = x y_train = y - self.classifier = self.classifier.fit(self._preprocess(x_train), y_train) + self._fit(x_train, y_train) if test_size: self._test_score(x_test, y_test) + def _fit(self, x, y) -> None: + """ + Executes the actual fitting of the classifier. Overwrite in subclasses if necessary. + :param x: The preprocessed features. + :param y: The labels. + """ + self.classifier.fit(x, y) + def predict(self, metapath_unrated: List[MetaPath]) -> List[Tuple[MetaPath, int]]: """ Predict the domain value of the given meta-paths. @@ -116,7 +125,7 @@ def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tup return metapath_pairs, metapath_labels def _test_score(self, x_test, y_test): - print('Test accuracy is {}'.format(self.classifier.score(X=self._preprocess(x_test), y=y_test))) + print('Test accuracy is {}'.format(self.classifier.score(X=x_test, y=y_test))) class DomainScoringRegressor(DomainScoring): @@ -151,7 +160,16 @@ def _test_score(self, x_test, y_test): """ Converts regression result into a binary classification and uses mean accuracy. """ - test_predict = self.classifier.predict(self._preprocess(x_test)) + test_predict = self.classifier.predict(x_test) score = numpy.mean(numpy.logical_and(numpy.array(y_test) > 0, numpy.array(test_predict) > 0)) print('Test accuracy is {}'.format(score)) - print('R^2 is {}'.format(self.classifier.score(X=self._preprocess(x_test), y=y_test))) \ No newline at end of file + print('R^2 is {}'.format(self.classifier.score(X=x_test, y=y_test))) + +# TODO: WIP +class DomainScoringNeuralNet(DomainScoring): + + def __init__(self): + """ + Extracts the domain value of meta-paths by training a neural network. + """ + super().__init__() \ No newline at end of file From 9512986e2a01f00d4a72be8539353d17dded48f6 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Wed, 14 Mar 2018 14:22:07 +0100 Subject: [PATCH 28/35] Add simple neural networks to notebook. --- notebooks/pl-1.0-exploration_open-day.ipynb | 12 +- notebooks/pl-1.0-rnn_open-day.ipynb | 301 +++++++++++++++++--- 2 files changed, 265 insertions(+), 48 deletions(-) diff --git a/notebooks/pl-1.0-exploration_open-day.ipynb b/notebooks/pl-1.0-exploration_open-day.ipynb index eea9bd9..fe2e350 100644 --- a/notebooks/pl-1.0-exploration_open-day.ipynb +++ b/notebooks/pl-1.0-exploration_open-day.ipynb @@ -61,15 +61,6 @@ "merlin = json.load(open(path, \"r\", encoding=\"utf8\"))" ] }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [], - "source": [ - "type_selection = merlin[\"edge_type_selection\"] + merlin[\"node_type_selection\"]" - ] - }, { "cell_type": "code", "execution_count": 95, @@ -87,6 +78,7 @@ } ], "source": [ + "type_selection = merlin[\"edge_type_selection\"] + merlin[\"node_type_selection\"]\n", "types = []\n", "for pair in type_selection:\n", " types.append(pair[0])\n", @@ -618,7 +610,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.5.2" } }, "nbformat": 4, diff --git a/notebooks/pl-1.0-rnn_open-day.ipynb b/notebooks/pl-1.0-rnn_open-day.ipynb index 0bc1c07..2895da9 100644 --- a/notebooks/pl-1.0-rnn_open-day.ipynb +++ b/notebooks/pl-1.0-rnn_open-day.ipynb @@ -2,20 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 165, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], + "outputs": [], "source": [ "from keras.models import Sequential\n", - "from keras.layers import Dense, Activation" + "from keras.layers import *" ] }, { @@ -75,6 +67,30 @@ "potato = json.load(open(path, \"r\", encoding=\"utf8\"))" ] }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ACTED_IN', 'PRODUCED', 'DIRECTED', 'WROTE', 'Person', 'Movie']" + ] + }, + "execution_count": 191, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type_selection = merlin[\"edge_type_selection\"] + merlin[\"node_type_selection\"]\n", + "types = []\n", + "for pair in type_selection:\n", + " types.append(pair[0])\n", + "types" + ] + }, { "cell_type": "code", "execution_count": 86, @@ -197,14 +213,104 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 175, "metadata": {}, "outputs": [], "source": [ "x, y = domain_score._extract_data_labels(m_graph)\n", + "x_raw = x\n", "x = domain_score._preprocess(x)" ] }, + { + "cell_type": "code", + "execution_count": 202, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import preprocessing\n", + "from keras.preprocessing.sequence import pad_sequences" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LabelEncoder()" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labeler = preprocessing.LabelEncoder()\n", + "labeler.fit(types)" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [], + "source": [ + "def to_sequence(in_list):\n", + " return in_list.reshape((-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "metadata": {}, + "outputs": [], + "source": [ + "def one_hot(in_sequence, distinct_values):\n", + " sequence = np.zeros((len(in_sequence), distinct_values))\n", + " i = 0\n", + " for point in in_sequence: \n", + " sequence[i][point] = 1\n", + " i += 1\n", + " return sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 299, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess_raw(raw, distinct_values, max_len=9):\n", + " data = []\n", + " for a,b in raw:\n", + " # encode labels to integers (0 is reserved for padding)\n", + " a = np.array(labeler.transform(a.as_list())) + 1\n", + " b = np.array(labeler.transform(b.as_list())) + 1\n", + " # pad to same length\n", + " a, b = pad_sequences([a, b], max_len, padding='post', value=0)\n", + " # merge a and b\n", + " sequence = np.append(a, b)\n", + " # to sequence\n", + " sequence = to_sequence(sequence)\n", + " # one-hot encode because we don't have distances/embeddings\n", + " sequence = one_hot(sequence, distinct_values + 1)\n", + " data.append(sequence)\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": 300, + "metadata": {}, + "outputs": [], + "source": [ + "x_preprocess = preprocess_raw(x_raw, len(types))" + ] + }, { "cell_type": "code", "execution_count": 97, @@ -269,7 +375,20 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 173, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['PYTHONHASHSEED'] = '0'\n", + "def reset_seed():\n", + " np.random.seed(random_state)\n", + " rn.seed(random_state)\n", + " tf.set_random_seed(random_state)" + ] + }, + { + "cell_type": "code", + "execution_count": 319, "metadata": {}, "outputs": [ { @@ -277,45 +396,41 @@ "output_type": "stream", "text": [ "Epoch 1/10\n", - "131/131 [==============================] - 0s 3ms/step - loss: 0.6947 - acc: 0.5191\n", + "131/131 [==============================] - 1s 4ms/step - loss: 0.6956 - acc: 0.5191\n", "Epoch 2/10\n", - "131/131 [==============================] - 0s 172us/step - loss: 0.6815 - acc: 0.5878\n", + "131/131 [==============================] - 0s 106us/step - loss: 0.6822 - acc: 0.5878\n", "Epoch 3/10\n", - "131/131 [==============================] - 0s 237us/step - loss: 0.6762 - acc: 0.5573\n", + "131/131 [==============================] - 0s 163us/step - loss: 0.6758 - acc: 0.5954\n", "Epoch 4/10\n", - "131/131 [==============================] - 0s 220us/step - loss: 0.6721 - acc: 0.6260\n", + "131/131 [==============================] - 0s 141us/step - loss: 0.6717 - acc: 0.6183\n", "Epoch 5/10\n", - "131/131 [==============================] - 0s 183us/step - loss: 0.6684 - acc: 0.6641\n", + "131/131 [==============================] - 0s 137us/step - loss: 0.6690 - acc: 0.6718\n", "Epoch 6/10\n", - "131/131 [==============================] - 0s 196us/step - loss: 0.6656 - acc: 0.6870\n", + "131/131 [==============================] - 0s 149us/step - loss: 0.6659 - acc: 0.6794\n", "Epoch 7/10\n", - "131/131 [==============================] - 0s 131us/step - loss: 0.6614 - acc: 0.6870\n", + "131/131 [==============================] - 0s 171us/step - loss: 0.6623 - acc: 0.6794\n", "Epoch 8/10\n", - "131/131 [==============================] - 0s 193us/step - loss: 0.6601 - acc: 0.6794\n", + "131/131 [==============================] - 0s 145us/step - loss: 0.6610 - acc: 0.6565\n", "Epoch 9/10\n", - "131/131 [==============================] - 0s 240us/step - loss: 0.6551 - acc: 0.6870\n", + "131/131 [==============================] - 0s 228us/step - loss: 0.6575 - acc: 0.6870\n", "Epoch 10/10\n", - "131/131 [==============================] - 0s 170us/step - loss: 0.6537 - acc: 0.6641\n" + "131/131 [==============================] - 0s 204us/step - loss: 0.6559 - acc: 0.6794\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 155, + "execution_count": 319, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Reproducible results\n", - "np.random.seed(random_state)\n", - "rn.seed(random_state)\n", - "tf.set_random_seed(random_state)\n", - "os.environ['PYTHONHASHSEED'] = '0'\n", - "\n", + "reset_seed()\n", "# Build model\n", "model = Sequential()\n", "model.add(Dense(44, activation='relu', input_dim=44))\n", @@ -325,42 +440,152 @@ " metrics=['accuracy'])\n", "\n", "# Train model\n", - "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=15)" + "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=16)" ] }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 320, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "57/57 [==============================] - 0s 3ms/step\n" + "57/57 [==============================] - 0s 5ms/step\n" ] }, { "data": { "text/plain": [ - "[0.6610821872426752, 0.6666666698037532]" + "[0.6638993683614229, 0.6491228101546305]" ] }, - "execution_count": 156, + "execution_count": 320, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "model.evaluate(np.array(x_test), np.array(y_test))" + "model.evaluate(np.array(x_test), np.array(y_test), batch_size=16)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 321, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "x_p_train, x_p_test, y_p_train, y_p_test = train_test_split(x_preprocess, y,\n", + " test_size=test_size,\n", + " random_state=random_state,\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 322, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(131, 18, 7)" + ] + }, + "execution_count": 322, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(x_p_train).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 323, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "131/131 [==============================] - 1s 5ms/step - loss: 0.7035 - acc: 0.5725\n", + "Epoch 2/10\n", + "131/131 [==============================] - 0s 390us/step - loss: 0.5776 - acc: 0.6641\n", + "Epoch 3/10\n", + "131/131 [==============================] - 0s 354us/step - loss: 0.5706 - acc: 0.6641\n", + "Epoch 4/10\n", + "131/131 [==============================] - 0s 340us/step - loss: 0.4936 - acc: 0.7252\n", + "Epoch 5/10\n", + "131/131 [==============================] - 0s 275us/step - loss: 0.5130 - acc: 0.7176\n", + "Epoch 6/10\n", + "131/131 [==============================] - 0s 256us/step - loss: 0.5067 - acc: 0.7328\n", + "Epoch 7/10\n", + "131/131 [==============================] - 0s 247us/step - loss: 0.5142 - acc: 0.7176\n", + "Epoch 8/10\n", + "131/131 [==============================] - 0s 272us/step - loss: 0.4981 - acc: 0.7481\n", + "Epoch 9/10\n", + "131/131 [==============================] - 0s 275us/step - loss: 0.4619 - acc: 0.7939\n", + "Epoch 10/10\n", + "131/131 [==============================] - 0s 257us/step - loss: 0.4563 - acc: 0.7634\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 323, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reproducible results\n", + "reset_seed()\n", + "# Use sequence classification (RNN/LSTM)\n", + "model_rnn = Sequential()\n", + "model_rnn.add(SimpleRNN(128, input_shape=(18, 7)))\n", + "model_rnn.add(Dropout(0.5))\n", + "model_rnn.add(Dense(1, activation='sigmoid'))\n", + "\n", + "model_rnn.compile(loss='binary_crossentropy',\n", + " optimizer='rmsprop',\n", + " metrics=['accuracy'])\n", + "\n", + "model_rnn.fit(np.array(x_p_train), np.array(y_p_train), batch_size=35, epochs=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 324, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "57/57 [==============================] - 0s 5ms/step\n" + ] + }, + { + "data": { + "text/plain": [ + "[0.5831738752231264, 0.7017544121073004]" + ] + }, + "execution_count": 324, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_rnn.evaluate(np.array(x_p_test), np.array(y_p_test), batch_size=35)" + ] } ], "metadata": { From 2bc0905c21c0cdea1e1e96a444341e4ac35b9445 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Wed, 14 Mar 2018 14:22:50 +0100 Subject: [PATCH 29/35] Add newly rendered image. --- notebooks/sb-1.0-classification_open-day.ipynb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/notebooks/sb-1.0-classification_open-day.ipynb b/notebooks/sb-1.0-classification_open-day.ipynb index 76ff5dc..1635890 100644 --- a/notebooks/sb-1.0-classification_open-day.ipynb +++ b/notebooks/sb-1.0-classification_open-day.ipynb @@ -134,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -158,9 +158,8 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAE2CAYAAACaxNI3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXVV99/HPN0RALhEQNQqEAAoIKHIRaQUZpBWwarD1AopQxYpFKmpVykP7EGzVR33qpVIUBVFEm4oIWCwXCwwgAoFwhyREgRACD4hcVTAQfs8fa51kz8mZzJnM2mdm9vm+X6/zmrP32Xv99pnLb/ZZ67fXVkRgZmbNMmW8D8DMzMpzcjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3frO5K+Ien48T4OszrJde7WLUn3AC8GngUEBLBtRPy/MbS5D3BmRGxR5CAnGUmnA0si4n+P97FYs0wd7wOwSSWAv4iIywq22fonsWY7S2tFxPKCx9MzkvzJ2WrjXy4bLXVcKe0p6SpJj0q6MZ+Rt177a0l3SHpC0q8kfSivXw/4b+Blkp7Mr0+XdLqkz1T230fSksry3ZI+Lelm4HeSpkh6qaQfS3pI0q8l/d2wb6DSfqttSZ+S9KCkpZJmSTpQ0kJJD0s6rrLvCZLOkjQnH+/1kl5deX17SZfl78Otkt7aFvdkST+T9CRwBPBe4NO5rfPydsfm79MTkm6TdFCljcMlXSnpS5Ieye/1gMrrG0v6Tn4fv5X0k8prb8k/m0cl/ULSqyqvHSvpvhxzvqR9h/v+2SQREX740dUDuBt4Y4f1LwMeBvbPy/vl5Rfm5QOBmfn53sDvgdfk5X2Ae9vaOx34TGV5yDb5OG7Icdch/cO5HjgeWAuYCfwK+PNh3seK9nPbz1T2/SDwEHAmsB6wA/AHYMu8/QnAH4G35+3/HrgrP58KLAKOzc/3BZ4AXlGJ+yiwZ15ep/295vV/BbwkP38n8LvK8uE5/gfy+/4wsLSy78+A/wCm5WPaO6/fBXgQ2D3v9778fXwesC1wbyXGDGCr8f5982NsD5+522idm88YH6mcFR4K/CwiLgKIiEtIyfbNefmCiLgnP78SuJiU5MfiaxFxf0T8EXgtsGlEfDYiludYpwIHd9nWMuBzkbp35gCbAl+NiD9ExB3AHcDOle3nRcQ5efsvk5L0nvmxfkR8ISKejdR9dT5wSGXf8yLiGoB87KuIiLMj4sH8/CzSP4w9KpssjojvREQA3wNeKunFkqYD+wNHRsQT+XtxZd7nb4BvRsT1kXyf9E9iT2A5sDawk6SpEXFvRNzd5ffOJij3udtozYpV+9y3BN5V6YIQ6XfrUgBJBwL/m3SGOAV4PnDLGI/jvrb4m0l6pBJ/CnBFl239NidKgKfy14cqrz8FbFBZXtFFFBEhaSnpU4Sqr2WLgc067TscSYcBHyd9AgFYn/QPp2XFAHZEPCWJfHwvBB6JiCc6NLslcFilu0qks/aXRcSVkj4GzAZ2kHQR8PcR8cBIx2oTl5O7jVanPvclwBkRceQqG0trAz8mnd2fFxHPSTqn0k6nwdTfk7pEWl7aYZvqfkuAuyJiuy6Ov4QVlT1KmXVz4H7Se5rRtu0MYGFluf39DlmWNAP4FrBvRFyd193IMGMdbZYAm0ia1iHBLwE+GxGf77RjRMwB5kjaIMf/P6QuIJuk3C1jJZwJvFXSm/Lg5rp5oPJlpI/7awMP58R+IPCmyr4PAi+UNK2y7ibgzXlwcDpwzAjx5wJP5kHWdSWtJWlHSbuXe4tD7CbpIElrkc6wnwauAa4Ffp+PY6qkAeAtpD7w4TwIbF1ZXh94Dng4fy/fD+zUzUFFKkm9ADhZ0kb5GFrdX98GPixpDwBJ60t6c/66raR98z/iZaRPKs919Z2wCcvJ3UajY8liRNwHzAL+F/AbUlfEJ4EpEfE74KPAWbnb5GDgvMq+C0nJ767cjz8d+D6p2+Ye4EJSP/iwxxERz5GS6GtIg4QPkZLZNNbMas+u8/G/mzQ4+l7g7bl/+xngraSxhoeBk4D3RcSiYdoBOA3YsTWGERHzSf3415C6X3YEfjGK430f6TqEBaR/HMcARMQ8Ur/7SfnncCcrz8zXIZ2p/4b0CeRFwHHYpNbVRUy51OqrpH8Gp0XEF9pePxL4CGlg5kngQxGxIL92HGlk/1ngmIi4uOg7MOshSScA20TEYeN9LGarM+KZu9KFFieRRuF3BA6RtH3bZj+IiFdHxC7Al4Cv5H13AN4FvJJUDndy7qM0M7MaddMtswewKCIW54+dc0gfwVfIH71bNmBlf93bgDm5LOweVi3pMjOzGnRTLbMZQ8u37qNDgpZ0FPAJUnnVGyv7Xl3ZbClDy8LMJpWIOHG8j8GsG8UGVCPi5Ih4OenqvH8q1a6ZmY1eN2fuSxlau7t5Xjec/wS+Wdm3Ottfx30leWpKM7M1EBEdxzG7OXO/Dni5pC1zHezBwE+rG0h6eWXxLaQyK/J2B0taW9JWwMtJNcmdDrD2xwknnOA4EzCG40zcGI4zcWNErP6ceMQz94hYLulo0nwgrVLI+ZJOBK6LiPOBoyX9GekCiEfJ9bMRcYekH5Hm5ngGOCpGOiIzMxuzrqYfiIgLge3a1p1Qef6x1ez7eaDjJc9mZlaPvrpCdWBgwHEmYAzHmbgxHGfixhjJhLjNniT31piZjZIkYgwDqmZmNsk4uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNVAjkvvM6dORVPQxc/r08X5bZmZrrBFXqErqfOfmMRCMOOuamdl48hWqZmZ9xsndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzcR6H07JOeedLM6uJZIYdrk1VnhSwdxzNPmtlYeFZIM7M+4+RuZtZAXSV3SQdIWiDpTknHdnj945Jul3STpJ9L2qLy2nJJN0i6UdK5JQ/ezMw6G7HPXdIU4E5gP+B+4Drg4IhYUNlmH+DaiHha0oeBgYg4OL/2RERMGyGG+9zNzEZprH3uewCLImJxRDwDzAFmVTeIiMsj4um8eA2wWTX+GhyzmZmNQTfJfTNgSWX5PoYm73ZHABdUlteRNFfSLyXNGm4nMzMrZ2rJxiQdCuwG7FNZvWVEPCBpK+BSSbdExN0l45qZ2VDdJPelwIzK8uZ53RCS/gw4DnhD7r4BICIeyF/vljQI7AKsktxnz5694vnAwAADAwPdHL+ZWd8YHBxkcHCwq227GVBdC1hIGlB9AJgLHBIR8yvb7AKcBewfEb+urN8I+ENELJO0KXAVMKs6GJu384CqmdkorW5AdcQz94hYLulo4GJSH/1pETFf0onAdRFxPvBFYH3gLEkCFkfEQcArgVMkLc/7fr49sZuZWXmefmC4NvGZu5lNbJ5+wMyszzi5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fWPS7pd0k2Sfi5pi8prh+f9Fko6rOTBm5lZZ4qI1W8gTQHuBPYD7geuAw6OiAWVbfYBro2IpyV9GBiIiIMlbQxcD+wKCJgH7BoRj7fFiJGOY4RjZM33HqZNoP2YSsfpFMPMrFuSiAh1eq2bM/c9gEURsTgingHmALOqG0TE5RHxdF68BtgsP98fuDgiHo+Ix4CLgQPW5E2YmVn3uknumwFLKsv3sTJ5d3IEcMEw+y4dYV8zMytgasnGJB0K7AbsU7JdMzMbnW6S+1JgRmV587xuCEl/BhwHvCF337T2HWjb97JOQWbPnr3i+cDAAAMDA502MzPrW4ODgwwODna1bTcDqmsBC0kDqg8Ac4FDImJ+ZZtdgLOA/SPi15X11QHVKfn5brn/vRrDA6pmZqO0ugHVEc/cI2K5pKNJg6FTgNMiYr6kE4HrIuJ84IvA+sBZkgQsjoiDIuJRSf9MSuoBnNie2M3MrLwRz9x7chA+czczG7WxlkKamdkk4+RuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORuZtZATu5mZg3k5G5m1kBdJXdJB0haIOlOScd2eH1vSfMkPSPpL9teWy7pBkk3Sjq31IGbmdnwpo60gaQpwEnAfsD9wHWSzouIBZXNFgOHA5/s0MTvI2LXEgdrZmbdGTG5A3sAiyJiMYCkOcAsYEVyj4h782vRYX8VOE4zMxuFbrplNgOWVJbvy+u6tY6kuZJ+KWnWqI7OzMzWSDdn7mO1ZUQ8IGkr4FJJt0TE3T2Ia2bWt7pJ7kuBGZXlzfO6rkTEA/nr3ZIGgV2AVZL77NmzVzwfGBhgYGCg2xBmZn1hcHCQwcHBrrZVRKdu8soG0lrAQtKA6gPAXOCQiJjfYdvTgfMj4uy8vBHwh4hYJmlT4CpgVttgLJJipOMY4RhZ872HaRNoP6bScTrFMDPrliQiouO45oh97hGxHDgauBi4HZgTEfMlnSjpLTnA7pKWAO8Avinp1rz7K4HrJd0IXAJ8vj2xm5lZeSOeuffkIHzmbmY2amM6czczs8nHyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3M3MGsjJ3cysgZzczcwayMndzKyBnNwnmJnTpyOp6GPm9Onj/bbMrMc8K+RwbTI+s0L26r2Y2eTnWSHNzPqMk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQM5uZuZNZCTu5lZAzm5m5k1kJO7mVkDObmbmTWQk7uZWQN1ldwlHSBpgaQ7JR3b4fW9Jc2T9Iykv2x77fC830JJh5U6cDMzG96I87lLmgLcCewH3A9cBxwcEQsq28wApgGfBH4aET/J6zcGrgd2JU0rPg/YNSIeb4vh+dxrijFcHDOb/MY6n/sewKKIWBwRzwBzgFnVDSLi3oi4DVbJS/sDF0fE4xHxGHAxcMCo34GZmY1KN8l9M2BJZfm+vK4b7fsuHcW+Zma2hjygambWQFO72GYpMKOyvHle142lwEDbvpd12nD27Nkrng8MDDAwMNBpMzOzvjU4OMjg4GBX23YzoLoWsJA0oPoAMBc4JCLmd9j2dOD8iDg7L1cHVKfk57vl/vfqfh5QrSnGcHHMbPIb04BqRCwHjiYNht4OzImI+ZJOlPSWHGB3SUuAdwDflHRr3vdR4J9JSf1a4MT2xG5mZuWNeObek4PwmXttMYaLY2aT31hLIc3MbJJxcu9TM6dPR1LRx8zp08f7bZlZ5m6Z4dqk2d0y7v4xm/zcLWNm1mec3M3MGsjJ3cysgZzczcwayMndzKyBnNzNzBrIyd3MrIGc3K1WvljKbHz4Iqbh2sQXMU2mOGb9yBcxmZn1GSd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswZycjczayAndzOzBnJyNzNrICd3M7MGcnI3M2sgJ3czswbqKrlLOkDSAkl3Sjq2w+trS5ojaZGkqyXNyOu3lPQHSTfkx8ml34CZma1q6kgbSJoCnATsB9wPXCfpvIhYUNnsCOCRiHiFpHcDXwQOzq/9KiJ2LXzcZma2Gt2cue8BLIqIxRHxDDAHmNW2zSzge/n5j0n/CFo63gLKzMzq001y3wxYUlm+L6/ruE1ELAcek7RJfm2mpHmSLpO011gP2MzMRjZit8waap2tPwDMiIhHJe0KnCtph4j4XU1xzcyM7pL7UmBGZXnzvK7qPmAL4H5JawHTIuKR/NoygIi4QdKvgW2BG9qDzJ49e8XzgYEBBgYGunsHZmZ9YnBwkMHBwa62VUSsfoOUrBeS+tEfAOYCh0TE/Mo2RwE7RcRRkg4GDoqIgyVtShpofU7S1sDlwKsi4rG2GDHScYxwjKz53sO0CbQfU+k4vYjRL3HM+pEkIqLjuOaIZ+4RsVzS0cDFpD760yJivqQTgesi4nzgNOD7khYBv2VlpcwbgM9IWgY8BxzZntjNzKy8Ec/ce3IQPnOvLUa/xDHrR6s7c/cVqmZmDeTkbo0wc/p0JBV7zJw+fbzfktmYuFtmuDZxt0w/x3HXj00G7pYxM+szTu5mZg3k5G5m1kBO7mZmDeTkbmbWQE7uZmYN5ORu1qXStfSup7c6uc59uDZxnXs/x/FUCjYZuM7dzKzPOLmbmTWQk7vZBOO+fSvBfe7DtUmz+3UdZ/xjjHccm/zc525m1mec3M36lLt/ms3dMsO1iT/693OcJr2X8Y5j9XG3jJlZn3FyNzNrICd3M7MGcnI3M2sgJ3czswZycjezWpUuuXS5ZXdcCjlcm7jcrp/jNOm9NC2Oyy1XcimkmVmfcXI3M2sgJ3czm/Q8lcKqukrukg6QtEDSnZKO7fD62pLmSFok6WpJMyqvHZfXz5f0ppIHb2YGsPjBBwko+lj84IO9fROFjZjcJU0BTgL2B3YEDpG0fdtmRwCPRMQrgK8CX8z77gC8C3glcCBwsqSOnf+9MOg4EzKG40zcGI4z1GT6hNDNmfsewKKIWBwRzwBzgFlt28wCvpef/xh4Y37+NmBORDwbEfcAi3J742LQcSZkDMeZuDEcZ6huPyGc0MU2dX9C6Ca5bwYsqSzfl9d13CYilgOPS9qkw75LO+xrZmaF1TWgOm5dL2ZmRroYYHUPYE/gwsryPwDHtm1zAfC6/Hwt4KFO2wIXtrZr27/0WIgffvjhR188hsvdUxnZdcDLJW0JPAAcDBzSts1/AYcD1wLvBC7N638K/EDSV0jdMS8H5rYHGO4KKzMzWzMjJveIWC7paOBiUjfOaRExX9KJwHURcT5wGvB9SYuA35L+ARARd0j6EXAH8Axw1JjmGTAzs65MiLllzMysLF+hambWQE7uZmYN5ORuZtZA3VTLTGqStgW+AbwkInaS9GrgbRHxL4XaXwv4ILA5qWT0qspr/1gwzq6rez0ibigQoyfvpS3mZsCWVH4XI+KKgu3vT3o/l+SrpFvrPxAR3ynQ/idW93pEfHmsMXKcacBxpPdyQUT8sPLayRFxVIk4ub1av2eV9tYD/h6YERF/I+kVwHa5SKM4SX8KzGTo79oZBdrdAvgSqSLwAuBL+Wp+JJ0bEQeNNcYaHVfTB1QlXQ58CjglInbJ626LiJ0KtX8qsB6pxPN9wOUR8Yn82g0RsdqkPIo4l+Wn6wK7AzeTLhZ7NXB9RPxJgRg9eS+VeF8A3k2qplqeV0dEvK1Q+58D9gJuAN4KfDUivp5fK/J+JJ2Qn24HvJZU/kuONzciDh1rjBznbNL0HdcAHyBVn70nIv5Y+Pes9u9ZJdZ/AvOAw/KJ13rALyPiNaViVGJ9H9gGuImhv2sfLdD2z4GzST+bI4DdgLdGxG8l3djKOz030kVMk/1BKtcEuLGy7qaC7d9SeT4V+BbwE2CdasyC8X4CvKqyvBPw40n6XhYC69T4s78VmJqfbwT8N/CV9t+HQrGuADasLG8IXFGw/Zvalo8HrgJeCNwwSb9n17e3C9xc0+/CfPLJbA1tt/9sDgVuJ/0zKfazGe2jH/rcH5a0DelqLiS9g3QxVilrt55EmiDtQ6Szg0uBDQrGadkuIm6txLyNNOtmCb1+L3cBz6uh3ZapEfEsQEQ8RjoTnSbpLCrvtZCXAMsqy8vyulLWyTO0AhARnwW+Tfqn8sKCcXr5PVsm6fms/NvcBvhj4RgttwF1TdD+PEnrthYi4kzgGOAi4KU1xRxRPyT3jwCnANtLWgp8DPjbgu1fL+mA6oqI+AxwOql/r7RbJJ0qaSA/vg3cUqjtXr+XPwA3STpF0r+1HgXb/7WkfVoLEbE8Io4gfWIo9Q+x5QxgrqTZkmaTrtb+3up3GZX/YuVsqwBExHdJfdbLOu2whnr5PZtNmpJkC0k/AC4BVrlfRCGbAndIukjST1uPQm2fCryuuiIi/od0tf5thWKMWuP73FskrQ9MiYgnx/tYxiKfIfwt8Ia86grgGxHx9Pgd1ZqRdHin9RFRJCnms0Ii4qkOr20WEUtLxKm0uSuwd168IiJuLNl+L4zD9+yFpPmrBFwTEQ+XbL8SZ59O6yPi8jriTQSNTe6SDo2IM4erZohCVQxtMWsZje8Q5/mkCoOFpdvO7fesikHS2sC2eXFh5CqDQm3XXmHUFm8v4BURcbqkFwEbRMTdhdruSVVOW8y6K5kuiYj9Rlo3WdRdmTdaTS6FXD9/3bAXwYYbjSd9XC8Z522ksqu1ga0kvQb4TBSqMMlOJ1UxtCpwlgJnAUWTu6QBUtfFPaQzty0kHV4wgfxr/tqxwoiV72/MctXM7qSqmdNJYwlnAq8vFKL1e9yxKqdQjBWGq2QifVIca9vrkqqyNpW0MSunCJ9G4fs9SPpFROwl6Uly337rJVK1zLSC4b5NrswjNX6LpB8C45Lcx2UUt5cP4EU9ilPbaHxbnHnACxhaYXBr4Rg9qWLI72W7yvK2wLwa4tRWYVRp8yZSwqh+z24pGSO3WWtVTqXd2iqZSIONd5MGT++uPG4Gjq4jZhfHtHGBNmqtzBvtox8GVK+SdLGkI/JZQl3qHI2veiYiHm9bV7pvrVdVDM+LStdSRNxJPdUzdVYYtSyL9Nfc+p6tP8L2a6ruqpyW2iqZIuJrEbEV8MmI2Kry2DkiTqojZhcuKdBG3ZV5o9LkbhkAImJbSXuQpiE+XtIdpPu6nlk4VGs0fi6VRBhlu0sAbpf0HmCt3Bf+UeCXhWPMZmgVw+uB9xeOAak651RS9wWk+uDra4hzS1uc91KuwqjlR5JOATaS9DekC41OLRwDVlblnJOXD6JsVU5Lq5LpEob+Po/5op+KUyR9lJXFAYOkiw2LjbuMQol7SnyEdG1IqzLvbtLv9Lho7IBqJ5I2Bb4MvDci1ircdk9G4/Ng5/HAm/Kqi4B/icLVMr2oYpC0DukPYq+86krg5Igo+imhVxVGkv6c9HMRcFFE/Lxk+5U4tVfl1F3JlGOcSvp00GrzfcDyiPhgqRijOJaSV/lOiMq8xid3pTk53k46c98GOAf4UUTMG9cDm8DGo4pB6Ybqm0dE6TPqVvt1Vxh9ISKOHWldoVi1VeW0xamtkim3f3NE7DzSul4YS3Ifj8q8bvRDn/vNQKuiZNuIOLZkYpf0i/z1SUlPVB5PSnqiVJxKvJ9L2qiyvLGkiwq1vW5OspvmdjfJj5kUrmLI8QYlTcsx5wHfVrolY+k4byMNeF6Yl19T8AKWlj/vsO7AwjFaVTnHkiYRg5VVOaXjDJDmsvl34GTgTklvWO1Oo7c891G3Ym7NysqcXhtLt0y1Mq/TY1w0vs8d2DoiQtIGkjaIiN+VbDwi9spfV/tDlLRxRDxaIOSmkS4Lb8V/VNKLC7QLcCTpCt6XkSaOankCqGOg6wUR8YSkDwJnRMQJkuo4cz8B2IPUp0tE3CRpqxINS/pb4Chgm7Zj35DyYyGQPoXuQv75RMT9kupIIP8KvKn1SSfXcP8HaVKsUj4FXCbpLlJy3ZLCYzv5xGFYEfFIfrrGn0oj4pT89OSI+M2atlNaPyT3HXMN+iaAJP0GODxXTPTSJUCJPr3nJM2IiHsBlG5cXqRvLSK+BnxN0t9FngmwZlMlvRR4F2kcoS7PRMTj0pCTs1L9kT8kTfP6eeAfKuufrCSOkpblk5W6q3JWqWSSVKx6RmmenKeAV5Bq9yF1/ZSuyppH+lkLmAE8mp9vBNwLbAVDkvxYXCXpHuA/gZ8UOplbY/2Q3L8FfCIiLoMVHze/Bfxpj4+jxGg8pCT4C6WpjEUaWPtQobZbelXFcCJpQPgXEXFd/li+qHAMqLHCKJelPi7pa8AjrUG03N30uoi4tkScil5V5dRayRQRz0n690jT4dYyzpLjbAWgNAfTORHx33n5QFKlUclYvarM60o/DKhOiEGbwqPxm5IqWaCGSpZeVDEo3RjkoxFRvI+9Q6zaK4wk3QjsmmvdW2em15f6mbfFqr0qpxeVTJL+L3A16Sy31kQk6daIeNVI6wrGq60yr+tj6IPkfg6pf/L7edWhwG4R8fYeH0epm0N0HNSKsnN+9OQfoqS5EbFHyTbHi6Sbou0mE5JuiYhXF47Ts6qcSvu1VDIpTQmwHmkQtfVPI6LslACtWBeR/kFVr3V4Q0TsXzDGhKrM64dqmQ8ALyLdKeVs0sVGdVyQM5JS3TKfqjz+iTQV7OxCbbf0qorhKkknSdpb0q6tR+kgdVYYVdwl6aOSnpcfx5Cu8iytV1U5vahkOo90/cHOEbFhfhRP7NkhpDxwDmk6ihfldSXVWpk3Wv1w5r476SP5TFaOMUSpM6puR+MlbVLHAJvS/Ru/GhF/VbDN/UiTXw2pYmiNWxSM06m9iIg3dlg/ljir3Oqs07oxxngx8G+kOdeDNID+sYh4qFD7K6pygF9VXtqQdGu695aIU4l3Y0TskiuZtmhVMpX8JCJpX9KY0d7kuxYBV+aB/VpIWj8ifl9T22pV5gGUrswb9fH0QXJfCHySNPfLc631EbG4UPt3s5rR+NaATl2USkBuj4gdCrU3hdSfP496qxh6RtI84O1tFUbn1NEfXhdJLwA2pkdVOZJuJfXrfw84Pg9419HNtBZplst9gQ8DT0XE9iVj5Dh/Shp43iAiZkjaGTgyyt5YfCdS9+8mpBwwXpV5QH9Uy/wmIv6rrsZ7ORqf2/06K8v4ppA+Bhabl7xXVQwAkl4CfA54WUQcKGkH4E8i4rTCoWqvMFLNc3mPQ1VO7ZVMSvPWrE8aVL0SeG2pTzodfAXYnzxVckTcPNyt6nKAAAAHg0lEQVT41RhMlMo8oD/O3Pcj9a21T4D0k8JxejIar6FzfjwL3BMRVxWO0ZMqBkkXkLp/jo+InSVNJU2XWryCoQcVRpeT5/JudfdIui0idiocp/aqnF5VMuU+/N1If5dXkeb8uTo63AWqQKxrI+J11e640kUCE6Uyr6UfztzfD2xPKu1rdcsEaVClpPsl/SNDR+PvLxyj6MRNq3Ek8HHSwGqdVQybRsSPJB2XAzwrqfjAbeUMrTUdxA6SilYYAetFxFwNvVDq2YLtt6j6Dzd/0ir6dxwRyyUdQjrbrU1EfBxA6Qrbvyb9o58OrFNDuCW5ayaULsY6hnQPhpLukvRPDK3Mq2NQvSv9kNxfGxHbjbzZmB1Cusz9HFbesabYaHzuAx32LLpwX+h5pOO/MiJK/wFU/V5p9snWWeieQPtc9SV8qvJ8XdJUBPNou+H0GPVqLu+7lC4w+0ZePop6EshVkk4iXW25YgAyCt6aUNLRpC6y3Uh34/oOqXumDh8GvkaaI2kpcDHpe1fSB0jdWWfn5SsZn8o8oD+6ZU4HvhQRd/QoXi2j8XkQENKFJTD07CAi4h9W3WuNY/WkiiGXPX4d2BG4nVSe9o7S9dQd4tZRYbQ1K/tXHyXN5f3eUgP3lTi1VuVU4tReySTpk6QEOC8i6viUU431+vbuy07rxhij1sq8UR9PHyT3+aQE1bqtV+veiaVH/Wsfjc9xOpX1Fbv6tdJm7VUMSvOsH00a6HqS1M//9Sg8z3qHuHVUGL0jdzFNiLm8bahOfyOl/27qrswbrX7oljmgR3F6MRoPKTetOOPI/1SKXozWwyqGM0j94J/Ly+8hfSJ5Z8kgPaow+jTpasRaaqhb6q7KqcTpVSVTrST9CenT1Is0dL71aUDpaQFqrcwbrcYn917+14yIJW0DanVc1XkE8J1c9yxSF8AHCse4hdQPuhOpD/wxSXVUMezUdvZ8mdJkS6VVJ7x6FviP0hVGwP/kbob2PurSNejfJlfl5PZvkfRDoGhyB75LrmTKy3eS3tukSu7A2sAGpFxXnRr5CeAdhWOdoDQvU62Ved1qfHLvoV6MxhPpcuadc3Jv1T+XjtGrKoYbJO0ZEdfkeK+jhnuo9qjC6N3560cq6wLYunCcXlXl9KSSqW6RbnN5uaTv9uBEr1eVeV1xci+nF6PxrSsVTyBPx5vrqz9TMsn3sIphN+CXku7NyzOAha3KoLGOi/SywihqvhK5oldVOb2qZOqVUyW9M/KNbiRtTJqOt9jEYfSuMq8rTu7lbBdt83tIej3p4oySvkMasHlXXn4f6cz6LwvGWJc0XWndVQx1j4e8JX/tWGFUIoCkN0bEpZI6fv9r+Ej+EVJVzvaSlpKrcgrHAPgEafxoa0lXkSuZaojTK3Xewazll5J26FVl3kgaXy3TK70Yjc9tdppadpV1tlKdFUaSZkfE7Fxy25pjaMXXiCg2HtLLqpzxqmSqi1adX2gm6QrsktUyPanM65bP3Meox6PxAE9J2isiWjfmfj3pdmU2vDorjJ7MP/fbWJnUodxt/FboZVUOPapk6qFe3MGsV5V5XXFyH7tejsZD6ts/ozWgSqqWOXw121u9FUYb5K/bka4LOC/HeCswt1CMql5V5fSqkqknIuLCfJHRh4AbgXMpfFI0XvXsw3G3TCGStqz7h9v2sXwaQEQ8McJultVZYSTpCuAvYuVsjRsCP4uIotc6KE0x3S4iomhVjqQzgZPaKpk+EhGHlYzTK0rz0h8DbA7cRJpE7uqSV9xONP1wJ6ZeOVU13+0nIp4DPp2fP+HE3h1JL5D0ZVL98SWS/rXyyaeUlwDLKsvL8rqiImKrDo/S5ZawspLpHkn3kPrcXyvpVkm1Tg9Rk2NIn6wWR8S+wC7AY6vfZXJzt0w5vRiNh959LG+SXlQYnQHMVbpnL6S5/L9bqvFxqMqZUP3HBTwdEU9LQtI6EbFA0oQpW6yDk3s5z0ma0TYaX0ef17tzu+019HWcvTXFNm2ThJ0o6aaSASLis0rz0++dV70/Im4sGOINwKWkvvxVqnIofKHMROs/LuC+/Mn6XODnkh4FmvYeh3ByL6cXo/EAO5AS+16kP+orgW/WEKdJelJhFGk63GJz1rTpWVVOE0XE2/PT2UozXr4AuHAcD6l2HlAtKHfDtEbjnw88FGVvCIGkH5EqcX6QV70HeEFEvGv4vfqb0gydZ5D+oCFXGEXNUwuXJOmE/LRjVU5EHDpex2YTk5N7Ib0ajZd0R/tUtZ3WWdK0CqNeVeXY5OdqmXJ6NRp/Q57nA6hvsq2maGCFUU+qcmzyc597Ob0aja91sq2GalKFUa1VOdYc7pYpJP+xvR/4GOkWaI8Cz4uINxeOs+XqXm9glcOY5Qt/VvlFr6k+vHZKtydsVeVcUbgqxxrCyb0GkvYhj8ZHxLKRtrd6SXo+HSqMarj5iNmE4eRujecKI+tHTu7WeK4wsn7kahnrB64wsr7jM3drvHwThe2AIRVGpHuPusLIGsnJ3RrPFUbWj5zczcwayH3uZmYN5ORuZtZATu5mZg3k5G5m1kBO7mZmDfT/AeacRzne/39WAAAAAElFTkSuQmCC\n", "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1006,7 +1005,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 10, From 9f7eeab5e0ca5f971f4722985b1fd8a9c0359c53 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Wed, 14 Mar 2018 15:21:42 +0100 Subject: [PATCH 30/35] Add rnn regression notebook. --- .../pl-1.0-rnn_regression_open-day.ipynb | 831 ++++++++++++++++++ 1 file changed, 831 insertions(+) create mode 100644 notebooks/pl-1.0-rnn_regression_open-day.ipynb diff --git a/notebooks/pl-1.0-rnn_regression_open-day.ipynb b/notebooks/pl-1.0-rnn_regression_open-day.ipynb new file mode 100644 index 0000000..7ec29ba --- /dev/null +++ b/notebooks/pl-1.0-rnn_regression_open-day.ipynb @@ -0,0 +1,831 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "from keras.models import Sequential\n", + "from keras.layers import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "## Help Python find our packages\n", + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import json\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import domain_scoring.domain_scoring as domain_scoring\n", + "\n", + "# Randomness\n", + "import random as rn\n", + "import tensorflow as tf\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "potato = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['PRODUCED', 'DIRECTED', 'WROTE', 'ACTED_IN', 'Person', 'Movie']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type_selection = potato[\"edge_type_selection\"] + potato[\"node_type_selection\"]\n", + "types = []\n", + "for pair in type_selection:\n", + " types.append(pair[0])\n", + "types" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_mps(data):\n", + " count = 0\n", + " first = True\n", + " batches = []\n", + " batch = []\n", + " for probably_path in data[\"meta_paths\"]:\n", + " if count % 6 == 0:\n", + " # Don't add empty batches\n", + " if len(batch) > 0:\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)\n", + " count += 1\n", + " # append last batch\n", + " if len(batch) > 0:\n", + " batches.append(batch)\n", + " print('#meta-paths:', count - len(batches) - 1)\n", + " return batches" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def construct_graph(batches):\n", + " ## Construct rating graph\n", + " from util.datastructures import MetaPathRatingGraph, MetaPath\n", + " graph = MetaPathRatingGraph()\n", + "\n", + " for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "## Clean up data // remove time_to_rate from array of ratings.\n", + "def clean_up(data):\n", + " batches = extract_mps(data)\n", + " return batches, construct_graph(batches)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#meta-paths: 51\n" + ] + } + ], + "source": [ + "p_batches, p_graph = clean_up(potato)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "domain_score = domain_scoring.DomainScoringRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess meta-paths\n", + "domain_score.vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), token_pattern='\\\\b\\\\w+\\\\b')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "domain_score.fit(p_graph)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "x, y = domain_score._extract_data_labels(p_graph)\n", + "x_raw = x\n", + "x = domain_score._preprocess(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import preprocessing\n", + "from keras.preprocessing.sequence import pad_sequences" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LabelEncoder()" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labeler = preprocessing.LabelEncoder()\n", + "labeler.fit(types)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def to_sequence(in_list):\n", + " return in_list.reshape((-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def one_hot(in_sequence, distinct_values):\n", + " sequence = np.zeros((len(in_sequence), distinct_values))\n", + " i = 0\n", + " for point in in_sequence: \n", + " sequence[i][point] = 1\n", + " i += 1\n", + " return sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess_raw(raw, distinct_values, max_len=9):\n", + " data = []\n", + " for a,b in raw:\n", + " # encode labels to integers (0 is reserved for padding)\n", + " a = np.array(labeler.transform(a.as_list())) + 1\n", + " b = np.array(labeler.transform(b.as_list())) + 1\n", + " # pad to same length\n", + " a, b = pad_sequences([a, b], max_len, padding='post', value=0)\n", + " # merge a and b\n", + " sequence = np.append(a, b)\n", + " # to sequence\n", + " sequence = to_sequence(sequence)\n", + " # one-hot encode because we don't have distances/embeddings\n", + " sequence = one_hot(sequence, distinct_values + 1)\n", + " data.append(sequence)\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "x_preprocess = preprocess_raw(x_raw, len(types))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "test_size = 0.3\n", + "random_state = 42" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y,\n", + " test_size=test_size,\n", + " random_state=random_state,\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "135" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(x_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(135, 44)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(x_train).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.15,\n", + " -0.15,\n", + " 0.04999999999999999,\n", + " -0.04999999999999999,\n", + " 0.04999999999999999,\n", + " -0.04999999999999999,\n", + " 0.1,\n", + " -0.1,\n", + " 0.1,\n", + " -0.1,\n", + " 0.05000000000000002,\n", + " -0.05000000000000002,\n", + " 0.1,\n", + " -0.1,\n", + " 0.05000000000000002,\n", + " -0.05000000000000002,\n", + " 0.04999999999999999,\n", + " -0.04999999999999999,\n", + " 0.1,\n", + " -0.1,\n", + " 0.16,\n", + " -0.16,\n", + " 0.17,\n", + " -0.17,\n", + " 0.03,\n", + " -0.03,\n", + " 0.06,\n", + " -0.06,\n", + " 0.07,\n", + " -0.07,\n", + " 0.010000000000000009,\n", + " -0.010000000000000009,\n", + " 0.07,\n", + " -0.07,\n", + " 0.13,\n", + " -0.13,\n", + " 0.14,\n", + " -0.14,\n", + " 0.13,\n", + " -0.13,\n", + " 0.34,\n", + " -0.34,\n", + " 0.21000000000000002,\n", + " -0.21000000000000002,\n", + " 0.15000000000000002,\n", + " -0.15000000000000002,\n", + " 0.08000000000000002,\n", + " -0.08000000000000002,\n", + " 0.19,\n", + " -0.19,\n", + " 0.06,\n", + " -0.06,\n", + " 0.26,\n", + " -0.26,\n", + " 0.13,\n", + " -0.13,\n", + " 0.07,\n", + " -0.07,\n", + " 0.43,\n", + " -0.43,\n", + " 0.21000000000000002,\n", + " -0.21000000000000002,\n", + " 0.020000000000000018,\n", + " -0.020000000000000018,\n", + " 0.23000000000000004,\n", + " -0.23000000000000004,\n", + " 0.21999999999999997,\n", + " -0.21999999999999997,\n", + " 0.020000000000000018,\n", + " -0.020000000000000018,\n", + " 0.41,\n", + " -0.41,\n", + " 0.19,\n", + " -0.19,\n", + " 0.21000000000000002,\n", + " -0.21000000000000002,\n", + " 0.19999999999999996,\n", + " -0.19999999999999996,\n", + " 0.06,\n", + " -0.06,\n", + " 0.21999999999999997,\n", + " -0.21999999999999997,\n", + " 0.17000000000000004,\n", + " -0.17000000000000004,\n", + " 0.010000000000000009,\n", + " -0.010000000000000009,\n", + " 0.15999999999999998,\n", + " -0.15999999999999998,\n", + " 0.11000000000000004,\n", + " -0.11000000000000004,\n", + " 0.04999999999999993,\n", + " -0.04999999999999993,\n", + " 0.04999999999999999,\n", + " -0.04999999999999999,\n", + " 0.20999999999999996,\n", + " -0.20999999999999996,\n", + " 0.16000000000000003,\n", + " -0.16000000000000003,\n", + " 0.030000000000000027,\n", + " -0.030000000000000027,\n", + " 0.10000000000000003,\n", + " -0.10000000000000003,\n", + " 0.13000000000000006,\n", + " -0.13000000000000006,\n", + " 0.06,\n", + " -0.06,\n", + " 0.10000000000000003,\n", + " -0.10000000000000003,\n", + " 0.040000000000000036,\n", + " -0.040000000000000036,\n", + " 0.07000000000000006,\n", + " -0.07000000000000006,\n", + " 0.040000000000000036,\n", + " -0.040000000000000036,\n", + " 0.030000000000000027,\n", + " -0.030000000000000027,\n", + " 0.36999999999999994,\n", + " -0.36999999999999994,\n", + " 0.21000000000000002,\n", + " -0.21000000000000002,\n", + " 0.11000000000000001,\n", + " -0.11000000000000001,\n", + " 0.48,\n", + " -0.48,\n", + " 0.04000000000000001,\n", + " -0.04000000000000001,\n", + " 0.32000000000000006,\n", + " -0.32000000000000006,\n", + " 0.07,\n", + " -0.07,\n", + " 0.43999999999999995,\n", + " -0.43999999999999995,\n", + " 0.28,\n", + " -0.28,\n", + " 0.15999999999999992,\n", + " -0.15999999999999992,\n", + " 0.2,\n", + " -0.2,\n", + " 0.2,\n", + " -0.2,\n", + " 0.019999999999999962,\n", + " -0.019999999999999962,\n", + " 0.18000000000000005,\n", + " -0.18000000000000005,\n", + " 0.18000000000000005,\n", + " -0.18000000000000005,\n", + " 0.26,\n", + " -0.26,\n", + " 0.06,\n", + " -0.06,\n", + " 0.26,\n", + " -0.26,\n", + " 0.07999999999999996,\n", + " -0.07999999999999996,\n", + " 0.010000000000000009,\n", + " -0.010000000000000009,\n", + " 0.21000000000000002,\n", + " -0.21000000000000002,\n", + " 0.18,\n", + " -0.18,\n", + " 0.07999999999999996,\n", + " -0.07999999999999996,\n", + " 0.22000000000000003,\n", + " -0.22000000000000003,\n", + " 0.030000000000000027,\n", + " -0.030000000000000027,\n", + " 0.040000000000000036,\n", + " -0.040000000000000036,\n", + " 0.13000000000000006,\n", + " -0.13000000000000006,\n", + " 0.10000000000000003,\n", + " -0.10000000000000003,\n", + " 0.14000000000000007,\n", + " -0.14000000000000007,\n", + " 0.06000000000000005,\n", + " -0.06000000000000005,\n", + " 0.12,\n", + " -0.12,\n", + " 0.18000000000000005,\n", + " -0.18000000000000005,\n", + " 0.10000000000000009,\n", + " -0.10000000000000009,\n", + " 0.26999999999999996,\n", + " -0.26999999999999996,\n", + " 0.14999999999999997,\n", + " -0.14999999999999997,\n", + " 0.33,\n", + " -0.33,\n", + " 0.25000000000000006,\n", + " -0.25000000000000006,\n", + " 0.019999999999999907,\n", + " -0.019999999999999907,\n", + " 0.07999999999999996,\n", + " -0.07999999999999996]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['PYTHONHASHSEED'] = '0'\n", + "def reset_seed():\n", + " np.random.seed(random_state)\n", + " rn.seed(random_state)\n", + " tf.set_random_seed(random_state)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "135/135 [==============================] - 0s 778us/step - loss: 0.1938 - acc: 0.0000e+00\n", + "Epoch 2/10\n", + "135/135 [==============================] - 0s 137us/step - loss: 0.1129 - acc: 0.0000e+00\n", + "Epoch 3/10\n", + "135/135 [==============================] - 0s 194us/step - loss: 0.0664 - acc: 0.0000e+00\n", + "Epoch 4/10\n", + "135/135 [==============================] - 0s 185us/step - loss: 0.0462 - acc: 0.0000e+00\n", + "Epoch 5/10\n", + "135/135 [==============================] - 0s 171us/step - loss: 0.0378 - acc: 0.0000e+00\n", + "Epoch 6/10\n", + "135/135 [==============================] - 0s 163us/step - loss: 0.0346 - acc: 0.0000e+00\n", + "Epoch 7/10\n", + "135/135 [==============================] - 0s 149us/step - loss: 0.0334 - acc: 0.0000e+00\n", + "Epoch 8/10\n", + "135/135 [==============================] - 0s 130us/step - loss: 0.0327 - acc: 0.0000e+00\n", + "Epoch 9/10\n", + "135/135 [==============================] - 0s 171us/step - loss: 0.0323 - acc: 0.0000e+00\n", + "Epoch 10/10\n", + "135/135 [==============================] - 0s 178us/step - loss: 0.0320 - acc: 0.0000e+00\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reproducible results\n", + "reset_seed()\n", + "# Build model\n", + "model = Sequential()\n", + "model.add(Dense(128, activation='relu', input_dim=44))\n", + "model.add(Dense(1, activation='sigmoid'))\n", + "model.compile(optimizer='adam',\n", + " loss='mean_squared_error',\n", + " metrics=['accuracy'])\n", + "\n", + "# Train model\n", + "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=16)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.03216452232974655" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_squared_error(model.predict(np.array(x_test), batch_size=16), np.array(y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "x_p_train, x_p_test, y_p_train, y_p_test = train_test_split(x_preprocess, y,\n", + " test_size=test_size,\n", + " random_state=random_state,\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(135, 18, 7)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(x_p_train).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "135/135 [==============================] - 0s 2ms/step - loss: 0.1998 - acc: 0.0000e+00\n", + "Epoch 2/10\n", + "135/135 [==============================] - 0s 525us/step - loss: 0.0778 - acc: 0.0000e+00\n", + "Epoch 3/10\n", + "135/135 [==============================] - 0s 523us/step - loss: 0.0589 - acc: 0.0000e+00\n", + "Epoch 4/10\n", + "135/135 [==============================] - 0s 622us/step - loss: 0.0493 - acc: 0.0000e+00\n", + "Epoch 5/10\n", + "135/135 [==============================] - 0s 576us/step - loss: 0.0434 - acc: 0.0000e+00\n", + "Epoch 6/10\n", + "135/135 [==============================] - 0s 618us/step - loss: 0.0406 - acc: 0.0000e+00\n", + "Epoch 7/10\n", + "135/135 [==============================] - 0s 512us/step - loss: 0.0397 - acc: 0.0000e+00\n", + "Epoch 8/10\n", + "135/135 [==============================] - 0s 608us/step - loss: 0.0378 - acc: 0.0000e+00\n", + "Epoch 9/10\n", + "135/135 [==============================] - 0s 499us/step - loss: 0.0376 - acc: 0.0000e+00\n", + "Epoch 10/10\n", + "135/135 [==============================] - 0s 534us/step - loss: 0.0367 - acc: 0.0000e+00\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reproducible results\n", + "reset_seed()\n", + "# Use sequence classification (RNN/LSTM)\n", + "model_rnn = Sequential()\n", + "model_rnn.add(SimpleRNN(256, input_shape=(18, 7)))\n", + "model_rnn.add(Dropout(0.5))\n", + "model_rnn.add(Dense(1, activation='sigmoid'))\n", + "\n", + "model_rnn.compile(loss='mean_squared_error',\n", + " optimizer='sgd',\n", + " metrics=['accuracy'])\n", + "\n", + "model_rnn.fit(np.array(x_p_train), np.array(y_p_train), batch_size=35, epochs=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.035975770946391046" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_predicted = model_rnn.predict(np.array(x_p_test), batch_size=16)\n", + "y_p_test = np.array(y_p_test)\n", + "mean_squared_error(y_predicted, Y_p_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy is 0.423728813559322\n" + ] + } + ], + "source": [ + "score = np.mean(np.logical_and((y_p_test) > 0, (y_predicted) > 0))\n", + "print('Test accuracy is {}'.format(score))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 93a08b67f561aae015b16582d9ad35c6cdabd233 Mon Sep 17 00:00:00 2001 From: Sebastian Bischoff Date: Wed, 14 Mar 2018 21:01:38 +0100 Subject: [PATCH 31/35] Add n-gram range parameter --- domain_scoring/domain_scoring.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/domain_scoring/domain_scoring.py b/domain_scoring/domain_scoring.py index a66621c..0bd1835 100644 --- a/domain_scoring/domain_scoring.py +++ b/domain_scoring/domain_scoring.py @@ -12,12 +12,12 @@ class DomainScoring(): - def __init__(self): + def __init__(self, ngram_range=(1,1)): """ Classifies the ordering and extracts the domain value of meta-paths. """ # The token_pattern also allows single character strings which the default doesn't allow - self.vectorizer = TfidfVectorizer(analyzer='word', token_pattern='\\b\\w+\\b') + self.vectorizer = TfidfVectorizer(analyzer='word', token_pattern='\\b\\w+\\b', ngram_range=ngram_range) self.classifier = DecisionTreeClassifier(random_state=42) self.domain_value_transformer = NaiveTransformer() From f794c3b3b153a748647a53fb4617aef2de746864 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Thu, 15 Mar 2018 14:08:29 +0100 Subject: [PATCH 32/35] Add rnn notebook with high score. --- notebooks/pl-1.0-rnn_open-day.ipynb | 221 ++++++++++++++++++---------- 1 file changed, 140 insertions(+), 81 deletions(-) diff --git a/notebooks/pl-1.0-rnn_open-day.ipynb b/notebooks/pl-1.0-rnn_open-day.ipynb index 2895da9..c0f5c0d 100644 --- a/notebooks/pl-1.0-rnn_open-day.ipynb +++ b/notebooks/pl-1.0-rnn_open-day.ipynb @@ -2,17 +2,26 @@ "cells": [ { "cell_type": "code", - "execution_count": 165, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], "source": [ "from keras.models import Sequential\n", - "from keras.layers import *" + "from keras.layers import *\n", + "from keras.callbacks import EarlyStopping" ] }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -37,12 +46,12 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 191, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -78,7 +87,7 @@ "['ACTED_IN', 'PRODUCED', 'DIRECTED', 'WROTE', 'Person', 'Movie']" ] }, - "execution_count": 191, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -93,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -121,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -144,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -175,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -185,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -194,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -204,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -213,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -224,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 202, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -234,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -243,7 +252,7 @@ "LabelEncoder()" ] }, - "execution_count": 199, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -255,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 271, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -265,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 272, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -280,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 299, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -304,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 300, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -313,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -323,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -335,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -344,7 +353,7 @@ "131" ] }, - "execution_count": 99, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -355,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -364,7 +373,7 @@ "(131, 44)" ] }, - "execution_count": 105, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -375,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 173, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -388,7 +397,18 @@ }, { "cell_type": "code", - "execution_count": 319, + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "callbacks = [\n", + " EarlyStopping(monitor='loss', patience=3)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -396,34 +416,34 @@ "output_type": "stream", "text": [ "Epoch 1/10\n", - "131/131 [==============================] - 1s 4ms/step - loss: 0.6956 - acc: 0.5191\n", + "131/131 [==============================] - 0s 737us/step - loss: 0.6986 - acc: 0.5038\n", "Epoch 2/10\n", - "131/131 [==============================] - 0s 106us/step - loss: 0.6822 - acc: 0.5878\n", + "131/131 [==============================] - 0s 99us/step - loss: 0.6854 - acc: 0.5573\n", "Epoch 3/10\n", - "131/131 [==============================] - 0s 163us/step - loss: 0.6758 - acc: 0.5954\n", + "131/131 [==============================] - 0s 121us/step - loss: 0.6785 - acc: 0.5649\n", "Epoch 4/10\n", - "131/131 [==============================] - 0s 141us/step - loss: 0.6717 - acc: 0.6183\n", + "131/131 [==============================] - 0s 187us/step - loss: 0.6763 - acc: 0.5878\n", "Epoch 5/10\n", - "131/131 [==============================] - 0s 137us/step - loss: 0.6690 - acc: 0.6718\n", + "131/131 [==============================] - 0s 156us/step - loss: 0.6735 - acc: 0.6336\n", "Epoch 6/10\n", - "131/131 [==============================] - 0s 149us/step - loss: 0.6659 - acc: 0.6794\n", + "131/131 [==============================] - 0s 110us/step - loss: 0.6719 - acc: 0.6260\n", "Epoch 7/10\n", - "131/131 [==============================] - 0s 171us/step - loss: 0.6623 - acc: 0.6794\n", + "131/131 [==============================] - 0s 88us/step - loss: 0.6695 - acc: 0.6870\n", "Epoch 8/10\n", - "131/131 [==============================] - 0s 145us/step - loss: 0.6610 - acc: 0.6565\n", + "131/131 [==============================] - 0s 132us/step - loss: 0.6685 - acc: 0.6565\n", "Epoch 9/10\n", - "131/131 [==============================] - 0s 228us/step - loss: 0.6575 - acc: 0.6870\n", + "131/131 [==============================] - 0s 180us/step - loss: 0.6657 - acc: 0.6947\n", "Epoch 10/10\n", - "131/131 [==============================] - 0s 204us/step - loss: 0.6559 - acc: 0.6794\n" + "131/131 [==============================] - 0s 100us/step - loss: 0.6651 - acc: 0.7023\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 319, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -440,28 +460,28 @@ " metrics=['accuracy'])\n", "\n", "# Train model\n", - "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=16)" + "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=32, callbacks=callbacks)" ] }, { "cell_type": "code", - "execution_count": 320, + "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "57/57 [==============================] - 0s 5ms/step\n" + "57/57 [==============================] - 0s 604us/step\n" ] }, { "data": { "text/plain": [ - "[0.6638993683614229, 0.6491228101546305]" + "[0.669717368326689, 0.6491228101546305]" ] }, - "execution_count": 320, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -472,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 321, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -484,7 +504,7 @@ }, { "cell_type": "code", - "execution_count": 322, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -493,7 +513,7 @@ "(131, 18, 7)" ] }, - "execution_count": 322, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -504,42 +524,74 @@ }, { "cell_type": "code", - "execution_count": 323, - "metadata": {}, + "execution_count": 33, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1/10\n", - "131/131 [==============================] - 1s 5ms/step - loss: 0.7035 - acc: 0.5725\n", - "Epoch 2/10\n", - "131/131 [==============================] - 0s 390us/step - loss: 0.5776 - acc: 0.6641\n", - "Epoch 3/10\n", - "131/131 [==============================] - 0s 354us/step - loss: 0.5706 - acc: 0.6641\n", - "Epoch 4/10\n", - "131/131 [==============================] - 0s 340us/step - loss: 0.4936 - acc: 0.7252\n", - "Epoch 5/10\n", - "131/131 [==============================] - 0s 275us/step - loss: 0.5130 - acc: 0.7176\n", - "Epoch 6/10\n", - "131/131 [==============================] - 0s 256us/step - loss: 0.5067 - acc: 0.7328\n", - "Epoch 7/10\n", - "131/131 [==============================] - 0s 247us/step - loss: 0.5142 - acc: 0.7176\n", - "Epoch 8/10\n", - "131/131 [==============================] - 0s 272us/step - loss: 0.4981 - acc: 0.7481\n", - "Epoch 9/10\n", - "131/131 [==============================] - 0s 275us/step - loss: 0.4619 - acc: 0.7939\n", - "Epoch 10/10\n", - "131/131 [==============================] - 0s 257us/step - loss: 0.4563 - acc: 0.7634\n" + "Epoch 1/25\n", + "131/131 [==============================] - 0s 3ms/step - loss: 0.7035 - acc: 0.5725\n", + "Epoch 2/25\n", + "131/131 [==============================] - 0s 218us/step - loss: 0.5775 - acc: 0.6641\n", + "Epoch 3/25\n", + "131/131 [==============================] - 0s 400us/step - loss: 0.5705 - acc: 0.6641\n", + "Epoch 4/25\n", + "131/131 [==============================] - 0s 304us/step - loss: 0.4936 - acc: 0.7252\n", + "Epoch 5/25\n", + "131/131 [==============================] - 0s 231us/step - loss: 0.5131 - acc: 0.7176\n", + "Epoch 6/25\n", + "131/131 [==============================] - 0s 326us/step - loss: 0.5067 - acc: 0.7328\n", + "Epoch 7/25\n", + "131/131 [==============================] - 0s 237us/step - loss: 0.5142 - acc: 0.7176\n", + "Epoch 8/25\n", + "131/131 [==============================] - 0s 309us/step - loss: 0.4982 - acc: 0.7481\n", + "Epoch 9/25\n", + "131/131 [==============================] - 0s 502us/step - loss: 0.4619 - acc: 0.7939\n", + "Epoch 10/25\n", + "131/131 [==============================] - 0s 353us/step - loss: 0.4562 - acc: 0.7634\n", + "Epoch 11/25\n", + "131/131 [==============================] - 0s 376us/step - loss: 0.4776 - acc: 0.7786\n", + "Epoch 12/25\n", + "131/131 [==============================] - 0s 239us/step - loss: 0.4423 - acc: 0.7863\n", + "Epoch 13/25\n", + "131/131 [==============================] - 0s 310us/step - loss: 0.4711 - acc: 0.7863\n", + "Epoch 14/25\n", + "131/131 [==============================] - 0s 313us/step - loss: 0.4540 - acc: 0.7557\n", + "Epoch 15/25\n", + "131/131 [==============================] - 0s 333us/step - loss: 0.4397 - acc: 0.7634\n", + "Epoch 16/25\n", + "131/131 [==============================] - 0s 335us/step - loss: 0.4429 - acc: 0.7863\n", + "Epoch 17/25\n", + "131/131 [==============================] - 0s 284us/step - loss: 0.4154 - acc: 0.7863\n", + "Epoch 18/25\n", + "131/131 [==============================] - 0s 339us/step - loss: 0.3990 - acc: 0.8015\n", + "Epoch 19/25\n", + "131/131 [==============================] - 0s 340us/step - loss: 0.3913 - acc: 0.8168\n", + "Epoch 20/25\n", + "131/131 [==============================] - 0s 362us/step - loss: 0.3775 - acc: 0.8168\n", + "Epoch 21/25\n", + "131/131 [==============================] - 0s 280us/step - loss: 0.4267 - acc: 0.8015\n", + "Epoch 22/25\n", + "131/131 [==============================] - 0s 352us/step - loss: 0.3972 - acc: 0.8015\n", + "Epoch 23/25\n", + "131/131 [==============================] - 0s 289us/step - loss: 0.3596 - acc: 0.8092\n", + "Epoch 24/25\n", + "131/131 [==============================] - 0s 393us/step - loss: 0.4010 - acc: 0.8321\n", + "Epoch 25/25\n", + "131/131 [==============================] - 0s 365us/step - loss: 0.3830 - acc: 0.8626\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 323, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -557,28 +609,28 @@ " optimizer='rmsprop',\n", " metrics=['accuracy'])\n", "\n", - "model_rnn.fit(np.array(x_p_train), np.array(y_p_train), batch_size=35, epochs=10)" + "model_rnn.fit(np.array(x_p_train), np.array(y_p_train), batch_size=35, epochs=25, callbacks=[])" ] }, { "cell_type": "code", - "execution_count": 324, + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "57/57 [==============================] - 0s 5ms/step\n" + "57/57 [==============================] - 0s 2ms/step\n" ] }, { "data": { "text/plain": [ - "[0.5831738752231264, 0.7017544121073004]" + "[0.5181714421824405, 0.8245614223312914]" ] }, - "execution_count": 324, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -586,6 +638,13 @@ "source": [ "model_rnn.evaluate(np.array(x_p_test), np.array(y_p_test), batch_size=35)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 733c5a0e52a30850cfa0f02f97d0e23db7d228c7 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Thu, 15 Mar 2018 14:26:48 +0100 Subject: [PATCH 33/35] Add new structured rnn notebook. --- notebooks/pl-1.0-rnn_open-day.ipynb | 180 ++++++---- notebooks/pl-2.0-rnn_open-day.ipynb | 507 ++++++++++++++++++++++++++++ 2 files changed, 613 insertions(+), 74 deletions(-) create mode 100644 notebooks/pl-2.0-rnn_open-day.ipynb diff --git a/notebooks/pl-1.0-rnn_open-day.ipynb b/notebooks/pl-1.0-rnn_open-day.ipynb index c0f5c0d..7cec3b0 100644 --- a/notebooks/pl-1.0-rnn_open-day.ipynb +++ b/notebooks/pl-1.0-rnn_open-day.ipynb @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -322,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -332,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 63, "metadata": {}, "outputs": [], "source": [ @@ -344,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -353,7 +353,7 @@ "131" ] }, - "execution_count": 25, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -373,7 +373,7 @@ "(131, 44)" ] }, - "execution_count": 26, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -384,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ @@ -397,53 +397,85 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "callbacks = [\n", - " EarlyStopping(monitor='loss', patience=3)\n", + " EarlyStopping(monitor='loss', patience=2)\n", "]" ] }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, + "execution_count": 83, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1/10\n", - "131/131 [==============================] - 0s 737us/step - loss: 0.6986 - acc: 0.5038\n", - "Epoch 2/10\n", - "131/131 [==============================] - 0s 99us/step - loss: 0.6854 - acc: 0.5573\n", - "Epoch 3/10\n", - "131/131 [==============================] - 0s 121us/step - loss: 0.6785 - acc: 0.5649\n", - "Epoch 4/10\n", - "131/131 [==============================] - 0s 187us/step - loss: 0.6763 - acc: 0.5878\n", - "Epoch 5/10\n", - "131/131 [==============================] - 0s 156us/step - loss: 0.6735 - acc: 0.6336\n", - "Epoch 6/10\n", - "131/131 [==============================] - 0s 110us/step - loss: 0.6719 - acc: 0.6260\n", - "Epoch 7/10\n", - "131/131 [==============================] - 0s 88us/step - loss: 0.6695 - acc: 0.6870\n", - "Epoch 8/10\n", - "131/131 [==============================] - 0s 132us/step - loss: 0.6685 - acc: 0.6565\n", - "Epoch 9/10\n", - "131/131 [==============================] - 0s 180us/step - loss: 0.6657 - acc: 0.6947\n", - "Epoch 10/10\n", - "131/131 [==============================] - 0s 100us/step - loss: 0.6651 - acc: 0.7023\n" + "Epoch 1/25\n", + "131/131 [==============================] - 1s 5ms/step - loss: 0.6881 - acc: 0.5649\n", + "Epoch 2/25\n", + "131/131 [==============================] - 0s 71us/step - loss: 0.6791 - acc: 0.5802\n", + "Epoch 3/25\n", + "131/131 [==============================] - 0s 99us/step - loss: 0.6766 - acc: 0.6183\n", + "Epoch 4/25\n", + "131/131 [==============================] - 0s 162us/step - loss: 0.6733 - acc: 0.5954\n", + "Epoch 5/25\n", + "131/131 [==============================] - 0s 146us/step - loss: 0.6723 - acc: 0.6031\n", + "Epoch 6/25\n", + "131/131 [==============================] - 0s 136us/step - loss: 0.6678 - acc: 0.6336\n", + "Epoch 7/25\n", + "131/131 [==============================] - 0s 80us/step - loss: 0.6653 - acc: 0.6641\n", + "Epoch 8/25\n", + "131/131 [==============================] - 0s 154us/step - loss: 0.6637 - acc: 0.6336\n", + "Epoch 9/25\n", + "131/131 [==============================] - 0s 154us/step - loss: 0.6607 - acc: 0.6565\n", + "Epoch 10/25\n", + "131/131 [==============================] - 0s 84us/step - loss: 0.6601 - acc: 0.6412\n", + "Epoch 11/25\n", + "131/131 [==============================] - 0s 170us/step - loss: 0.6575 - acc: 0.6794\n", + "Epoch 12/25\n", + "131/131 [==============================] - 0s 162us/step - loss: 0.6561 - acc: 0.6641\n", + "Epoch 13/25\n", + "131/131 [==============================] - 0s 186us/step - loss: 0.6552 - acc: 0.6489\n", + "Epoch 14/25\n", + "131/131 [==============================] - 0s 224us/step - loss: 0.6523 - acc: 0.6565\n", + "Epoch 15/25\n", + "131/131 [==============================] - 0s 354us/step - loss: 0.6504 - acc: 0.6565\n", + "Epoch 16/25\n", + "131/131 [==============================] - 0s 340us/step - loss: 0.6488 - acc: 0.6641\n", + "Epoch 17/25\n", + "131/131 [==============================] - 0s 97us/step - loss: 0.6474 - acc: 0.6718\n", + "Epoch 18/25\n", + "131/131 [==============================] - 0s 221us/step - loss: 0.6458 - acc: 0.6794\n", + "Epoch 19/25\n", + "131/131 [==============================] - 0s 283us/step - loss: 0.6438 - acc: 0.6565\n", + "Epoch 20/25\n", + "131/131 [==============================] - 0s 85us/step - loss: 0.6432 - acc: 0.6794\n", + "Epoch 21/25\n", + "131/131 [==============================] - 0s 123us/step - loss: 0.6411 - acc: 0.6794\n", + "Epoch 22/25\n", + "131/131 [==============================] - 0s 224us/step - loss: 0.6389 - acc: 0.6794\n", + "Epoch 23/25\n", + "131/131 [==============================] - 0s 247us/step - loss: 0.6391 - acc: 0.6641\n", + "Epoch 24/25\n", + "131/131 [==============================] - 0s 283us/step - loss: 0.6355 - acc: 0.6870\n", + "Epoch 25/25\n", + "131/131 [==============================] - 0s 173us/step - loss: 0.6356 - acc: 0.6641\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 29, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } @@ -460,28 +492,28 @@ " metrics=['accuracy'])\n", "\n", "# Train model\n", - "model.fit(np.array(x_train), np.array(y_train), epochs=10, batch_size=32, callbacks=callbacks)" + "model.fit(np.array(x_train), np.array(y_train), epochs=25, batch_size=32, callbacks=callbacks)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "57/57 [==============================] - 0s 604us/step\n" + "57/57 [==============================] - 0s 7ms/step\n" ] }, { "data": { "text/plain": [ - "[0.669717368326689, 0.6491228101546305]" + "[0.6689000746660065, 0.6491228101546305]" ] }, - "execution_count": 30, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } @@ -492,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ @@ -504,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -513,7 +545,7 @@ "(131, 18, 7)" ] }, - "execution_count": 32, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -524,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 72, "metadata": { "scrolled": true }, @@ -534,64 +566,64 @@ "output_type": "stream", "text": [ "Epoch 1/25\n", - "131/131 [==============================] - 0s 3ms/step - loss: 0.7035 - acc: 0.5725\n", + "131/131 [==============================] - 1s 5ms/step - loss: 0.8841 - acc: 0.5115\n", "Epoch 2/25\n", - "131/131 [==============================] - 0s 218us/step - loss: 0.5775 - acc: 0.6641\n", + "131/131 [==============================] - 0s 181us/step - loss: 0.5931 - acc: 0.6641\n", "Epoch 3/25\n", - "131/131 [==============================] - 0s 400us/step - loss: 0.5705 - acc: 0.6641\n", + "131/131 [==============================] - 0s 290us/step - loss: 0.5322 - acc: 0.7023\n", "Epoch 4/25\n", - "131/131 [==============================] - 0s 304us/step - loss: 0.4936 - acc: 0.7252\n", + "131/131 [==============================] - 0s 356us/step - loss: 0.5153 - acc: 0.7328\n", "Epoch 5/25\n", - "131/131 [==============================] - 0s 231us/step - loss: 0.5131 - acc: 0.7176\n", + "131/131 [==============================] - 0s 408us/step - loss: 0.5106 - acc: 0.7252\n", "Epoch 6/25\n", - "131/131 [==============================] - 0s 326us/step - loss: 0.5067 - acc: 0.7328\n", + "131/131 [==============================] - 0s 344us/step - loss: 0.5026 - acc: 0.7481\n", "Epoch 7/25\n", - "131/131 [==============================] - 0s 237us/step - loss: 0.5142 - acc: 0.7176\n", + "131/131 [==============================] - 0s 248us/step - loss: 0.4430 - acc: 0.7710\n", "Epoch 8/25\n", - "131/131 [==============================] - 0s 309us/step - loss: 0.4982 - acc: 0.7481\n", + "131/131 [==============================] - 0s 380us/step - loss: 0.4903 - acc: 0.7786\n", "Epoch 9/25\n", - "131/131 [==============================] - 0s 502us/step - loss: 0.4619 - acc: 0.7939\n", + "131/131 [==============================] - 0s 333us/step - loss: 0.4717 - acc: 0.7710\n", "Epoch 10/25\n", - "131/131 [==============================] - 0s 353us/step - loss: 0.4562 - acc: 0.7634\n", + "131/131 [==============================] - 0s 336us/step - loss: 0.4846 - acc: 0.7481\n", "Epoch 11/25\n", - "131/131 [==============================] - 0s 376us/step - loss: 0.4776 - acc: 0.7786\n", + "131/131 [==============================] - 0s 335us/step - loss: 0.4962 - acc: 0.7557\n", "Epoch 12/25\n", - "131/131 [==============================] - 0s 239us/step - loss: 0.4423 - acc: 0.7863\n", + "131/131 [==============================] - 0s 301us/step - loss: 0.4769 - acc: 0.7481\n", "Epoch 13/25\n", - "131/131 [==============================] - 0s 310us/step - loss: 0.4711 - acc: 0.7863\n", + "131/131 [==============================] - 0s 399us/step - loss: 0.4354 - acc: 0.7939\n", "Epoch 14/25\n", - "131/131 [==============================] - 0s 313us/step - loss: 0.4540 - acc: 0.7557\n", + "131/131 [==============================] - 0s 557us/step - loss: 0.4423 - acc: 0.7863\n", "Epoch 15/25\n", - "131/131 [==============================] - 0s 333us/step - loss: 0.4397 - acc: 0.7634\n", + "131/131 [==============================] - 0s 288us/step - loss: 0.4116 - acc: 0.8244\n", "Epoch 16/25\n", - "131/131 [==============================] - 0s 335us/step - loss: 0.4429 - acc: 0.7863\n", + "131/131 [==============================] - 0s 497us/step - loss: 0.4543 - acc: 0.7634\n", "Epoch 17/25\n", - "131/131 [==============================] - 0s 284us/step - loss: 0.4154 - acc: 0.7863\n", + "131/131 [==============================] - 0s 554us/step - loss: 0.4123 - acc: 0.8015\n", "Epoch 18/25\n", - "131/131 [==============================] - 0s 339us/step - loss: 0.3990 - acc: 0.8015\n", + "131/131 [==============================] - 0s 457us/step - loss: 0.4154 - acc: 0.8015\n", "Epoch 19/25\n", - "131/131 [==============================] - 0s 340us/step - loss: 0.3913 - acc: 0.8168\n", + "131/131 [==============================] - 0s 273us/step - loss: 0.4245 - acc: 0.8015\n", "Epoch 20/25\n", - "131/131 [==============================] - 0s 362us/step - loss: 0.3775 - acc: 0.8168\n", + "131/131 [==============================] - 0s 408us/step - loss: 0.3828 - acc: 0.7786\n", "Epoch 21/25\n", - "131/131 [==============================] - 0s 280us/step - loss: 0.4267 - acc: 0.8015\n", + "131/131 [==============================] - 0s 494us/step - loss: 0.3780 - acc: 0.8702\n", "Epoch 22/25\n", - "131/131 [==============================] - 0s 352us/step - loss: 0.3972 - acc: 0.8015\n", + "131/131 [==============================] - 0s 452us/step - loss: 0.3750 - acc: 0.8321\n", "Epoch 23/25\n", - "131/131 [==============================] - 0s 289us/step - loss: 0.3596 - acc: 0.8092\n", + "131/131 [==============================] - 0s 338us/step - loss: 0.3655 - acc: 0.8321\n", "Epoch 24/25\n", - "131/131 [==============================] - 0s 393us/step - loss: 0.4010 - acc: 0.8321\n", + "131/131 [==============================] - 0s 269us/step - loss: 0.3872 - acc: 0.8244\n", "Epoch 25/25\n", - "131/131 [==============================] - 0s 365us/step - loss: 0.3830 - acc: 0.8626\n" + "131/131 [==============================] - 0s 402us/step - loss: 0.3812 - acc: 0.7939\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 33, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -614,23 +646,23 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "57/57 [==============================] - 0s 2ms/step\n" + "57/57 [==============================] - 0s 5ms/step\n" ] }, { "data": { "text/plain": [ - "[0.5181714421824405, 0.8245614223312914]" + "[0.5605788722372892, 0.7719298402468363]" ] }, - "execution_count": 34, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/pl-2.0-rnn_open-day.ipynb b/notebooks/pl-2.0-rnn_open-day.ipynb new file mode 100644 index 0000000..c22396a --- /dev/null +++ b/notebooks/pl-2.0-rnn_open-day.ipynb @@ -0,0 +1,507 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "from keras.models import Sequential\n", + "from keras.layers import *\n", + "from keras.callbacks import EarlyStopping" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "## Help Python find our packages\n", + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import json\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import domain_scoring.domain_scoring as domain_scoring\n", + "\n", + "# Randomness\n", + "import random as rn\n", + "import tensorflow as tf\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import preprocessing\n", + "from keras.preprocessing.sequence import pad_sequences" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the data\n", + "Load the data we want to work with" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data = json.load(open(path, \"r\", encoding=\"utf8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ACTED_IN', 'PRODUCED', 'DIRECTED', 'WROTE', 'Person', 'Movie']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type_selection = data[\"edge_type_selection\"] + data[\"node_type_selection\"]\n", + "types = []\n", + "for pair in type_selection:\n", + " types.append(pair[0])\n", + "types" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data extraction\n", + "Define functions for extraction and extract the data we need" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_mps(data):\n", + " count = 0\n", + " first = True\n", + " batches = []\n", + " batch = []\n", + " for probably_path in data[\"meta_paths\"]:\n", + " if count % 6 == 0:\n", + " # Don't add empty batches\n", + " if len(batch) > 0:\n", + " batches.append(batch)\n", + " batch = []\n", + " else:\n", + " if 'time_to_rate' not in probably_path.keys():\n", + " batch.append(probably_path)\n", + " count += 1\n", + " # append last batch\n", + " if len(batch) > 0:\n", + " batches.append(batch)\n", + " print('#meta-paths:', count - len(batches) - 1)\n", + " return batches" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def construct_graph(batches):\n", + " ## Construct rating graph\n", + " from util.datastructures import MetaPathRatingGraph, MetaPath\n", + " graph = MetaPathRatingGraph()\n", + "\n", + " for batch in batches:\n", + " #ordered = sorted(batch, key=lambda x: float(x['rating']))\n", + " for metapath in batch:\n", + " for another_metapath in batch:\n", + " if metapath is another_metapath:\n", + " continue\n", + " if float(metapath['rating']) <= float(another_metapath['rating']):\n", + " graph.add_user_rating(MetaPath.from_list(another_metapath['metapath']), MetaPath.from_list(metapath['metapath']), \n", + " distance=float(another_metapath['rating']) - float(metapath['rating']))\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "## Clean up data // remove time_to_rate from array of ratings.\n", + "def clean_up(data):\n", + " batches = extract_mps(data)\n", + " return batches, construct_graph(batches)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#meta-paths: 51\n" + ] + } + ], + "source": [ + "batches, graph = clean_up(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocessing\n", + "Define functions for preprocessing and preprocess the data for training" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def to_sequence(in_list):\n", + " return in_list.reshape((-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "def one_hot(in_sequence, distinct_values):\n", + " sequence = np.zeros((len(in_sequence), distinct_values))\n", + " i = 0\n", + " for point in in_sequence: \n", + " sequence[i][point] = 1\n", + " i += 1\n", + " return sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess_raw(raw, types, max_len=9):\n", + " labeler = preprocessing.LabelEncoder()\n", + " labeler.fit(types)\n", + " distinct_values = len(types)\n", + " \n", + " data = []\n", + " for a,b in raw:\n", + " # encode labels to integers (0 is reserved for padding)\n", + " a = np.array(labeler.transform(a.as_list())) + 1\n", + " b = np.array(labeler.transform(b.as_list())) + 1\n", + " # pad to same length\n", + " a, b = pad_sequences([a, b], max_len, padding='post', value=0)\n", + " # merge a and b\n", + " sequence = np.append(a, b)\n", + " # to sequence\n", + " sequence = to_sequence(sequence)\n", + " # one-hot encode because we don't have distances/embeddings\n", + " sequence = one_hot(sequence, distinct_values + 1)\n", + " data.append(sequence)\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# Use methods from our own domain scoring module as this will finally implement our model.\n", + "domain_score = domain_scoring.DomainScoring()\n", + "# Extract data and labels\n", + "x, y = domain_score._extract_data_labels(graph)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: Add further features:\n", + "# 1. Neighbor node types in graph schema\n", + "# 2. Length of mp\n", + "# 3. Number of instances?" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "x_preprocess = preprocess_raw(x, types)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "Setup the training process" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "test_size = 0.3\n", + "random_state = 42" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure we have reproducible results\n", + "os.environ['PYTHONHASHSEED'] = '0'\n", + "def reset_seed():\n", + " np.random.seed(random_state)\n", + " rn.seed(random_state)\n", + " tf.set_random_seed(random_state)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# Split in test and train data\n", + "x_p_train, x_p_test, y_p_train, y_p_test = train_test_split(x_preprocess, y,\n", + " test_size=test_size,\n", + " random_state=random_state,\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(131, 18, 7)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(x_p_train).shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train\n", + "Do the actual training and validation" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "131/131 [==============================] - 0s 2ms/step - loss: 0.7043 - acc: 0.5725\n", + "Epoch 2/25\n", + "131/131 [==============================] - 0s 336us/step - loss: 0.5798 - acc: 0.6641\n", + "Epoch 3/25\n", + "131/131 [==============================] - 0s 274us/step - loss: 0.5719 - acc: 0.6641\n", + "Epoch 4/25\n", + "131/131 [==============================] - 0s 336us/step - loss: 0.4939 - acc: 0.7252\n", + "Epoch 5/25\n", + "131/131 [==============================] - 0s 256us/step - loss: 0.5131 - acc: 0.7176\n", + "Epoch 6/25\n", + "131/131 [==============================] - 0s 256us/step - loss: 0.5070 - acc: 0.7328\n", + "Epoch 7/25\n", + "131/131 [==============================] - 0s 341us/step - loss: 0.5143 - acc: 0.7252\n", + "Epoch 8/25\n", + "131/131 [==============================] - 0s 349us/step - loss: 0.4986 - acc: 0.7481\n", + "Epoch 9/25\n", + "131/131 [==============================] - 0s 301us/step - loss: 0.4626 - acc: 0.7939\n", + "Epoch 10/25\n", + "131/131 [==============================] - 0s 305us/step - loss: 0.4559 - acc: 0.7634\n", + "Epoch 11/25\n", + "131/131 [==============================] - 0s 217us/step - loss: 0.4785 - acc: 0.7710\n", + "Epoch 12/25\n", + "131/131 [==============================] - 0s 330us/step - loss: 0.4430 - acc: 0.7939\n", + "Epoch 13/25\n", + "131/131 [==============================] - 0s 437us/step - loss: 0.4717 - acc: 0.7863\n", + "Epoch 14/25\n", + "131/131 [==============================] - 0s 371us/step - loss: 0.4550 - acc: 0.7557\n", + "Epoch 15/25\n", + "131/131 [==============================] - 0s 483us/step - loss: 0.4401 - acc: 0.7634\n", + "Epoch 16/25\n", + "131/131 [==============================] - 0s 399us/step - loss: 0.4434 - acc: 0.7863\n", + "Epoch 17/25\n", + "131/131 [==============================] - 0s 275us/step - loss: 0.4154 - acc: 0.7863\n", + "Epoch 18/25\n", + "131/131 [==============================] - 0s 649us/step - loss: 0.3993 - acc: 0.8015\n", + "Epoch 19/25\n", + "131/131 [==============================] - 0s 482us/step - loss: 0.3918 - acc: 0.8244\n", + "Epoch 20/25\n", + "131/131 [==============================] - 0s 503us/step - loss: 0.3787 - acc: 0.8168\n", + "Epoch 21/25\n", + "131/131 [==============================] - 0s 334us/step - loss: 0.4270 - acc: 0.7939\n", + "Epoch 22/25\n", + "131/131 [==============================] - 0s 458us/step - loss: 0.3979 - acc: 0.8015\n", + "Epoch 23/25\n", + "131/131 [==============================] - 0s 422us/step - loss: 0.3603 - acc: 0.8092\n", + "Epoch 24/25\n", + "131/131 [==============================] - 0s 266us/step - loss: 0.4018 - acc: 0.8321\n", + "Epoch 25/25\n", + "131/131 [==============================] - 0s 368us/step - loss: 0.3843 - acc: 0.8626\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reproducible results\n", + "reset_seed()\n", + "# Use sequence classification (RNN/LSTM)\n", + "model_rnn = Sequential()\n", + "model_rnn.add(SimpleRNN(128, input_shape=(18, 7)))\n", + "model_rnn.add(Dropout(0.5))\n", + "model_rnn.add(Dense(1, activation='sigmoid'))\n", + "\n", + "model_rnn.compile(loss='binary_crossentropy',\n", + " optimizer='rmsprop',\n", + " metrics=['accuracy'])\n", + "\n", + "model_rnn.fit(np.array(x_p_train), np.array(y_p_train), batch_size=35, epochs=25, callbacks=[])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "57/57 [==============================] - 0s 2ms/step\n" + ] + }, + { + "data": { + "text/plain": [ + "[0.5171718952948587, 0.8245614223312914]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_rnn.evaluate(np.array(x_p_test), np.array(y_p_test), batch_size=35)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From f00d77981bc94642402f824f2452d6f2ed0d94dc Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 19 Mar 2018 11:28:46 +0100 Subject: [PATCH 34/35] Restructured rated datasets --- .../length_3-Axel_1519139472.9673014.json} | 0 .../length_3-Marius Marc_1519142042.4640055.json} | 0 .../Constantin Lange_1519139724.4022887.json} | 0 .../Constantin_1519140574.3827724.json} | 0 .../Jan_1519144878.0546994.json} | 0 .../Juliane_1519146371.039609.json} | 0 .../Merlin_1519148528.2417703.json} | 0 .../Potato_1519142479.127663.json} | 0 .../mcfelix_1519142949.904623.json} | 0 tests/active_learning/experiments_test.py | 4 ++-- 10 files changed, 2 insertions(+), 2 deletions(-) rename rated_datasets/{Programming Languages OOvsWeb [Freebase] - length 3_Axel_1519139472.9673014.json => programming_languages/length_3-Axel_1519139472.9673014.json} (100%) rename rated_datasets/{Programming Languages OOvsWeb [Freebase] - length 3_Marius Marc_1519142042.4640055.json => programming_languages/length_3-Marius Marc_1519142042.4640055.json} (100%) rename rated_datasets/{Rotten Tomato_Constantin Lange_1519139724.4022887.json => rotten_tomatoes/Constantin Lange_1519139724.4022887.json} (100%) rename rated_datasets/{Rotten Tomato_Constantin_1519140574.3827724.json => rotten_tomatoes/Constantin_1519140574.3827724.json} (100%) rename rated_datasets/{Rotten Tomato_Jan_1519144878.0546994.json => rotten_tomatoes/Jan_1519144878.0546994.json} (100%) rename rated_datasets/{Rotten Tomato_Juliane_1519146371.039609.json => rotten_tomatoes/Juliane_1519146371.039609.json} (100%) rename rated_datasets/{Rotten_Tomato_Merlin_1519148528.2417703.json => rotten_tomatoes/Merlin_1519148528.2417703.json} (100%) rename rated_datasets/{Rotten Tomato_Potato_1519142479.127663.json => rotten_tomatoes/Potato_1519142479.127663.json} (100%) rename rated_datasets/{Rotten Tomato_mcfelix_1519142949.904623.json => rotten_tomatoes/mcfelix_1519142949.904623.json} (100%) diff --git a/rated_datasets/Programming Languages OOvsWeb [Freebase] - length 3_Axel_1519139472.9673014.json b/rated_datasets/programming_languages/length_3-Axel_1519139472.9673014.json similarity index 100% rename from rated_datasets/Programming Languages OOvsWeb [Freebase] - length 3_Axel_1519139472.9673014.json rename to rated_datasets/programming_languages/length_3-Axel_1519139472.9673014.json diff --git a/rated_datasets/Programming Languages OOvsWeb [Freebase] - length 3_Marius Marc_1519142042.4640055.json b/rated_datasets/programming_languages/length_3-Marius Marc_1519142042.4640055.json similarity index 100% rename from rated_datasets/Programming Languages OOvsWeb [Freebase] - length 3_Marius Marc_1519142042.4640055.json rename to rated_datasets/programming_languages/length_3-Marius Marc_1519142042.4640055.json diff --git a/rated_datasets/Rotten Tomato_Constantin Lange_1519139724.4022887.json b/rated_datasets/rotten_tomatoes/Constantin Lange_1519139724.4022887.json similarity index 100% rename from rated_datasets/Rotten Tomato_Constantin Lange_1519139724.4022887.json rename to rated_datasets/rotten_tomatoes/Constantin Lange_1519139724.4022887.json diff --git a/rated_datasets/Rotten Tomato_Constantin_1519140574.3827724.json b/rated_datasets/rotten_tomatoes/Constantin_1519140574.3827724.json similarity index 100% rename from rated_datasets/Rotten Tomato_Constantin_1519140574.3827724.json rename to rated_datasets/rotten_tomatoes/Constantin_1519140574.3827724.json diff --git a/rated_datasets/Rotten Tomato_Jan_1519144878.0546994.json b/rated_datasets/rotten_tomatoes/Jan_1519144878.0546994.json similarity index 100% rename from rated_datasets/Rotten Tomato_Jan_1519144878.0546994.json rename to rated_datasets/rotten_tomatoes/Jan_1519144878.0546994.json diff --git a/rated_datasets/Rotten Tomato_Juliane_1519146371.039609.json b/rated_datasets/rotten_tomatoes/Juliane_1519146371.039609.json similarity index 100% rename from rated_datasets/Rotten Tomato_Juliane_1519146371.039609.json rename to rated_datasets/rotten_tomatoes/Juliane_1519146371.039609.json diff --git a/rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json b/rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json similarity index 100% rename from rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json rename to rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json diff --git a/rated_datasets/Rotten Tomato_Potato_1519142479.127663.json b/rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json similarity index 100% rename from rated_datasets/Rotten Tomato_Potato_1519142479.127663.json rename to rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json diff --git a/rated_datasets/Rotten Tomato_mcfelix_1519142949.904623.json b/rated_datasets/rotten_tomatoes/mcfelix_1519142949.904623.json similarity index 100% rename from rated_datasets/Rotten Tomato_mcfelix_1519142949.904623.json rename to rated_datasets/rotten_tomatoes/mcfelix_1519142949.904623.json diff --git a/tests/active_learning/experiments_test.py b/tests/active_learning/experiments_test.py index 76f255c..3120f7e 100644 --- a/tests/active_learning/experiments_test.py +++ b/tests/active_learning/experiments_test.py @@ -12,11 +12,11 @@ def test_UserOracle(self): "The Oracle of Merlin and Felix ... ") merlin = UserOracle(dataset_name='Rotten Tomato', - ground_truth_path='rated_datasets/Rotten Tomato_Merlin_1519148528.2417703.json', + ground_truth_path='rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json', is_zero_indexed=False, batch_size=5) felix = UserOracle(dataset_name='Rotten Tomato', - ground_truth_path='rated_datasets/Rotten Tomato_mcfelix_1519142949.904623.json', + ground_truth_path='rated_datasets/rotten_tomatoes/mcfelix_1519142949.904623.json', is_zero_indexed=False, batch_size=5, default_rating=0.5) From 3a622f0061e1983c0772100cf6433d32b0666489 Mon Sep 17 00:00:00 2001 From: Pius Ladenburger Date: Mon, 19 Mar 2018 11:32:47 +0100 Subject: [PATCH 35/35] Updated dataset paths in notebooks. --- notebooks/pl-1.0-exploration_open-day.ipynb | 4 ++-- notebooks/pl-1.0-regression_open-day.ipynb | 4 ++-- notebooks/pl-1.0-rnn_open-day.ipynb | 4 ++-- notebooks/pl-1.0-rnn_regression_open-day.ipynb | 2 +- notebooks/pl-2.0-rnn_open-day.ipynb | 2 +- notebooks/sb-1.0-classification_open-day.ipynb | 2 +- notebooks/sb-1.0-load_data_open-day.ipynb | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/notebooks/pl-1.0-exploration_open-day.ipynb b/notebooks/pl-1.0-exploration_open-day.ipynb index fe2e350..277dcaa 100644 --- a/notebooks/pl-1.0-exploration_open-day.ipynb +++ b/notebooks/pl-1.0-exploration_open-day.ipynb @@ -31,7 +31,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + "path = '../rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json'" ] }, { @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = '../rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json'" ] }, { diff --git a/notebooks/pl-1.0-regression_open-day.ipynb b/notebooks/pl-1.0-regression_open-day.ipynb index bc75241..b19bae3 100644 --- a/notebooks/pl-1.0-regression_open-day.ipynb +++ b/notebooks/pl-1.0-regression_open-day.ipynb @@ -41,7 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = '../rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json'" ] }, { @@ -59,7 +59,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + "path = '../rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json'" ] }, { diff --git a/notebooks/pl-1.0-rnn_open-day.ipynb b/notebooks/pl-1.0-rnn_open-day.ipynb index 7cec3b0..cd24f3a 100644 --- a/notebooks/pl-1.0-rnn_open-day.ipynb +++ b/notebooks/pl-1.0-rnn_open-day.ipynb @@ -46,7 +46,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + "path = '../rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json'" ] }, { @@ -64,7 +64,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + "path = '../rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json'" ] }, { diff --git a/notebooks/pl-1.0-rnn_regression_open-day.ipynb b/notebooks/pl-1.0-rnn_regression_open-day.ipynb index 7ec29ba..71238b2 100644 --- a/notebooks/pl-1.0-rnn_regression_open-day.ipynb +++ b/notebooks/pl-1.0-rnn_regression_open-day.ipynb @@ -47,7 +47,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = '../rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json'" ] }, { diff --git a/notebooks/pl-2.0-rnn_open-day.ipynb b/notebooks/pl-2.0-rnn_open-day.ipynb index c22396a..7ce1806 100644 --- a/notebooks/pl-2.0-rnn_open-day.ipynb +++ b/notebooks/pl-2.0-rnn_open-day.ipynb @@ -74,7 +74,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten Tomato_Potato_1519142479.127663.json'" + "path = '../rated_datasets/rotten_tomatoes/Potato_1519142479.127663.json'" ] }, { diff --git a/notebooks/sb-1.0-classification_open-day.ipynb b/notebooks/sb-1.0-classification_open-day.ipynb index 1635890..f766f2d 100644 --- a/notebooks/sb-1.0-classification_open-day.ipynb +++ b/notebooks/sb-1.0-classification_open-day.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = '../rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json'" ] }, { diff --git a/notebooks/sb-1.0-load_data_open-day.ipynb b/notebooks/sb-1.0-load_data_open-day.ipynb index ac25dd2..bb2c59b 100644 --- a/notebooks/sb-1.0-load_data_open-day.ipynb +++ b/notebooks/sb-1.0-load_data_open-day.ipynb @@ -19,7 +19,7 @@ }, "outputs": [], "source": [ - "path = 'rated_datasets/Rotten_Tomato_Merlin_1519148528.2417703.json'" + "path = 'rated_datasets/rotten_tomatoes/Merlin_1519148528.2417703.json'" ] }, { @@ -130,7 +130,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.5.2" } }, "nbformat": 4,