Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
068510a
Add notebook to load data from open day
Baschdl Mar 2, 2018
b0674ee
Add fitting of graph
Baschdl Mar 2, 2018
e84ae04
Merge branch 'add-testing-preference-prediction' into userpreference-…
Baschdl Mar 2, 2018
0705252
Correct creation of MetaPathRatingGraph
Baschdl Mar 2, 2018
f336836
Add exporting of decision tree graph and add feature names
Baschdl Mar 2, 2018
192bb85
Revert "Add exporting of decision tree graph and add feature names"
Baschdl Mar 2, 2018
bd18e05
Add exporting of decision tree graph and add feature names
Baschdl Mar 2, 2018
0b48d1d
Tidy up notebook
Baschdl Mar 2, 2018
f2c8358
Move notebook
Baschdl Mar 2, 2018
086de3d
Rename notebook
Baschdl Mar 2, 2018
fdbc5cf
Add pruned notebook
Baschdl Mar 2, 2018
7690ee4
Add dynamic rendering of decision tree
Baschdl Mar 2, 2018
b821fc3
Allow enabling development mode for server.
GittiHab Mar 8, 2018
c3b0d82
Add exploration notebook.
GittiHab Mar 12, 2018
ed7a369
Update exploration notebook.
GittiHab Mar 12, 2018
df4d490
Add Dockerfile and jupyter notebook config
Baschdl Mar 12, 2018
3a9a539
Rename notebook
GittiHab Mar 12, 2018
6781556
Add regression notebook.
GittiHab Mar 12, 2018
c42fcc3
Add accuracy score to regressor.
GittiHab Mar 12, 2018
0b8f930
Add tini to Docker image
Baschdl Mar 13, 2018
619c5a1
Introduce script to create Jupyter Notebook Docker image.
GittiHab Mar 9, 2018
d6defb4
Correct script for other Dockerfile
Baschdl Mar 13, 2018
9a05d92
Merge pull request #51 from KDD-OpenSource/jupyter-notebook-extra-doc…
Baschdl Mar 13, 2018
ad32a41
Merge branch 'master' into userpreference-prediction-openday
Baschdl Mar 13, 2018
2388005
Fix not initializing with super.
GittiHab Mar 13, 2018
f11c646
Updated notebook.
GittiHab Mar 13, 2018
35208f0
Add script to copy notebooks from container to local directory.
GittiHab Mar 13, 2018
a12c260
Add missing random state.
GittiHab Mar 13, 2018
48b83b3
Add rnn notebook
GittiHab Mar 14, 2018
d91bec1
Refactor domain scoring class.
GittiHab Mar 14, 2018
9512986
Add simple neural networks to notebook.
GittiHab Mar 14, 2018
2bc0905
Add newly rendered image.
GittiHab Mar 14, 2018
9f7eeab
Add rnn regression notebook.
GittiHab Mar 14, 2018
93a08b6
Add n-gram range parameter
Baschdl Mar 14, 2018
a5ed39b
Merge branch 'userpreference-prediction-openday' of https://github.co…
Baschdl Mar 14, 2018
f794c3b
Add rnn notebook with high score.
GittiHab Mar 15, 2018
733c5a0
Add new structured rnn notebook.
GittiHab Mar 15, 2018
e131272
Merge branch 'master' into userpreference-prediction-openday
Baschdl Mar 15, 2018
f00d779
Restructured rated datasets
GittiHab Mar 19, 2018
3a622f0
Updated dataset paths in notebooks.
GittiHab Mar 19, 2018
1de630a
Merge branch 'master' into userpreference-prediction-openday
GittiHab Mar 19, 2018
e328645
Merge branch 'userpreference-prediction-openday' of https://github.co…
Baschdl Apr 4, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
FROM ubuntu:16.04
EXPOSE 8000
# TODO: Do we really need python3-dev?
RUN apt-get update && apt-get install -y python3-pip python3 dirmngr
RUN apt-get update && apt-get install -y python3-pip python3 dirmngr graphviz

RUN apt-key adv --keyserver pgp.skewed.de --recv-key 612DEFB798507F25
RUN echo "deb http://downloads.skewed.de/apt/xenial xenial universe" | tee -a /etc/apt/sources.list
RUN echo "deb-src http://downloads.skewed.de/apt/xenial xenial universe" | tee -a /etc/apt/sources.list
RUN apt-get update && apt-get install -y libboost-all-dev
RUN apt-get update -qq && apt-get install -y python3-graph-tool

RUN pip3 install jupyter

COPY . /32de-python/

WORKDIR /32de-python
Expand Down
16 changes: 16 additions & 0 deletions Dockerfile-Notebook
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM server:latest

RUN apt-get update && apt-get install -y graphviz
RUN pip3 install jupyter

# Add Tini. Tini operates as a process subreaper for jupyter. This prevents
# kernel crashes.
ENV TINI_VERSION v0.6.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini
RUN chmod +x /usr/bin/tini
ENTRYPOINT ["/usr/bin/tini", "--"]

COPY . /32de-python/

EXPOSE 8888
CMD ["jupyter", "notebook", "--config", "deployment/jupyter_notebook_config.py"]
2 changes: 2 additions & 0 deletions deployment/build-notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
docker build -t notebook -f Dockerfile-Notebook ${1:-.}
2 changes: 2 additions & 0 deletions deployment/copy-notebooks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
docker cp notebook-container:/32de-python/notebooks/. ${1:-.}/notebooks/.
7 changes: 7 additions & 0 deletions deployment/jupyter_notebook_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Set ip to '*' to bind on all interfaces (ips) for the public server
c.NotebookApp.ip = '*'
c.NotebookApp.password = u'sha1:ba8ffcde0b68:95fa25d7225a3915db1db76799f1695a0483afb4'
c.NotebookApp.open_browser = False

c.NotebookApp.port = 8888
c.NotebookApp.allow_root = True
7 changes: 7 additions & 0 deletions deployment/run-notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
docker stop notebook-container
docker rm notebook-container
docker run --name notebook-container \
--publish=${1:-8888}:8888 \
-d \
notebook
1 change: 1 addition & 0 deletions deployment/run-server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ docker rm server-container
docker run --name server-container \
--publish=${1:-8000}:8000 \
-d \
-e METAEXP_DEV=${2:-'false'}\
server
2 changes: 2 additions & 0 deletions deployment/stop-notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
docker stop notebook-container
79 changes: 70 additions & 9 deletions domain_scoring/domain_scoring.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Tuple
import numpy
from sklearn.ensemble import RandomForestRegressor

from util.datastructures import MetaPathRatingGraph
from util.datastructures import MetaPath
from util.lists import all_pairs
Expand All @@ -13,12 +15,12 @@


class DomainScoring():
def __init__(self):
def __init__(self, ngram_range=(1,1)):
"""
Classifies the ordering and extracts the domain value of meta-paths.
"""
# The token_pattern also allows single character strings which the default doesn't allow
self.vectorizer = TfidfVectorizer(analyzer='word', token_pattern='\\b\\w+\\b')
self.vectorizer = TfidfVectorizer(analyzer='word', token_pattern='\\b\\w+\\b', ngram_range=ngram_range)
self.random_state = RANDOM_STATE
self.classifier = DecisionTreeClassifier(random_state=self.random_state)
self.domain_value_transformer = NaiveTransformer()
Expand All @@ -33,6 +35,7 @@ def fit(self, metapath_graph: MetaPathRatingGraph, test_size: float = None) -> N
"""
self._fit_vectorizer(metapath_graph)
x, y = self._extract_data_labels(metapath_graph)
x = self._preprocess(x)

if test_size is not None:
x_train, x_test, y_train, y_test = train_test_split(x, y,
Expand All @@ -43,10 +46,18 @@ def fit(self, metapath_graph: MetaPathRatingGraph, test_size: float = None) -> N
x_train = x
y_train = y

self.classifier = self.classifier.fit(self._preprocess(x_train), y_train)
self._fit(x_train, y_train)

if test_size is not None:
print('Test accuracy is {}'.format(self.classifier.score(X=self._preprocess(x_test), y=y_test)))
if test_size:
self._test_score(x_test, y_test)

def _fit(self, x, y) -> None:
"""
Executes the actual fitting of the classifier. Overwrite in subclasses if necessary.
:param x: The preprocessed features.
:param y: The labels.
"""
self.classifier.fit(x, y)

def predict(self, metapath_unrated: List[MetaPath]) -> List[Tuple[MetaPath, int]]:
"""
Expand Down Expand Up @@ -78,8 +89,8 @@ def _transform_to_domain_values(self,
"""
Transforms the classified ordering of all meta-paths pairs to the domain values.

:param inferred_ratings: user-defined and inferred rating for all meta-paths
:return: Total order of all meta-paths with values in [0,1]
:param inferred_ratings: user-defined and inferred rating for all meta-paths.
:return: Total order of all meta-paths with values in [0,1].
"""

return self.domain_value_transformer.transform(metapaths_pairs, classification)
Expand All @@ -94,10 +105,10 @@ def _fit_vectorizer(self, metapath_graph: MetaPathRatingGraph) -> None:

def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]):
"""
Computes all pairwise tuples (a, b) of the meta-paths with their feature vector. If a is ranked higher than b
Computes all pairwise tuples (a, b) of the meta-paths. If a is ranked higher than b
a > b then the label is 1, 0 otherwise.

:param metapath_graph: The meta-path graph representing the ordering of all meta-path
:param metapath_graph: The meta-path graph representing the ordering of all meta-path.
:return: (x, y) The feature vector and class labels.
"""

Expand All @@ -112,3 +123,53 @@ def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tup
metapath_labels.append(LARGER) # >

return metapath_pairs, metapath_labels

def _test_score(self, x_test, y_test):
print('Test accuracy is {}'.format(self.classifier.score(X=x_test, y=y_test)))

class DomainScoringRegressor(DomainScoring):

def __init__(self):
"""
Extracts the domain value of meta-paths via regression.
"""
super().__init__()
self.classifier = RandomForestRegressor(random_state=self.random_state)

def _extract_data_labels(self, metapath_graph: MetaPathRatingGraph) -> (List[Tuple[MetaPath]], List[int]):
"""
Computes all pairwise distances (a, b) of the meta-paths.

:param metapath_graph: The meta-path graph representing the ordering of all meta-path.
:return: (x, y) The meta-paths pairs and their respective distance.
"""

metapath_pairs = []
metapath_labels = []

for superior, inferior, distance in metapath_graph.stream_meta_path_distances():
metapath_pairs.append((inferior, superior))
metapath_labels.append(distance) # <

metapath_pairs.append((superior, inferior))
metapath_labels.append(-distance) # >

return metapath_pairs, metapath_labels

def _test_score(self, x_test, y_test):
"""
Converts regression result into a binary classification and uses mean accuracy.
"""
test_predict = self.classifier.predict(x_test)
score = numpy.mean(numpy.logical_and(numpy.array(y_test) > 0, numpy.array(test_predict) > 0))
print('Test accuracy is {}'.format(score))
print('R^2 is {}'.format(self.classifier.score(X=x_test, y=y_test)))

# TODO: WIP
class DomainScoringNeuralNet(DomainScoring):

def __init__(self):
"""
Extracts the domain value of meta-paths by training a neural network.
"""
super().__init__()
Loading