From 3f5eb641eb154c17dd8b1863b7714e0fa8a07973 Mon Sep 17 00:00:00 2001 From: mojtaba Date: Thu, 24 Jan 2019 17:24:26 -0500 Subject: [PATCH 1/2] added .gitignore --- .gitignore | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d5e598 --- /dev/null +++ b/.gitignore @@ -0,0 +1,120 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +*.iws + +# C extensions +*.so + +# Pycharm files +../.idea +**/../.idea +**/.idea/ +.idea/ +.idea/* + +# documentation +_build/ + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +/conda_recipe/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +*.ipynb + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# mac os +.DS_Store From 21a5c35205834c329d388baefe539d3706014208 Mon Sep 17 00:00:00 2001 From: mojtaba Date: Thu, 24 Jan 2019 17:24:54 -0500 Subject: [PATCH 2/2] compatible with tensorflow --- NGF/layers.py | 97 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/NGF/layers.py b/NGF/layers.py index 4e127f8..285ae9e 100644 --- a/NGF/layers.py +++ b/NGF/layers.py @@ -8,34 +8,35 @@ from keras import layers from keras.utils.layer_utils import layer_from_config -import theano.tensor as T +import tensorflow as tf +# import theano.tensor as T import keras.backend as K from .utils import filter_func_args, mol_shapes_to_dims -def temporal_padding(x, paddings=(1, 0), padvalue=0): - '''Pad the middle dimension of a 3D tensor - with `padding[0]` values left and `padding[1]` values right. - - Modified from keras.backend.temporal_padding - https://github.com/fchollet/keras/blob/3bf913d/keras/backend/theano_backend.py#L590 - - TODO: Implement for tensorflow (supposebly more easy) - ''' - if not isinstance(paddings, (tuple, list, ndarray)): - paddings = (paddings, paddings) - - input_shape = x.shape - output_shape = (input_shape[0], - input_shape[1] + sum(paddings), - input_shape[2]) - output = T.zeros(output_shape) - - # Set pad value and set subtensor of actual tensor - output = T.set_subtensor(output[:, :paddings[0], :], padvalue) - output = T.set_subtensor(output[:, paddings[1]:, :], padvalue) - output = T.set_subtensor(output[:, paddings[0]:x.shape[1] + paddings[0], :], x) - return output +# def temporal_padding(x, paddings=(1, 0), padvalue=0): +# '''Pad the middle dimension of a 3D tensor +# with `padding[0]` values left and `padding[1]` values right. +# +# Modified from keras.backend.temporal_padding +# https://github.com/fchollet/keras/blob/3bf913d/keras/backend/theano_backend.py#L590 +# +# TODO: Implement for tensorflow (supposebly more easy) +# ''' +# if not isinstance(paddings, (tuple, list, ndarray)): +# paddings = (paddings, paddings) +# +# input_shape = x.shape +# output_shape = (input_shape[0], +# input_shape[1] + sum(paddings), +# input_shape[2]) +# output = T.zeros(output_shape) +# +# # Set pad value and set subtensor of actual tensor +# output = T.set_subtensor(output[:, :paddings[0], :], padvalue) +# output = T.set_subtensor(output[:, paddings[1]:, :], padvalue) +# output = T.set_subtensor(output[:, paddings[0]:x.shape[1] + paddings[0], :], x) +# return output def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False): ''' Looks up the features of an all atoms neighbours, for a batch of molecules. @@ -64,8 +65,11 @@ def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False): masked_edges = edges + 1 # We then add a padding vector at index 0 by padding to the left of the # lookup matrix with the value that the new mask should get - masked_atoms = temporal_padding(atoms, (1,0), padvalue=maskvalue) - + # Theano padding: + # masked_atoms = temporal_padding(atoms, (1,0), padvalue=maskvalue) + # Tensorflow padding: + paddings = tf.constant([[0, 0], [1, 0], [0, 0]]) + masked_atoms = tf.pad(atoms, paddings, "CONSTANT") # Import dimensions atoms_shape = K.shape(masked_atoms) @@ -79,8 +83,13 @@ def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False): # create broadcastable offset offset_shape = (batch_n, 1, 1) - offset = K.reshape(T.arange(batch_n, dtype=K.dtype(masked_edges)), offset_shape) - offset *= lookup_size + # Theano arange and cast: + # offset = K.reshape(T.arange(batch_n, dtype=K.dtype(masked_edges)), offset_shape) + # offset *= lookup_size + # Tensorflow range and backend cast: + offset = K.reshape(K.cast(tf.range(batch_n, dtype='int32'), dtype=K.dtype(masked_edges)), offset_shape) + offset *= K.cast(lookup_size, dtype=K.dtype(offset)) + # apply offset to account for the fact that after reshape, all individual # batch_n indices will be combined into a single big index @@ -88,14 +97,20 @@ def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False): flattened_edges = K.reshape(masked_edges + offset, (batch_n, -1)) # Gather flattened - flattened_result = K.gather(flattened_atoms, flattened_edges) + # flattened_result = K.gather(flattened_atoms, flattened_edges) + # Tensorflow/backend cast: + flattened_result = K.gather(flattened_atoms, K.cast(flattened_edges, dtype='int32')) # Unflatten result output_shape = (batch_n, max_atoms, max_degree, num_atom_features) - output = T.reshape(flattened_result, output_shape) + #output = T.reshape(flattened_result, output_shape) + # Tensorflow/backend reshape: + output = K.reshape(flattened_result, output_shape) if include_self: - return K.concatenate([K.expand_dims(atoms, dim=2), output], axis=2) + # return K.concatenate([K.expand_dims(atoms, dim=2), output], axis=2) + # Tensorflow concat: + return tf.concat([K.expand_dims(atoms, dim=2), output], axis=2) return output class NeuralGraphHidden(layers.Layer): @@ -233,7 +248,9 @@ def call(self, inputs, mask=None): num_bond_features = bonds._keras_shape[-1] # Create a matrix that stores for each atom, the degree it is - atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) + # atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) + # backend cast to floatx: + atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), dtype=K.floatx()), axis=-1, keepdims=True) # For each atom, look up the features of it's neighbour neighbour_atom_features = neighbour_lookup(atoms, edges, include_self=True) @@ -245,7 +262,9 @@ def call(self, inputs, mask=None): summed_bond_features = K.sum(bonds, axis=-2) # Concatenate the summed atom and bond features - summed_features = K.concatenate([summed_atom_features, summed_bond_features], axis=-1) + # summed_features = K.concatenate([summed_atom_features, summed_bond_features], axis=-1) + # Tensorflow concat: + summed_features = tf.concat([summed_atom_features, summed_bond_features], axis=-1) # For each degree we convolve with a different weight matrix new_features_by_degree = [] @@ -430,14 +449,18 @@ def call(self, inputs, mask=None): # to create a general atom mask (unused atoms are 0 padded) # We have to use the edge vector for this, because in theory, a convolution # could lead to a zero vector for an atom that is present in the molecule - atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) + # atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) + # backend cast to floatx: + atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), K.floatx()), axis=-1, keepdims=True) general_atom_mask = K.cast(K.not_equal(atom_degrees, 0), K.floatx()) # Sum the edge features for each atom summed_bond_features = K.sum(bonds, axis=-2) # Concatenate the summed atom and bond features - atoms_bonds_features = K.concatenate([atoms, summed_bond_features], axis=-1) + # atoms_bonds_features = K.concatenate([atoms, summed_bond_features], axis=-1) + # Tensorflow concat: + atoms_bonds_features = tf.concat([atoms, summed_bond_features], axis=-1) # Compute fingerprint atoms_bonds_features._keras_shape = (None, max_atoms, num_atom_features+num_bond_features) @@ -504,7 +527,9 @@ def call(self, inputs, mask=None): # Take max along `degree` axis (2) to get max of neighbours and self max_features = K.max(neighbour_atom_features, axis=2) - atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) + # atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True) + # backend cast to floatx: + atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), K.floatx()), axis=-1, keepdims=True) general_atom_mask = K.cast(K.not_equal(atom_degrees, 0), K.floatx()) return max_features * general_atom_mask