Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.iws

# C extensions
*.so

# Pycharm files
../.idea
**/../.idea
**/.idea/
.idea/
.idea/*

# documentation
_build/

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
/conda_recipe/

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints
*.ipynb

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# mac os
.DS_Store
97 changes: 61 additions & 36 deletions NGF/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,35 @@

from keras import layers
from keras.utils.layer_utils import layer_from_config
import theano.tensor as T
import tensorflow as tf
# import theano.tensor as T
import keras.backend as K

from .utils import filter_func_args, mol_shapes_to_dims

def temporal_padding(x, paddings=(1, 0), padvalue=0):
'''Pad the middle dimension of a 3D tensor
with `padding[0]` values left and `padding[1]` values right.

Modified from keras.backend.temporal_padding
https://github.com/fchollet/keras/blob/3bf913d/keras/backend/theano_backend.py#L590

TODO: Implement for tensorflow (supposebly more easy)
'''
if not isinstance(paddings, (tuple, list, ndarray)):
paddings = (paddings, paddings)

input_shape = x.shape
output_shape = (input_shape[0],
input_shape[1] + sum(paddings),
input_shape[2])
output = T.zeros(output_shape)

# Set pad value and set subtensor of actual tensor
output = T.set_subtensor(output[:, :paddings[0], :], padvalue)
output = T.set_subtensor(output[:, paddings[1]:, :], padvalue)
output = T.set_subtensor(output[:, paddings[0]:x.shape[1] + paddings[0], :], x)
return output
# def temporal_padding(x, paddings=(1, 0), padvalue=0):
# '''Pad the middle dimension of a 3D tensor
# with `padding[0]` values left and `padding[1]` values right.
#
# Modified from keras.backend.temporal_padding
# https://github.com/fchollet/keras/blob/3bf913d/keras/backend/theano_backend.py#L590
#
# TODO: Implement for tensorflow (supposebly more easy)
# '''
# if not isinstance(paddings, (tuple, list, ndarray)):
# paddings = (paddings, paddings)
#
# input_shape = x.shape
# output_shape = (input_shape[0],
# input_shape[1] + sum(paddings),
# input_shape[2])
# output = T.zeros(output_shape)
#
# # Set pad value and set subtensor of actual tensor
# output = T.set_subtensor(output[:, :paddings[0], :], padvalue)
# output = T.set_subtensor(output[:, paddings[1]:, :], padvalue)
# output = T.set_subtensor(output[:, paddings[0]:x.shape[1] + paddings[0], :], x)
# return output

def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False):
''' Looks up the features of an all atoms neighbours, for a batch of molecules.
Expand Down Expand Up @@ -64,8 +65,11 @@ def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False):
masked_edges = edges + 1
# We then add a padding vector at index 0 by padding to the left of the
# lookup matrix with the value that the new mask should get
masked_atoms = temporal_padding(atoms, (1,0), padvalue=maskvalue)

# Theano padding:
# masked_atoms = temporal_padding(atoms, (1,0), padvalue=maskvalue)
# Tensorflow padding:
paddings = tf.constant([[0, 0], [1, 0], [0, 0]])
masked_atoms = tf.pad(atoms, paddings, "CONSTANT")

# Import dimensions
atoms_shape = K.shape(masked_atoms)
Expand All @@ -79,23 +83,34 @@ def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False):

# create broadcastable offset
offset_shape = (batch_n, 1, 1)
offset = K.reshape(T.arange(batch_n, dtype=K.dtype(masked_edges)), offset_shape)
offset *= lookup_size
# Theano arange and cast:
# offset = K.reshape(T.arange(batch_n, dtype=K.dtype(masked_edges)), offset_shape)
# offset *= lookup_size
# Tensorflow range and backend cast:
offset = K.reshape(K.cast(tf.range(batch_n, dtype='int32'), dtype=K.dtype(masked_edges)), offset_shape)
offset *= K.cast(lookup_size, dtype=K.dtype(offset))


# apply offset to account for the fact that after reshape, all individual
# batch_n indices will be combined into a single big index
flattened_atoms = K.reshape(masked_atoms, (-1, num_atom_features))
flattened_edges = K.reshape(masked_edges + offset, (batch_n, -1))

# Gather flattened
flattened_result = K.gather(flattened_atoms, flattened_edges)
# flattened_result = K.gather(flattened_atoms, flattened_edges)
# Tensorflow/backend cast:
flattened_result = K.gather(flattened_atoms, K.cast(flattened_edges, dtype='int32'))

# Unflatten result
output_shape = (batch_n, max_atoms, max_degree, num_atom_features)
output = T.reshape(flattened_result, output_shape)
#output = T.reshape(flattened_result, output_shape)
# Tensorflow/backend reshape:
output = K.reshape(flattened_result, output_shape)

if include_self:
return K.concatenate([K.expand_dims(atoms, dim=2), output], axis=2)
# return K.concatenate([K.expand_dims(atoms, dim=2), output], axis=2)
# Tensorflow concat:
return tf.concat([K.expand_dims(atoms, dim=2), output], axis=2)
return output

class NeuralGraphHidden(layers.Layer):
Expand Down Expand Up @@ -233,7 +248,9 @@ def call(self, inputs, mask=None):
num_bond_features = bonds._keras_shape[-1]

# Create a matrix that stores for each atom, the degree it is
atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True)
# atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True)
# backend cast to floatx:
atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), dtype=K.floatx()), axis=-1, keepdims=True)

# For each atom, look up the features of it's neighbour
neighbour_atom_features = neighbour_lookup(atoms, edges, include_self=True)
Expand All @@ -245,7 +262,9 @@ def call(self, inputs, mask=None):
summed_bond_features = K.sum(bonds, axis=-2)

# Concatenate the summed atom and bond features
summed_features = K.concatenate([summed_atom_features, summed_bond_features], axis=-1)
# summed_features = K.concatenate([summed_atom_features, summed_bond_features], axis=-1)
# Tensorflow concat:
summed_features = tf.concat([summed_atom_features, summed_bond_features], axis=-1)

# For each degree we convolve with a different weight matrix
new_features_by_degree = []
Expand Down Expand Up @@ -430,14 +449,18 @@ def call(self, inputs, mask=None):
# to create a general atom mask (unused atoms are 0 padded)
# We have to use the edge vector for this, because in theory, a convolution
# could lead to a zero vector for an atom that is present in the molecule
atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True)
# atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True)
# backend cast to floatx:
atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), K.floatx()), axis=-1, keepdims=True)
general_atom_mask = K.cast(K.not_equal(atom_degrees, 0), K.floatx())

# Sum the edge features for each atom
summed_bond_features = K.sum(bonds, axis=-2)

# Concatenate the summed atom and bond features
atoms_bonds_features = K.concatenate([atoms, summed_bond_features], axis=-1)
# atoms_bonds_features = K.concatenate([atoms, summed_bond_features], axis=-1)
# Tensorflow concat:
atoms_bonds_features = tf.concat([atoms, summed_bond_features], axis=-1)

# Compute fingerprint
atoms_bonds_features._keras_shape = (None, max_atoms, num_atom_features+num_bond_features)
Expand Down Expand Up @@ -504,7 +527,9 @@ def call(self, inputs, mask=None):
# Take max along `degree` axis (2) to get max of neighbours and self
max_features = K.max(neighbour_atom_features, axis=2)

atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True)
# atom_degrees = K.sum(K.not_equal(edges, -1), axis=-1, keepdims=True)
# backend cast to floatx:
atom_degrees = K.sum(K.cast(K.not_equal(edges, -1), K.floatx()), axis=-1, keepdims=True)
general_atom_mask = K.cast(K.not_equal(atom_degrees, 0), K.floatx())

return max_features * general_atom_mask
Expand Down