diff --git a/Manual.html b/Manual.html index 41efd33..edcca99 100644 --- a/Manual.html +++ b/Manual.html @@ -4,7 +4,7 @@

PDBFixer

-
Copyright 2013-2023 by Peter Eastman and Stanford University
+
Copyright 2013-2024 by Peter Eastman and Stanford University

1. Introduction

@@ -207,6 +207,14 @@

Add Missing Hydrogens

The argument is the pH based on which to select protonation states. +

Optimize Hydrogen Bonds

+ +You can optimize the hydrogen bonding network by calling +

+fixer.optimizeHydrogenBonds() +

+This performs 180 degree rotations of HIS, ASN, and GLN sidechains, and moves hydrogens on neutral HIS residues, to maximize the total number of hydrogen bonds. +

Add Water

If you want to add a water box, call addSolvent(). This method has several optional arguments. Its full definition is diff --git a/pdbfixer/hbonds.py b/pdbfixer/hbonds.py new file mode 100644 index 0000000..acb13b1 --- /dev/null +++ b/pdbfixer/hbonds.py @@ -0,0 +1,293 @@ +import openmm as mm +import openmm.app as app +import openmm.unit as unit +from openmm.app.element import hydrogen, oxygen, nitrogen, fluorine +import numpy as np +import itertools +from collections import defaultdict +from collections.abc import Iterator +from typing import List + +class HydrogenBondOptimizer(object): + """This class rotates sidechains to optimize the hydrogen bond network.""" + + def __init__(self, fixer): + topology, positions = self.protonateHistidines(fixer) + + # Record the parent atom that every hydrogen is bonded to. + + parent = {} + for atom1, atom2 in topology.bonds(): + if atom1.element == hydrogen and atom2.element != hydrogen: + parent[atom1] = atom2 + elif atom2.element == hydrogen and atom1.element != hydrogen: + parent[atom2] = atom1 + + # Record the donors and acceptors in each residue. + + self.residueDonors = {} + self.residueAcceptors = {} + for residue in topology.residues(): + self.residueDonors[residue] = [] + self.residueAcceptors[residue] = [] + for atom in residue.atoms(): + if atom.element == hydrogen and atom in parent and parent[atom].element in (oxygen, nitrogen, fluorine): + self.residueDonors[residue].append((atom, parent[atom])) + elif atom.element in (oxygen, nitrogen, fluorine): + self.residueAcceptors[residue].append(atom) + + # Record the groups of atoms we will try to rotate. + + rotations = { + 'ASN': (('CB', 'CG'), ('OD1', 'ND2', 'HD21', 'HD22')), + 'GLN': (('CG', 'CD'), ('OE1', 'NE2', 'HE21', 'HE22')), + 'HIS': (('CB', 'CG'), ('ND1', 'CD2', 'CE1', 'NE2', 'HD1', 'HD2', 'HE1', 'HE2')) + } + residueRotations = {} + for residue in topology.residues(): + if residue.name in rotations: + axis, atoms = rotations[residue.name] + residueRotations[residue] = ([atom for atom in residue.atoms() if atom.name in axis], + [atom for atom in residue.atoms() if atom.name in atoms]) + + # For each residue we will be modifying, record all other residues that are close enough + # to potentially form hydrogen bonds. + + self.residueNeighbors = {} + residueBounds = dict((residue, BoundingSphere(residue.atoms(), positions)) for residue in topology.residues()) + for res1 in residueRotations: + self.residueNeighbors[res1] = [] + bounds = BoundingSphere(residueRotations[res1][1], positions) + for res2 in topology.residues(): + if res1 != res2 and bounds.distance(residueBounds[res2]) < 0.5: + close = False + for atom1 in residueRotations[res1][1]: + for atom2 in res2.atoms(): + delta = positions[atom1.index]-positions[atom2.index] + if unit.norm(delta) < 0.5: + close = True + break + if close: + break + if close: + self.residueNeighbors[res1].append(res2) + + # Divide them into clusters that need to be analyzed together. + + clusters = [[res] for res in self.residueNeighbors] + converged = False + while not converged: + converged = True + for i in range(len(clusters)): + if i >= len(clusters): + break + c1 = clusters[i] + for j in range(i+1, len(clusters)): + c2 = clusters[j] + if any(res2 in self.residueNeighbors[res1] for res1, res2 in itertools.product(c1, c2)): + c1 += c2 + del clusters[j] + converged = False + break + + # Create the rotated version of the coordinates. + + rotatedPositions = positions[:] + for residue in residueRotations: + axis, atoms = residueRotations[residue] + center = positions[axis[1].index] + e = center-positions[axis[0].index] + e /= unit.norm(e) + for atom in atoms: + d = positions[atom.index]-center + dot = np.dot(e, d) + rotatedPositions[atom.index] = center - d + 2*dot*e + + # For each variable residue, consider hydrogen bonds to other residues that are + # *not* variable. How does the number change? + + residueRotationHbondChange = defaultdict(int) + for residue in self.residueNeighbors: + for neighbor in self.residueNeighbors[residue]: + if neighbor not in self.residueNeighbors: + before = self.countHbonds(residue, neighbor, positions, positions) + after = self.countHbonds(residue, neighbor, rotatedPositions, positions) + residueRotationHbondChange[residue] += after-before + + # Now consider pairs of residues that are close enough to form hydrogen bonds to + # each other. + + pairRotationHbonds = {} + for residue in self.residueNeighbors: + for neighbor in self.residueNeighbors[residue]: + if neighbor.index > residue.index and neighbor in self.residueNeighbors: + pairRotationHbonds[(residue, neighbor)] = [ + [ + self.countHbonds(residue, neighbor, positions, positions), + self.countHbonds(residue, neighbor, positions, rotatedPositions) + ], + [ + self.countHbonds(residue, neighbor, rotatedPositions, positions), + self.countHbonds(residue, neighbor, rotatedPositions, rotatedPositions) + ] + ] + + # Optimize each cluster. + + newPositions = positions[:] + for cluster in clusters: + bestCombination = None + bestBonds = -1 + + # Loop over all possible combinations of which residues to rotate. + + for combination in itertools.product(*[[0,1]]*len(cluster)): + bonds = 0 + for rotated, residue in zip(combination, cluster): + if rotated == 1: + bonds += residueRotationHbondChange[residue] + for neighbor in self.residueNeighbors[residue]: + if (residue, neighbor) in pairRotationHbonds: + neighborRotated = combination[cluster.index(neighbor)] + bonds += pairRotationHbonds[(residue, neighbor)][rotated][neighborRotated] + if bonds > bestBonds: + bestCombination = combination + bestBonds = bonds + + # Set the positions to the optimal set of rotations. + + for rotated, residue in zip(bestCombination, cluster): + if rotated == 1: + for atom in residueRotations[residue][1]: + newPositions[atom.index] = rotatedPositions[atom.index] + + # Create the final topology and positions. + + fixer.topology, fixer.positions = self.deleteExtraHydrogens(fixer, topology, newPositions) + + def protonateHistidines(self, fixer): + """Add a second hydrogen to neutral histidines so we can identify bonds formed by both of them.""" + positions = fixer.positions.value_in_unit(unit.nanometer) + newTopology = app.Topology() + newTopology.setPeriodicBoxVectors(fixer.topology.getPeriodicBoxVectors()) + newPositions = [] + newAtoms = {} + for chain in fixer.topology.chains(): + newChain = newTopology.addChain(chain.id) + for residue in chain.residues(): + newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode) + for atom in residue.atoms(): + newAtoms[atom] = newTopology.addAtom(atom.name, atom.element, newResidue) + newPositions.append(positions[atom.index]) + if residue.name == 'HIS': + atomsByName = dict((atom.name, atom) for atom in newResidue.atoms()) + posByName = dict((atom.name, newPositions[atom.index]) for atom in newResidue.atoms()) + if all(name in atomsByName for name in ('CG', 'ND1', 'CD2', 'CE1', 'NE2')): + # If this residue has one of the two hydrogens, add the other. + if 'HD1' in atomsByName and 'HE2' not in atomsByName: + newAtom = newTopology.addAtom('HE2', hydrogen, newResidue) + newTopology.addBond(newAtom, atomsByName['NE2']) + axis = posByName['NE2'] - 0.5*(posByName['CD2']-posByName['CE1']) + axis /= unit.norm(axis) + newPositions.append(posByName['NE2'] + axis) + elif 'HD1' not in atomsByName and 'HE2' in atomsByName: + newAtom = newTopology.addAtom('HD1', hydrogen, newResidue) + newTopology.addBond(newAtom, atomsByName['ND1']) + axis = posByName['ND1'] - 0.5*(posByName['CG']-posByName['CE1']) + axis /= unit.norm(axis) + newPositions.append(posByName['ND1'] + axis) + for atom1, atom2 in fixer.topology.bonds(): + newTopology.addBond(newAtoms[atom1], newAtoms[atom2]) + return newTopology, newPositions + + def deleteExtraHydrogens(self, fixer, topology, positions): + """We may have added an extra hydrogen to some HIS residues. Figure out which one to keep + and delete the other one.""" + toDelete = [] + for oldRes, newRes in zip(fixer.topology.residues(), topology.residues()): + if oldRes.name == 'HIS' and len(oldRes) != len(newRes): + # See whether each of the hydrogens forms a hydrogen bond. + + atomsByName = dict((atom.name, atom) for atom in newRes.atoms()) + hd1 = atomsByName['HD1'] + nd1 = atomsByName['ND1'] + he2 = atomsByName['HE2'] + ne2 = atomsByName['NE2'] + hd1Acceptor = None + he2Acceptor = None + for neighbor in self.residueNeighbors[newRes]: + for a in self.residueAcceptors[neighbor]: + if self.isHbond(positions[nd1.index], positions[hd1.index], positions[a.index]): + hd1Acceptor = neighbor + if self.isHbond(positions[ne2.index], positions[he2.index], positions[a.index]): + he2Acceptor = neighbor + if hd1Acceptor is not None and he2Acceptor is None: + # HD1 forms a hydrogen bond. Delete HE2. + toDelete.append(he2) + elif hd1Acceptor is None and he2Acceptor is not None: + # HE2 forms a hydrogen bond. Delete HD1. + toDelete.append(hd1) + elif hd1Acceptor is None and he2Acceptor is None: + # Neither one forms a hydrogen bond. Delete the one that was not present + # in the original Topology. + if any(atom.name == 'HD1' for atom in oldRes.atoms()): + toDelete.append(he2) + else: + toDelete.append(hd1) + else: + # Both form hydrogen bonds. If one is bonded to a rotatable residue, keep that one. + # That residue was optimized based on the assumption it could form a hydrogen bond + # to this one. + if hd1Acceptor.name in ('ASN', 'GLN', 'HIS'): + toDelete.append(he2) + elif he2Acceptor.name in ('ASN', 'GLN', 'HIS'): + toDelete.append(hd1) + else: + # Delete the one that was not present in the original Topology. + if any(atom.name == 'HD1' for atom in oldRes.atoms()): + toDelete.append(he2) + else: + toDelete.append(hd1) + + # Create the new Topology and positions. + + modeller = app.Modeller(topology, positions) + modeller.delete(toDelete) + return modeller.topology, modeller.positions + + def isHbond(self, d: mm.Vec3, h: mm.Vec3, a: mm.Vec3): + """Decide whether three atoms could form a hydrogen bond.""" + if unit.norm(h-a) > 0.25: + return False + deltaDH = h-d + deltaHA = a-h + deltaDH /= unit.norm(deltaDH) + deltaHA /= unit.norm(deltaHA) + return np.arccos(np.dot(deltaDH, deltaHA)) < 60*np.pi/180 + + def countHbonds(self, res1: app.Residue, res2: app.Residue, pos1: List[mm.Vec3], pos2: List[mm.Vec3]): + """Count the number of hydrogen bonds between two residues.""" + count = 0 + for h, d in self.residueDonors[res1]: + for a in self.residueAcceptors[res2]: + if self.isHbond(pos1[d.index], pos1[h.index], pos2[a.index]): + count += 1 + for h, d in self.residueDonors[res2]: + for a in self.residueAcceptors[res1]: + if self.isHbond(pos2[d.index], pos2[h.index], pos1[a.index]): + count += 1 + return count + + +class BoundingSphere(object): + """Computes a bounding sphere for a set of atoms. This is used to accelerate searches for neighboring residues.""" + + def __init__(self, atoms: Iterator[app.Atom], positions: List[mm.Vec3]): + atoms = list(atoms) + minRange = mm.Vec3(*(min((positions[atom.index][i] for atom in atoms)) for i in range(3))) + maxRange = mm.Vec3(*(max((positions[atom.index][i] for atom in atoms)) for i in range(3))) + self.center = 0.5*(minRange+maxRange) + self.radius = max(unit.norm(self.center-positions[atom.index]) for atom in atoms) + + def distance(self, other: 'BoundingSphere'): + return max(0, unit.norm(self.center-other.center) - self.radius - other.radius) diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index de434eb..1d27e28 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -39,6 +39,7 @@ from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader from openmm.app.element import hydrogen, oxygen from openmm.app.forcefield import NonbondedGenerator +from .hbonds import HydrogenBondOptimizer # Support Cythonized functions in OpenMM 7.3 # and also implementations in older versions. @@ -1340,6 +1341,18 @@ def _describeVariant(self, residue, definitions): variant.append((h, parent)) return variant + def optimizeHydrogenBonds(self): + """Optimize the hydrogen bond network. + + This should be called after hydrogens have been added. It considers two types of changes: + + 1. 180 degree rotations of the terminal groups in HIS, ASN, and GLN sidechains. + 2. For neutral HIS residues, exchanging the hydrogen position between ND1 and NE2. + + It considers all possible combinations of them to find the one producing the largest number of hydrogen bonds. + """ + HydrogenBondOptimizer(self) + def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'): """Add a solvent box surrounding the structure.