From dc375e2b85c8d9557dd66e71b5ccdd69392ff154 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Wed, 30 Nov 2022 09:17:55 +0100 Subject: [PATCH] sdf for non-cannonical residues --- moleculekit/tools/preparation.py | 55 +++++++++++++++++--------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/moleculekit/tools/preparation.py b/moleculekit/tools/preparation.py index 032f4357..d7189406 100644 --- a/moleculekit/tools/preparation.py +++ b/moleculekit/tools/preparation.py @@ -93,7 +93,7 @@ def _generate_nonstandard_residues_ff( _molkit_ff=True, outdir=None, ignore_ns_errors=False, - residue_smiles=None, + residue_sdf=None, ): import tempfile from moleculekit.tools.preparation_customres import _get_custom_ff @@ -104,6 +104,7 @@ def _generate_nonstandard_residues_ff( _mol_to_xml_def, _prepare_for_parameterize, ) + from moleculekit.tools.graphalignment import makeMolGraph, compareGraphs uqprot_resn = np.unique(mol.get("resname", sel="protein")) not_in_ff = [] @@ -127,29 +128,31 @@ def _generate_nonstandard_residues_ff( for res in not_in_ff: try: logger.info(f"Attempting to template non-canonical residue {res}...") - # This removes the non-canonical hydrogens from the original mol object - mol.remove((mol.resname == res) & (mol.element == "H"), _logger=False) + molc = mol.copy() + molc.filter(f"resname {res}", _logger=False) - # Hacky way of getting the first molecule, if there are copies - molresn = molc.resname == res - firstname = molc.name[molresn][0] - lastname = molc.name[molresn][-1] - start = np.where(molresn & (molc.name == firstname))[0][0] - end = np.where(molresn & (molc.name == lastname))[0][0] - # Remove all other stuff - molc.filter(f"index {start} to {end}", _logger=False) - - if len(np.unique(molc.name)) != molc.numAtoms: - raise RuntimeError( - f"Residue {res} contains duplicate atom names. Please rename the atoms to have unique names." - ) - - smiles = None - if residue_smiles is not None and res in residue_smiles: - smiles = residue_smiles[res] - - tmol = _template_residue_from_smiles(molc, res, smiles=smiles) + tmol = Molecule(residue_sdf[res]) + + fields = ("element",) + g1 = makeMolGraph(tmol, "all", fields) + g2 = makeMolGraph(molc, "all", fields) + _, _, matching = compareGraphs( + g1, g2, fields=fields, tolerance=0.5, returnmatching=True + ) + for pp in matching: # Rename atoms in reference molecule + tmol.name[pp[0]] = molc.name[pp[1]] + + # Rename non-matched hydrogens to X_H to rename later + matched = [pp[0] for pp in matching] + for i in np.where(tmol.element == "H")[0]: + if i in matched: + continue + tmol.name[i] = "X_H" + + #templating method + #tmol = _template_residue_from_smiles(molc, res, smiles=smiles) + cres = _process_custom_residue(tmol, res) _mol_to_xml_def(cres, os.path.join(tmpdir, f"{res}.xml")) @@ -531,7 +534,7 @@ def systemPrepare( _logger_level="ERROR", _molkit_ff=True, outdir=None, - residue_smiles=None, + residue_sdf=None, ): """Prepare molecular systems through protonation and h-bond optimization. @@ -629,7 +632,9 @@ def systemPrepare( If True it will ignore errors on non-canonical residues leaving them unprotonated. outdir : str A path where to save custom residue cif files used for building - + residue_sdf : dict or None + A dictionary mapping each non-cannonical residue name to an SDF with defined bond orders and hydrogens. + {'NAG':'/path/to/sdf.sdf', 'SUG':'/path/to/sdf2.sdf'} Returns ------- mol_out : moleculekit.molecule.Molecule @@ -713,7 +718,7 @@ def systemPrepare( _molkit_ff, outdir, ignore_ns_errors=ignore_ns_errors, - residue_smiles=residue_smiles, + residue_sdf=residue_sdf, ) nonpept = None