From 0c216218f5e7788d7dda60eab7d6e236e903c701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Sat, 4 Nov 2023 19:46:30 +0800 Subject: [PATCH 1/9] add molstm for momu kmolformer --- scripts/multimodal/moledit/edit.sh | 23 +++++++++++++++++++++++ scripts/multimodal/moledit/train.sh | 24 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 scripts/multimodal/moledit/edit.sh create mode 100755 scripts/multimodal/moledit/train.sh diff --git a/scripts/multimodal/moledit/edit.sh b/scripts/multimodal/moledit/edit.sh new file mode 100644 index 0000000..3eb3285 --- /dev/null +++ b/scripts/multimodal/moledit/edit.sh @@ -0,0 +1,23 @@ +#!/bin/bash molkformer--Graph momu--Graph molstm--SMILES/Graph ID---./models/MoleculeSTM/downstream_molecule_edit_utils.py +MODE="test" +MODEL="molstm" +DEVICE=$1 +EPOCHS=100 +MOL_TYPE="Graph" +ID=101 + +python open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py \ +--device ${DEVICE} \ +--config_path ./configs/moledit/${MODEL}-MegaMolBART.json \ +--MegaMolBART_generation_model_dir ./ckpts/mol_edit_ckpts/pretrained_MegaMolBART/checkpoints \ +--input_SMILES_file ./datasets/mol_edit/Editing_data/single_multi_property_SMILES.txt \ +--language_edit_model_dir_new ./ckpts/finetune_ckpts/moledit/${MODEL} \ +--language_edit_model_dir ./ckpts/mol_edit_ckpts/demo_checkpoints_${MOL_TYPE} \ +--vocab_path ./open_biomed/models/MoleculeSTM/bart_vocab.txt \ +--output_model_dir ./open_biomed/tasks/mol_edit \ +--text_mode ./ckpts/mol_edit_ckpts/pretrained_SciBERT \ +--epochs ${EPOCHS} \ +--input_description_id ${ID} \ +--MoleculeSTM_molecule_type ${MOL_TYPE} \ +--MoleculeSTM_model_dir ./ckpts/mol_edit_ckpts/demo_checkpoints_${MOL_TYPE} \ +--MASTER_PORT '6000' \ No newline at end of file diff --git a/scripts/multimodal/moledit/train.sh b/scripts/multimodal/moledit/train.sh new file mode 100755 index 0000000..39a9082 --- /dev/null +++ b/scripts/multimodal/moledit/train.sh @@ -0,0 +1,24 @@ +#!/bin/bash molkformer--Graph momu--Graph molstm--SMILES/Graph +MODE="train" +MODEL="molkformer" +DEVICE=$1 +EPOCHS=100 +MOL_TYPE="Graph" + +mkdir ./ckpts/finetune_ckpts/moledit/${MODEL} + +python open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py \ +--device ${DEVICE} \ +--MoleculeSTM_molecule_type ${MOL_TYPE} \ +--config_path ./configs/moledit/${MODEL}-MegaMolBART.json \ +--dataset ZINC250K \ +--dataset_path ./datasets/mol_edit/ZINC250K_data \ +--output_path ./ckpts/finetune_ckpts/moledit/${MODEL}/ \ +--mode ${MODE} \ +--epochs ${EPOCHS} \ +--num_workers 8 \ +--batch_size 256 \ +--vocab_path ./open_biomed/models/MoleculeSTM/bart_vocab.txt \ +--MoleculeSTM_model_dir ./ckpts/mol_edit_ckpts/demo_checkpoints_${MOL_TYPE} \ +--MegaMolBART_generation_model_dir ./ckpts/mol_edit_ckpts/pretrained_MegaMolBART/checkpoints \ +--MASTER_PORT '6000' \ No newline at end of file From e9b7f2a864922400f70a5bbd8902bc687471bb51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Sat, 4 Nov 2023 19:58:45 +0800 Subject: [PATCH 2/9] add molstm for momu kmolformer --- configs/moledit/molkformer-MegaMolBART.json | 51 + configs/moledit/molstm-MegaMolBART.json | 24 + configs/moledit/momu-MegaMolBART.json | 43 + open_biomed/datasets/moledit_dataset.py | 84 ++ open_biomed/models/MoleculeSTM/__init__.py | 0 ...guage_edit_step_00_check_reconstruction.py | 106 ++ ...t_step_01_molecule_representation_align.py | 229 +++ ...nguage_edit_step_02_latent_optimization.py | 161 +++ .../backup/downstream_language_edit_utils.py | 133 ++ .../MoleculeSTM/cuchemcommon/__init__.py | 0 .../MoleculeSTM/cuchemcommon/context.py | 53 + .../MoleculeSTM/cuchemcommon/data/__init__.py | 45 + .../cuchemcommon/data/cluster_wf.py | 61 + .../cuchemcommon/data/generative_wf.py | 19 + .../cuchemcommon/data/helper/__init__.py | 0 .../cuchemcommon/data/helper/chembldata.py | 320 +++++ .../MoleculeSTM/cuchemcommon/fingerprint.py | 95 ++ .../models/MoleculeSTM/cuchemcommon/smiles.py | 38 + .../cuchemcommon/utils/__init__.py | 1 + .../MoleculeSTM/cuchemcommon/utils/logger.py | 106 ++ .../cuchemcommon/utils/singleton.py | 26 + .../MoleculeSTM/cuchemcommon/utils/sysinfo.py | 68 + .../MoleculeSTM/cuchemcommon/workflow.py | 201 +++ .../MoleculeSTM/datasets/DrugBankGraph.py | 235 ++++ .../MoleculeSTM/datasets/DrugBankSMILES.py | 94 ++ .../MoleculeSTM/datasets/MoleculeNetGraph.py | 584 ++++++++ .../MoleculeSTM/datasets/MoleculeNetSMILES.py | 36 + .../models/MoleculeSTM/datasets/PubChemSTM.py | 275 ++++ .../MoleculeSTM/datasets/PubChemSTM_raw.py | 172 +++ .../MoleculeSTM/datasets/ZINC250K_Graph.py | 67 + .../MoleculeSTM/datasets/ZINC250K_SMILES.py | 31 + .../models/MoleculeSTM/datasets/__init__.py | 8 + .../models/MoleculeSTM/datasets/utils.py | 182 +++ .../downstream_molecule_edit_utils.py | 503 +++++++ .../MoleculeSTM/models/GA/ZINC_first_1000.smi | 1000 +++++++++++++ .../models/MoleculeSTM/models/GA/__init__.py | 0 .../models/MoleculeSTM/models/GA/crossover.py | 194 +++ .../models/MoleculeSTM/models/GA/mutate.py | 132 ++ open_biomed/models/MoleculeSTM/models/MLP.py | 49 + .../models/MoleculeSTM/models/__init__.py | 2 + .../models/mega_molbart/__init__.py | 1 + .../models/mega_molbart/decoder.py | 426 ++++++ .../models/mega_molbart/mega_mol_bart.py | 471 +++++++ .../models/mega_molbart/megatron_bart.py | 800 +++++++++++ .../models/mega_molbart/tokenizer.py | 483 +++++++ .../MoleculeSTM/models/mega_molbart/util.py | 21 + .../MoleculeSTM/models/molecule_gnn_model.py | 197 +++ open_biomed/models/MoleculeSTM/splitters.py | 93 ++ open_biomed/models/MoleculeSTM/utils.py | 71 + open_biomed/models/__init__.py | 4 +- .../models/multimodal/molkformer/kformer.py | 1244 +++++++++++++++++ .../multimodal/molkformer/mol_kformer.py | 277 ++++ .../models/task_model/moledit_model.py | 52 + .../moledit_step_01_Space_Alignment.py | 311 +++++ .../moledit_step_02_Latent_Optimization.py | 247 ++++ 55 files changed, 10125 insertions(+), 1 deletion(-) create mode 100644 configs/moledit/molkformer-MegaMolBART.json create mode 100644 configs/moledit/molstm-MegaMolBART.json create mode 100644 configs/moledit/momu-MegaMolBART.json create mode 100644 open_biomed/datasets/moledit_dataset.py create mode 100644 open_biomed/models/MoleculeSTM/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py create mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py create mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py create mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/context.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py create mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/datasets/utils.py create mode 100644 open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py create mode 100644 open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi create mode 100644 open_biomed/models/MoleculeSTM/models/GA/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/models/GA/crossover.py create mode 100644 open_biomed/models/MoleculeSTM/models/GA/mutate.py create mode 100644 open_biomed/models/MoleculeSTM/models/MLP.py create mode 100644 open_biomed/models/MoleculeSTM/models/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py create mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/decoder.py create mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py create mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/megatron_bart.py create mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/tokenizer.py create mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/util.py create mode 100644 open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py create mode 100644 open_biomed/models/MoleculeSTM/splitters.py create mode 100644 open_biomed/models/MoleculeSTM/utils.py create mode 100644 open_biomed/models/multimodal/molkformer/kformer.py create mode 100644 open_biomed/models/multimodal/molkformer/mol_kformer.py create mode 100644 open_biomed/models/task_model/moledit_model.py create mode 100644 open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py create mode 100644 open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py diff --git a/configs/moledit/molkformer-MegaMolBART.json b/configs/moledit/molkformer-MegaMolBART.json new file mode 100644 index 0000000..9bf615a --- /dev/null +++ b/configs/moledit/molkformer-MegaMolBART.json @@ -0,0 +1,51 @@ +{ + "model": "molkformer-MegaMolBART", + "data": { + "mol": { + "modality": ["structure"], + "featurizer": { + "structure": { + "name": "MultiScale", + "scales": ["SMILES", "graph"], + "SMILES": { + "name": "moleculeSTM", + "transformer_type": "molbart", + "model_name_or_path": "./open_biomed/models/MoleculeSTM/bart_vocab.txt", + "max_length": 512 + }, + "graph": { + "name": "BaseGNN" + } + } + + } + }, + "text": { + "name": "TransformerTokenizer", + "transformer_type": "biot5", + "max_length": 512, + "model_name_or_path": "./ckpts/text_ckpts/t5-v1.1-base", + "path_selfies": "./assets/tokenizers/biot5/selfies_dict.txt" + } + }, + "network": { + "graph": { + "name": "molkformer", + "structure": { + "gin_hidden_dim": 300, + "gin_num_layers": 5, + "drop_ratio": 0.0 + }, + "decoder": { + "config_file": "./ckpts/text_ckpts/t5-v1.1-base/config.json" + }, + "kformer_config_file": "./configs/encoders/multimodal/kformer_config.json", + "encoder_tokenizer": "./ckpts/text_ckpts/scibert_scivocab_uncased", + "decoder_tokenizer": "./ckpts/text_ckpts/t5-v1.1-base", + "path_selfies": "./assets/tokenizers/biot5/selfies_dict.txt", + "max_n_atoms": 256, + "projection_dim": 256, + "init_checkpoint": "./ckpts/fusion_ckpts/mol_kformer_biot5.pth" + } + } +} \ No newline at end of file diff --git a/configs/moledit/molstm-MegaMolBART.json b/configs/moledit/molstm-MegaMolBART.json new file mode 100644 index 0000000..7532052 --- /dev/null +++ b/configs/moledit/molstm-MegaMolBART.json @@ -0,0 +1,24 @@ +{ + "model": "molstm-MegaMolBART", + "data": { + "mol": { + "modality": ["structure"], + "featurizer": { + "structure": { + "name": "MultiScale", + "scales": ["SMILES", "graph"], + "SMILES": { + "name": "moleculeSTM", + "transformer_type": "molbart", + "model_name_or_path": "./open_biomed/models/MoleculeSTM/bart_vocab.txt", + "max_length": 512 + }, + "graph": { + "name": "BaseGNN" + } + } + } + } + } + +} \ No newline at end of file diff --git a/configs/moledit/momu-MegaMolBART.json b/configs/moledit/momu-MegaMolBART.json new file mode 100644 index 0000000..f2dd4fa --- /dev/null +++ b/configs/moledit/momu-MegaMolBART.json @@ -0,0 +1,43 @@ +{ + "model": "momu-MegaMolBART", + "data": { + "mol": { + "modality": ["structure"], + "featurizer": { + "structure": { + "name": "MultiScale", + "scales": ["SMILES", "graph"], + "SMILES": { + "name": "moleculeSTM", + "transformer_type": "molbart", + "model_name_or_path": "./open_biomed/models/MoleculeSTM/bart_vocab.txt", + "max_length": 512 + }, + "graph": { + "name": "ogb" + } + } + + } + } + }, + "network": { + "graph": { + "name": "momu", + "gin_hidden_dim": 300, + "gin_num_layers": 5, + "drop_ratio": 0.0, + "graph_pooling": "sum", + "graph_self": false, + "max_n_nodes": -1, + "bert_dropout": 0.0, + "bert_hidden_dim": 768, + "output_dim": 300, + "projection_dim": 256, + "init_checkpoint": "./ckpts/fusion_ckpts/momu/MoMu-S.ckpt", + "param_key": "state_dict", + "stop_grad": false + } + + } +} \ No newline at end of file diff --git a/open_biomed/datasets/moledit_dataset.py b/open_biomed/datasets/moledit_dataset.py new file mode 100644 index 0000000..aafe085 --- /dev/null +++ b/open_biomed/datasets/moledit_dataset.py @@ -0,0 +1,84 @@ +import logging +logger = logging.getLogger(__name__) + +from abc import ABC, abstractmethod + +import os +import csv + +import torch +from torch.utils.data import Dataset +import pandas as pd + +from feature.mol_featurizer import MolMultiModalFeaturizer +from feature.text_featurizer import TextTransformerTokFeaturizer +from utils.mol_utils import valid_smiles +from models.MoleculeSTM.models.mega_molbart.tokenizer import MolEncTokenizer + +class MoleditDataset(Dataset, ABC): + def __init__(self, path, config): + super(MoleditDataset, self).__init__() + self.path = path + self.config = config + self._load_data() + self._featurize() + + @abstractmethod + def _load_data(self): + raise NotImplementedError + + def _featurize(self): + featurizer = MolMultiModalFeaturizer(self.config) + self.mols = [featurizer(smi) for smi in self.smiles] + + smiles_emb = [d['structure'] for d in self.mols] + smiles_emb = [d['SMILES'] for d in smiles_emb] + smiles_emb = [item for sublist in smiles_emb for item in sublist] + tokens, orig_pad_masks = self._pad_seqs(smiles_emb) + smiles = [{'input_ids': tokens, 'pad_masks': pad_masks} for tokens, pad_masks in zip(tokens, orig_pad_masks)] + for i, dictionary in enumerate(smiles): + self.mols[i]["structure"]["SMILES"] = dictionary + + + @staticmethod + def _pad_seqs(seqs, pad_token = 0): + pad_length = max([len(seq) for seq in seqs]) + padded = [seq + ([pad_token] * (pad_length - len(seq))) for seq in seqs] + masks = [([0] * len(seq)) + ([1] * (pad_length - len(seq))) for seq in seqs] + return padded, masks + + def __getitem__(self, index): + return self.mols[index] + + def __len__(self): + return len(self.mols) + + +class ZINC250K(MoleditDataset): + def __init__(self, path, config, split): + self.split = split + super(ZINC250K, self).__init__(path, config) + + def _load_data(self, subset_size=1000): + + SMILES_file = os.path.join(self.path, "raw/250k_rndm_zinc_drugs_clean_3.csv") + df = pd.read_csv(SMILES_file) + smiles = df['smiles'].tolist() # Already canonical SMILES + self.smiles = [x.strip() for x in smiles] + + new_SMILES_file = os.path.join(self.path, "raw/smiles.csv") + if not os.path.exists(new_SMILES_file): + data_smiles_series = pd.Series(self.smiles) + print("saving to {}".format(new_SMILES_file)) + data_smiles_series.to_csv(new_SMILES_file, index=False, header=False) + + if subset_size is not None: + self.smiles = self.smiles[:subset_size] + + + + +SUPPORTED_MOLEDIT_DATASET = { + "ZINC250K": ZINC250K +} + diff --git a/open_biomed/models/MoleculeSTM/__init__.py b/open_biomed/models/MoleculeSTM/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py new file mode 100644 index 0000000..3f10668 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py @@ -0,0 +1,106 @@ +import argparse +import os +import numpy as np +from rdkit import Chem +from rdkit.Chem import Descriptors + +import torch +from torch.utils.data import DataLoader as torch_DataLoader + +from MoleculeSTM.utils import freeze_network +from MoleculeSTM.datasets import ZINC15_Datasets_Only_SMILES, PubChem_Datasets_Only_SMILES +from MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART + +props = [ + "qed", "MolWt", "MolLogP", "TPSA", + "HeavyAtomCount", "NumAromaticRings", "NumHAcceptors", "NumHDonors", "NumRotatableBonds" +] +props = [ + "MolWt", "MolLogP" +] +prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", type=int, default=0) + parser.add_argument("--verbose", type=int, default=1) + parser.add_argument("--dataspace_path", type=str, default="../../Datasets") + parser.add_argument("--dataset", type=str, default="ZINC15") + parser.add_argument("--molecule_type", type=str, default="MegaMolBART", choices=["MegaMolBART", "Graph"]) + + ########## for MoleculeSTM ########## + parser.add_argument("--CLIP_input_model_dir", type=str, default="../../pretrained_model") + parser.add_argument("--SSL_emb_dim", type=int, default=256) + + ########## for generation ########## + parser.add_argument("--generation_model_dir", type=str, default="../../Datasets/pretrained_MegaMolBART/checkpoints") + + ########## for optimization ########## + parser.add_argument("--batch_size", type=int, default=64) + parser.add_argument("--num_workers", type=int, default=8) + + args = parser.parse_args() + print(args) + + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(input_dir=args.generation_model_dir, output_dir=None) + molecule_model_generation = MegaMolBART_wrapper.model + print("Loading from pretrained MegaMolBART ({}).".format(args.generation_model_dir)) + molecule_dim_generation = 256 + + device = torch.device("cuda:" + str(args.device)) \ + if torch.cuda.is_available() else torch.device("cpu") + molecule_model_generation = molecule_model_generation.to(device) + + np.random.seed(args.seed) + torch.random.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + device = torch.device("cuda:" + str(args.device)) \ + if torch.cuda.is_available() else torch.device("cpu") + + freeze_network(molecule_model_generation) + molecule_model_generation.eval() + + if args.molecule_type == "MegaMolBART": + if args.dataset == "ZINC15": + dataset_root = os.path.join(args.dataspace_path, "ZINC15_data") + dataset = ZINC15_Datasets_Only_SMILES(dataset_root) + elif "PubChem" in args.dataset: + dataset_root = os.path.join(args.dataspace_path, "PubChem_data") + dataset = PubChem_Datasets_Only_SMILES(dataset_root) + else: + raise Exception + dataloader_class = torch_DataLoader + else: + raise Exception + + dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) + + for batch_idx, batch in enumerate(dataloader): + SMILES_list = batch + print("SMILES_list", SMILES_list) + + for original_SMILES in SMILES_list: + mol = Chem.MolFromSmiles(original_SMILES) + for name, func in prop_pred: + value = func(mol) + print("{}: {}".format(name, value)) + canon_original_SMILES = Chem.MolToSmiles(mol) + + latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model_generation, [original_SMILES]) # [pad, B, d], [pad, B] + print("latent_code:\t", latent_code_init[0, :, :5]) + + latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model_generation, [canon_original_SMILES]) # [pad, B, d], [pad, B] + print("latent_code:\t", latent_code_init[0, :, :5]) + + generated_SMILES = MegaMolBART_wrapper.inverse_transform([latent_code_init], pad_mask_init.bool().cuda(), k=1, sanitize=True) + print("original SMILES: \t", original_SMILES) + print("original SMILES (canon): \t", canon_original_SMILES) + print("reconstructured SMILES: \t", generated_SMILES[0]) + print() + + if batch_idx >= 9: + break diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py new file mode 100644 index 0000000..bea3fc9 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py @@ -0,0 +1,229 @@ +import argparse +import os +import numpy as np +from tqdm import tqdm +import time + +import torch +import torch.nn as nn +from torch import optim +import torch.nn.functional as F +from torch.utils.data import DataLoader as torch_DataLoader + +from MoleculeSTM.utils import get_molecule_repr_MoleculeSTM +from MoleculeSTM.downstream_language_edit_utils import load_molecule_models +from MoleculeSTM.utils import freeze_network +from MoleculeSTM.datasets import PubChem_Datasets_Only_SMILES + + +def cycle_index(num, shift): + arr = torch.arange(num) + shift + arr[-shift:] = torch.arange(shift) + return arr + + +def do_CL(X, Y, args): + if args.normalize: + X = F.normalize(X, dim=-1) + Y = F.normalize(Y, dim=-1) + + if args.SSL_loss == 'EBM_NCE': + criterion = nn.BCEWithLogitsLoss() + neg_Y = torch.cat([Y[cycle_index(len(Y), i + 1)] for i in range(args.CL_neg_samples)], dim=0) + neg_X = X.repeat((args.CL_neg_samples, 1)) + + pred_pos = torch.sum(X * Y, dim=1) / args.T + pred_neg = torch.sum(neg_X * neg_Y, dim=1) / args.T + + loss_pos = criterion(pred_pos, torch.ones(len(pred_pos)).to(pred_pos.device)) + loss_neg = criterion(pred_neg, torch.zeros(len(pred_neg)).to(pred_neg.device)) + CL_loss = (loss_pos + args.CL_neg_samples * loss_neg) / (1 + args.CL_neg_samples) + + CL_acc = (torch.sum(pred_pos > 0).float() + torch.sum(pred_neg < 0).float()) / \ + (len(pred_pos) + len(pred_neg)) + CL_acc = CL_acc.detach().cpu().item() + + elif args.SSL_loss == 'InfoNCE': + criterion = nn.CrossEntropyLoss() + B = X.size()[0] + logits = torch.mm(X, Y.transpose(1, 0)) # B*B + logits = torch.div(logits, args.T) + labels = torch.arange(B).long().to(logits.device) # B*1 + + CL_loss = criterion(logits, labels) + pred = logits.argmax(dim=1, keepdim=False) + CL_acc = pred.eq(labels).sum().detach().cpu().item() * 1. / B + + else: + raise Exception + + return CL_loss, CL_acc + + +def mean_pooling(token_embeddings, attention_mask): + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() # [pad, B, d] + sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0) # [B, d] + sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9) # [B, d] + return sum_embeddings / sum_mask + + +def get_molecule_repr_generation(molecule_data, molecule_model, molecule_type="MegaMolBART", MegaMolBART_wrapper=None): + if molecule_type == "MegaMolBART": + embedding, pad_mask = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model, molecule_data) # [pad, B, d], [pad, B] + # molecule_repr = embedding[0, :, :] # [B, d] + # next we will take the mean pooling instead of the CLS token. + molecule_repr = mean_pooling(embedding, pad_mask) + else: + molecule_repr, _ = molecule_model(molecule_data) + return molecule_repr + + +def save_model(save_best, epoch=None): + if args.output_model_dir is not None: + if save_best: + global optimal_loss + print("save model with loss: {:.5f}".format(optimal_loss)) + model_file = "model.pth" + + elif epoch is None: + model_file = "model_final.pth" + + else: + model_file = "model_{}.pth".format(epoch) + + saved_file_path = os.path.join(args.output_model_dir, "generation2MoleculeSTM_{}".format(model_file)) + torch.save(generation2MoleculeSTM.state_dict(), saved_file_path) + + saved_file_path = os.path.join(args.output_model_dir, "MoleculeSTM2generation_{}".format(model_file)) + torch.save(MoleculeSTM2generation.state_dict(), saved_file_path) + return + + +def train(epoch): + if args.verbose: + L = tqdm(dataloader) + else: + L = dataloader + + start_time = time.time() + accum_loss, accum_acc = 0, 0 + for batch in L: + SMILES_list = batch + + molecule_repr_generation = get_molecule_repr_generation( + SMILES_list, molecule_model=molecule_model_generation, + molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_generation2MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) + + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) + + loss_01, acc_01 = do_CL(molecule_repr_generation, molecule_repr_MoleculeSTM2generation, args) + loss_02, acc_02 = do_CL(molecule_repr_MoleculeSTM, molecule_repr_generation2MoleculeSTM, args) + loss = (loss_01 + loss_02) / 2 + acc = (acc_01 + acc_02) / 2 + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + accum_loss += loss.item() + accum_acc += acc + + accum_loss /= len(L) + accum_acc /= len(L) + + global optimal_loss + temp_loss = accum_loss + if temp_loss < optimal_loss: + optimal_loss = temp_loss + save_model(save_best=True, epoch=epoch) + print("CL Loss: {:.5f}\tCL Acc: {:.5f}Time: {:.5f}".format(accum_loss, accum_acc, time.time() - start_time)) + return + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", type=int, default=0) + parser.add_argument("--verbose", type=int, default=1) + parser.add_argument("--dataspace_path", type=str, default="../../Datasets") + parser.add_argument("--dataset", type=str, default="PubChem") + parser.add_argument("--molecule_type", type=str, default="MegaMolBART", choices=["MegaMolBART", "Graph"]) + parser.add_argument("--output_model_dir", type=str, default=None) + + ########## for MoleculeSTM ########## + parser.add_argument("--MoleculeSTM_model_dir", type=str, default="../../pretrained_model_Raw") + parser.add_argument("--SSL_emb_dim", type=int, default=256) + + ########## for generation ########## + parser.add_argument("--generation_model_dir", type=str, default="../../Datasets/pretrained_MegaMolBART/checkpoints") + + ########## for optimization ########## + parser.add_argument("--batch_size", type=int, default=256) + parser.add_argument("--num_workers", type=int, default=8) + parser.add_argument("--epochs", type=int, default=100) + parser.add_argument("--decay", type=float, default=0) + parser.add_argument("--generation_lr", type=float, default=1e-4) + parser.add_argument("--MoleculeSTM_lr", type=float, default=1e-4) + parser.add_argument("--T", type=float, default=0.1) + parser.add_argument("--SSL_loss", type=str, default="EBM_NCE", choices=["EBM_NCE", "InfoNCE"]) + parser.add_argument("--CL_neg_samples", type=int, default=1) + parser.add_argument('--normalize', dest='normalize', action='store_true') + parser.add_argument('--no_normalize', dest='normalize', action='store_false') + parser.set_defaults(normalize=True) + + args = parser.parse_args() + print(args) + + MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ + molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM = load_molecule_models(args) + device = torch.device("cuda:" + str(args.device)) \ + if torch.cuda.is_available() else torch.device("cpu") + molecule_model_generation = molecule_model_generation.to(device) + molecule_model_MoleculeSTM = molecule_model_MoleculeSTM.to(device) + mol2latent_MoleculeSTM = mol2latent_MoleculeSTM.to(device) + + np.random.seed(args.seed) + torch.random.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + device = torch.device("cuda:" + str(args.device)) \ + if torch.cuda.is_available() else torch.device("cpu") + + freeze_network(molecule_model_generation) + freeze_network(mol2latent_MoleculeSTM) + freeze_network(molecule_model_MoleculeSTM) + molecule_model_generation.eval() + mol2latent_MoleculeSTM.eval() + molecule_model_MoleculeSTM.eval() + + if args.molecule_type == "MegaMolBART": + if "PubChem" in args.dataset: + dataset_root = os.path.join(args.dataspace_path, "PubChem_data") + else: + raise Exception + dataset = PubChem_Datasets_Only_SMILES(dataset_root) + dataloader_class = torch_DataLoader + else: + raise Exception + + dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) + + generation2MoleculeSTM = nn.Linear(molecule_dim_generation, molecule_dim_MoleculeSTM).to(device) + MoleculeSTM2generation = nn.Linear(molecule_dim_MoleculeSTM, molecule_dim_generation).to(device) + + model_param_group = [ + {"params": generation2MoleculeSTM.parameters(), "lr": args.generation_lr}, + {"params": MoleculeSTM2generation.parameters(), "lr": args.MoleculeSTM_lr}, + ] + optimizer = optim.Adam(model_param_group, weight_decay=args.decay) + optimal_loss = 1e10 + + for e in range(1, args.epochs+1): + print("Epoch {}".format(e)) + train(e) diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py new file mode 100644 index 0000000..5bf5129 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py @@ -0,0 +1,161 @@ +import argparse +import math +import numpy as np +from rdkit import Chem, RDLogger + +import torch +from torch import optim +import torch.nn.functional as F +from tqdm import tqdm +from downstream_language_edit_utils import load_language_molecule_and_edit_models, clip_loss_for_edit, evaluate_SMILES_list +from MoleculeSTM.utils import prepare_text_tokens + + +def get_lr(t, initial_lr, rampdown=0.25, rampup=0.05): + lr_ramp = min(1, (1 - t) / rampdown) + lr_ramp = 0.5 - 0.5 * math.cos(lr_ramp * math.pi) + lr_ramp = lr_ramp * min(1, t / rampup) + return initial_lr * lr_ramp + + +def mean_pooling(token_embeddings, attention_mask): + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() # [pad, B, d] + sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0) # [B, d] + sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9) # [B, d] + return sum_embeddings / sum_mask + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", type=int, default=0) + parser.add_argument("--verbose", type=int, default=1) + + ########## for editing ########## + parser.add_argument("--description", type=str) + parser.add_argument("--input_model_dir", type=str) + parser.add_argument("--mode", type=str, default="edit", choices=["edit", "free_generation"]) + parser.add_argument("--input_SMILES", type=str, default=None) + parser.add_argument("--l2_lambda", type=float, default=0.008) + + ########## for ? ########## + parser.add_argument("--dataspace_path", type=str, default="../../Datasets") + parser.add_argument("--SSL_emb_dim", type=int, default=256) + parser.add_argument("--max_seq_len", type=int, default=512) + + ########## for MoleculeSTM ########## + parser.add_argument("--MoleculeSTM_model_dir", type=str, default="../../pretrained_model_Raw") + + ########## for generation ########## + parser.add_argument("--generation_model_dir", type=str, default="../../Datasets/pretrained_MegaMolBART/checkpoints") + + ########## for MoleculeSTM and generation projection ########## + parser.add_argument("--language_edit_model_dir", type=str, default="edit_temp/EBM_NCE") + + ########## for editing ########## + parser.add_argument("--lr_rampup", type=float, default=0.05) + parser.add_argument("--lr", type=float, default=0.1) + parser.add_argument("--epochs", type=int, default=100) + args = parser.parse_args() + + print(args) + + text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim, \ + text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation = load_language_molecule_and_edit_models(args) + device = torch.device("cuda:" + str(args.device)) \ + if torch.cuda.is_available() else torch.device("cpu") + text_model = text_model.to(device) + molecule_model = molecule_model.to(device) + text2latent = text2latent.to(device) + mol2latent = mol2latent.to(device) + generation2MoleculeSTM.to(device) + MoleculeSTM2generation.to(device) + text_model.eval() + molecule_model.eval() + text2latent.eval() + mol2latent.eval() + generation2MoleculeSTM.eval() + MoleculeSTM2generation.eval() + + np.random.seed(args.seed) + torch.random.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + device = torch.device("cuda:" + str(args.device)) \ + if torch.cuda.is_available() else torch.device("cpu") + + description_list = [args.description] + text_tokens_ids, text_masks = prepare_text_tokens( + device=device, description=description_list, tokenizer=text_tokenizer, max_seq_len=args.max_seq_len) + text_output = text_model(input_ids=text_tokens_ids, attention_mask=text_masks) + text_repr = text_output["pooler_output"] + text_repr = text2latent(text_repr) + + record_SMILES_list = [] + + if args.mode == "edit": + SMILES_list = [args.input_SMILES] + latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding(SMILES_list) # [pad, B, d], [pad, B] + molecule_repr_generation_init = mean_pooling(latent_code_init, pad_mask_init) # [B, d] + # record_SMILES_list.append(args.input_SMILES) + else: + padding_dim = 10 + latent_code_init = torch.randn(padding_dim, 1, molecule_dim).to(device) + pad_mask_init = torch.zeros(padding_dim, 1).bool().to(device) + print("latent_code_init", latent_code_init.size()) + print("pad_mask_init", pad_mask_init.size()) + + generated_mols = MegaMolBART_wrapper.inverse_transform( + [latent_code_init], pad_mask_init.bool().cuda(), k=1, sanitize=True) + print("initial SMILES", generated_mols[0]) + record_SMILES_list.append(generated_mols[0]) + + l2_lambda_list = [ + 1, 0.1, 0.01, 0.001, 0.0001, + 3, 0.3, 0.03, 0.003, 0.0003, + 5, 0.5, 0.05, 0.005, 0.0005, + 8, 0.8, 0.08, 0.008, 0.0008, + ] + l2_lambda_list = [ + 0.1, + ] + + for l2_lambda in l2_lambda_list: + result_SMILES_list = [record_SMILES_list[0]] + print("with lambda {} ......".format(l2_lambda)) + latent = latent_code_init.detach().clone() + latent.requires_grad = True + optimizer = optim.Adam([latent], lr=args.lr) + + if args.verbose: + L = tqdm(range(args.epochs)) + else: + L = range(args.epochs) + for i in L: + t = i / args.epochs + lr = get_lr(t, args.lr) + optimizer.param_groups[0]["lr"] = lr + + molecule_repr_generation = mean_pooling(latent, pad_mask_init) # [B, d] + # molecule_repr_MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) + + clip_loss_ = clip_loss_for_edit(molecule_repr_generation, mol2latent, text_repr) + l2_loss_ = args.l2_lambda * ((latent_code_init - latent) ** 2).sum() + + loss = clip_loss_ + l2_loss_ + print(clip_loss_.item(), l2_loss_.item()) + + optimizer.zero_grad() + loss.backward(retain_graph=True) + optimizer.step() + print("clip loss: {:.5f}\tL2 loss: {:.5f}".format(clip_loss_.item(), args.l2_lambda * l2_loss_)) + + generated_mols = MegaMolBART_wrapper.inverse_transform( + [latent], pad_mask_init.bool().cuda(), k=1, sanitize=True) + # print("generated_mols",generated_mols[0]) + # Chem.SanitizeMol(generated_mols[0]) + print("final SMILES", generated_mols[0]) + result_SMILES_list.append(generated_mols[0]) + + evaluate_SMILES_list(result_SMILES_list) + print() diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py new file mode 100644 index 0000000..40e7e2b --- /dev/null +++ b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py @@ -0,0 +1,133 @@ +import os +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel, AutoTokenizer +from MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART +from rdkit import Chem, RDLogger +from rdkit.Chem import AllChem, Descriptors +lg = RDLogger.logger() +lg.setLevel(RDLogger.CRITICAL) + + +def load_molecule_models(args): + """ + This function returns the two encoders, one for molecule generative model and one for CLIP. + TODO: now we adopt MegaMolBART for both. Will make this more flexible in the future. + """ + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(input_dir=args.generation_model_dir, output_dir=None) + molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) + print("Loading from pretrained MegaMolBART ({}).".format(args.generation_model_dir)) + molecule_dim_generation = 256 + + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") + molecule_model_MoleculeSTM = MegaMolBART_wrapper.model + state_dict = torch.load(input_model_path, map_location='cpu') + print("Loading from {}...".format(input_model_path)) + molecule_model_MoleculeSTM.load_state_dict(state_dict) + molecule_dim_MoleculeSTM = args.SSL_emb_dim + + mol2latent_MoleculeSTM = nn.Linear(256, molecule_dim_MoleculeSTM) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + mol2latent_MoleculeSTM.load_state_dict(state_dict) + return MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ + molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM + + +def load_language_molecule_and_edit_models(args): + pretrained_SciBERT_folder = os.path.join(args.dataspace_path, 'pretrained_SciBERT') + # text_tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased', cache_dir=pretrained_SciBERT_folder) + # TODO: check https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py#L1501 + # text_model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased', cache_dir=pretrained_SciBERT_folder) + text_tokenizer = AutoTokenizer.from_pretrained('/mnt/cyz_dair/projects/MoleculeSTM-main/MoleculeSTM-main/data/pretrained_SciBERT', cache_dir=pretrained_SciBERT_folder) + text_model = AutoModel.from_pretrained('/mnt/cyz_dair/projects/MoleculeSTM-main/MoleculeSTM-main/data/pretrained_SciBERT', cache_dir=pretrained_SciBERT_folder) + + text_dim = 768 + + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "text_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + text_model.load_state_dict(state_dict) + + """ + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") + print("Loading from {}...".format(input_model_path)) + MegaMolBART_wrapper = MegaMolBART(input_dir=None, output_dir=None) + molecule_model = MegaMolBART_wrapper.model + state_dict = torch.load(input_model_path, map_location='cpu') + molecule_model.load_state_dict(state_dict) + """ + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(input_dir=args.generation_model_dir, output_dir=None) + molecule_model = MegaMolBART_wrapper.model + print("Loading from pretrained MegaMolBART ({}).".format(args.generation_model_dir)) + molecule_dim_generation = 256 + molecule_dim_MoleculeSTM = 256 + + text2latent = nn.Linear(text_dim, args.SSL_emb_dim) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "text2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + text2latent.load_state_dict(state_dict) + + mol2latent = nn.Linear(molecule_dim_generation, args.SSL_emb_dim) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + mol2latent.load_state_dict(state_dict) + + generation2MoleculeSTM = nn.Linear(molecule_dim_generation, molecule_dim_MoleculeSTM) + input_model_path = os.path.join(args.language_edit_model_dir, "generation2MoleculeSTM_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + generation2MoleculeSTM.load_state_dict(state_dict) + + MoleculeSTM2generation = nn.Linear(molecule_dim_MoleculeSTM, molecule_dim_generation) + input_model_path = os.path.join(args.language_edit_model_dir, "MoleculeSTM2generation_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + MoleculeSTM2generation.load_state_dict(state_dict) + + return text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim_generation, text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation + + +def clip_loss_for_edit(molecule_repr, mol2latent, text_repr): + # molecule_repr = F.normalize(molecule_repr, dim=-1) + # molecule_repr = mol2latent(molecule_repr) + molecule_repr = F.normalize(molecule_repr, dim=-1) + + text_repr = F.normalize(text_repr, dim=-1) + + similarity = -torch.mm(molecule_repr, text_repr.transpose(0, 1))[0] + return similarity + + +def evaluate_SMILES_list(SMILES_list): + print("SMILES_list:") + print(SMILES_list) + mol_list = [] + for SMILES in SMILES_list: + mol = Chem.MolFromSmiles(SMILES) + # Chem.SanitizeMol(mol) + # print(SMILES, mol) + if mol is None: + continue + mol_list.append(mol) + print("mol_list", len(mol_list)) + + print() + props = ["MolWt", "MolLogP", "TPSA", "qed"] + props = ["MolLogP"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + for name, func in prop_pred: + print("evaluating with {}".format(name)) + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + print("====={} & {:.5f}".format(SMILES, value)) + print() + + return \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/context.py b/open_biomed/models/MoleculeSTM/cuchemcommon/context.py new file mode 100644 index 0000000..74e2793 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/context.py @@ -0,0 +1,53 @@ +import logging +import os +from configparser import RawConfigParser +from io import StringIO + +from models.MoleculeSTM.cuchemcommon.utils.singleton import Singleton + +logger = logging.getLogger(__name__) + +CONFIG_FILE = '.env' + + +class Context(metaclass=Singleton): + + def __init__(self): + + self.dask_client = None + self.compute_type = 'gpu' + self.is_benchmark = False + self.benchmark_file = None + self.cache_directory = None + self.n_molecule = None + self.batch_size = 10000 + + self.config = {} + if os.path.exists(CONFIG_FILE): + logger.info('Reading properties from %s...', CONFIG_FILE) + self.config = self._load_properties_file(CONFIG_FILE) + else: + logger.warn('Could not locate %s', CONFIG_FILE) + + def _load_properties_file(self, properties_file): + """ + Reads a properties file using ConfigParser. + + :param propertiesFile/configFile: + """ + config_file = open(properties_file, 'r') + config_content = StringIO('[root]\n' + config_file.read()) + config = RawConfigParser() + config.read_file(config_content) + + return config._sections['root'] + + def get_config(self, config_name, default=None): + """ + Returns values from local configuration. + """ + try: + return self.config[config_name] + except KeyError: + logger.warn('%s not found, returing default.', config_name) + return default diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py new file mode 100644 index 0000000..3a07d30 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py @@ -0,0 +1,45 @@ +from typing import List + + +class ClusterWfDAO(object): + """ + Base class for all DAO for fetching data for Clustering Workflows + """ + + def meta_df(self): + """ + Returns df with dtype set for structure without any column filter. + """ + return NotImplemented + + def fetch_molecular_embedding(self, n_molecules: int, cache_directory: str = None): + """ + Fetch molecular properties from database/cache into a dask array. + """ + return NotImplemented + + def fetch_molecular_embedding_by_id(self, molecule_id: List): + """ + Fetch molecular properties from database for the given id. Id depends on + the backend databse. For chemble DB it should be molregid. + """ + return NotImplemented + + def fetch_id_from_smile(self, new_molecules: List): + """ + Fetch molecular details for a list of molecules. The values in the list + of molecules depends on database/service used. For e.g. it could be + ChemblId or molreg_id for Chemble database. + """ + return NotImplemented + + +class GenerativeWfDao(object): + + def fetch_id_from_chembl(self, id: List): + """ + Fetch molecular details for a list of molecules. The values in the list + of molecules depends on database/service used. For e.g. it could be + ChemblId or molreg_id for Chemble database. + """ + return NotImplemented diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py new file mode 100644 index 0000000..6462d5f --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py @@ -0,0 +1,61 @@ +import logging +import math +import os +from typing import List + +import cudf +import dask +import dask_cudf +from cuchemcommon.context import Context +from cuchemcommon.data.helper.chembldata import BATCH_SIZE, ChEmblData +from cuchemcommon.utils.singleton import Singleton + +from . import ClusterWfDAO + +logger = logging.getLogger(__name__) + +FINGER_PRINT_FILES = 'filter_*.h5' + + +class ChemblClusterWfDao(ClusterWfDAO, metaclass=Singleton): + + def __init__(self, fp_type): + self.chem_data = ChEmblData(fp_type) + + def meta_df(self): + chem_data = ChEmblData() + return chem_data._meta_df() + + def fetch_molecular_embedding(self, + n_molecules: int, + cache_directory: str = None): + context = Context() + if cache_directory: + hdf_path = os.path.join(cache_directory, FINGER_PRINT_FILES) + logger.info('Reading %d rows from %s...', n_molecules, hdf_path) + mol_df = dask.dataframe.read_hdf(hdf_path, 'fingerprints') + + if n_molecules > 0: + npartitions = math.ceil(n_molecules / BATCH_SIZE) + mol_df = mol_df.head(n_molecules, compute=False, npartitions=npartitions) + else: + logger.info('Reading molecules from database...') + mol_df = self.chem_data.fetch_mol_embedding(num_recs=n_molecules, + batch_size=context.batch_size) + + return mol_df + + def fetch_molecular_embedding_by_id(self, molecule_id: List): + context = Context() + meta = self.chem_data._meta_df() + fp_df = self.chem_data._fetch_mol_embedding(molregnos=molecule_id, + batch_size=context.batch_size) \ + .astype(meta.dtypes) + + fp_df = cudf.from_pandas(fp_df) + fp_df = dask_cudf.from_cudf(fp_df, npartitions=1).reset_index() + return fp_df + + def fetch_id_from_chembl(self, new_molecules: List): + logger.debug('Fetch ChEMBL ID using molregno...') + return self.chem_data.fetch_id_from_chembl(new_molecules) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py new file mode 100644 index 0000000..9e16a2d --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py @@ -0,0 +1,19 @@ +import logging +from typing import List + +from cuchemcommon.data.helper.chembldata import ChEmblData +from cuchemcommon.utils.singleton import Singleton + +from . import GenerativeWfDao + +logger = logging.getLogger(__name__) + + +class ChemblGenerativeWfDao(GenerativeWfDao, metaclass=Singleton): + + def __init__(self, fp_type): + self.chem_data = ChEmblData(fp_type) + + def fetch_id_from_chembl(self, id: List): + logger.debug('Fetch ChEMBL ID using molregno...') + return self.chem_data.fetch_id_from_chembl(id) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py new file mode 100644 index 0000000..7b0d272 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py @@ -0,0 +1,320 @@ +import os +import warnings +import pandas +import sqlite3 +import logging + +from typing import List +from dask import delayed, dataframe + +from contextlib import closing +from cuchemcommon.utils.singleton import Singleton +from cuchemcommon.context import Context + +warnings.filterwarnings("ignore", message=r"deprecated", category=FutureWarning) +logger = logging.getLogger(__name__) + +BATCH_SIZE = 100000 +ADDITIONAL_FEILD = ['canonical_smiles', 'transformed_smiles'] +IMP_PROPS = [ + 'alogp', + 'aromatic_rings', + 'full_mwt', + 'psa', + 'rtb'] +IMP_PROPS_TYPE = [pandas.Series([], dtype='float64'), + pandas.Series([], dtype='int64'), + pandas.Series([], dtype='float64'), + pandas.Series([], dtype='float64'), + pandas.Series([], dtype='int64')] +ADDITIONAL_FEILD_TYPE = [pandas.Series([], dtype='object'), + pandas.Series([], dtype='object')] + +SQL_MOLECULAR_PROP = """ +SELECT md.molregno as molregno, md.chembl_id, cp.*, cs.* +FROM compound_properties cp, + compound_structures cs, + molecule_dictionary md +WHERE cp.molregno = md.molregno + AND md.molregno = cs.molregno + AND md.molregno in (%s) +""" + + +# DEPRECATED. Please add code to DAO classes. +class ChEmblData(object, metaclass=Singleton): + + def __init__(self, fp_type): + + context = Context() + db_file = context.get_config('data_mount_path', default='/data') + db_file = os.path.join(db_file, 'db/chembl_27.db') + + if not os.path.exists(db_file): + logger.error('%s not found', db_file) + raise Exception('{} not found'.format(db_file)) + + self.fp_type = fp_type + self.chembl_db = 'file:%s?mode=ro' % db_file + + logger.info('ChEMBL database: %s...' % self.chembl_db) + + def fetch_props_by_molregno(self, molregnos): + """ + Returns compound properties and structure filtered by ChEMBL IDs along + with a list of columns. + """ + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = SQL_MOLECULAR_PROP % " ,".join(list(map(str, molregnos))) + cur.execute(select_stmt) + + cols = list(map(lambda x: x[0], cur.description)) + return cols, cur.fetchall() + + def fetch_props_by_chemble(self, chemble_ids): + """ + Returns compound properties and structure filtered by ChEMBL IDs along + with a list of columns. + """ + sql_stml = """ + SELECT md.molregno as molregno, md.chembl_id, cp.*, cs.* + FROM compound_properties cp, + compound_structures cs, + molecule_dictionary md + WHERE cp.molregno = md.molregno + AND md.molregno = cs.molregno + AND md.chembl_id in (%s) + """ + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = sql_stml % "'%s'" % "','".join([x.strip().upper() for x in chemble_ids]) + cur.execute(select_stmt) + + cols = list(map(lambda x: x[0], cur.description)) + return cols, cur.fetchall() + + def fetch_molregno_by_chemblId(self, chemblIds): + logger.debug('Fetch ChEMBL ID using molregno...') + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = ''' + SELECT md.molregno as molregno + FROM compound_properties cp, + compound_structures cs, + molecule_dictionary md + WHERE cp.molregno = md.molregno + AND md.molregno = cs.molregno + AND md.chembl_id in (%s) + ''' % "'%s'" % "','".join(chemblIds) + cur.execute(select_stmt) + return cur.fetchall() + + def fetch_id_from_chembl(self, new_molecules: List): + logger.debug('Fetch ChEMBL ID using molregno...') + + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = ''' + SELECT cs.molregno as molregno, md.chembl_id as chembl_id, + cs.canonical_smiles as smiles + FROM compound_structures cs, + molecule_dictionary md + WHERE md.molregno = cs.molregno + AND md.chembl_id in (%s) + ''' % "'%s'" % "','".join([x.strip().upper() for x in new_molecules]) + cur.execute(select_stmt) + + return cur.fetchall() + + def fetch_chemblId_by_molregno(self, molregnos): + logger.debug('Fetch ChEMBL ID using molregno...') + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = ''' + SELECT md.chembl_id as chembl_id + FROM molecule_dictionary md + WHERE md.molregno in (%s) + ''' % ", ".join(list(map(str, molregnos))) + cur.execute(select_stmt) + return cur.fetchall() + + def fetch_approved_drugs(self): + """Fetch approved drugs with phase >=3 as dataframe + + Args: + chembl_db_path (string): path to chembl sqlite database + Returns: + pd.DataFrame: dataframe containing SMILES strings and molecule index + """ + logger.debug('Fetching ChEMBL approved drugs...') + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = """SELECT + di.molregno, + cs.canonical_smiles, + di.max_phase_for_ind + FROM + drug_indication AS di + LEFT JOIN compound_structures AS cs ON di.molregno = cs.molregno + WHERE + di.max_phase_for_ind >= 3 + AND cs.canonical_smiles IS NOT NULL;""" + cur.execute(select_stmt) + return cur.fetchall() + + def fetch_random_samples(self, num_samples, max_len): + """Fetch random samples from ChEMBL as dataframe + + Args: + num_samples (int): number of samples to select + chembl_db_path (string): path to chembl sqlite database + Returns: + pd.DataFrame: dataframe containing SMILES strings and molecule index + """ + logger.debug('Fetching ChEMBL random samples...') + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = """SELECT + cs.molregno, + cs.canonical_smiles, + LENGTH(cs.canonical_smiles) as len + FROM + compound_structures AS cs + WHERE + cs.canonical_smiles IS NOT NULL + AND + len <= """ + f'{max_len}' + """ + ORDER BY RANDOM() + LIMIT """ + f'{num_samples};' + + cur.execute(select_stmt) + return cur.fetchall() + + def fetch_molecule_cnt(self): + logger.debug('Finding number of molecules...') + with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ + closing(con.cursor()) as cur: + select_stmt = ''' + SELECT count(*) + FROM compound_properties cp, + molecule_dictionary md, + compound_structures cs + WHERE cp.molregno = md.molregno + AND md.molregno = cs.molregno + ''' + cur.execute(select_stmt) + + return cur.fetchone()[0] + + def _meta_df(self, **transformation_kwargs): + transformation = self.fp_type(**transformation_kwargs) + + prop_meta = {'id': pandas.Series([], dtype='int64')} + prop_meta.update(dict(zip(IMP_PROPS + ADDITIONAL_FEILD, + IMP_PROPS_TYPE + ADDITIONAL_FEILD_TYPE))) + prop_meta.update({i: pandas.Series([], dtype='float32') for i in range(len(transformation))}) + + return pandas.DataFrame(prop_meta) + + def _fetch_mol_embedding(self, + start=0, + batch_size=BATCH_SIZE, + molregnos=None, + **transformation_kwargs): + """ + Returns compound properties and structure for the first N number of + records in a dataframe. + """ + + logger.info('Fetching %d records starting %d...' % (batch_size, start)) + + imp_cols = ['cp.' + col for col in IMP_PROPS] + + if molregnos is None: + select_stmt = ''' + SELECT md.molregno, %s, cs.canonical_smiles + FROM compound_properties cp, + molecule_dictionary md, + compound_structures cs + WHERE cp.molregno = md.molregno + AND md.molregno = cs.molregno + LIMIT %d, %d + ''' % (', '.join(imp_cols), start, batch_size) + else: + select_stmt = ''' + SELECT md.molregno, %s, cs.canonical_smiles + FROM compound_properties cp, + molecule_dictionary md, + compound_structures cs + WHERE cp.molregno = md.molregno + AND md.molregno = cs.molregno + AND md.molregno in (%s) + LIMIT %d, %d + ''' % (', '.join(imp_cols), " ,".join(list(map(str, molregnos))), start, batch_size) + + df = pandas.read_sql(select_stmt, + sqlite3.connect(self.chembl_db, uri=True)) + + # Smiles -> Smiles transformation and filtering + # TODO: Discuss internally to find use or refactor this code to remove + # model specific filtering + df['transformed_smiles'] = df['canonical_smiles'] + # if smiles_transforms is not None: + # if len(smiles_transforms) > 0: + # for xf in smiles_transforms: + # df['transformed_smiles'] = df['transformed_smiles'].map(xf.transform) + # df.dropna(subset=['transformed_smiles'], axis=0, inplace=True) + + # Conversion to fingerprints or embeddings + # transformed_smiles = df['transformed_smiles'] + transformation = self.fp_type(**transformation_kwargs) + cache_data = transformation.transform(df) + return_df = pandas.DataFrame(cache_data) + + return_df = pandas.DataFrame( + return_df, + columns=pandas.RangeIndex(start=0, + stop=len(transformation))).astype('float32') + + return_df = df.merge(return_df, left_index=True, right_index=True) + return_df.rename(columns={'molregno': 'id'}, inplace=True) + return return_df + + def fetch_mol_embedding(self, + num_recs=None, + batch_size=BATCH_SIZE, + molregnos=None, + **transformation_kwargs): + """ + Returns compound properties and structure for the first N number of + records in a dataframe. + """ + logger.debug('Fetching properties for all molecules...') + + if num_recs is None or num_recs < 0: + num_recs = self.fetch_molecule_cnt() + + logger.info('num_recs %d', num_recs) + logger.info('batch_size %d', batch_size) + meta_df = self._meta_df(**transformation_kwargs) + + dls = [] + for start in range(0, num_recs, batch_size): + bsize = min(num_recs - start, batch_size) + dl_data = delayed(self._fetch_mol_embedding)(start=start, + batch_size=bsize, + molregnos=molregnos, + **transformation_kwargs) + dls.append(dl_data) + + return dataframe.from_delayed(dls, meta=meta_df) + + def save_fingerprints(self, hdf_path='data/filter_*.h5', num_recs=None, batch_size=5000): + """ + Generates fingerprints for all ChEMBL ID's in the database + """ + logger.debug('Fetching molecules from database for fingerprints...') + + mol_df = self.fetch_mol_embedding(num_recs=num_recs, batch_size=batch_size) + mol_df.to_hdf(hdf_path, 'fingerprints') diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py b/open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py new file mode 100644 index 0000000..55f2471 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py @@ -0,0 +1,95 @@ +import logging +import os +from abc import ABC +from enum import Enum + +import numpy as np +import pandas as pd +from cddd.inference import InferenceModel +from cuchem.utils.data_peddler import download_cddd_models +from rdkit import Chem +from rdkit.Chem import AllChem + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +logger = logging.getLogger(__name__) + + +def calc_morgan_fingerprints(dataframe, smiles_col='canonical_smiles'): + """Calculate Morgan fingerprints on SMILES strings + + Args: + dataframe (pd.DataFrame): dataframe containing a SMILES column for calculation + + Returns: + pd.DataFrame: new dataframe containing fingerprints + """ + mf = MorganFingerprint() + fp = mf.transform(dataframe, col_name=smiles_col) + fp = pd.DataFrame(fp) + fp.index = dataframe.index + return fp + + +class TransformationDefaults(Enum): + MorganFingerprint = {'radius': 2, 'nBits': 512} + Embeddings = {} + + +class BaseTransformation(ABC): + def __init__(self, **kwargs): + self.name = None + self.kwargs = None + self.func = None + + def transform(self, data): + return NotImplemented + + def transform_many(self, data): + return list(map(self.transform, data)) + + def __len__(self): + return NotImplemented + + +class MorganFingerprint(BaseTransformation): + + def __init__(self, **kwargs): + self.name = __class__.__name__.split('.')[-1] + self.kwargs = TransformationDefaults[self.name].value + self.kwargs.update(kwargs) + self.func = AllChem.GetMorganFingerprintAsBitVect + + def transform(self, data, col_name='transformed_smiles'): + data = data[col_name] + fp_array = [] + for mol in data: + m = Chem.MolFromSmiles(mol) + fp = self.func(m, **self.kwargs) + fp_array.append(list(fp.ToBitString())) + fp_array = np.asarray(fp_array) + return fp_array + + def __len__(self): + return self.kwargs['nBits'] + + +class Embeddings(BaseTransformation): + + def __init__(self, use_gpu=True, cpu_threads=5, model_dir=None, **kwargs): + self.name = __class__.__name__.split('.')[-1] + self.kwargs = TransformationDefaults[self.name].value + self.kwargs.update(kwargs) + model_dir = download_cddd_models() + self.func = InferenceModel(model_dir, use_gpu=use_gpu, cpu_threads=cpu_threads) + + def transform(self, data): + data = data['transformed_smiles'] + return self.func.seq_to_emb(data).squeeze() + + def inverse_transform(self, embeddings): + "Embedding array -- individual compound embeddings are in rows" + embeddings = np.asarray(embeddings) + return self.func.emb_to_seq(embeddings) + + def __len__(self): + return self.func.hparams.emb_size diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py b/open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py new file mode 100644 index 0000000..5034fa1 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py @@ -0,0 +1,38 @@ +# import os +# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +# import logging +# from abc import ABC +# from rdkit.Chem.SaltRemover import SaltRemover +# from cddd.preprocessing import remove_salt_stereo, filter_smiles + +# logger = logging.getLogger(__name__) + + +# class BaseTransformation(ABC): +# def __init__(self): +# pass + +# def transform(self, data): +# return NotImplemented + +# def transform_many(self, data): +# return list(map(self.transform, data)) +# #return [self.filter(x) for x in data] + + +# class RemoveSalt(BaseTransformation): +# def __init__(self, remover=SaltRemover()): +# self.name = __class__.__name__.split('.')[-1] +# self.remover = remover + +# def transform(self, data): +# return remove_salt_stereo(data, self.remover) + + +# class PreprocessSmiles(BaseTransformation): +# def __init__(self): +# self.name = __class__.__name__.split('.')[-1] + +# def transform(self, data): +# return filter_smiles(data) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py new file mode 100644 index 0000000..0de2d94 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py @@ -0,0 +1 @@ +from cuchemcommon.utils.singleton import Singleton \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py new file mode 100644 index 0000000..7f9e669 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py @@ -0,0 +1,106 @@ +#!/opt/conda/envs/rapids/bin/python3 +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from datetime import datetime + +from cuchemcommon.context import Context + +from .sysinfo import get_machine_config, print_machine_config + +BENCHMARK_FILE = '/data/benchmark.csv' + +logger = logging.getLogger(__name__) + + +def initialize_logfile(benchmark_file=BENCHMARK_FILE): + """Initialize benchmark file with header if needed""" + + config = get_machine_config() + config_message = print_machine_config(config) + + if not os.path.exists(benchmark_file): + with open(benchmark_file, 'w') as fh: + fh.write(f'# {config_message}\n') + fh.write('date,benchmark_type,step,time(hh:mm:ss.ms),n_molecules,n_workers,metric_name,metric_value\n') + return benchmark_file + + +class MetricsLogger(object): + + def __init__(self, + task_name, + n_molecules): + + self.task_name = task_name + self.n_molecules = n_molecules + self.start_time = None + self.metric_name = None + self.metric_value = None + + self.metric_func = None + self.metric_func_args = None + self.metric_func_kwargs = {} + + def __enter__(self): + self.start_time = datetime.now() + + return self + + def __exit__(self, type, value, traceback): + context = Context() + + runtime = datetime.now() - self.start_time + logger.info('### Runtime {} time (hh:mm:ss.ms) {}'.format(self.task_name, runtime)) + n_workers = len(context.dask_client.cluster.workers) + + if self.metric_func and context.is_benchmark: + self.metric_value = self.metric_func(*self.metric_func_args, + **self.metric_func_kwargs) + + if self.metric_value is None: + self.metric_name = '' + self.metric_value = '' + else: + logger.info('Calculated {} is {}'.format(self.metric_name, self.metric_value)) + + log_results(self.start_time, context.compute_type, self.task_name, + runtime, + n_molecules=self.n_molecules, + n_workers=n_workers, + metric_name=self.metric_name, + metric_value=self.metric_value, + benchmark_file=context.benchmark_file) + + +def log_results(date, + benchmark_type, + step, + time, + n_molecules, + n_workers, + metric_name='', + metric_value='', + benchmark_file=BENCHMARK_FILE): + """Log benchmark results to a file""" + + out_list = [date, benchmark_type, step, time, n_molecules, n_workers, metric_name, metric_value] + out_fmt = ','.join(['{}'] * len(out_list)) + '\n' + + with open(benchmark_file, 'a') as fh: + out_string = out_fmt.format(*out_list) + fh.write(out_string) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py new file mode 100644 index 0000000..fc28938 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py @@ -0,0 +1,26 @@ +# singleton.py + +import logging + +""" +Metaclass for singletons. +""" + +logger = logging.getLogger(__name__) + + +class Singleton(type): + """ + Ensures single instance of a class. + + Example Usage: + class MySingleton(metaclass=Singleton) + pass + """ + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, cls).__call__( + *args, **kwargs) + return cls._instances[cls] diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py new file mode 100644 index 0000000..1077c5b --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py @@ -0,0 +1,68 @@ +#!/opt/conda/envs/rapids/bin/python3 +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import Counter + +import psutil +import pynvml as nv + + +def get_machine_config(): + """Get machine config for CPU and GPU(s)""" + + # CPU config + physical_cores = psutil.cpu_count(logical=False) + logical_cores = psutil.cpu_count(logical=True) + + cpufreq = psutil.cpu_freq() + cpufreq_max = cpufreq.max # Mhz + cpufreq_min = cpufreq.min + cpufreq_cur = cpufreq.current + + svmem = psutil.virtual_memory() + mem_total = svmem.total / (1024.0 ** 3) # GB + mem_avail = svmem.available / (1024.0 ** 3) + + # GPU config + nv.nvmlInit() + driver_version = nv.nvmlSystemGetDriverVersion() + deviceCount = nv.nvmlDeviceGetCount() + gpu_devices, gpu_mems = [], [] + for i in range(deviceCount): + handle = nv.nvmlDeviceGetHandleByIndex(i) + gpu_devices.append(nv.nvmlDeviceGetName(handle).decode("utf-8")) + gpu_mem = nv.nvmlDeviceGetMemoryInfo(handle).total / (1024.0 ** 3) + gpu_mems.append(gpu_mem) + + return {'cpu': {'physical_cores': physical_cores, 'logical_cores': logical_cores, + 'min_freq_MHz': cpufreq_min, 'max_freq_MHz': cpufreq_max, 'cur_freq_MHz': cpufreq_cur, + 'total_mem_GB': mem_total, 'avail_mem_GB': mem_avail}, + 'gpu': {'devices': gpu_devices, 'mem_GB': gpu_mems}} + + +def print_machine_config(config): + """Printable version of config""" + cpu_cores = config['cpu']['physical_cores'] + cpu_freq = int(round(config['cpu']['max_freq_MHz'], 0)) + ram = int(round(config['cpu']['total_mem_GB'], 0)) + cpu_config_message = f'{cpu_freq} MHz CPU with {cpu_cores} cores, {ram} GB RAM' + + gpu_devices = Counter([(x, int(round(y, 0))) for x, y in zip(config['gpu']['devices'], config['gpu']['mem_GB'])]) + gpu_config_message = '' + for (handle, mem), count in gpu_devices.items(): + gpu_config_message += f'{count} x {handle} GPU(s)' + + return ', '.join([cpu_config_message, gpu_config_message]) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py b/open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py new file mode 100644 index 0000000..0872750 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py @@ -0,0 +1,201 @@ +import logging +# import torch +from functools import singledispatch +from typing import List + +import numpy as np +from models.MoleculeSTM.cuchemcommon.data import GenerativeWfDao +from rdkit.Chem import PandasTools, CanonSmiles + +logger = logging.getLogger(__name__) + + +@singledispatch +def add_jitter(embedding, radius, cnt, shape): + return NotImplemented + + +@add_jitter.register(np.ndarray) +def _(embedding, radius, cnt, shape): + + distorteds = [] + for i in range(cnt): + noise = np.random.normal(0, radius, embedding.shape) + distorted = noise + embedding + distorteds.append(distorted) + + return distorteds + + +class BaseGenerativeWorkflow: + + def __init__(self, dao: GenerativeWfDao = None) -> None: + self.dao = dao + self.min_jitter_radius = None + + def get_iteration(self): + NotImplemented + + def smiles_to_embedding(self, + smiles: str, + padding: int): + NotImplemented + + def embedding_to_smiles(self, + embedding: float, + dim: int, + pad_mask): + NotImplemented + + def interpolate_smiles(self, + smiles: List, + num_points: int = 10, + scaled_radius=None, + force_unique=False): + NotImplemented + + def find_similars_smiles_list(self, + smiles: str, + num_requested: int = 10, + scaled_radius=None, + force_unique=False): + NotImplemented + + def find_similars_smiles(self, + smiles: str, + num_requested: int = 10, + scaled_radius=None, + force_unique=False): + NotImplemented + + def _compute_radius(self, scaled_radius): + if scaled_radius: + return float(scaled_radius * self.min_jitter_radius) + else: + return self.min_jitter_radius + + def addjitter(self, + embedding, + radius=None, + cnt=1, + shape=None): + radius = radius if radius else self.radius_scale + return add_jitter(embedding, radius, cnt, shape) + + def compute_unique_smiles(self, + interp_df, + embedding_funct, + scaled_radius=None): + """ + Identify duplicate SMILES and distorts the embedding. The input df + must have columns 'SMILES' and 'Generated' at 0th and 1st position. + 'Generated' colunm must contain boolean to classify SMILES into input + SMILES(False) and generated SMILES(True). + + This function does not make any assumptions about order of embeddings. + Instead it simply orders the df by SMILES to identify the duplicates. + """ + + distance = self._compute_radius(scaled_radius) + embeddings = interp_df['embeddings'] + embeddings_dim = interp_df['embeddings_dim'] + for index, row in interp_df.iterrows(): + smile_string = row['SMILES'] + try: + canonical_smile = CanonSmiles(smile_string) + except: + # If a SMILES cannot be canonicalized, just use the original + canonical_smile = smile_string + + row['SMILES'] = canonical_smile + + for i in range(5): + smiles = interp_df['SMILES'].sort_values() + duplicates = set() + for idx in range(0, smiles.shape[0] - 1): + if smiles.iat[idx] == smiles.iat[idx + 1]: + duplicates.add(smiles.index[idx]) + duplicates.add(smiles.index[idx + 1]) + + if len(duplicates) > 0: + for dup_idx in duplicates: + if interp_df.iat[dup_idx, 3]: + # add jitter to generated molecules only + distored = self.addjitter(embeddings[dup_idx], + distance, + cnt=1, + shape=embeddings_dim[dup_idx]) + embeddings[dup_idx] = distored[0] + interp_df['SMILES'] = embedding_funct(embeddings.to_list()) + interp_df['embeddings'] = embeddings + else: + break + + # Ensure all generated molecules are valid. + for i in range(5): + PandasTools.AddMoleculeColumnToFrame(interp_df, 'SMILES') + invalid_mol_df = interp_df[interp_df['ROMol'].isnull()] + + if not invalid_mol_df.empty: + invalid_index = invalid_mol_df.index.to_list() + for idx in invalid_index: + embeddings[idx] = self.addjitter(embeddings[idx], + distance, + cnt=1, + shape=embeddings_dim[idx])[0] + interp_df['SMILES'] = embedding_funct(embeddings.to_list()) + interp_df['embeddings'] = embeddings + else: + break + + # Cleanup + if 'ROMol' in interp_df.columns: + interp_df = interp_df.drop('ROMol', axis=1) + + return interp_df + + def interpolate_by_id(self, + ids: List, + id_type: str = 'chembleid', + num_points=10, + force_unique=False, + scaled_radius: int = 1): + smiles = None + + if not self.min_jitter_radius: + raise Exception('Property `radius_scale` must be defined in model class.') + + if id_type.lower() == 'chembleid': + smiles = [row[2] for row in self.dao.fetch_id_from_chembl(ids)] + if len(smiles) != len(ids): + raise Exception('One of the ids is invalid %s', ids) + else: + raise Exception('id type %s not supported' % id_type) + + return self.interpolate_smiles(smiles, + num_points=num_points, + scaled_radius=scaled_radius, + force_unique=force_unique) + + def find_similars_smiles_by_id(self, + chemble_id: str, + id_type: str = 'chembleid', + num_requested=10, + force_unique=False, + scaled_radius: int = 1): + smiles = None + + if not self.min_jitter_radius: + raise Exception('Property `radius_scale` must be defined in model class.') + + if id_type.lower() == 'chembleid': + smiles = [row[2] for row in self.dao.fetch_id_from_chembl(chemble_id)] + if len(smiles) != len(chemble_id): + raise Exception('One of the ids is invalid %s' + chemble_id) + else: + raise Exception('id type %s not supported' % id_type) + + return self.find_similars_smiles(smiles[0], + num_requested=num_requested, + scaled_radius=scaled_radius, + force_unique=force_unique) diff --git a/open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py b/open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py new file mode 100644 index 0000000..1b9ea61 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py @@ -0,0 +1,235 @@ +import os +from itertools import chain, repeat +import pandas as pd +import torch +from torch_geometric.data import InMemoryDataset, Data +from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple +from rdkit.Chem import AllChem + + +class DrugBank_Datasets_Graph_retrieval(InMemoryDataset): + def __init__( + self, root, train_mode, neg_sample_size, processed_dir_prefix, template="raw/SMILES_description_{}.txt", + transform=None, pre_transform=None, pre_filter=None, empty=False + ): + self.root = root + self.transform = transform + self.pre_filter = pre_filter + self.pre_transform = pre_transform + self.processed_dir_prefix = processed_dir_prefix + self.template = template + self.train_mode = train_mode + self.smiles_text_file_name = "SMILES.csv" + + super(DrugBank_Datasets_Graph_retrieval, self).__init__(root, transform, pre_transform, pre_filter) + + if not empty: + self.data, self.slices = torch.load(self.processed_paths[0]) + print('Data: {}'.format(self.data)) + + df = pd.read_csv(os.path.join(self.processed_dir, self.smiles_text_file_name)) + print(df.columns) + self.text_list = df["text"].tolist() + + # sampling + self.neg_sample_size = neg_sample_size + negative_sampled_index_file = os.path.join(self.root, "index", template.format(train_mode)) + print("Loading negative samples from {}".format(negative_sampled_index_file)) + f = open(negative_sampled_index_file, 'r') + neg_index_list = [] + for line in f.readlines(): + line = line.strip().split(",") + line = [int(x) for x in line] + neg_index_list.append(line) + self.neg_index_list = neg_index_list + + return + + def get_graph(self, index): + data = Data() + for key in self.data.keys: + item, slices = self.data[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[index], slices[index + 1]) + data[key] = item[s] + return data + + def get(self, index): + text = self.text_list[index] + data = self.get_graph(index) + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_text = [self.text_list[idx] for idx in neg_index_list] + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_data = [self.get_graph(idx) for idx in neg_index_list] + return text, data, neg_text, neg_data + + @property + def raw_file_names(self): + file_name_list = os.listdir(self.raw_dir) + return file_name_list + + @property + def processed_dir(self): + return os.path.join(self.root, 'processed', '{}_{}'.format(self.processed_dir_prefix, self.train_mode)) + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def download(self): + return + + def process(self): + data_list, SMILES_list, text_list = [], [], [] + SMILES2description_file = os.path.join(self.root, 'raw', self.template.format(self.train_mode)) + f = open(SMILES2description_file, 'r') + + for line_id, line in enumerate(f.readlines()): + line = line.strip().split("\t", 1) + SMILES = line[0] + text = line[1] + + rdkit_mol = AllChem.MolFromSmiles(SMILES) + data = mol_to_graph_data_obj_simple(rdkit_mol) + data.id = torch.tensor([line_id]) + + data_list.append(data) + SMILES_list.append(SMILES) + text_list.append(text) + + if self.pre_filter is not None: + data_list = [data for data in data_list if self.pre_filter(data)] + + if self.pre_transform is not None: + data_list = [self.pre_transform(data) for data in data_list] + + df = pd.DataFrame( + {"text": text_list, "smiles": SMILES_list}, + ) + saver_path = os.path.join(self.processed_dir, self.smiles_text_file_name) + print("saving to {}".format(saver_path)) + df.to_csv(saver_path, index=False) + + data, slices = self.collate(data_list) + torch.save((data, slices), self.processed_paths[0]) + print("saving to {}".format(self.processed_paths[0])) + print() + return + + def __len__(self): + return len(self.text_list) + + +class DrugBank_Datasets_Graph_ATC(InMemoryDataset): + def __init__( + self, root, file_name, processed_dir_prefix, neg_sample_size, prompt_template="{}.", + transform=None, pre_transform=None, pre_filter=None, empty=False + ): + self.root = root + self.transform = transform + self.pre_filter = pre_filter + self.pre_transform = pre_transform + self.file_name = file_name + self.processed_dir_prefix = processed_dir_prefix + self.smiles_text_file_name = "SMILES.csv" + self.prompt_template = prompt_template + + super(DrugBank_Datasets_Graph_ATC, self).__init__(root, transform, pre_transform, pre_filter) + + if not empty: + self.data, self.slices = torch.load(self.processed_paths[0]) + print('Data: {}'.format(self.data)) + + df = pd.read_csv(os.path.join(self.processed_dir, self.smiles_text_file_name)) + self.SMILES_list = df["smiles"].tolist() + self.ATC_code_list = df["ATC_code"].tolist() + ATC_label_list = df["ATC_label"].tolist() # This is for raw TAC label + self.ATC_label_list = [self.prompt_template.format(x) for x in ATC_label_list] + + self.neg_sample_size = neg_sample_size + negative_sampled_index_file = os.path.join(self.root, "index", file_name) + print("Loading negative samples from {}".format(negative_sampled_index_file)) + f = open(negative_sampled_index_file, 'r') + neg_index_list = [] + for line in f.readlines(): + line = line.strip().split(",") + line = [int(x) for x in line] + neg_index_list.append(line) + self.neg_index_list = neg_index_list + + assert len(self.SMILES_list) == len(self.neg_index_list) == len(self.ATC_code_list) == len(self.ATC_label_list) + return + + def get_graph(self, index): + data = Data() + for key in self.data.keys: + item, slices = self.data[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[index], slices[index + 1]) + data[key] = item[s] + return data + + def get(self, index): + text = self.ATC_label_list[index] + data = self.get_graph(index) + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_text = [self.ATC_label_list[idx] for idx in neg_index_list] + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_data = [self.get_graph(idx) for idx in neg_index_list] + return text, data, neg_text, neg_data + + @property + def raw_file_names(self): + file_name_list = os.listdir(self.raw_dir) + return file_name_list + + @property + def processed_dir(self): + return os.path.join(self.root, "processed", "molecule_{}".format(self.processed_dir_prefix)) + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def download(self): + return + + def process(self): + SMILES2ATC_txt_file = os.path.join(self.root, "raw", self.file_name) + + f = open(SMILES2ATC_txt_file, 'r') + data_list, SMILES_list, ATC_code_list, ATC_label_list = [], [], [], [] + for line_idx, line in enumerate(f.readlines()): + line = line.strip().split("\t") + SMILES = line[0] + ATC_code = line[1] + ATC_label = line[2] + rdkit_mol = AllChem.MolFromSmiles(SMILES) + data = mol_to_graph_data_obj_simple(rdkit_mol) + data.id = torch.tensor([line_idx]) + + data_list.append(data) + SMILES_list.append(SMILES) + ATC_code_list.append(ATC_code) + ATC_label_list.append(ATC_label) + + if self.pre_filter is not None: + data_list = [data for data in data_list if self.pre_filter(data)] + + if self.pre_transform is not None: + data_list = [self.pre_transform(data) for data in data_list] + + df = pd.DataFrame( + {"smiles": SMILES_list, "ATC_code": ATC_code_list, "ATC_label": ATC_label_list}, + ) + saver_path = os.path.join(self.processed_dir, self.smiles_text_file_name) + print("saving to {}".format(saver_path)) + df.to_csv(saver_path, index=False) + + data, slices = self.collate(data_list) + torch.save((data, slices), self.processed_paths[0]) + print("saving to {}".format(self.processed_paths[0])) + return + + def __len__(self): + return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py b/open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py new file mode 100644 index 0000000..1307920 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py @@ -0,0 +1,94 @@ +import os +from torch.utils.data import Dataset + + +class DrugBank_Datasets_SMILES_retrieval(Dataset): + def __init__(self, root, train_mode, neg_sample_size, template="SMILES_description_{}.txt"): + self.root = root + + self.SMILES_list, self.text_list = [], [] + SMILES2description_file = os.path.join(self.root, "raw", template.format(train_mode)) + f = open(SMILES2description_file, 'r') + for line in f.readlines(): + line = line.strip().split("\t", 1) + SMILES = line[0] + text = line[1] + self.SMILES_list.append(SMILES) + self.text_list.append(text) + + self.neg_sample_size = neg_sample_size + negative_sampled_index_file = os.path.join(self.root, "index", template.format(train_mode)) + print("Loading negative samples from {}".format(negative_sampled_index_file)) + f = open(negative_sampled_index_file, 'r') + neg_index_list = [] + for line in f.readlines(): + line = line.strip().split(",") + line = [int(x) for x in line] + neg_index_list.append(line) + self.neg_index_list = neg_index_list + return + + def __getitem__(self, index): + description = self.text_list[index] + SMILES = self.SMILES_list[index] + + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_description = [self.text_list[idx] for idx in neg_index_list] + + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_SMILES = [self.SMILES_list[idx] for idx in neg_index_list] + + return description, SMILES, neg_description, neg_SMILES + + def __len__(self): + return len(self.SMILES_list) + + +class DrugBank_Datasets_SMILES_ATC(Dataset): + def __init__(self, root, file_name, neg_sample_size, prompt_template="{}."): + self.root = root + self.neg_sample_size = neg_sample_size + self.prompt_template = prompt_template + + SMILES2ATC_txt_file = os.path.join(self.root, 'raw', file_name) + + f = open(SMILES2ATC_txt_file, 'r') + SMILES_list, ATC_code_list, ATC_label_list = [], [], [] + for line in f.readlines(): + line = line.strip().split("\t") + SMILES_list.append(line[0]) + ATC_code_list.append(line[1]) + ATC_label_list.append(prompt_template.format(line[2])) + + self.SMILES_list = SMILES_list + self.ATC_code_list = ATC_code_list + self.ATC_label_list = ATC_label_list + + self.neg_sample_size = neg_sample_size + negative_sampled_index_file = os.path.join(self.root, "index", file_name) + print("Loading negative samples from {}".format(negative_sampled_index_file)) + f = open(negative_sampled_index_file, 'r') + neg_index_list = [] + for line in f.readlines(): + line = line.strip().split(",") + line = [int(x) for x in line] + neg_index_list.append(line) + self.neg_index_list = neg_index_list + + assert len(self.SMILES_list) == len(self.neg_index_list) == len(ATC_code_list) == len(ATC_label_list) + return + + def __getitem__(self, index): + text = self.ATC_label_list[index] + SMILES = self.SMILES_list[index] + + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_text = [self.ATC_label_list[idx] for idx in neg_index_list] + + neg_index_list = self.neg_index_list[index][:self.neg_sample_size] + neg_SMILES = [self.SMILES_list[idx] for idx in neg_index_list] + + return text, SMILES, neg_text, neg_SMILES + + def __len__(self): + return len(self.SMILES_list) \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py b/open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py new file mode 100644 index 0000000..4392598 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py @@ -0,0 +1,584 @@ +import os +import pickle +from itertools import chain, repeat + +import networkx as nx +import numpy as np +import pandas as pd +import torch +from ogb.utils.features import atom_to_feature_vector, bond_to_feature_vector +from rdkit import Chem +from rdkit.Chem import AllChem, Descriptors +from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect +from torch.utils import data +from torch_geometric.data import (Data, InMemoryDataset, download_url, extract_zip) + + +def mol_to_graph_data_obj_simple(mol): + """ used in MoleculeNetGraphDataset() class + Converts rdkit mol objects to graph data object in pytorch geometric + NB: Uses simplified atom and bond features, and represent as indices + :param mol: rdkit mol object + :return: graph data object with the attributes: x, edge_index, edge_attr """ + + # atoms + # num_atom_features = 2 # atom type, chirality tag + atom_features_list = [] + for atom in mol.GetAtoms(): + atom_feature = atom_to_feature_vector(atom) + atom_features_list.append(atom_feature) + x = torch.tensor(np.array(atom_features_list), dtype=torch.long) + + # bonds + if len(mol.GetBonds()) <= 0: # mol has no bonds + num_bond_features = 3 # bond type & direction + edge_index = torch.empty((2, 0), dtype=torch.long) + edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) + else: # mol has bonds + edges_list = [] + edge_features_list = [] + for bond in mol.GetBonds(): + i = bond.GetBeginAtomIdx() + j = bond.GetEndAtomIdx() + edge_feature = bond_to_feature_vector(bond) + + edges_list.append((i, j)) + edge_features_list.append(edge_feature) + edges_list.append((j, i)) + edge_features_list.append(edge_feature) + + # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] + edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) + + # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] + edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) + + data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) + + return data + + +def graph_data_obj_to_nx_simple(data): + """ torch geometric -> networkx + NB: possible issues with recapitulating relative + stereochemistry since the edges in the nx object are unordered. + :param data: pytorch geometric Data object + :return: networkx object """ + G = nx.Graph() + + # atoms + atom_features = data.x.cpu().numpy() + num_atoms = atom_features.shape[0] + for i in range(num_atoms): + temp_feature = atom_features[i] + G.add_node( + i, + x0=temp_feature[0], + x1=temp_feature[1], + x2=temp_feature[2], + x3=temp_feature[3], + x4=temp_feature[4], + x5=temp_feature[5], + x6=temp_feature[6], + x7=temp_feature[7], + x8=temp_feature[8]) + pass + + # bonds + edge_index = data.edge_index.cpu().numpy() + edge_attr = data.edge_attr.cpu().numpy() + num_bonds = edge_index.shape[1] + for j in range(0, num_bonds, 2): + begin_idx = int(edge_index[0, j]) + end_idx = int(edge_index[1, j]) + temp_feature= edge_attr[j] + if not G.has_edge(begin_idx, end_idx): + G.add_edge(begin_idx, end_idx, + e0=temp_feature[0], + e1=temp_feature[1], + e2=temp_feature[2]) + + return G + + +def nx_to_graph_data_obj_simple(G): + """ vice versa of graph_data_obj_to_nx_simple() + Assume node indices are numbered from 0 to num_nodes - 1. + NB: Uses simplified atom and bond features, and represent as indices. + NB: possible issues with recapitulating relative stereochemistry + since the edges in the nx object are unordered. """ + + # atoms + # num_atom_features = 2 # atom type, chirality tag + atom_features_list = [] + for _, node in G.nodes(data=True): + atom_feature = [node['x0'], node['x1'], node['x2'], node['x3'], node['x4'], node['x5'], node['x6'], node['x7'], node['x8']] + atom_features_list.append(atom_feature) + x = torch.tensor(np.array(atom_features_list), dtype=torch.long) + + # bonds + num_bond_features = 3 # bond type, bond direction + if len(G.edges()) > 0: # mol has bonds + edges_list = [] + edge_features_list = [] + for i, j, edge in G.edges(data=True): + edge_feature = [edge['e0'], edge['e1'], edge['e2']] + edges_list.append((i, j)) + edge_features_list.append(edge_feature) + edges_list.append((j, i)) + edge_features_list.append(edge_feature) + + # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] + edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) + + # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] + edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) + else: # mol has no bonds + edge_index = torch.empty((2, 0), dtype=torch.long) + edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) + + data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) + + return data + + +def create_standardized_mol_id(smiles): + """ smiles -> inchi """ + + if check_smiles_validity(smiles): + # remove stereochemistry + smiles = AllChem.MolToSmiles(AllChem.MolFromSmiles(smiles), + isomericSmiles=False) + mol = AllChem.MolFromSmiles(smiles) + if mol is not None: + # to catch weird issue with O=C1O[al]2oc(=O)c3ccc(cn3)c3ccccc3c3cccc(c3)\ + # c3ccccc3c3cc(C(F)(F)F)c(cc3o2)-c2ccccc2-c2cccc(c2)-c2ccccc2-c2cccnc21 + if '.' in smiles: # if multiple species, pick largest molecule + mol_species_list = split_rdkit_mol_obj(mol) + largest_mol = get_largest_mol(mol_species_list) + inchi = AllChem.MolToInchi(largest_mol) + else: + inchi = AllChem.MolToInchi(mol) + return inchi + return + + +class MoleculeNetGraphDataset(InMemoryDataset): + def __init__(self, root, dataset='zinc250k', transform=None, + pre_transform=None, pre_filter=None, empty=False): + + self.root = root + self.dataset = dataset + self.transform = transform + self.pre_filter = pre_filter + self.pre_transform = pre_transform + + super(MoleculeNetGraphDataset, self).__init__(root, transform, pre_transform, pre_filter) + + if not empty: + self.data, self.slices = torch.load(self.processed_paths[0]) + print('Dataset: {}\nData: {}'.format(self.dataset, self.data)) + + def get(self, idx): + data = Data() + for key in self.data.keys: + item, slices = self.data[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) + data[key] = item[s] + return data + + @property + def raw_file_names(self): + if self.dataset == 'davis': + file_name_list = ['davis'] + elif self.dataset == 'kiba': + file_name_list = ['kiba'] + else: + file_name_list = os.listdir(self.raw_dir) + return file_name_list + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def download(self): + return + + def process(self): + + def shared_extractor(smiles_list, rdkit_mol_objs, labels): + data_list, data_smiles_list, data_label_list = [], [], [] + if labels.ndim == 1: + labels = np.expand_dims(labels, axis=1) + for i in range(len(smiles_list)): + print(i) + rdkit_mol = rdkit_mol_objs[i] + if rdkit_mol is None: + continue + data = mol_to_graph_data_obj_simple(rdkit_mol) + data.id = torch.tensor([i]) + data.y = torch.tensor(labels[i]) + data_list.append(data) + data_smiles_list.append(smiles_list[i]) + data_label_list.append(labels[i]) + return data_list, data_smiles_list, data_label_list + + if self.dataset == 'tox21': + smiles_list, rdkit_mol_objs, labels = \ + _load_tox21_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'hiv': + smiles_list, rdkit_mol_objs, labels = \ + _load_hiv_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'bace': + smiles_list, rdkit_mol_objs, folds, labels = \ + _load_bace_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'bbbp': + smiles_list, rdkit_mol_objs, labels = \ + _load_bbbp_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'clintox': + smiles_list, rdkit_mol_objs, labels = \ + _load_clintox_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'esol': + smiles_list, rdkit_mol_objs, labels = \ + _load_esol_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'freesolv': + smiles_list, rdkit_mol_objs, labels = \ + _load_freesolv_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'lipophilicity': + smiles_list, rdkit_mol_objs, labels = \ + _load_lipophilicity_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'malaria': + smiles_list, rdkit_mol_objs, labels = \ + _load_malaria_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'cep': + smiles_list, rdkit_mol_objs, labels = \ + _load_cep_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'muv': + smiles_list, rdkit_mol_objs, labels = \ + _load_muv_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'pcba': + smiles_list, rdkit_mol_objs, labels = \ + _load_pcba_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'sider': + smiles_list, rdkit_mol_objs, labels = \ + _load_sider_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + elif self.dataset == 'toxcast': + smiles_list, rdkit_mol_objs, labels = \ + _load_toxcast_dataset(self.raw_paths[0]) + data_list, data_smiles_list, data_label_list = shared_extractor( + smiles_list, rdkit_mol_objs, labels) + + else: + raise ValueError('Dataset {} not included.'.format(self.dataset)) + + if self.pre_filter is not None: + data_list = [data for data in data_list if self.pre_filter(data)] + + if self.pre_transform is not None: + data_list = [self.pre_transform(data) for data in data_list] + + data_smiles_series = pd.Series(data_smiles_list) + saver_path = os.path.join(self.processed_dir, 'smiles.csv') + data_smiles_series.to_csv(saver_path, index=False, header=False) + + data_label_array = np.array(data_label_list) + saver_path = os.path.join(self.processed_dir, 'labels') + np.savez_compressed(saver_path, labels=data_label_array) + + data, slices = self.collate(data_list) + torch.save((data, slices), self.processed_paths[0]) + + return + + +def _load_tox21_dataset(input_path): + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + tasks = ['NR-AR', 'NR-AR-LBD', 'NR-AhR', 'NR-Aromatase', 'NR-ER', 'NR-ER-LBD', + 'NR-PPAR-gamma', 'SR-ARE', 'SR-ATAD5', 'SR-HSE', 'SR-MMP', 'SR-p53'] + labels = input_df[tasks] + # convert 0 to -1 + labels = labels.replace(0, -1) + # convert nan to 0 + labels = labels.fillna(0) + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_hiv_dataset(input_path): + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['HIV_active'] + # convert 0 to -1 + labels = labels.replace(0, -1) + # there are no nans + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_bace_dataset(input_path): + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['mol'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['Class'] + # convert 0 to -1 + labels = labels.replace(0, -1) + # there are no nans + folds = input_df['Model'] + folds = folds.replace('Train', 0) # 0 -> train + folds = folds.replace('Valid', 1) # 1 -> valid + folds = folds.replace('Test', 2) # 2 -> test + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + assert len(smiles_list) == len(folds) + return smiles_list, rdkit_mol_objs_list, folds.values, labels.values + + +def _load_bbbp_dataset(input_path): + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + + preprocessed_rdkit_mol_objs_list = [m if m is not None else None + for m in rdkit_mol_objs_list] + preprocessed_smiles_list = [AllChem.MolToSmiles(m) if m is not None else None + for m in preprocessed_rdkit_mol_objs_list] + labels = input_df['p_np'] + # convert 0 to -1 + labels = labels.replace(0, -1) + # there are no nans + assert len(smiles_list) == len(preprocessed_rdkit_mol_objs_list) + assert len(smiles_list) == len(preprocessed_smiles_list) + assert len(smiles_list) == len(labels) + return preprocessed_smiles_list, \ + preprocessed_rdkit_mol_objs_list, labels.values + + +def _load_clintox_dataset(input_path): + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + + preprocessed_rdkit_mol_objs_list = [m if m is not None else None + for m in rdkit_mol_objs_list] + preprocessed_smiles_list = [AllChem.MolToSmiles(m) if m is not None else None + for m in preprocessed_rdkit_mol_objs_list] + tasks = ['FDA_APPROVED', 'CT_TOX'] + labels = input_df[tasks] + # convert 0 to -1 + labels = labels.replace(0, -1) + # there are no nans + assert len(smiles_list) == len(preprocessed_rdkit_mol_objs_list) + assert len(smiles_list) == len(preprocessed_smiles_list) + assert len(smiles_list) == len(labels) + return preprocessed_smiles_list, \ + preprocessed_rdkit_mol_objs_list, labels.values + + +def _load_esol_dataset(input_path): + # NB: some examples have multiple species + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['measured log solubility in mols per litre'] + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_freesolv_dataset(input_path): + + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['expt'] + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_lipophilicity_dataset(input_path): + + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['exp'] + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_malaria_dataset(input_path): + + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['activity'] + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_cep_dataset(input_path): + + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + labels = input_df['PCE'] + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_muv_dataset(input_path): + + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + tasks = ['MUV-466', 'MUV-548', 'MUV-600', 'MUV-644', 'MUV-652', 'MUV-689', + 'MUV-692', 'MUV-712', 'MUV-713', 'MUV-733', 'MUV-737', 'MUV-810', + 'MUV-832', 'MUV-846', 'MUV-852', 'MUV-858', 'MUV-859'] + labels = input_df[tasks] + # convert 0 to -1 + labels = labels.replace(0, -1) + # convert nan to 0 + labels = labels.fillna(0) + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_sider_dataset(input_path): + + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + tasks = ['Hepatobiliary disorders', + 'Metabolism and nutrition disorders', 'Product issues', 'Eye disorders', + 'Investigations', 'Musculoskeletal and connective tissue disorders', + 'Gastrointestinal disorders', 'Social circumstances', + 'Immune system disorders', 'Reproductive system and breast disorders', + 'Neoplasms benign, malignant and unspecified (incl cysts and polyps)', + 'General disorders and administration site conditions', + 'Endocrine disorders', 'Surgical and medical procedures', + 'Vascular disorders', 'Blood and lymphatic system disorders', + 'Skin and subcutaneous tissue disorders', + 'Congenital, familial and genetic disorders', + 'Infections and infestations', + 'Respiratory, thoracic and mediastinal disorders', + 'Psychiatric disorders', 'Renal and urinary disorders', + 'Pregnancy, puerperium and perinatal conditions', + 'Ear and labyrinth disorders', 'Cardiac disorders', + 'Nervous system disorders', + 'Injury, poisoning and procedural complications'] + labels = input_df[tasks] + # convert 0 to -1 + labels = labels.replace(0, -1) + assert len(smiles_list) == len(rdkit_mol_objs_list) + assert len(smiles_list) == len(labels) + return smiles_list, rdkit_mol_objs_list, labels.values + + +def _load_toxcast_dataset(input_path): + + # NB: some examples have multiple species, some example smiles are invalid + input_df = pd.read_csv(input_path, sep=',') + smiles_list = input_df['smiles'] + rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] + # Some smiles could not be successfully converted + # to rdkit mol object so them to None + preprocessed_rdkit_mol_objs_list = [m if m is not None else None + for m in rdkit_mol_objs_list] + preprocessed_smiles_list = [AllChem.MolToSmiles(m) if m is not None else None + for m in preprocessed_rdkit_mol_objs_list] + tasks = list(input_df.columns)[1:] + labels = input_df[tasks] + # convert 0 to -1 + labels = labels.replace(0, -1) + # convert nan to 0 + labels = labels.fillna(0) + assert len(smiles_list) == len(preprocessed_rdkit_mol_objs_list) + assert len(smiles_list) == len(preprocessed_smiles_list) + assert len(smiles_list) == len(labels) + return preprocessed_smiles_list, \ + preprocessed_rdkit_mol_objs_list, labels.values + + +def check_smiles_validity(smiles): + try: + m = Chem.MolFromSmiles(smiles) + if m: + return True + else: + return False + except: + return False + + +def split_rdkit_mol_obj(mol): + """ + Split rdkit mol object containing multiple species or one species into a + list of mol objects or a list containing a single object respectively """ + + smiles = AllChem.MolToSmiles(mol, isomericSmiles=True) + smiles_list = smiles.split('.') + mol_species_list = [] + for s in smiles_list: + if check_smiles_validity(s): + mol_species_list.append(AllChem.MolFromSmiles(s)) + return mol_species_list + + +def get_largest_mol(mol_list): + """ + Given a list of rdkit mol objects, returns mol object containing the + largest num of atoms. If multiple containing largest num of atoms, + picks the first one """ + + num_atoms_list = [len(m.GetAtoms()) for m in mol_list] + largest_mol_idx = num_atoms_list.index(max(num_atoms_list)) + return mol_list[largest_mol_idx] diff --git a/open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py b/open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py new file mode 100644 index 0000000..4bfe005 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py @@ -0,0 +1,36 @@ +import os +import numpy as np +from rdkit import Chem +from torch.utils.data import Dataset + + +class MoleculeNetSMILESDataset(Dataset): + def __init__(self, root): + ''' + This needs to be called after calling the MoleculeNetGraphDataset. + ''' + self.root = root + SMILES_file = os.path.join(root, "processed", "smiles.csv") + + self.SMILES_list = [] + with open(SMILES_file, 'r') as f: + lines = f.readlines() + for line in lines: + SMILES = line.strip() + mol = Chem.MolFromSmiles(SMILES) + canon_SMILES = Chem.MolToSmiles(mol) + self.SMILES_list.append(canon_SMILES) + + labels_file = os.path.join(root, "processed", "labels.npz") + self.labels_data = np.load(labels_file)['labels'] + + print(len(self.SMILES_list), '\t', self.labels_data.shape) + return + + def __getitem__(self, index): + SMILES = self.SMILES_list[index] + labels = self.labels_data[index] + return SMILES, labels + + def __len__(self): + return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py b/open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py new file mode 100644 index 0000000..6f4af30 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py @@ -0,0 +1,275 @@ +import os +from itertools import repeat +import pandas as pd +import json +from tqdm import tqdm + +import torch +from torch.utils.data import Dataset +from torch_geometric.data import Data, InMemoryDataset + +from rdkit import Chem +from rdkit import RDLogger +RDLogger.DisableLog('rdApp.*') + +from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple + + +class PubChemSTM_Datasets_Only_SMILES(Dataset): + def __init__(self, root, subset_size=None): + self.root = root + + CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") + + df = pd.read_csv(CID2SMILES_file) + SMILES_list = df["SMILES"].tolist() + SMILES_list = sorted(set(SMILES_list)) + + self.SMILES_list = SMILES_list + if subset_size is not None: + self.SMILES_list = self.SMILES_list[:subset_size] + return + + def __getitem__(self, index): + SMILES = self.SMILES_list[index] + return SMILES + + def __len__(self): + return len(self.SMILES_list) + + +class PubChemSTM_Datasets_SMILES(Dataset): + def __init__(self, root): + self.root = root + + CID2text_file = os.path.join(self.root, "raw/CID2text.json") + CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") + self.load_CID2SMILES(CID2text_file, CID2SMILES_file) + + self.text_list = [] + missing_count = 0 + for CID, value_list in self.CID2text_data.items(): + if CID not in self.CID2SMILES: + print("CID {} missing".format(CID)) + missing_count += 1 + continue + for value in value_list: + self.text_list.append([CID, value]) + print("missing", missing_count) + print("len of text_list: {}".format(len(self.text_list))) + return + + def load_CID2SMILES(self, CID2text_file, CID2SMILES_file): + with open(CID2text_file, "r") as f: + self.CID2text_data = json.load(f) + print("len of CID2text: {}".format(len(self.CID2text_data.keys()))) + + df = pd.read_csv(CID2SMILES_file) + CID_list, SMILES_list = df["CID"].tolist(), df["SMILES"].tolist() + self.CID2SMILES = {} + for CID, SMILES in zip(CID_list, SMILES_list): + CID = str(CID) + self.CID2SMILES[CID] = SMILES + print("len of CID2SMILES: {}".format(len(self.CID2SMILES.keys()))) + return + + def __getitem__(self, index): + CID, text = self.text_list[index] + SMILES = self.CID2SMILES[CID] + return text, SMILES + + def __len__(self): + return len(self.text_list) + + +class PubChemSTM_SubDatasets_SMILES(PubChemSTM_Datasets_SMILES): + def __init__(self, root, size): + self.root = root + + CID2text_file = os.path.join(self.root, "raw/CID2text.json") + CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") + self.load_CID2SMILES(CID2text_file, CID2SMILES_file) + + self.text_list = [] + for CID, value_list in self.CID2text_data.items(): + if CID not in self.CID2SMILES: + print("CID {} missing".format(CID)) + continue + for value in value_list: + self.text_list.append([CID, value]) + if len(self.text_list) >= size: + break + print("len of text_list: {}".format(len(self.text_list))) + return + + +class PubChemSTM_Datasets_Graph(InMemoryDataset): + def __init__(self, root, transform=None, pre_transform=None, pre_filter=None): + self.root = root + self.transform = transform + self.pre_transform = pre_transform + self.pre_filter = pre_filter + # only for `process` function + self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") + self.CID2text_file = os.path.join(self.root, "raw/CID2text.json") + # `process` result file + self.CID_text_file_path = os.path.join(self.root, "processed/CID_text_list.csv") + + super(PubChemSTM_Datasets_Graph, self).__init__(root, transform, pre_transform, pre_filter) + + self.load_Graph_CID_and_text() + return + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def process(self): + suppl = Chem.SDMolSupplier(self.SDF_file_path) + + CID2graph = {} + for mol in tqdm(suppl): + CID = mol.GetProp("PUBCHEM_COMPOUND_CID") + CID = int(CID) + graph = mol_to_graph_data_obj_simple(mol) + CID2graph[CID] = graph + print("CID2graph", len(CID2graph)) + + with open(self.CID2text_file, "r") as f: + CID2text_data = json.load(f) + print("CID2data", len(CID2text_data)) + + CID_list, graph_list, text_list = [], [], [] + for CID, value_list in CID2text_data.items(): + CID = int(CID) + if CID not in CID2graph: + print("CID {} missing".format(CID)) + continue + graph = CID2graph[CID] + for value in value_list: + text_list.append(value) + CID_list.append(CID) + graph_list.append(graph) + + CID_text_df = pd.DataFrame({"CID": CID_list, "text": text_list}) + CID_text_df.to_csv(self.CID_text_file_path, index=None) + + if self.pre_filter is not None: + graph_list = [graph for graph in graph_list if self.pre_filter(graph)] + + if self.pre_transform is not None: + graph_list = [self.pre_transform(graph) for graph in graph_list] + + graphs, slices = self.collate(graph_list) + torch.save((graphs, slices), self.processed_paths[0]) + return + + def load_Graph_CID_and_text(self): + self.graphs, self.slices = torch.load(self.processed_paths[0]) + + CID_text_df = pd.read_csv(self.CID_text_file_path) + self.CID_list = CID_text_df["CID"].tolist() + self.text_list = CID_text_df["text"].tolist() + return + + def get(self, idx): + text = self.text_list[idx] + + data = Data() + for key in self.graphs.keys: + item, slices = self.graphs[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) + data[key] = item[s] + return text, data + + def __len__(self): + return len(self.text_list) + + +class PubChemSTM_SubDatasets_Graph(PubChemSTM_Datasets_Graph): + def __init__(self, root, size, transform=None, pre_transform=None, pre_filter=None): + self.root = root + self.size = size + self.transform = transform + self.pre_transform = pre_transform + self.pre_filter = pre_filter + self.size = size + # only for `process` function + self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") + self.CID2text_file = os.path.join(self.root, "raw/CID2text.json") + # `process` result file + self.CID_text_file_path = os.path.join(self.root, "processed/CID_text_list.csv") + + super(PubChemSTM_Datasets_Graph, self).__init__(root, transform, pre_transform, pre_filter) + + self.load_Graph_CID_and_text() + return + + def __len__(self): + return self.size + + +class PubChemSTM_Datasets_SMILES_and_Graph(InMemoryDataset): + def __init__(self, root, subset_size=None, transform=None, pre_transform=None, pre_filter=None): + self.root = root + + # only for `process` function + self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") + # `process` result file + self.SMILES_file_path = os.path.join(self.root, "processed_molecule_only/SMILES.csv") + + super(PubChemSTM_Datasets_SMILES_and_Graph, self).__init__(root, transform, pre_transform, pre_filter) + + self.graphs, self.slices = torch.load(self.processed_paths[0]) + + CID_text_df = pd.read_csv(self.SMILES_file_path) + self.SMILES_list = CID_text_df["smiles"].tolist() + if subset_size is not None: + self.SMILES_list = self.SMILES_list[:subset_size] + return + + @property + def processed_dir(self): + return os.path.join(self.root, 'processed_molecule_only') + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def process(self): + suppl = Chem.SDMolSupplier(self.SDF_file_path) + + SMILES_list, graph_list = [], [] + for mol in tqdm(suppl): + SMILES = Chem.MolToSmiles(mol) + SMILES_list.append(SMILES) + graph = mol_to_graph_data_obj_simple(mol) + graph_list.append(graph) + + SMILES_df = pd.DataFrame({"smiles": SMILES_list}) + SMILES_df.to_csv(self.SMILES_file_path, index=None) + + if self.pre_filter is not None: + graph_list = [graph for graph in graph_list if self.pre_filter(graph)] + + if self.pre_transform is not None: + graph_list = [self.pre_transform(graph) for graph in graph_list] + + graphs, slices = self.collate(graph_list) + torch.save((graphs, slices), self.processed_paths[0]) + return + + def get(self, idx): + SMILES = self.SMILES_list[idx] + + data = Data() + for key in self.graphs.keys: + item, slices = self.graphs[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) + data[key] = item[s] + return SMILES, data + + def __len__(self): + return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py b/open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py new file mode 100644 index 0000000..a484f6b --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py @@ -0,0 +1,172 @@ +import os +from itertools import repeat +import pandas as pd +import json +from tqdm import tqdm + +import torch +from torch_geometric.data import Data, InMemoryDataset + +from rdkit import Chem +from rdkit import RDLogger +RDLogger.DisableLog('rdApp.*') + +from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple + +from models.MoleculeSTM.datasets import PubChemSTM_Datasets_SMILES + + +class PubChemSTM_Datasets_Raw_SMILES(PubChemSTM_Datasets_SMILES): + def __init__(self, root): + self.root = root + + CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") + # Both PubChemSTM and PubChemSTM_Raw share the same CID2SMILES file. + CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") + self.load_CID2SMILES(CID2text_file, CID2SMILES_file) + + self.text_list = [] + missing_count = 0 + for CID, value_list in self.CID2text_data.items(): + if CID not in self.CID2SMILES: + print("CID {} missing".format(CID)) + missing_count += 1 + continue + for value in value_list: + self.text_list.append([CID, value]) + print("missing", missing_count) + print("len of text_list: {}".format(len(self.text_list))) + + return + + +class PubChemSTM_SubDatasets_Raw_SMILES(PubChemSTM_Datasets_Raw_SMILES): + def __init__(self, root, size): + self.root = root + + CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") + CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") + self.load_CID2SMILES(CID2text_file, CID2SMILES_file) + + self.text_list = [] + for CID, value_list in self.CID2text_data.items(): + if CID not in self.CID2SMILES: + print("CID {} missing".format(CID)) + continue + for value in value_list: + self.text_list.append([CID, value]) + if len(self.text_list) >= size: + break + print("len of text_list: {}".format(len(self.text_list))) + return + + +class PubChemSTM_Datasets_Raw_Graph(InMemoryDataset): + def __init__(self, root, transform=None, pre_transform=None, pre_filter=None): + self.root = root + self.transform = transform + self.pre_transform = pre_transform + self.pre_filter = pre_filter + # only for `process` function + self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") + self.CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") + # `process` result file + self.CID_text_file_path = os.path.join(self.root, "processed_raw/CID_text_list.csv") + + super(PubChemSTM_Datasets_Raw_Graph, self).__init__(root, transform, pre_transform, pre_filter) + + self.load_Graph_CID_and_text() + return + + @property + def processed_dir(self) -> str: + return os.path.join(self.root, 'processed_raw') + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def process(self): + suppl = Chem.SDMolSupplier(self.SDF_file_path) + + CID2graph = {} + for mol in tqdm(suppl): + CID = mol.GetProp("PUBCHEM_COMPOUND_CID") + CID = int(CID) + graph = mol_to_graph_data_obj_simple(mol) + CID2graph[CID] = graph + print("CID2graph", len(CID2graph)) + + with open(self.CID2text_file, "r") as f: + CID2text_data = json.load(f) + print("CID2data", len(CID2text_data)) + + CID_list, graph_list, text_list = [], [], [] + for CID, value_list in CID2text_data.items(): + CID = int(CID) + if CID not in CID2graph: + print("CID {} missing".format(CID)) + continue + graph = CID2graph[CID] + for value in value_list: + text_list.append(value) + CID_list.append(CID) + graph_list.append(graph) + + CID_text_df = pd.DataFrame({"CID": CID_list, "text": text_list}) + CID_text_df.to_csv(self.CID_text_file_path, index=None) + + if self.pre_filter is not None: + graph_list = [graph for graph in graph_list if self.pre_filter(graph)] + + if self.pre_transform is not None: + graph_list = [self.pre_transform(graph) for graph in graph_list] + + graphs, slices = self.collate(graph_list) + torch.save((graphs, slices), self.processed_paths[0]) + return + + def load_Graph_CID_and_text(self): + self.graphs, self.slices = torch.load(self.processed_paths[0]) + + CID_text_df = pd.read_csv(self.CID_text_file_path) + self.CID_list = CID_text_df["CID"].tolist() + self.text_list = CID_text_df["text"].tolist() + return + + def get(self, idx): + text = self.text_list[idx] + + data = Data() + for key in self.graphs.keys: + item, slices = self.graphs[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) + data[key] = item[s] + return text, data + + def __len__(self): + return len(self.text_list) + + +class PubChemSTM_SubDatasets_Raw_Graph(PubChemSTM_Datasets_Raw_Graph): + def __init__(self, root, size, transform=None, pre_transform=None, pre_filter=None): + self.root = root + self.size = size + self.transform = transform + self.pre_transform = pre_transform + self.pre_filter = pre_filter + self.size = size + # only for `process` function + self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") + self.CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") + # `process` result file + self.CID_text_file_path = os.path.join(self.root, "processed_raw/CID_text_list.csv") + + super(PubChemSTM_SubDatasets_Raw_Graph, self).__init__(root, transform, pre_transform, pre_filter) + + self.load_Graph_CID_and_text() + return + + def __len__(self): + return self.size diff --git a/open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py b/open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py new file mode 100644 index 0000000..0822062 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py @@ -0,0 +1,67 @@ +import os +import pandas as pd +from tqdm import tqdm +from rdkit import Chem +from itertools import repeat + +import torch +from torch_geometric.data import Data, InMemoryDataset + +from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple + + +class ZINC250K_Dataset_Graph(InMemoryDataset): + def __init__(self, root, subset_size=257, transform=None, pre_transform=None, pre_filter=None): + self.root = root + + self.SMILES_file = os.path.join(self.root, "raw/250k_rndm_zinc_drugs_clean_3.csv") + df = pd.read_csv(self.SMILES_file) + SMILES_list = df['smiles'].tolist() + self.SMILES_list = [x.strip() for x in SMILES_list] + + super(ZINC250K_Dataset_Graph, self).__init__(root, transform, pre_transform, pre_filter) + + self.graphs, self.slices = torch.load(self.processed_paths[0]) + + if subset_size is not None: + self.SMILES_list = self.SMILES_list[:subset_size] + return + + @property + def processed_dir(self): + return os.path.join(self.root, 'processed_molecule_only') + + @property + def processed_file_names(self): + return 'geometric_data_processed.pt' + + def process(self): + graph_list = [] + for SMILES in tqdm(self.SMILES_list): + RDKit_mol = Chem.MolFromSmiles(SMILES) + graph = mol_to_graph_data_obj_simple(RDKit_mol) + graph_list.append(graph) + + if self.pre_filter is not None: + graph_list = [graph for graph in graph_list if self.pre_filter(graph)] + + if self.pre_transform is not None: + graph_list = [self.pre_transform(graph) for graph in graph_list] + + graphs, slices = self.collate(graph_list) + torch.save((graphs, slices), self.processed_paths[0]) + return + + def get(self, idx): + SMILES = self.SMILES_list[idx] + + data = Data() + for key in self.graphs.keys: + item, slices = self.graphs[key], self.slices[key] + s = list(repeat(slice(None), item.dim())) + s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) + data[key] = item[s] + return SMILES, data + + def __len__(self): + return len(self.SMILES_list) \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py b/open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py new file mode 100644 index 0000000..54709dc --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py @@ -0,0 +1,31 @@ +from torch.utils.data import Dataset +import os +import pandas as pd + + +class ZINC250K_Dataset_SMILES(Dataset): + def __init__(self, root, subset_size=512): + self.root = root + + SMILES_file = os.path.join(self.root, "raw/250k_rndm_zinc_drugs_clean_3.csv") + df = pd.read_csv(SMILES_file) + SMILES_list = df['smiles'].tolist() # Already canonical SMILES + self.SMILES_list = [x.strip() for x in SMILES_list] + # self.SMILES_list = [{'original_tokens': d, 'masked_pad_masks': [1,2,3]} for d in self.SMILES_list] + + new_SMILES_file = os.path.join(self.root, "raw/smiles.csv") + if not os.path.exists(new_SMILES_file): + data_smiles_series = pd.Series(self.SMILES_list) + print("saving to {}".format(new_SMILES_file)) + data_smiles_series.to_csv(new_SMILES_file, index=False, header=False) + + if subset_size is not None: + self.SMILES_list = self.SMILES_list[:subset_size] + return + + def __getitem__(self, index): + SMILES = self.SMILES_list[index] + return SMILES + + def __len__(self): + return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/__init__.py b/open_biomed/models/MoleculeSTM/datasets/__init__.py new file mode 100644 index 0000000..199071b --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/__init__.py @@ -0,0 +1,8 @@ +from models.MoleculeSTM.datasets.PubChemSTM import PubChemSTM_Datasets_SMILES, PubChemSTM_SubDatasets_SMILES, PubChemSTM_Datasets_Graph, PubChemSTM_SubDatasets_Graph, PubChemSTM_Datasets_Only_SMILES, PubChemSTM_Datasets_SMILES_and_Graph +from models.MoleculeSTM.datasets.PubChemSTM_raw import PubChemSTM_Datasets_Raw_SMILES, PubChemSTM_SubDatasets_Raw_SMILES, PubChemSTM_Datasets_Raw_Graph, PubChemSTM_SubDatasets_Raw_Graph +from models.MoleculeSTM.datasets.MoleculeNetGraph import MoleculeNetGraphDataset +from models.MoleculeSTM.datasets.MoleculeNetSMILES import MoleculeNetSMILESDataset +from models.MoleculeSTM.datasets.DrugBankSMILES import DrugBank_Datasets_SMILES_retrieval, DrugBank_Datasets_SMILES_ATC +from models.MoleculeSTM.datasets.DrugBankGraph import DrugBank_Datasets_Graph_retrieval, DrugBank_Datasets_Graph_ATC +from models.MoleculeSTM.datasets.ZINC250K_SMILES import ZINC250K_Dataset_SMILES +from models.MoleculeSTM.datasets.ZINC250K_Graph import ZINC250K_Dataset_Graph \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/datasets/utils.py b/open_biomed/models/MoleculeSTM/datasets/utils.py new file mode 100644 index 0000000..38446aa --- /dev/null +++ b/open_biomed/models/MoleculeSTM/datasets/utils.py @@ -0,0 +1,182 @@ + +import networkx as nx +import numpy as np +import torch +from rdkit import Chem +from torch_geometric.data import Data + + +allowable_features = { + 'possible_atomic_num_list': list(range(1, 119)), + 'possible_formal_charge_list': [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5], + 'possible_chirality_list': [ + Chem.rdchem.ChiralType.CHI_UNSPECIFIED, + Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW, + Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW, + Chem.rdchem.ChiralType.CHI_OTHER + ], + 'possible_hybridization_list': [ + Chem.rdchem.HybridizationType.S, + Chem.rdchem.HybridizationType.SP, + Chem.rdchem.HybridizationType.SP2, + Chem.rdchem.HybridizationType.SP3, + Chem.rdchem.HybridizationType.SP3D, + Chem.rdchem.HybridizationType.SP3D2, + Chem.rdchem.HybridizationType.UNSPECIFIED + ], + 'possible_numH_list': [0, 1, 2, 3, 4, 5, 6, 7, 8], + 'possible_implicit_valence_list': [0, 1, 2, 3, 4, 5, 6], + 'possible_degree_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + 'possible_bonds': [ + Chem.rdchem.BondType.SINGLE, + Chem.rdchem.BondType.DOUBLE, + Chem.rdchem.BondType.TRIPLE, + Chem.rdchem.BondType.AROMATIC + ], + 'possible_bond_dirs': [ # only for double bond stereo information + Chem.rdchem.BondDir.NONE, + Chem.rdchem.BondDir.ENDUPRIGHT, + Chem.rdchem.BondDir.ENDDOWNRIGHT + ] +} + + +def mol_to_graph_data_obj_simple(mol): + # atoms + # num_atom_features = 2 # atom type, chirality tag + atom_features_list = [] + for atom in mol.GetAtoms(): + atomic_num = atom.GetAtomicNum() + chiral_tag = atom.GetChiralTag() + if atomic_num == 0: + atomic_num = 118 # Only for one extreme case + atom_feature = [allowable_features['possible_atomic_num_list'].index(atomic_num)] + \ + [allowable_features['possible_chirality_list'].index(chiral_tag)] + atom_features_list.append(atom_feature) + x = torch.tensor(np.array(atom_features_list), dtype=torch.long) + + # bonds + if len(mol.GetBonds()) <= 0: # mol has no bonds + num_bond_features = 2 # bond type & direction + edge_index = torch.empty((2, 0), dtype=torch.long) + edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) + else: # mol has bonds + edges_list = [] + edge_features_list = [] + for bond in mol.GetBonds(): + i = bond.GetBeginAtomIdx() + j = bond.GetEndAtomIdx() + bond_type = bond.GetBondType() + bond_dir = bond.GetBondDir() + if bond_dir not in allowable_features['possible_bond_dirs']: + bond_dir = 0 + edge_feature = [allowable_features['possible_bonds'].index(bond_type)] + \ + [allowable_features['possible_bond_dirs'].index(bond_dir)] + edges_list.append((i, j)) + edge_features_list.append(edge_feature) + edges_list.append((j, i)) + edge_features_list.append(edge_feature) + + # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] + edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) + + # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] + edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) + + data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) + + return data + + +def graph_data_obj_to_mol_simple(data_x, data_edge_index, data_edge_attr): + mol = Chem.RWMol() + + # atoms + atom_features = data_x.cpu().numpy() + num_atoms = atom_features.shape[0] + for i in range(num_atoms): + atomic_num_idx, chirality_tag_idx = atom_features[i] + atomic_num = allowable_features['possible_atomic_num_list'][atomic_num_idx] + chirality_tag = allowable_features['possible_chirality_list'][chirality_tag_idx] + atom = Chem.Atom(atomic_num) + atom.SetChiralTag(chirality_tag) + mol.AddAtom(atom) + + # bonds + edge_index = data_edge_index.cpu().numpy() + edge_attr = data_edge_attr.cpu().numpy() + num_bonds = edge_index.shape[1] + for j in range(0, num_bonds, 2): + begin_idx = int(edge_index[0, j]) + end_idx = int(edge_index[1, j]) + bond_type_idx, bond_dir_idx = edge_attr[j] + bond_type = allowable_features['possible_bonds'][bond_type_idx] + bond_dir = allowable_features['possible_bond_dirs'][bond_dir_idx] + mol.AddBond(begin_idx, end_idx, bond_type) + # set bond direction + new_bond = mol.GetBondBetweenAtoms(begin_idx, end_idx) + new_bond.SetBondDir(bond_dir) + return mol + + +def graph_data_obj_to_nx_simple(data): + G = nx.Graph() + + # atoms + atom_features = data.x.cpu().numpy() + num_atoms = atom_features.shape[0] + for i in range(num_atoms): + atomic_num_idx, chirality_tag_idx = atom_features[i] + G.add_node(i, atom_num_idx=atomic_num_idx, + chirality_tag_idx=chirality_tag_idx) + pass + + # bonds + edge_index = data.edge_index.cpu().numpy() + edge_attr = data.edge_attr.cpu().numpy() + num_bonds = edge_index.shape[1] + for j in range(0, num_bonds, 2): + begin_idx = int(edge_index[0, j]) + end_idx = int(edge_index[1, j]) + bond_type_idx, bond_dir_idx = edge_attr[j] + if not G.has_edge(begin_idx, end_idx): + G.add_edge(begin_idx, end_idx, + bond_type_idx=bond_type_idx, + bond_dir_idx=bond_dir_idx) + + return G + + +def nx_to_graph_data_obj_simple(G): + # atoms + # num_atom_features = 2 # atom type, chirality tag + atom_features_list = [] + for _, node in G.nodes(data=True): + atom_feature = [node['atom_num_idx'], node['chirality_tag_idx']] + atom_features_list.append(atom_feature) + x = torch.tensor(np.array(atom_features_list), dtype=torch.long) + + # bonds + num_bond_features = 2 # bond type, bond direction + if len(G.edges()) > 0: # mol has bonds + edges_list = [] + edge_features_list = [] + for i, j, edge in G.edges(data=True): + edge_feature = [edge['bond_type_idx'], edge['bond_dir_idx']] + edges_list.append((i, j)) + edge_features_list.append(edge_feature) + edges_list.append((j, i)) + edge_features_list.append(edge_feature) + + # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] + edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) + + # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] + edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) + else: # mol has no bonds + edge_index = torch.empty((2, 0), dtype=torch.long) + edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) + + data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) + + return data diff --git a/open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py b/open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py new file mode 100644 index 0000000..9dd6d61 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py @@ -0,0 +1,503 @@ +import os +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import AutoModel, AutoTokenizer +from models.MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART +from models.MoleculeSTM.models import GNN, GNN_graphpred, MLP +from rdkit import Chem, RDLogger +from rdkit.Chem import AllChem, Descriptors +from rdkit import DataStructs +lg = RDLogger.logger() +lg.setLevel(RDLogger.CRITICAL) + + +def get_SMILES_list(args): + if args.input_SMILES is not None: + SMILES_list = [args.input_SMILES] + else: + SMILES_list = [] + f = open(args.input_SMILES_file, 'r') + lines = f.readlines() + for line in lines: + SMILES = line.strip() + if len(SMILES) > 0: + SMILES_list.append(SMILES) + return SMILES_list + + +description_dict = { + 101: "This molecule is soluble in water.", + 102: "This molecule is insoluble in water.", + 103: "This molecule is like a drug.", + 104: "This molecule is not like a drug.", + 105: "This molecule has high permeability.", + 106: "This molecule has low permeability.", + 107: "This molecule has more hydrogen bond acceptors.", + 108: "This molecule has more hydrogen bond donors.", + 109: "This molecule has high bioavailability.", + 110: "This molecule has low toxicity.", + 111: "This molecule is metabolically stable.", + + 201: "This molecule is soluble in water and has more hydrogen bond acceptors.", + 202: "This molecule is insoluble in water and has more hydrogen bond acceptors.", + 203: "This molecule is soluble in water and has more hydrogen bond donors.", + 204: "This molecule is insoluble in water and has more hydrogen bond donors.", + 205: "This molecule is soluble in water and has high permeability.", + 206: "This molecule is soluble in water and has low permeability.", + + 301: "This molecule looks like Penicillin.", + 302: "This molecule looks like Aspirin.", + 303: "This molecule looks like Caffeine.", + 304: "This molecule looks like Cholesterol.", + 305: "This molecule looks like Dopamine.", + 306: "This molecule looks like Cysteine.", + 307: "This molecule looks like Glutathione.", + + 401: "This molecule is tested positive in an assay that are inhibitors and substrates of an enzyme protein. It uses molecular oxygen inserting one oxygen atom into a substrate, and reducing the second into a water molecule.", + 402: "This molecule is tested positive in an assay for Anthrax Lethal, which acts as a protease that cleaves the N-terminal of most dual specificity mitogen-activated protein kinase kinases.", + 403: "This molecule is tested positive in an assay for Activators of ClpP, which cleaves peptides in various proteins in a process that requires ATP hydrolysis and has a limited peptidase activity in the absence of ATP-binding subunits.", + 404: "This molecule is tested positive in an assay for activators involved in the transport of proteins between the endosomes and the trans Golgi network.", + 405: "This molecule is an inhibitor of a protein that prevents the establishment of the cellular antiviral state by inhibiting ubiquitination that triggers antiviral transduction signal and inhibits post-transcriptional processing of cellular pre-mRNA.", + 406: "This molecule is tested positive in the high throughput screening assay to identify inhibitors of the SARS coronavirus 3C-like Protease, which cleaves the C-terminus of replicase polyprotein at 11 sites.", +} + + +def get_description_list(args): + if args.input_description is not None: + description_list = [args.input_description] + elif args.input_description_id is None: + raise ValueError + else: + print("Use {} descrition.".format(args.input_description_id)) + description_list = [description_dict[args.input_description_id]] + print("description_list", description_list) + return description_list + + +# https://pubchem.ncbi.nlm.nih.gov/compound/5904 +# Penicillin_SMILES = "CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C" +Penicillin_SMILES = "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O" + +# https://pubchem.ncbi.nlm.nih.gov/compound/2244 +# Aspirin_SMILES = "CC(=O)OC1=CC=CC=C1C(=O)O" +Aspirin_SMILES = "CC(=O)Oc1ccccc1C(=O)O" + +# https://pubchem.ncbi.nlm.nih.gov/compound/2519 +# Caffeine_SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +Caffeine_SMILES = "Cn1c(=O)c2c(ncn2C)n(C)c1=O" + +# https://pubchem.ncbi.nlm.nih.gov/compound/5997 +# Cholesterol_SMILES = "CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C" +Cholesterol_SMILES = "CC(C)CCCC(C)C1CCC2C3CC=C4CC(O)CCC4(C)C3CCC12C" + +# https://pubchem.ncbi.nlm.nih.gov/compound/681 +# Dopamine_SMILES = "C1=CC(=C(C=C1CCN)O)O" +Dopamine_SMILES = "NCCc1ccc(O)c(O)c1" + +# https://pubchem.ncbi.nlm.nih.gov/compound/5862 +# Cysteine_SMILES = "C(C(C(=O)O)N)S" +Cysteine_SMILES = "NC(CS)C(=O)O" + +# https://pubchem.ncbi.nlm.nih.gov/compound/124886 +# Glutathione_SMILES = "C(CC(=O)NC(CS)C(=O)NCC(=O)O)C(C(=O)O)N" +Glutathione_SMILES = "NC(CCC(=O)NC(CS)C(=O)NCC(=O)O)C(=O)O" + + +def load_molecule_models(args): + """ + This function returns the two encoders, one for molecule generative model and one for CLIP. + """ + if args.MoleculeSTM_molecule_type == "SMILES": + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) + molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) + print("Loading from pretrained MegaMolBART ({}).".format(args.MegaMolBART_generation_model_dir)) + molecule_dim_generation = 256 + + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") + molecule_model_MoleculeSTM = MegaMolBART_wrapper.model + state_dict = torch.load(input_model_path, map_location='cpu') + print("Loading from {}...".format(input_model_path)) + molecule_model_MoleculeSTM.load_state_dict(state_dict) + molecule_dim_MoleculeSTM = args.SSL_emb_dim + + mol2latent_MoleculeSTM = nn.Linear(256, molecule_dim_MoleculeSTM) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + mol2latent_MoleculeSTM.load_state_dict(state_dict) + + else: + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) + molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) + print("Loading from pretrained MegaMolBART ({}).".format(args.MegaMolBART_generation_model_dir)) + molecule_dim_generation = 256 + + # This is loading GNN from the pretrained_GNN + molecule_node_model = GNN(num_layer=args.num_layer, emb_dim=args.gnn_emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type) + molecule_model_MoleculeSTM = GNN_graphpred(num_layer=args.num_layer, emb_dim=args.gnn_emb_dim, JK=args.JK, graph_pooling=args.graph_pooling, num_tasks=1, molecule_node_model=molecule_node_model) + print("Start from pretrained model (MoleculeSTM) in {}.".format(args.MoleculeSTM_model_dir)) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") + state_dict = torch.load(input_model_path, map_location='cpu') + molecule_model_MoleculeSTM.load_state_dict(state_dict) + molecule_dim_MoleculeSTM = args.SSL_emb_dim + + mol2latent_MoleculeSTM = nn.Linear(300, molecule_dim_MoleculeSTM) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + mol2latent_MoleculeSTM.load_state_dict(state_dict) + + return MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ + molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM + + +def load_language_molecule_and_edit_models(args): + pretrained_SciBERT_folder = os.path.join(args.dataset_path, 'pretrained_SciBERT') + text_tokenizer = AutoTokenizer.from_pretrained(args.text_mode, cache_dir=pretrained_SciBERT_folder) + text_model = AutoModel.from_pretrained(args.text_mode, cache_dir=pretrained_SciBERT_folder) + + text_dim = 768 + + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "text_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + text_model.load_state_dict(state_dict) + + """ + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") + print("Loading from {}...".format(input_model_path)) + MegaMolBART_wrapper = MegaMolBART(input_dir=None, output_dir=None) + molecule_model = MegaMolBART_wrapper.model + state_dict = torch.load(input_model_path, map_location='cpu') + molecule_model.load_state_dict(state_dict) + """ + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) + molecule_model = MegaMolBART_wrapper.model + print("Loading from pretrained MegaMolBART ({}).".format(args.MegaMolBART_generation_model_dir)) + molecule_dim_generation = 256 + if args.MoleculeSTM_molecule_type == "SMILES": # For MegaMolBART + molecule_dim_MoleculeSTM = 256 + else: # For GIN + molecule_dim_MoleculeSTM = 300 + + text2latent = nn.Linear(text_dim, args.SSL_emb_dim) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "text2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + text2latent.load_state_dict(state_dict) + + mol2latent = nn.Linear(molecule_dim_MoleculeSTM, args.SSL_emb_dim) + input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + mol2latent.load_state_dict(state_dict) + + # generation2MoleculeSTM = nn.Linear(molecule_dim_generation, args.SSL_emb_dim) + generation2MoleculeSTM = MLP(molecule_dim_generation, [args.SSL_emb_dim, args.SSL_emb_dim]) + input_model_path = os.path.join(args.language_edit_model_dir, "generation2foundation_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + generation2MoleculeSTM.load_state_dict(state_dict) + + # MoleculeSTM2generation = nn.Linear(args.SSL_emb_dim, molecule_dim_generation) + MoleculeSTM2generation = MLP(args.SSL_emb_dim, [molecule_dim_generation, molecule_dim_generation]) + input_model_path = os.path.join(args.language_edit_model_dir, "foundation2generation_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + MoleculeSTM2generation.load_state_dict(state_dict) + + return text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim_generation, text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation + + +def clip_loss_for_edit(molecule_repr, text_repr): + molecule_repr = F.normalize(molecule_repr, dim=-1) + text_repr = F.normalize(text_repr, dim=-1) + + similarity = -torch.mm(molecule_repr, text_repr.transpose(0, 1))[0] + return similarity + + +def get_molecule_similarity(mol_a, mol_b): + fp_a = AllChem.GetMorganFingerprintAsBitVect(mol_a, 2, nBits=1024) + fp_b = AllChem.GetMorganFingerprintAsBitVect(mol_b, 2, nBits=1024) + sim = DataStructs.TanimotoSimilarity(fp_a, fp_b) + return sim + + +def evaluate_SMILES_list(SMILES_list, description): + print("SMILES_list:", SMILES_list) + mol_list = [] + for SMILES in SMILES_list: + mol = Chem.MolFromSmiles(SMILES) + # Chem.SanitizeMol(mol) + # print(SMILES, mol) + if mol is None: + continue + mol_list.append(mol) + print("valid mol list:", len(mol_list)) + + if len(mol_list) < 3: + return [False] + + if "soluble" in description and "insoluble" not in description: + props = ["MolLogP"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] > value_list[2]: + answer = [True] + else: + answer = [False] + + elif "insoluble" in description: + props = ["MolLogP"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] < value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule is more like a drug.", "This molecule is like a drug."]: + props = ["qed"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] < value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule is less like a drug.", "This molecule is not like a drug."]: + props = ["qed"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] > value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule has higher permeability.", "This molecule has high permeability."]: + props = ["TPSA"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] > value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule has lower permeability.", "This molecule has low permeability."]: + props = ["TPSA"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] < value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule has higher molecular weight.", "This molecule has high molecular weight."]: + props = ["MolWt"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] < value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule has lower molecular weight.", "This molecule has low molecular weight."]: + props = ["MolWt"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] > value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule has more hydrogen bond acceptors."]: + props = ["NumHAcceptors"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] < value_list[2]: + answer = [True] + else: + answer = [False] + + elif description in ["This molecule has more hydrogen bond donors."]: + props = ["NumHDonors"] + prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] + value_list = [] + for name, func in prop_pred: + for SMILES, mol in zip(SMILES_list, mol_list): + value = func(mol) + value_list.append(value) + print("{} & {:.5f}".format(SMILES, value)) + if value_list[0] < value_list[2]: + answer = [True] + else: + answer = [False] + + elif "penicillin" in description or "Penicillin" in description: + target_mol = Chem.MolFromSmiles(Penicillin_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between penicillin and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between penicillin and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: + answer = [True] + else: + answer = [False] + + elif "aspirin" in description or "Aspirin" in description: + target_mol = Chem.MolFromSmiles(Aspirin_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between aspirin and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between aspirin and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: # check original_similarity >< 0.8 + answer = [True] + else: + answer = [False] + + elif "caffeine" in description or "Caffeine" in description: + target_mol = Chem.MolFromSmiles(Caffeine_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between caffeine and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between caffeine and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: + answer = [True] + else: + answer = [False] + + elif "cholesterol" in description or "Cholesterol" in description: + target_mol = Chem.MolFromSmiles(Cholesterol_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between cholesterol and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between cholesterol and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: # check original_similarity >< 0.8 + answer = [True] + else: + answer = [False] + + elif "dopamine" in description or "Dopamine" in description: + target_mol = Chem.MolFromSmiles(Dopamine_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between dopamine and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between dopamine and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: + answer = [True] + else: + answer = [False] + + elif "cysteine" in description or "Cysteine" in description: + target_mol = Chem.MolFromSmiles(Cysteine_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between cysteine and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between cysteine and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: # check original_similarity >< 0.8 + answer = [True] + else: + answer = [False] + + elif "glutathione" in description or "Glutathione" in description: + target_mol = Chem.MolFromSmiles(Glutathione_SMILES) + original_SMILES = SMILES_list[0] + original_mol = mol_list[0] + original_similarity = get_molecule_similarity(target_mol, original_mol) + print("similarity between glutathione and original molecules\n{} & {:.5f}".format(original_SMILES, original_similarity)) + + edited_SMILES = SMILES_list[2] + edited_mol = mol_list[2] + edited_similarity = get_molecule_similarity(target_mol, edited_mol) + print("similarity between glutathione and edited molecules\n{} & {:.5f}".format(edited_SMILES, edited_similarity)) + if edited_similarity > original_similarity: # check original_similarity >< 0.8 + answer = [True] + else: + answer = [False] + + else: + print("Not implemented.") + answer = [False] + + return answer \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi b/open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi new file mode 100644 index 0000000..3d4698d --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi @@ -0,0 +1,1000 @@ +CC(C)(C)c1ccc2occ(CC(=O)Nc3ccccc3F)c2c1 +C[C@@H]1CC(Nc2cncc(-c3nncn3C)c2)C[C@@H](C)C1 +N#Cc1ccc(-c2ccc(O[C@@H](C(=O)N3CCCC3)c3ccccc3)cc2)cc1 +CCOC(=O)[C@@H]1CCCN(C(=O)c2nc(-c3ccc(C)cc3)n3c2CCCCC3)C1 +N#CC1=C(SCC(=O)Nc2cccc(Cl)c2)N=C([O-])[C@H](C#N)C12CCCCC2 +CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br +COc1ccc(C(=O)N(C)[C@@H](C)C/C(N)=N/O)cc1O +O=C(Nc1nc[nH]n1)c1cccnc1Nc1cccc(F)c1 +Cc1c(/C=N/c2cc(Br)ccn2)c(O)n2c(nc3ccccc32)c1C#N +C[C@@H]1CN(C(=O)c2cc(Br)cn2C)CC[C@H]1[NH3+] +CCOc1ccc(OCC)c([C@H]2C(C#N)=C(N)N(c3ccccc3C(F)(F)F)C3=C2C(=O)CCC3)c1 +Cc1ccc2nc(S[C@H](C)C(=O)NC3CCC(C)CC3)n(C)c(=O)c2c1 +O=C(N1CCc2c(F)ccc(F)c2C1)C1(O)Cc2ccccc2C1 +Cc1ccccc1C(=O)N1CCC2(CC1)C[C@H](c1ccccc1)C(=O)N2C +CCCc1cc(NC(=O)CN2C(=O)NC3(CCC(C)CC3)C2=O)n(C)n1 +CC(C)Cc1nc(SCC(=O)NC[C@@H]2CCCO2)c2c(=O)n(C)c(=O)n(C)c2n1 +Cc1ccc(CNC(=O)c2ccccc2NC(=O)[C@@H]2CC(=O)N(c3ccc(C)cc3)C2)cc1 +CCCCC(=O)NC(=S)Nc1ccccc1C(=O)N1CCOCC1 +Cc1c(NC(=O)CSc2nc3sc4c(c3c(=O)[nH]2)CCCC4)c(=O)n(-c2ccccc2)n1C +CC(C)[C@@H](Oc1cccc(Cl)c1)C(=O)N1CCC(n2cccn2)CC1 +CCN(CC)C(=O)C[C@@H](C)[NH2+][C@H](C)c1cccc(F)c1F +Cc1nc2c(c(Nc3ncc(C)s3)n1)CCN(C(=O)CCc1ccccc1)C2 +O=C(NCCNC(=O)N1C[C@H]2CC=CC[C@@H]2C1)c1cccnc1 +O=c1n(CCO)c2ccccc2n1CCO +COC(=O)Cc1csc(NC(=O)Cc2coc3cc(C)ccc23)n1 +Cc1ccc(N2CC[C@@H](NS(=O)(=O)c3ccccc3C)C2=O)cc1C +CC[C@H](C)C[C@@H](C)NC(=O)N1CCN(CC(=O)NC2CC2)CC1 +CC(=O)Nc1c2n(c3ccccc13)C[C@](C)(C(=O)NC1CCCCC1)N(C1CCCCC1)C2=O +N#Cc1ccncc1NC[C@@H]1C[C@@]12CCc1ccccc12 +Cc1cccn2c(=O)c(C(=O)NC[C@H]3CCO[C@@H]3C(C)C)cnc12 +CNC(=O)c1ccc(/C=C/C(=O)Nc2c(C)cc(C)nc2Cl)cc1 +CC1=C(CNC(=O)c2cc(-c3ccccc3)nc3c2CNN3C(C)C)CN=N1 +C[C@@H](NC(=O)COC(=O)/C=C/c1ccc(Cl)cc1)c1ccccc1 +CCc1ccc(N(Cc2ccc(C)s2)C(=O)c2ccc(=O)n(C)n2)cc1 +CCOC(=O)c1nnc2ccccc2c1N1CC[C@@H]([NH+](CC)CC)C1 +Cc1ccc(C#N)cc1S(=O)(=O)NCc1ccnc(OC(C)(C)C)c1 +O=C(O[C@H]1CCOC1)C1(c2ccc(Cl)c(Cl)c2)CCC1 +CCC[NH2+][C@@H]1COC[C@H]1C(=O)NCc1cscc1C +O=C(NCc1nccc2ccccc12)c1ccc[nH]c1=O +CC(=O)c1ccc(S(=O)(=O)N2CCCC[C@H]2C)cc1 +O=[N+]([O-])c1c(Nc2cccc3ncccc23)ncnc1N1CCN(c2cccc(Cl)c2)CC1 +O=C(CCCO)Nc1ccc(F)cc1F +NC(=O)CCOc1ccc(NC(=O)C[C@H]2CCc3ccccc32)cc1 +COc1cc(C)ccc1OCC(=O)Nc1nnc(C)s1 +CC(=O)c1c(O)cccc1COc1ccccc1 +CCn1cc(S(=O)(=O)N2CCCCC[C@@H]2c2cc(-c3ccc(F)cc3)no2)cn1 +COC(=O)[C@](NC(=O)c1cccc(Cl)c1)(Nc1ccc(Br)c[nH+]1)C(F)(F)F +Cc1[nH]c2ccc(C(=O)Nc3cc(C(C)(C)C)nn3-c3ncccn3)cc2c1C +Cc1noc(C)c1C[C@H](C)C(=O)N[C@@H](C)C1CCCCC1 +CCn1cc(C(=O)N[C@H]2CC(=O)N(C)C2)c(C(C)C)n1 +COc1cccc(-c2cncc3ccccc23)c1C(=O)N(C(C)C)C(C)C +COc1ccc([C@@H](C)NC(=O)Cc2cccc3ccccc23)cc1 +O=C1C[C@H](c2nc(-c3cccnc3)no2)CN1c1cccc(Cl)c1 +C[C@H]1CCCN(c2ccc(C(=O)Nc3ccc(N4CCOCC4)cc3)cc2[N+](=O)[O-])C1 +C=CCN(C(=O)C/C=C/c1ccc(C)cc1)[C@@H]1CCS(=O)(=O)C1 +O=C(CSc1nnc(-c2cccc([N+](=O)[O-])c2)o1)Nc1nncs1 +CN(CCc1ccc(F)cc1)c1cc(Br)cc(F)c1C(N)=O +COc1ccccc1NC(=O)CSc1ccc(-c2ccccc2OC)nn1 +Cc1occc1C(=O)/C(C#N)=C\c1cccc(C(F)(F)F)c1 +COc1ccc2c(c1)N(C(=O)CCSc1ccccn1)C[C@@H](C)O2 +CC[C@@H](NC(=O)[C@H](C)n1cccn1)c1ccc(C)c(F)c1 +CCC[C@]1(C(=O)N[C@@H]2CONC2=O)CC[NH2+]C1 +O=C(c1cc2cc([N+](=O)[O-])ccc2oc1=O)N1CCN(Cc2ccccc2)CC1 +CCn1c(CC2CC[NH2+]CC2)nn(CCO)c1=O +C=CCn1c(S[C@H](C)C(=O)N2CCC(C)CC2)nnc1-c1ccc(Cl)cc1 +CCO[C@H]1C(=O)O[C@H]([C@@H](O)CO)C1=O +Cc1ccc(-c2nnc(C[NH+](CCO)[C@H]3CCc4ccccc43)o2)cc1 +Cc1cc(-n2c(C)cc(C[NH2+][C@H](C)c3ccc(F)c(F)c3)c2C)no1 +C[C@@H](NC(=O)Nc1ccn(-c2ncccc2Cl)n1)[C@@H]1CCCO1 +COc1cc(S(=O)(=O)N2CCN=C2Cc2ccccc2)ccc1Cl +COc1ccc(OC)c(/C=C2\Oc3cc(OC(=O)c4ccncc4)cc(C)c3C2=O)c1 +COc1ccc([C@@H](NC(=O)Nc2cc(C)ccc2Cl)C2CCOCC2)cc1 +C[C@H](Cc1cccs1)N(C)C[C@@H]1CCCC[C@@H]1[NH3+] +C[C@H]([NH3+])c1nc2cc(C(F)(F)F)ccc2n1C +COc1cccc(CN2CCc3nnc(CCc4ccccc4)n3CC2)c1 +O=C(N[C@H]1CCS(=O)(=O)C1)C1CC[NH2+]CC1 +COCC[C@H](C)C(=O)N[C@@H](C)COC +Cc1cc(N(C)C)ccc1NC(=O)c1ccc(CN2CC[NH+](C)CC2)cc1 +C[C@H](CNC(=O)[C@H]1CC[NH2+][C@@H]1C)C[NH+]1CCCC1 +CN(C)c1ccc([C@H](CNC(=O)C(=O)Nc2ccccc2C#N)N2CC[NH+](C)CC2)cc1 +CCOc1ncnc(S(=O)(=O)CC)c1N +CC[C@@H](NC(=O)N(C)Cc1ccc(-c2ccccc2)cc1)c1ccncc1 +O=C(Nc1ccc(-c2nc3ccccc3o2)cc1)[C@H]1CCCN1S(=O)(=O)c1ccc(F)cc1 +CC[C@@H](C)CNc1nc2ccc(Cl)cc2s1 +Cc1cc(C)c2nc(N3CCN(C(=O)[C@@H]4CCCCN4S(C)(=O)=O)CC3)sc2c1 +CCc1nnc(-c2cc3ccccc3n2CC(=O)NC(C)(C)C)o1 +CCc1ccc(NC(=O)c2nn(-c3ccc(CC)cc3)ccc2=O)cc1 +Cc1ccc(C(=O)N[C@H]2CCC[NH2+][C@H]2C)cc1F +C[C@H](OC(=O)c1nc(C2CC2)n2ccccc12)c1cnc2ccccc2c1 +CCCCOc1ccccc1C[C@@H]([NH3+])C(=O)[O-] +CCC[C@@H]1CN(C(=O)C(=O)Nc2ccc(C)nc2Cl)CCO1 +C[C@H]1C(=O)N(c2ccc3c(c2)CCC3)CCN1C(=O)c1ccc(Cl)c(Cl)c1 +COC(=O)C1(NC(=O)[C@H]2C[C@H]2c2c(F)cccc2F)CCSCC1 +N#CC1(NC(=O)COc2cccc(Cl)c2)CCCC1 +COC1CC[NH+](CCNc2nccn(C)c2=O)CC1 +C=CCN(Cc1cccc([N+](=O)[O-])c1)C(=O)Nc1cc(OC)ccc1Cl +Cc1cc(Cl)ccc1OCC(=O)N/N=C/c1ccccn1 +O=C1NC(=S)NC(=O)C1=CNc1ccc([N+](=O)[O-])cc1O +Cc1c(C(=O)N2CCOCC2)oc2c1-c1nn(CC(=O)NCc3ccco3)cc1CC2 +CCc1ccc(CNC(=O)c2ccc(-c3nccnc3N3CCCCC3)cc2)cc1 +COc1ccc([C@H]2C[C@@H](C(F)(F)F)n3nc(C(=O)NC4CCCCC4)cc3N2)cc1OC +CCCc1cc(C(=O)NNC(=O)c2cccc(Br)c2)[nH]n1 +O=[N+]([O-])c1c(Nc2ccc(F)c(F)c2)ncnc1Oc1cccc2cccnc12 +CC(C)(C)n1ncc2c1CCC[C@H]2NC(=O)CSc1nc2ccccc2o1 +Cc1ccc([C@@H](C)NC(=O)N[C@@H](CCO)c2cccs2)cc1 +CCN(CC)S(=O)(=O)c1ccc2nc(-c3ccncc3)cc(C(=O)[O-])c2c1 +CCCN(CC)c1cc[nH+]c(C(=O)[O-])c1 +Cc1ccccc1N1C(=O)C[C@H]([NH+](C2CCCCC2)C2CCCCC2)C1=O +CS(=O)(=O)[C@H]1O[C@H]1c1ccc(Cl)cc1Cl +C[C@H](CSc1ccc(C(=O)N(C)C)cn1)C(=O)[O-] +CCOC(=O)[C@H]1C=C(C#N)O[C@@H](c2ccc(C)cc2)C1 +CCC[NH2+]C1CCC(O)(Cc2nc(C)cs2)CC1 +O=C1c2ccccc2N[C@H](CSC2=NC=NC3=NC=N[C@@H]32)N1c1ccc(Cl)cc1 +CCc1nc2ccccc2c(C(=O)NCc2ccc(OC)c(C(=O)OC)c2)c1C +Cc1nn(-c2nncc(-c3ccc(Cl)cc3)n2)c2c1[C@H](c1ccccc1)CC(=O)N2 +CC(C)NC(=O)Nc1cccc(C(=O)N(C)Cc2nnc(C3CC3)n2C)c1 +Cc1nnccc1C(=O)N[C@H](C)c1ccc(Cl)s1 +O=C(NCCCc1nc(-c2ccc(Br)o2)no1)C1CC1 +CCOc1cccc(NC(=O)CCc2ccc(N)cc2)c1 +COc1ccc(/C=C2/SC(=O)N(CC(=O)Nc3ccc(F)cc3)C2=O)cc1OC +CC(C)N(C)C(=O)[C@H]1CSCN1C(=O)/C=C/SCc1ccco1 +CCC1(CC)[C@@H](NC(=O)Nc2ccc(C(=O)NC)cc2)[C@H](C)[C@@H]1OC +Cc1ccc2ncc(C(=O)Nc3ncccc3OCc3ccncc3)n2c1 +N#Cc1ccnc(N2CCC([NH2+]C[C@@H]3CCCO3)CC2)c1 +O=C(Cn1nnn(-c2cccs2)c1=O)OC1CCCCC1 +COc1cccc(COc2ccc(OC)cc2CCl)c1 +CC[C@H](NC(=O)NCc1c(C)noc1C)c1ccc(OC)cc1 +CCc1c(C(=O)Nc2ccc3c(c2)NC(=O)CS3)[nH]c(C)c1C(C)=O +CSc1ccc(/C=c2\sc3ncnn3c2=O)cc1 +COc1ccc(C(=O)/C=C(\C)Nc2ccc(F)cc2F)cc1 +O=C(C=C1CCSCC1)N[C@@H]1CCC[C@H]1Cc1ccccc1 +C[C@@H](Sc1nc(/C=C/c2cccs2)n[nH]1)C(=O)N1CCOCC1 +Cc1cccc(C(=O)N[C@@H](C(=O)N2CCC[C@@H](C)C2)C(C)C)c1 +COC(=O)[C@@H](C)Sc1nnc(Nc2cccc(Br)c2)s1 +Cc1cnc(C(=O)Nc2ccc(N(C)C3CC[NH+](C)CC3)cc2)cn1 +CC(=O)Nc1nc2ccc(NC(=O)NCc3ccccc3)cc2s1 +C=CCN1C(=O)/C(=C/c2ccccc2F)S/C1=N\S(=O)(=O)c1cccs1 +CC[C@H]1CC(=O)N(Cc2ccccc2C#CCCO)C1 +COc1ccc(F)cc1C(=O)Nc1nccs1 +CC[C@@H](CC(=O)NC1(C(=O)OC)CCSCC1)c1ccccc1 +Cc1ccc(C)c(-n2c(SCCCCCO)nc3ccccc3c2=O)c1 +CC[C@@H](C)[C@@H]([NH3+])c1ccc(Cl)s1 +COc1cccc([C@@H](C)[NH2+]CCOc2ncccc2Cl)c1 +CC[C@@H]1CCCCN1C(=O)NC1CCN(C(=O)OC(C)(C)C)CC1 +CCOc1cccc(NC(=O)NCc2ccc(N3CCSCC3)cc2)c1 +Cc1cccc(NC(=O)CN2CCN(c3ccc4c(c3)OCCO4)C2=O)n1 +C=C(C)C(=O)N[C@H](C)c1nc2ccccc2n1CCC(=O)N1CCCCCC1 +CCOC[C@H]1CC[NH+](Cc2ccc(-c3nc4ccccc4s3)o2)C1 +CCOC(=O)[C@]1(Cc2cccc(Cl)c2)CCCN(C(=O)c2ccnn2C)C1 +Cc1ccc([N+](=O)[O-])cc1NC(=O)C(=O)N1CC[C@H]([NH+]2CCCC2)C1 +CCOCCCNC(=O)N[C@@H]1CCC[C@@H](CC)C1 +O=C(Cc1cccc(F)c1F)Nc1cccc(Br)n1 +COc1ccccc1NC(=O)[C@@H]1CCCN(C(=O)Nc2cccs2)C1 +C[C@H]1CCC[C@](C#N)([C@]2(O)CCCCC2(C)C)C1 +O=C(NCc1ccc([N+]2=CCCC2)cc1)NC1(c2ccc(Cl)cc2)CC1 +CCCC(=O)N[C@@H]1CCC[NH+](Cc2ncccc2C)C1 +O=C(NCc1cccs1)C1(c2cccc(Cl)c2)CCC1 +C[C@H]1CCC[C@H](NC(=O)[C@@H](C)Sc2ncn[nH]2)[C@@H]1C +COc1ccc([C@@H]([NH2+]Cc2ccc(Cl)nc2)c2ccc(F)cc2)cc1 +COc1cc(NC(=O)[C@H](C)Sc2ccccc2Cl)cc(OC)c1 +CCN1CCC(=NNC(=O)c2ccccc2)CC1 +CCCOc1ccc(Br)cc1C[NH+]1CCC([C@@H](C)O)CC1 +Cc1cc2n(C[C@H](O)CO[C@H](c3ccccc3)c3ccccc3C)c(=O)c3ccccc3n2n1 +CCc1ncc(CN(C)C(=O)Nc2c(C)ccc([N+](=O)[O-])c2C)s1 +c1ccc2nc(NCCCc3nc4ccccc4[nH]3)cnc2c1 +Cc1c([C@H](C)[NH2+]Cc2cccn2C)cnn1C +CC(=O)N[C@@H](C(=O)NC1COC1)C(C)C +O=C(Nc1ccccc1F)c1cc2ccccc2c2cccnc12 +Cc1ccccc1N1C(=O)/C(=C/c2cccn2-c2cccc([N+](=O)[O-])c2)C([O-])=NC1=S +COCCN1C[C@H](C(=O)N(Cc2cccc(Cl)c2)C(C)C)CC1=O +COc1ccc(NC(=O)N2CCN(C(=O)Cc3csc4ccccc34)CC2)cc1OC +C#CCN(C[C@H]1CCCO1)C(=O)N[C@@H](C)c1cccc([N+](=O)[O-])c1 +Cc1cccc(C2=CCN(C(=O)Nc3ccc(C(N)=O)c(Cl)c3)CC2)c1 +COCCCN1C(=O)c2ccc(C(=O)Nc3nc(-c4ccc(C)cc4)cs3)cc2C1=O +O=S(=O)(Nc1ccc(N2CCCS2(=O)=O)cc1)c1ccc(F)c(Cl)c1 +O=C1/C(=C/c2ccccc2)Oc2c1ccc1c2CN(Cc2cccs2)CO1 +CC[C@@H](C)[C@@H](NC(=O)c1cccc(F)c1)C(=O)N=c1[nH]c2ccccc2[nH]1 +Cc1c(F)cc(N)cc1S(=O)(=O)N[C@@H](C)C1CC1 +Cc1ccc(Cn2ncc3c(N)ncnc32)cc1 +CCOC(=O)C(C)(C)c1nc(-c2ccccc2)no1 +CCOC(=O)c1sc(/C=C/c2nc3c(s2)CCC3)nc1C +C[C@@H]1CC[C@@H]([NH2+]C2CCC(NS(C)(=O)=O)CC2)c2ccccc21 +CN(C)S(=O)(=O)c1ccc(C(=O)N(C(=O)N2CCCCC2)c2ccccc2)cc1 +CC[NH2+][C@@H](CC)c1ccccc1OCc1cccc(F)c1 +C=CCOC(=O)C1=C(C)N=C2S[C@H](C)C(=O)N2[C@H]1c1ccc(F)cc1 +CCC[NH2+][C@]1(C(=O)OCC)CC[C@H](n2cc(Cl)c(C)n2)C1 +Cc1cc(NC(=O)CSc2nnc3c4ccccc4n(C)c3n2)ccc1Br +CCOC(=O)c1sc(NC(=O)c2ccc(-n3c(C)nc4ccccc4c3=O)cc2)cc1C +CC#CCC(=O)C1([NH+](CC)CC)CCCC1 +O=C(NCCCc1ccccc1)C1CCN(C(=O)[C@@H]2CC(=O)N(c3ccccc3)C2)CC1 +O=Cc1ccc(OCc2ccn(-c3cccc(F)c3)n2)cc1 +Clc1ccccc1Cn1ccnc1 +Cc1[nH]c(=O)c(C(=O)N2CCN(c3ccccc3)C(=O)C2)c(C)c1C +C[C@@H](NC(=O)NC[C@H]1CCCN(c2ncccn2)C1)C1CCOCC1 +O=C(NCCS(=O)(=O)c1ccccc1)N1CCC[C@@H]2CCC[C@@H]21 +Cc1ccc(-c2cc(NC(=O)C(C)C)c(=O)n(CC(=O)Nc3cccc(C)c3)n2)cc1 +Cc1ccc(S(=O)(=O)N2CCN(C(=O)[C@H]3CCCC[C@@H]3C(=O)[O-])CC2)cc1C +CCc1ccc(-c2nc(C(=O)N3CCO[C@H](CC)C3)cs2)cc1 +Cc1cc(C)cc(OCC(C)(C)C[NH2+]C2CC2)c1 +CCOC(=O)[C@H](F)[C@@]1(O)CCC[NH+](C(C)C)CC1 +CCn1cc(/C=C/C(=O)c2ccc3ccccc3c2)cn1 +CCC(CC)C(=O)Nc1cnn(-c2ccccc2F)c1 +O=C1O[C@H](C(=O)Nc2ccnc3ccnn23)Cc2ccccc21 +COCCn1nnnc1[C@@H](C(C)C)N1CCSCC1 +O=C(NCCNS(=O)(=O)c1cccc(Cl)c1F)c1cccnc1 +Cc1ccc(CNC(=O)NCc2nnc3n2CCC3)s1 +C/C=C/C[C@]1(C(=O)[O-])CCN(C(=O)OC(C)(C)C)C1 +O=C(N[C@@H](CO)c1ccco1)c1cc(Cl)ccc1OC1CCCC1 +Cc1cc(N(C)C)ccc1NC(=O)c1c[nH]c2nccc(Cl)c12 +CCOC(=O)c1cccc(NC(=O)c2cn[nH]c2C)c1 +CCC(CC)[S@](=O)CCC(=O)[O-] +COCc1ccc(C(=O)N(C)Cc2ccc(Cl)s2)cc1 +O=C(CCc1nc2ccccc2c(=O)[nH]1)Nc1cc(Cl)c(Cl)cc1Cl +CC[C@@H](C)NC(=O)c1ccc2c(c1)CCCN2S(C)(=O)=O +COc1ccc(C)cc1-n1nnnc1SCC(=O)Nc1cc(C)cc(C)c1 +C[C@H](NC(=O)Cc1ccc[nH]1)C(=O)N1CCCC[C@H]1C +CCOC(=O)[C@H]1CCCN(C(=O)c2cc(C(C)C)n(C)n2)C1 +C[C@H]([NH2+]Cc1nc(Cc2ccccc2)no1)[C@@H](C)n1cccn1 +c1cc(CN2CC[NH+](Cc3ccc4c(c3)OCCO4)CC2)no1 +CCCCNC(=O)CCc1c(C)nc2c3ccccc3nn2c1C +O=C(C[C@@H]1C[NH2+]CCO1)N[C@H]1C=CS(=O)(=O)C1 +C[C@@H]1CCO[C@@H]1C(=O)N1CC[C@H](C(N)=O)c2ccccc21 +CCC[NH2+][C@H](Cc1nn(C)c2ccccc12)c1ncc[nH]1 +CC(C)c1noc(-c2cc[nH+]c(N3CCN(C(=O)[C@H]4C[C@H]4C)CC3)c2)n1 +O=C(NCc1cccnc1)NCc1ccnc(OCC(F)F)c1 +C[C@@H](NC(=O)N1CCCCCCC1)[C@@H]1CCCO1 +COc1ccc([C@H]2C(C(=O)NCc3ccccc3)=C(C)Nc3ncnn32)cc1OC +Cc1cc(=O)n2c(n1)SC[C@@H]2CC(=O)NCCC(C)C +CC(C)C1=CN=N[C@H]1[C@H]1CCC[NH+](C[C@@H](C)Cc2ccc3c(c2)OCO3)C1 +COc1cc(CNC(=O)c2occc2Br)ccn1 +Cc1cccc(COc2ccc(Br)cc2/C=C2\SC(=S)NC2=O)c1 +O=C(N[C@@H]1CCO[C@@H]1c1ccc(Cl)c(F)c1)[C@H]1Cc2ccccc2O1 +Cc1ccc([C@]2([NH3+])CC[C@@H]2C)cc1 +O=C(CNC(=O)c1ccco1)OCC(=O)c1ccc2ccccc2c1 +O=C(c1cc2c(F)cccc2[nH]1)N(C[C@@H]1CCCO1)c1ccncc1 +Cc1cc(C)c(NC(=O)c2cc3ccccc3n2C)c(C)c1 +CSc1ccccc1NC(=O)N[C@@H](CO)c1ccc(Cl)cc1 +Cc1cc(Br)ccc1CNC(=O)C1CC=CC1 +COc1ccc(CN/C(C)=C2/C(=O)N(c3ccc(OC)cc3)N=C2C)cc1 +O=C(COC(=O)C1(c2ccccc2F)CCCC1)N1CCOCC1 +CC1(C)C(=O)NCC[NH+]1Cc1ccc(OCC(F)F)cc1 +Cc1ccc([N+](=O)[O-])cc1C(=O)Nc1ccc(C(=O)NC(C)C)cc1 +C[C@@H]([C@@H](O)c1ccc2ncnn2c1)[N+](=O)[O-] +C[C@H](Oc1ccc(Cl)c(Cl)c1)C(=O)NC[C@H]1CCC[C@@H]1O +COC[C@H]1CCC[NH+](Cc2cc(C)n(Cc3ccco3)c2C)C1 +Cc1ccc([C@H](C)NC(=O)CN(C)C(=O)OC(C)(C)C)cc1F +CC(=O)NCCC(=O)N1CCC[C@@H](C)C1 +COc1ccc(N2CCn3c2nn(CC(N)=O)c(=O)c3=O)cc1 +ClC(Cl)(Cl)c1nonc1C(Cl)(Cl)Cl +CCc1sc(C(=O)N2CCN([C@@H](C(N)=O)c3ccccc3)CC2)cc1C +C[NH+](C)Cc1cc(NC(=O)CCCC(=O)N2CCCCCC2)[nH]n1 +C[C@@H](Nc1ccc(COC(C)(C)C)cc1)c1ccc(C#N)cc1 +NC(=O)c1ccc(NC(=O)c2cccn(Cc3ccc(F)cc3)c2=O)cc1 +Cc1cc(C(=O)N2CC[C@H](C)C[C@H]2C)c2c(C)nn(C)c2n1 +Cc1c(C(=O)N2CCCC2)oc2c1-c1nn(CC(=O)N3C[C@@H](C)C[C@@H](C)C3)cc1CC2 +Cc1cc(C)cc(NC(=O)CC(C)C)c1 +COc1cc(OC)cc([C@H]2CC[NH+](CCC(F)(F)F)C2)c1 +CCn1ccnc(N2CCCC[C@@H](N3CC[NH+](C)CC3)C2)c1=O +C[C@@H]1CCC[C@](O)(c2ccc(Cl)s2)CC1 +CCOC(=O)C[C@H](C)CNC(=O)C(=O)N1CCc2ccc(C)cc21 +COc1ccccc1C[NH+]1CCC[C@H](N2CCCC2=O)C1 +CCNc1ccc2c(OC)ccc(F)c2n1 +Oc1ccc(C2[NH+](Cc3ccccc3)CC[NH+]2Cc2ccccc2)c(O)c1 +Fc1ccc([C@@H]2C[C@@H](c3ccc(Br)cc3)Nc3ncnn32)cc1Br +COc1ccc(O)c(CNC2CC[NH+](Cc3ccccc3Cl)CC2)c1 +Cc1ccc(NCc2cccc(C(=O)NCc3ccco3)c2)c(F)c1 +Cc1c(C(=O)N(C)[C@@H]2CCN(c3ccccc3Cl)C2=O)cnn1C +Cc1nc(C(C)(C)C)[nH]c(=O)c1C(=O)Nc1cccc(Cl)c1C +CCOC(=O)[C@H]1CCCN(C(=O)c2cn(CCc3ccccc3)nn2)C1 +C[C@@H]1[C@H](C(=O)[O-])CCN1S(=O)(=O)c1ccc(F)c(Cl)c1 +CC1CCC(OC(=O)C2=NC3=C(C(=O)C[C@@H](c4ccccc4)C3)[C@H]2C)CC1 +CN(Cc1ccno1)Cc1c(C(=O)N2CC[NH+](C3CCCCC3)CC2)nc2ccccn12 +Cc1cc(F)c([C@@H]([NH3+])[C@H]2Cc3ccccc3O2)cc1F +COc1ccc(OCC(=O)N/N=C2\CCCc3ccccc32)cc1 +CC(C)[NH+]1CCC(N2CC[NH+](Cc3c(F)ccc(F)c3F)C[C@@H]2CCO)CC1 +CC[NH2+][C@@H](C)c1cc(F)c(C)cc1N1C[C@H](C)S[C@H](C)C1 +CC(C)c1nsc(NC[C@H](C2CC2)[NH+](C)C)n1 +Cc1ccc(-c2nc(C[NH+]3CCCC[C@H]3c3cccnc3)c(C)o2)s1 +COc1cccc(C2=C[C@H](C(=O)N3CCCCC3)N=N2)c1 +O=C(Nc1ccc2[nH]c(=O)[nH]c2c1)c1cc(S(=O)(=O)NC2CC2)ccc1Br +c1ccc2c(c1)CC[C@H]([C@H]1CCCc3cccnc31)N2 +Cc1ccc(NC(=S)NC(C)C)cc1C +C[C@@H](Nc1ccc2c(c1)CCC2)C(=O)N1CCCC1 +CCOc1ccc(Nc2ccc(C#N)c([N+](=O)[O-])c2)cc1 +CC(C)OC(=O)CCNC(=O)c1cnn(-c2ccc(F)cc2)n1 +CCCCOc1ccccc1NC(=O)c1scnc1C1CC1 +O=C1N=C(N2CCCCC2)S/C1=C1/C(=O)Nc2ccccc21 +Cc1c([C@H](C)NC(=O)c2[nH]c3ccccc3c2Cl)cnn1C +CSc1cccc2sc(N3CCN(C(=O)c4ccn(C(C)C)n4)CC3)nc12 +Cc1ccc(C)c2nc3sc(C(=O)Nc4ccc5c(c4)OCO5)c(N)c3cc12 +COC(=O)CCN(Cc1cnc2ncccn12)C1CCOCC1 +O=C(c1c[nH]c2ccc(F)cc12)N(CC1CC1)CC(F)(F)F +O=C(Cc1ccc(Cl)cc1)/N=C1/S[C@@H]2CS(=O)(=O)C[C@H]2N1c1cc(Cl)ccc1Cl +Cc1cccc(C)c1NC(=O)C[NH+]1CCC(OCc2ccc(F)cc2)CC1 +C[C@H]1CCC[C@@H](C)N1C(=O)[C@@H]1COCCO1 +CC(C)[C@@H](C)CC(=O)NNC(=S)NC1CCCCC1 +Cc1cccc(NC(=O)CN2C(=O)/C(=C3\SC(=S)N(Cc4ccco4)C3=O)c3ccccc32)c1 +CC(=O)N[C@@H]1C(=O)C[C@@H]2[C@H]3CCC4=CC(=O)CC[C@@]4(C)[C@@H]3CC[C@]12C +CCCNC(=O)[C@H]1CS[C@H](c2ccccc2O)N1C(C)=O +Cc1ccc(N2CC[C@H](C(=O)NC[C@@H](CC(C)C)N3CCOCC3)C2=O)cc1 +COc1ccc(NC(=O)CCc2ccc3c(c2)OCCO3)cc1OC +Cc1ccc(C(F)(F)F)cc1/C=C/C(=O)[O-] +Cc1ccccc1C(=O)Nc1ccc(N2CC[NH+](Cc3ccccc3)CC2)cc1 +c1cc(C[NH2+]Cc2ccco2)cc(OC2CCCC2)c1 +O=C(c1cccs1)N(Cc1ccc(F)cc1)Cc1cc(-c2ccccc2)cn2nnnc12 +COc1ccccc1CC(=O)N[C@@H]1CS(=O)(=O)C[C@H]1Cl +c1cc(-c2nc3c4cn[nH]c4ncn3n2)ccc1COc1ccc2c(c1)CCC2 +CN(C(=O)c1ccc(Cl)cc1O)C1CCC(=O)CC1 +CNC(=O)CNS(=O)(=O)c1cccc(C(F)(F)F)c1 +Cc1ccc(S(=O)(=O)N2C[C@@H](CC(=O)[O-])c3ccccc32)c(C)c1 +Cc1cc(N)nc(SCC(=O)NC[C@@H](c2ccccc2)C(C)C)n1 +Cn1cc(C(N)=O)c(NC(=O)c2ccc3sccc3c2)n1 +COc1cc2c(cc1O)[C@H](c1cnc(-c3cccc(C)c3)nc1)CC(=O)N2 +CC(=O)Nc1ccc(O[C@H](C)c2nc(C(C)(C)C)no2)cc1 +O=C(c1c(-c2ccccc2)nc2sc3c(n12)CCCC3)C(F)(F)F +CC(C)CNC(=O)NC(=O)[C@@H](C)Nc1ccc(OC(C)C)cc1 +C=C[C@](C)(O)CC[C@H]1C(C)=CC(=O)[C@H]2C(C)(C)CCC[C@@]21C +CC[C@H](C)Sc1nncn1-c1ccccc1C +COCC(=O)N1CCCc2ccc(NC(=O)c3cccc(Br)c3)cc21 +C[C@H](Oc1cccc(Cl)c1)C(=O)Nc1ccc2ccccc2c1 +Cc1cc(Br)ccc1SCC(=O)N1CCC(C(=O)c2ccc3c(c2)OCCO3)CC1 +Nc1ccc2c(c1)CN(C(=O)c1ccc(Cl)cn1)CC2 +C[C@H](CN(C)C(=O)c1ccc(F)c(F)c1F)C(=O)[O-] +NC(=O)c1ccc(SCC(=O)Nc2ccc3c(c2)Cc2ccccc2-3)c([N+](=O)[O-])c1 +CN(C(=O)CSCC(F)(F)F)c1cccc([N+](=O)[O-])c1 +C=CCn1c(SCC(=O)Nc2cc(C)on2)nnc1[C@H]1COc2ccccc2O1 +CC(=O)N[C@@H](CC(=O)Nc1ccnn1Cc1ccc(C)o1)c1ccccc1 +CNC(=O)Cc1nc(C[NH+](C)C2CCC(c3ccccc3)CC2)cs1 +COCCOc1c(Cl)cccc1NC(=O)Cc1c[nH]c2ccccc12 +COCCN1[C@@H](C)CN(C(=O)C[NH+](C)C2CC2)C[C@H]1C +CSc1nncn1/N=C\c1cc(Cl)ccc1F +CC(C)(C)OC(=O)N1CC[C@H]2CC(=O)[C@H]2C1 +C[C@@H](CCO)SCc1ccccc1OC(F)F +C[C@@H]1C[C@@H]1C(=O)Nc1ccc(F)cc1C(=O)NC1CCC(O)CC1 +COc1ccccc1CNC(=O)COc1ncnc2oc(C)c(C)c12 +Fc1ccc(F)c(C[NH+]2CCC(n3cc(-c4cccnc4)nn3)CC2)c1F +O=C(C[NH+]1CCC(C(=O)c2ccc(Cl)cc2)CC1)NC[C@H]1COCCO1 +CCC[NH+](C)C[C@H]1CCN(C(=O)Nc2cc(NC(C)=O)ccc2C)C1 +Cc1nc2ncnn2c(NCCOC2CCCCCC2)c1C +C[C@H]1CN(Cc2cnn(-c3ccccc3)n2)C[C@H](C)S1 +COc1cc(OC)cc([C@@H](N[C@@H](C)c2ccc(F)cn2)c2[nH+]ccn2C)c1 +CC(C)CCc1noc(C[NH+](C)[C@H]2CCC[C@@H]2S(C)(=O)=O)n1 +CCc1nc2n(n1)CCC[C@H]2NC(=O)c1ccc(-n2cc(C)cn2)cc1 +C[C@@H](NC(=O)c1ccccc1CSc1nc2ccccc2[nH]1)C1CC1 +O=C([C@H]1CCCN1S(=O)(=O)N1CCCCC1)N1CCSCC1 +Cn1c(=O)c(=O)n(CC(=O)N2CCC3(CC2)OCCO3)c2cccnc21 +COc1ccc(C(=O)N(CC2=CC=C[C@@H]3N=CC=C23)C2CC2)cc1 +CCc1ccc(-c2nc(N)ccc2[N+](=O)[O-])cc1 +c1csc([C@@H]2CN(Cc3cnc(C4CCC4)s3)CCO2)c1 +O=S(=O)(/N=C(\[O-])c1ccsc1)N1CCCC1 +Cc1nccn1CC(=O)N1CCCC[C@@H]1CCNC(=O)c1ccccc1 +C[C@H](Sc1nnc(-c2ccc(Cl)cc2)n1C[C@H]1CCCO1)C(=O)Nc1ccc2c(c1)OCO2 +COc1ccc(-n2nnc(-c3nc(-c4ccc5c(c4)OCO5)cs3)c2C)cc1OC +CN(c1ccccc1)S(=O)(=O)c1ccc2c(c1)C(C)(C)C(=O)N2 +Cc1cc(S(=O)(=O)N2CCN(C(=O)[C@H]3C[C@H]3c3ccc(Cl)cc3)CC2)c(C)s1 +O=C(Nc1ccc(Oc2ccc(Cl)nn2)cc1)[C@@H](O)c1ccccc1 +CCCc1cc(=O)n2c(n1)SC[C@@H]2CC(=O)Nc1cccc(Cl)c1Cl +Cc1ccc(NC(=O)CSc2nnc([C@@H]3CCCN3C(=O)c3cccc(C)c3)n2C)cc1 +Cc1noc(C)c1CCCNC(=O)N[C@H]1CC(=O)N(C2CC2)C1 +CC(C)(C)[C@@H]1CCC(=O)[C@@H](CN2CCOCC2)C1 +COc1cc(/C=C2\SC([N-]c3cccc(C(=O)[O-])c3)=NC2=O)cc(OC)c1O +C=CCO[C@H](C)C(=O)Nc1ccc(F)cc1Br +O=C(CN1CCN(Cc2ccc(F)c(F)c2)CC1)c1cccs1 +CC[NH+]1C[C@H](c2ccccc2)CC2(CCN(C(=O)c3ccon3)CC2)C1 +C[C@H]1C[C@H]([NH+]2CC[C@H](S(=O)(=O)NC3CC3)C2)CC(C)(C)C1 +CC[C@H](Sc1cc(C)c2cccc(C)c2n1)C(=O)Nc1nc2ccc(S(N)(=O)=O)cc2s1 +COC(=O)[C@]1(NC2CC2)CC[C@H](Sc2ncc(C)cn2)C1 +COc1ccccc1Nc1nn(CN(C)OC)c(=S)s1 +Cc1cc(C)c(C)c(S(=O)(=O)/N=C(\[O-])c2cc(C3CC3)n(C(C)(C)C)n2)c1C +CC(C)(O)C#Cc1ccc(C[NH2+][C@H]2CCCN(c3nc4ccccc4s3)C2)s1 +COc1ccc(OC)c(S(=O)(=O)n2cc3c(=O)n(C)c(=O)n(C)c3n2)c1 +C/[NH+]=C(/NCc1noc(C(C)(C)C)n1)N[C@@H](C)c1ccc(F)cc1F +Cc1nnsc1C(=O)Nc1nnc(-c2ccc(Br)cc2)o1 +CCN(Cc1ccc(OC)c(OC)c1)C(=O)C[NH+]1CC[C@@H](C)[C@H](O)C1 +O=C([O-])[C@H]1CCCCN1C(=O)CSCc1ccccc1 +Cc1cccc([C@H](O)C[C@@H]2CCCCC[NH2+]2)c1 +O=C(C1CCCCC1)N1CCN(Cn2cc(Br)cn2)CC1 +Cc1ccccc1COc1ccc([C@@H]2C3=C(CCCC3=O)Nc3nnnn32)cc1 +CCO[C@@H]1C[C@@H]([NH+](C)C[C@@H]2CCCN(S(C)(=O)=O)C2)C12CCCCC2 +CCn1c(=O)c(=O)[nH]c2cc(C(=O)NN3C(=O)N[C@](C)(c4ccccc4)C3=O)ccc21 +COc1cc(OC)cc([C@@H](NC(=O)N(C)C2CCCCC2)c2nccn2C)c1 +O=C1[C@H]2[C@@H]3C=C[C@@H](C3)[C@H]2C(=O)N1CN(C(=O)C(F)(F)F)c1cccc(C(F)(F)F)c1 +CCNc1ncc(COc2cccc3ccccc23)s1 +Cc1noc(C)c1CCCNC(=O)c1c(C)nn(Cc2ccccc2)c1C +C[C@H]1CC([NH2+][C@@H](C)c2c[nH]c3cc(F)ccc23)C[C@H](C)O1 +C/C(=C1/SC(=O)N(c2ccc(Cl)cc2)C1=O)c1ccc(Br)cc1 +Cc1ccc(NC(=O)C[C@H]2SC([N-]c3ccc(N(C)C)cc3)=NC2=O)c(C)c1 +CO[C@H]1CCCC[C@H]1NC(=O)NC[C@H](c1cccc(F)c1)[NH+](C)C +Cc1nn(C)c(C)c1CN[C@H]1CCC[NH2+]C1 +Cc1cc(=O)[nH]c(SCC(=O)N2C[C@]3(C)C[C@H]2CC(C)(C)C3)n1 +COC1CC[NH+](Cn2nc(-c3ccc(C)cc3)n(C)c2=S)CC1 +C[NH2+][C@]1(C(=O)[O-])CCC[C@@H](OCC2CCCCC2)C1 +Cc1nnc(SCC(=O)c2cc(C)n(CC(F)(F)F)c2C)s1 +CC(C)CN(CCC#N)C(=O)NC[C@@H]1CC[C@H](C(=O)[O-])O1 +Cc1cc(CN2CCN3C(=O)NC[C@@H]3C2)cc(C)c1OC(F)F +CCNC(=O)c1cccc(NC(=O)NCCCSC)c1 +Cn1c(-c2cccc3ccccc23)nn(CN2CCOCC2)c1=S +CCCCCN1C(=O)/C(=C/c2ccc(O)c(OCC)c2)SC1=S +N#CCCN(Cc1ccccc1)C(=S)NC(=O)c1cccc(Cl)c1 +CCCS(=O)(=O)c1ccccc1C(=O)Nc1nnc(CC)s1 +CNC(=O)[C@H](C)CN(C)Cc1cc(=O)n2cccc(C)c2[nH+]1 +COCCNC(=O)/C(C#N)=C/c1cccc(O)c1 +CNC(=O)CN1c2ccccc2C(=O)N(C)[C@H]1c1ccccc1O +CC(=O)N[C@H](C)C(=O)Nc1ccc(Sc2nncs2)c(Cl)c1 +CC[n+]1c(N)n(CCOc2ccc(Cl)cc2Cl)c2ccccc21 +COC(=O)CCCc1nnc(NC(=O)N2CCC[C@@H]3CCC[C@@H]32)s1 +O=C(N[C@H]1CCCC[C@H]1OC1CCCC1)c1ccc([N+](=O)[O-])cc1 +O=C(CCOc1ccccc1)NNC(=O)CC1(O)CCCC1 +C=CCN(CC(=O)[O-])C(=O)[C@@H](C[NH3+])C(C)C +O=C(CCn1ccccc1=O)NCC1(c2ccccc2)CC1 +COc1cc(C)c([C@@H](C)NC(=O)CSC2CCCC2)cc1OC +C=CCNC(=O)Nc1ccc(F)c(NC(=O)OC)c1 +Oc1ccccc1/C=[NH+]/CCC/[NH+]=C/c1ccccc1O +Cn1cccc1Cc1nnc(SCC(=O)Nc2ccc3c(c2)OCCO3)n1C +C[C@@H](C#N)CNC(=O)c1cccc(Oc2cccc(C(F)(F)F)c2)c1 +Cc1oc(-c2ccccc2)nc1CCNC(=O)c1ccc([S@@](C)=O)cc1 +CCc1noc(C)c1C[NH+](C[C@@H]1CCCCO1)C(C)C +Cc1ccccc1Oc1cc(Br)ccc1C[NH3+] +CCc1cccc(CC)c1NC(=O)NC1CC1 +CC[NH+]1CCN(C2(CNC(=O)c3ccccc3Br)CCCCC2)CC1 +CCCCn1nc(C)c(C[NH2+]C[C@@H](C)O)c1Cl +C/C=C/C=C/C(=O)N1C[C@@H](C(=O)OC)[C@@H](C)C1 +COc1cccc(OC)c1OC1CC[NH+](Cc2ccc([C@H]3C[C@@H]3C)o2)CC1 +COC(=O)c1cc(CSc2nnc(-c3cccnc3)n2-c2ccccc2F)oc1C +CCOc1ccc2cc(C(=O)NCc3ccccc3)c(=[NH2+])oc2c1 +O[C@H]1CCN(c2ccnc(N3CCc4[nH]c5ccc(Cl)cc5c4C3)n2)C1 +O=S(=O)(NC[C@H](O)c1ccc(C(F)(F)F)cc1)c1cc(F)ccc1F +O=C(CSc1ccc2c(c1)OCCCO2)NC(=O)c1cccs1 +C[NH+](C)CCSc1ccc(NC(=O)C2CC2)nn1 +Cc1ccc(C)c(NC(=S)NCCc2cccs2)c1 +CNc1ncc(F)c(-c2cccc(Cl)c2)n1 +Cc1ccc(-c2nnc(SCC(=O)Nc3ccc(CC#N)cc3)n2N)cc1 +CCN(CC)C(=O)c1ccccc1OC(C)=O +Cc1ccc(-c2nc(-c3ccc(OCC(F)(F)F)nc3)no2)cc1 +COc1ccc([C@@H](CNc2nc3ccccc3o2)N2CCOCC2)cc1 +CNC(=O)[C@H]1CCCC[C@H]1[NH2+][C@H](C)c1cc(C)cc(C)c1 +CCN[C@H](c1cccnc1)C1([NH+]2CCCCC2)CCCC1 +Cn1ncc2c(NCc3ccco3)nc(CCc3ccccc3)nc21 +Cn1cc[nH+]c1C[C@H]1CCC[NH+](Cc2ncc(-c3ccccc3Cl)o2)C1 +Cc1cc(N2CC[C@H](C)[C@H](O)C2)nc(C)[nH+]1 +Cc1nnc(CCC[NH+]2CCC(CC[NH+]3CCCC[C@@H]3C)CC2)o1 +CCc1nn(C)cc1CNC(=O)C1(CC)CCC1 +COc1ccc(N2/C(=N/C(=O)CCCC(=O)[O-])S[C@@H]3CS(=O)(=O)C[C@H]32)cc1Cl +CC(=O)C1=C([O-])C(=O)N(CCC2=c3ccccc3=[NH+][C@H]2C)[C@H]1c1ccccc1F +COc1cc(C)c(C(=O)N[C@H]2C[C@H](C)N(c3ccccc3)C2)cc1OC +Cc1ccc([C@H]2C[C@@H]2NC(=O)N2CCC(C(N)=O)CC2)cc1C +Cc1nc2n(n1)CCC[C@@H]2N[C@@H]1CCc2c(Cl)cc(Cl)cc21 +CC(C)Oc1ccc(-c2nc(C(=O)O[C@@H](C)[C@@H]3CCOC3)cs2)cc1 +CN(C[C@@H]1CCCN(C(=O)NCCc2ccc(F)cc2)C1)C(=O)OC(C)(C)C +COc1cc(Cl)c(C)cc1NC(=O)[C@H](C)N1CCN(S(=O)(=O)c2c(C)noc2C)CC1 +Cc1nc(-c2ccc(Cl)s2)sc1C(=O)N[C@H]1C[C@H]1C +COc1cc(F)c([C@H]([NH3+])c2ccc(SC)cc2)cc1OC +Cc1ccc(-n2ccnc2SCC(=O)N(CC(N)=O)C(C)C)cc1C +Cc1cccc(/C=C2\SC(=S)N(c3c(C)cccc3C)C2=O)c1 +CC1=C(C(=O)OC(C)C)[C@H](C)N=C1C(=O)Nc1ncc(C)s1 +COC(=O)c1cccc(C(=O)N2C[C@@H](c3ccc(F)cc3)C[C@H]2C)c1 +O=C(NCCCn1ncccc1=O)[C@@H]1CC(=O)N(c2ccccc2)C1 +CCC(=O)N1CCCN(C(=O)N[C@@H]2CCc3ccccc32)CC1 +Cc1cccc([C@@H](C)[NH2+]CCS(=O)(=O)C(C)(C)C)c1C +Cc1ccc(-c2cnc(CCC(=O)NCC(C)(C)c3ccncc3)o2)cc1 +Cc1cc(NN)c2cccc(OC(F)(F)F)c2[nH+]1 +C[C@H]1CCC[C@@H](C(=O)Nc2cccc(OCCc3ccccc3)c2)[NH2+]1 +Cn1ncnc1CCNC(=O)[C@H]1C[C@@H]1c1cc(Cl)cc(Cl)c1 +CC[C@H](Sc1nnc2cc(C)c3cc(C)cc(C)c3n12)C(=O)Nc1nnc(COC)s1 +CCOc1ccc(S(=O)(=O)N2CCC(c3nnc(C4CC4)o3)CC2)cc1 +Cc1cc2nc(C)c(CCC(=O)NC[C@@H](c3ccccc3)N3CCOCC3)c(C)n2n1 +COc1ccc(N2C(=O)CS[C@H]2c2ccc(Cl)cc2)cc1Cl +C/C=C(\C)[C@@H]1C=C[C@@H]2C[C@H](C)C[C@H](C)[C@@H]2[C@@H]1C(=O)C1=C([O-])[C@H](C[C@](C)(O)C(=O)[O-])NC1=O +CS(=O)(=O)c1ccc(NC(=O)N2CCC[C@H]2CC2CCCCC2)cc1 +COc1ccc(C(=O)OCc2nc3ccccc3s2)cn1 +COc1ccc(S(=O)(=O)Nc2ccccc2-n2nc(C)cc2C)cc1NC(C)=O +Cn1c(=O)n(CC(=O)N[C@@H]2CCCc3ccc(F)cc32)c2ccccc21 +N#C/C(C(=O)NC1CCCC1)=C(/[O-])Cc1cnn(-c2ccccc2)c1 +NC(=O)[C@H](Nc1cccc(Oc2ccccc2)c1)c1ccc(F)cc1 +CCN(C[C@@H]1CCOC1)C(=O)Nc1cc2c(cc1Cl)OCCO2 +CCCCc1nnc(NC(=O)C2CCN(S(=O)(=O)c3ccc(C)cc3)CC2)s1 +CC[C@@H](NC(=O)c1ccc(Br)o1)C(=O)N1CCOCC1 +CC[C@]1(c2ccccc2)NC(=O)N(CCOc2ccc(Cl)cc2Cl)C1=O +COc1ccc(OC)c(NC(=O)c2ccc3c(c2)C(=O)N(c2cc(C)on2)C3=O)c1 +Cc1ccc(OCC(=O)NC(=S)NC[C@H]2CCCO2)cc1 +CN(C(=O)CCOc1cccc(C(N)=O)c1)[C@H]1CCC[NH+](C)C1 +COC(=O)c1ccc(NC(=O)c2c(C)sc3ncnc(N4CCC[C@H](C)C4)c23)cc1 +Cn1ncc2c1CC/C(=C\c1ccc(-n3cncn3)c(F)c1)C2=O +O=C(Cc1ccc([N+](=O)[O-])cc1)NCC1(O)CCOCC1 +C=CCOc1ccc(CNc2ccc(OC)cc2)cc1 +C[C@H]1N=C(CCNC(=O)CCC2=c3ccccc3=[NH+]C2)CS1 +COC(=O)c1cc(NC(=O)[C@@H]2CCO[C@H]2C)ccc1C +COc1ccccc1[C@@H](C)NC(=O)c1cnc2c(C)cccn2c1=O +CC[C@@H](C)NS(=O)(=O)c1cc(N2C(=O)[C@@H](C)CS2(=O)=O)ccc1OC +CC(C)(C)NS(=O)(=O)c1ccc(OCC(=O)N2CCOCC2)cc1 +COCCCNC(=O)[C@H]1CN(C(=O)c2cccs2)CC12CCCCC2 +Cc1nc(-n2cccc2)sc1C(=O)Nc1cccc(-c2cn3ccsc3n2)c1 +CC[S@](=O)[C@H]1CCCC[C@@H]1NC(=O)NC[C@H](O)c1ccco1 +Cc1cc2ncn(C[C@H]3CC3(Cl)Cl)c2cc1C +Cc1ccccc1OCC(=O)O[C@@H](C)c1nccs1 +C[C@H](C(=O)N1CCOCC1)[NH+]1CCN(Cc2ccc3c(c2)OCO3)CC1 +CCN(C(=O)[C@@H]1Cc2ccccc2S1)[C@H]1CCC[C@@H]1C[NH3+] +Cc1cc(Br)ccc1NC(=O)[C@@H](C)[NH+](C)Cc1cccs1 +COC(=O)[C@H]1CCC[C@H]1NC(=O)Nc1ccc(C)cc1C +CC(C)NS(=O)(=O)c1ccc(C(=O)N[C@H](C)c2ccccc2Br)cc1 +CC(C)OCCN1CCN(C(=O)Nc2ccccc2C(F)(F)F)CC1 +CCn1cc(-c2nc(-c3cccc(Cl)c3)no2)c(=O)c2ccccc21 +COC[C@@H](C)NC(=O)C(=O)Nc1cc(-c2ccccc2)nn1C(C)C +C#CCOc1ccccc1CN1CCN(C2=[NH+]C[C@@H](C)S2)CC1 +COCCNC(=O)[C@H]1CCCN1S(=O)(=O)c1ccc(Br)cc1 +Cc1ccc(C[C@H](O)c2c(F)cc(Br)cc2F)cc1 +COC(=O)[C@@]1(NC2CCCC2)CCCS[C@H]1C +CCC[NH2+][C@H](Cc1ccccc1)[C@@H]1CN(CC)CCO1 +C=CCn1c(=O)c2c(nc3n2[C@H](C)C(C)=NN3C)n(C)c1=O +C[C@H]1CCCC[C@H]1NC(=O)c1cc(S(=O)(=O)N2CCOCC2)ccc1Cl +COCCn1ccc2ccc(NC(=O)NCCc3ccccn3)cc21 +Cc1nc(CNC(=O)C(=O)Nc2cc(Cl)ccc2Cl)no1 +CS[C@@H]1CC[C@H](NC(=O)CCC(=O)c2ccc(C)s2)C1 +Cc1nc(/C=N/Nc2ccc(Cl)nn2)c[nH]1 +O=C(CSc1nnnn1C1CC1)N[C@H](CO)C(=O)[O-] +O=C(COc1ccc(Br)cc1)N[C@H]1CCS(=O)(=O)C1 +CCc1nnc(NC(=O)c2ccccc2N)s1 +O=C([O-])c1ccccc1-c1ccc(/C=C2\C(=O)N(c3cccc(Br)c3)C(=O)N=C2[O-])o1 +COc1ccc(-c2csc(NC(=O)c3ccc(S(C)(=O)=O)cc3)n2)cc1OC +COc1ccc(Cn2ccc3nc(N4CCN(c5ccccc5)CC4)ncc3c2=O)cc1 +Cc1nn(C)cc1/C=N/NC(=O)c1ccncc1 +N#Cc1ccc(OC2CCC(NC(=O)c3ccc[nH]3)CC2)nc1 +CC(C)c1ccc2oc(-c3ccc(C[NH3+])cc3)nc2c1 +COc1ccc([C@@H]2NC(=O)N[C@@](O)(C(F)(F)F)[C@H]2C(=O)c2ccc(F)cc2)cc1OC +Cc1cc(NC(=O)c2cccc([N+](=O)[O-])c2C)n(-c2ccccc2F)n1 +CCC(=O)N1CCCC[C@@H]1C(=O)NCCc1ccc(F)cc1C +Cc1ccc(S(=O)(=O)N2C(N)=C(C#N)[C@H](c3ccc(Cl)cc3)[C@H]2C(=O)c2ccccc2)cc1 +C[NH+](C)[C@@H]1CC[C@H](NC(=O)[C@@H]2CCCc3[nH]ncc32)C1 +CCOCCS(=O)(=O)[N-]c1cc(Br)ccc1O +NC1=NC(=O)[C@H](CC(=O)N2CC[C@H](c3ccccc3)C2)S1 +Cc1noc(-c2cccnc2N2CC[C@H](NC(=O)COc3ccc(F)cc3)C2)n1 +Cc1ccc(OC(=O)c2cccc(C(=O)Oc3ccc(C)cn3)n2)nc1 +CN(CC1CCCC1)C(=O)C(=O)Nc1cccc(SC(F)F)c1 +COc1cccnc1N(C)C(=O)C[C@H](C)Cc1ccc(Cl)cc1 +O=C(c1cc(=O)[nH]c2ccccc12)N1CCC([C@H](O)c2ccccc2)CC1 +CCO[C@@H]1C[C@@H]([NH3+])[C@@H]1Nc1ncc(Cl)cc1F +CC(=O)Nc1ccc(NC(=O)c2ccc3c(c2)Cc2ccccc2-3)cc1 +COCCOC[C@H]1CC[NH+](C2C[C@@H](C)O[C@H](C)C2)C1 +O=C1OC(c2ccccc2OC(F)F)=N/C1=C\c1cccc(F)c1 +COc1ccccc1CNC(=O)c1cc2sccc2n1Cc1cccc(F)c1 +CCOC(=O)C1(NCc2nnc(-c3cc(C)oc3C)o2)CCCC1 +N#Cc1cccc(NC(=O)N2CCC(NC(=O)CC3CCCC3)CC2)c1 +CCOC(=O)C1=C(c2ccccc2)Nc2ncnn2[C@H]1c1ccc(SC)cc1 +Cc1cc(NC(=O)N[C@@H](Cc2ccccc2)c2ccccc2F)n(C)n1 +C[C@H](Sc1cccc[n+]1[O-])C(=O)NC[C@H]1COc2ccccc2O1 +O=C(NCCOc1ccc2c(c1)OCO2)c1cc(C2CC2)on1 +N#CCC[NH2+]C1(C(=O)[O-])CC1 +CC[C@@H](Oc1ccccc1OC)C(=O)NCc1ccccn1 +Cc1nc(C[C@@H](N)[C@]2([NH+](C)C)CCC[C@H](C)C2)cs1 +CCOc1ccccc1/C=C1\Oc2c(ccc([O-])c2C[NH+]2CCN(C)CC2)C1=O +CC(C)Cn1cc[nH+]c1CN[C@@H](c1ccccc1)C(C)C +Cc1c(C[NH+]2CCC[C@H]2c2ccc3c(c2)OCO3)cc(C#N)n1C +O=C(CBr)c1cnc2ccc(Cl)cn12 +COc1ccc(F)cc1NC(=O)c1sccc1S(=O)(=O)N(C)C +COc1ccc(Br)cc1/C=C1/C(=O)NN(c2ccc(C)c(C)c2)C1=O +CC(C)(C)c1ccc(C(=O)N[C@H]2CCN3CCCc4cccc2c43)cc1 +CC[NH+]1CCC[C@@]2(CC1)C[NH+]=C(N)N2c1ccc(C)cc1 +CN(C)c1ccc(/C=C(/C#N)C(=O)c2cccc(C#N)c2)cc1 +CCCC[C@@H](NC(N)=O)C(=O)Nc1cc(OC)ccc1F +COC1=CC2=NC(SCc3cc(-c4ccccc4)on3)=NC2=CC1 +Cc1ccc([C@@](C)(O)CNC(=O)NC[C@@H](c2ccco2)[NH+]2CCCCC2)o1 +C[C@@H](NC(=O)CSc1ccc2c(c1)OCCCO2)c1ccc2ccccc2c1 +CCN1CCN(S(=O)(=O)c2cc(-c3csc(C)n3)ccc2C)CC1 +Cc1nccn1Cc1ccc(NC(=O)c2cccc(-n3cccc3)c2)cc1 +Cc1cccc([S@@](=O)Cc2ccc(N)c(F)c2)c1 +CC(C)C[C@@H](C[NH3+])c1nc(C2CCOCC2)no1 +CC1=NC(SCC(=O)Nc2ccccc2C(F)(F)F)=NC(=O)[C@H]1Cc1ccccc1 +Cc1ccc(F)cc1NC(=O)C(=O)NCCCn1cc[nH+]c1 +COc1cccc(C[NH2+]Cc2cccc(Br)c2OC)c1OC +Nc1cc(=O)[nH]c(SCC(=O)Nc2nc(-c3ccc(Br)cc3)cs2)n1 +CCc1noc(CN(CC)C(=O)C(C)(C)NC(=O)c2cccs2)n1 +CC(C)C[C@](C)(O)CNC(=O)CC[C@H](C)O +C=C[C@@H](C)NC(=O)c1c(C)cc(C)c([N+](=O)[O-])c1C +O=C(Nc1ccc(S(=O)(=O)NC[C@@H]2CCCO2)cc1)[C@H]1CC(=O)N(c2ccc(F)c(Cl)c2)C1 +CC1(C)CCC[C@H]1N1C(=O)c2cccc(N)c2C1=O +CCCNC(=O)NC(=O)CN1C(=O)N[C@](Cc2ccccc2)(c2ccccc2)C1=O +O=C(COc1cc(Cl)c(Cl)cc1Cl)N1CCN(C(=O)Nc2ccccc2)CC1 +CCc1nc2n(n1)C[C@H]([NH2+]CCNS(=O)(=O)c1ccccc1)CC2 +CCCCN(C(=O)c1oc2ccccc2c1C)c1c(N)n(CCC)c(=O)[nH]c1=O +CCCn1c(S[C@H](C(N)=O)c2ccccc2)nc2sc(CC)cc2c1=O +COc1cccc(CCNC(=O)CN2CCN(c3ccccc3O)CC2)c1 +c1ccc2c(NCc3nnc(C4CCC4)o3)cccc2c1 +CC1=C(C(=O)C2=C([O-])C(=O)N(CC[NH+](C)C)[C@H]2c2cccc(Cl)c2)[C@H](C)N=N1 +CCOc1ccc([C@@H]2Nc3ccc(C(=O)N(C)C)cc3[C@H]3C=CC[C@H]32)cc1 +COc1ccc(Cc2sc(NC(=O)[C@H]3COc4ccccc4O3)nc2C)cc1 +Cc1cnc2nc(C[C@H](O)C(F)(F)F)[nH]c2c1 +CCN(CC(C)(C)O)C(=O)COCc1ccccc1Cl +O=C(Nc1cccc2ccccc12)NC1CC[NH+](CC(F)F)CC1 +O=C1C(=O)N(CC[NH+]2CCOCC2)[C@@H](c2cccc([N+](=O)[O-])c2)/C1=C(\O)c1cccs1 +CC[C@@H](C)N(CC)C(=O)c1ccccc1N +Cc1cc(C)c(C(=O)Cn2nc(N)n(Nc3cccs3)c2=S)c(C)c1 +C[C@H]1CN(C(=O)CC[C@H](C)c2ccccc2)C(C)(C)CO1 +CN(C)c1nc2c(c(-c3ccc(S(C)(=O)=O)cc3)n1)CCCC2 +CC(C)c1ccc(CNC(N)=[NH2+])cc1 +CN(C)N1C(N)=C(C#N)[C@@H](c2cccs2)C2=C1CCCC2=O +O=C(N[C@H]1C=C[C@H](C(=O)[O-])C1)c1cc(F)c(Cl)cc1Cl +CC(C)n1nnnc1COc1cccc(C(=O)NC2CCCCCC2)c1 +O=S(=O)(NCc1ccc(Cl)cc1Cl)c1ccccc1Br +COC[C@H](NC(=O)c1cc(-c2ccccc2)c(C)[nH]c1=O)C(N)=O +O[C@H]1CCCCC[C@H]1n1cc(-c2ccccc2Cl)cn1 +CC1=C[C@@H](C)[C@H]2C(=O)N([C@H](Cc3ccccc3)C(=O)[O-])C(=O)[C@H]2C1 +CCCCNS(=O)(=O)Cc1ccc([N+](=O)[O-])cc1 +COc1ccc(CC[C@H]2C[C@@H](C(C)(C)C)CCC2=O)cc1 +C[C@H]1CN(CC(=O)Nc2nc(-c3ccccc3Cl)cs2)CCO1 +Cc1cc(Cl)cc(Cl)c1CNC(=O)c1cccs1 +CC(C)(C)OC(=O)NC1CCN(CC(=O)c2nccs2)CC1 +COc1ccc(C)cc1[C@@H](C)NC[C@@H]1CN(C2CC2)CCO1 +CCOC(=O)c1ccn(-c2cccc(NC(=O)C3CCCC3)c2)n1 +O=C1N(C[NH+]2CCN(c3ccccc3)CC2)c2ccccc2C12O[C@@H]1CCCC[C@H]1O2 +CC(C)c1nc(C(=O)[O-])nn1-c1ccccc1F +C1=C[C@H]2C[C@@H]1C[C@H]2CN1CC[NH+](C2CCCCCC2)CC1 +CC(C)C[C@H](C[NH+](C)C)Nc1ncncc1N +O=C1C=C(c2cccs2)C[C@H](c2cccs2)[C@@H]1n1cnc([N+](=O)[O-])n1 +C[NH2+]C1CCC([NH+](C)CC(=O)N[C@H](C)c2ccco2)CC1 +CC(C)c1nc2n(n1)CCC[C@H]2[NH2+]C[C@@H]1CCC[C@H](C)C1 +CCCOC(=O)c1ccc(NC(=O)c2ccc[n+]([O-])c2)cc1 +CCOc1cc(CO)cc(Br)c1OCc1ccccc1F +O=C(Cn1nnn(-c2cccs2)c1=O)NC[C@@H]1CN(Cc2ccccc2)CCO1 +CC(C)[C@H](O)CCNC(=O)C(=O)Nc1ccn(-c2ncccc2Cl)n1 +Cc1cccc(C(=O)NCCS(=O)(=O)NCC2CCC2)c1C +CCCC(=O)N1CCC(C(=O)NN=C(c2ccccc2)c2ccccc2)CC1 +O=C(N[C@H](NC(=S)Nc1ccccc1)C(Cl)(Cl)Cl)c1cccc(Br)c1 +CCN(Cc1ccccc1)C(=O)c1cc(NC(=O)Cc2ccccc2)n(C)n1 +CC(C)CC(=O)N1CCN(C(=O)c2cnc3c(c2)NC(=O)CO3)CC1 +O=C(Cc1csc(NC(=O)Nc2ccc(Cl)cc2)n1)NCCc1ccc(Cl)cc1 +Cc1ccc2c(c1)C[C@@H](C[C@@H](C[NH3+])c1ccc(F)cc1)O2 +CC1=C(C(=O)OC(C)C)[C@@H](c2ccc(C)s2)NC(=O)N1C +COc1cc(F)c([N+](=O)[O-])c(NC[C@@H](O)c2cnn(C)c2)c1 +COc1ccc(C(=O)N2CCC([C@@]3(C)NC(=O)N(C4Cc5ccccc5C4)C3=O)CC2)cc1 +CCc1cnc(NC(=O)c2cc(C)n(C(C)C)c2C)s1 +COc1ccccc1-c1nc(C[NH+](C)Cc2ccc(C#N)cc2)cs1 +COc1ccc(-c2noc(-c3cc(-c4ccc(Cl)cc4)n[nH]3)n2)cc1OC +CC[C@H](NC(=O)CN1C(=O)c2ccccc2N2C(=O)CC[C@]12C)c1ccc(C)cc1 +Cc1ccc(CNC(=O)NCc2ccnc(OC(C)(C)C)c2)cn1 +CCN(CCO)C(=O)Nc1cccc(C(=O)Nc2cccc(C#N)c2)c1 +Cc1cccc(-c2nn(C[NH+]3CCCCC3)c(=S)n2-c2ccccc2)c1 +C[NH2+][C@@H](C1CCCC1)[C@@H]1CCc2cccnc21 +Cc1ccc(-c2cccc(F)c2C(=O)[O-])c(C)c1 +CN(C)C(=O)[C@@H](Sc1nnc2n(C)c3ccccc3n12)c1ccccc1 +Cc1ccc(C(=O)NNC(=O)c2ccc(SC[C@H]3CCCO3)c([N+](=O)[O-])c2)cc1 +CCCn1/c(=N/C(=O)[C@@H](CCSC)NC(N)=O)[nH]c2ccccc21 +Cn1/c(=N/C(=O)c2sccc2S(=O)(=O)N2CCOCC2)sc2ccccc21 +COc1ccccc1N1C[C@@H](C(=O)NN2C(=O)NC3(CCCCC3)C2=O)CC1=O +COc1cc(OC)cc(C(=O)Nc2ccccc2C(=O)NC(C)(C)C)c1 +C[C@H](Nc1nc(-c2ccncc2)nc2ccccc12)c1ccccn1 +CCOc1ccc(C[NH+]2CCC[C@H]([C@H](O)c3nccn3C)C2)cc1OC +O=C([O-])c1ccc([S@@](=O)Cc2ccc(O)cc2)cc1 +CCc1nn(C)cc1CNC(=O)NCC(C)(C)Cc1ccccc1 +COC(=C(C#N)C#N)c1cccs1 +O=C(NCc1ccnc(OCC(F)F)c1)NCc1cscn1 +C[C@@H](C(=O)N1CCCC1)N1CCN(C(=O)NCc2ccco2)CC1 +CC(C)c1ccc(CN(C)C(=O)NCCCn2cccn2)cc1 +CNC(=O)[C@@H]1CCCN(C(=O)Nc2nn(-c3ccccc3Cl)cc2C)C1 +COc1ccccc1N1CC[NH+]([C@@H](C)C(=O)Nc2ccc(F)cc2)CC1 +CN1C[NH+](C)CC2=C1NCNS2(=O)=O +CNC(=O)c1ccc(O[C@@H]2CCC[C@H]([NH3+])C2)nn1 +COc1ccc(C(=O)O[C@@H](C)[C@@H]2CCCO2)cc1OC(F)F +Cc1ccc(/C=C2/SC(=S)N(CCC(=O)N3CCCc4ccccc43)C2=O)cc1 +CCc1cccc(S(=O)(=O)Nc2cccc(-c3nnnn3C)c2)c1 +Cc1ccc2c(c1)CCN2C(=O)c1ccc(C)nc1C +CCCNC(=O)CN1CCN(C(=O)Cc2c(C)nn(-c3ccccc3)c2C)CC1 +Cc1ccc(C(=O)NC2CC[NH+](Cc3nc(-c4ccccc4)cs3)CC2)s1 +CCOc1ccc(F)c(C(=O)OC[C@H]2CCCCO2)c1F +C[NH+]1CCC(NC(=O)c2ncoc2-c2ccccc2)CC1 +O=C(NC[C@@H](O)CN1CCCC1=O)Nc1cccc(F)c1 +CC[NH+]1CCC[C@H](NC(=O)c2ccc(OC)c(O)c2)C1 +O=C(CSCC(F)(F)F)N1CCN(c2ccc(Cl)cn2)CC1 +COc1ccc(Cl)cc1S(=O)(=O)N[C@H](C)C(=O)NCc1ccc2c(c1)OCO2 +Cn1cc(C(=O)Nc2ccc(-n3ccnn3)cc2)c(C(C)(C)C)n1 +Cc1cccc(NC(=O)C[C@@H]2CCCCO2)c1C(=O)[O-] +Clc1ccc([C@H](NCCc2nnc3ccccn23)C2CC2)cc1Cl +Clc1ccc(OCCCCSc2ncccn2)cc1Cl +Cn1nnc2cc(C(=O)N[C@@H](C#N)c3ccc(Cl)c(Cl)c3)ccc21 +Cc1cc(C)cc(-c2nnc(Sc3nc(C(C)C)ns3)o2)c1 +C/C(=C/c1ccc(F)cc1)C(=O)NCc1cccc(OCC(F)F)n1 +CC(C)Nc1cccc(CNC(=O)N[C@@H]2CC[NH+](CC3CC3)C2)c1 +O=C(COc1ccc(F)cc1F)NC[C@H](O)c1ccccc1Cl +Cc1ccc(-n2nc3c(c2NC(=O)C(C)C)C[S@@](=O)C3)cc1 +COc1nc(Oc2ccc3ccccc3c2)ccc1N +O=C(c1ccccc1)c1ccc2nc(Nc3ccccc3)c3nncn3c2c1 +O=C(C[C@@H](O)c1cccc(F)c1)Nc1cc(F)ccc1O +CC(C)c1nc(CSCc2ccnn2C)no1 +O=C(C1CCC1)N1CCC[C@H]1c1nc2cc(-c3ccccc3)ccc2o1 +O=C(CC[C@@H]1NC(=O)NC1=O)NC1CCN(c2ccccc2F)CC1 +Cc1ccc(-n2nc3c(c2NC(=O)c2ccc(Br)o2)CSC3)cc1C +C=CCn1c(SCc2nnc([S-])n2-c2ccccc2)nnc1-c1ccccc1 +c1nnn(C23C[C@H]4C[C@H](CC(c5nc6c7cn[nH]c7ncn6n5)(C4)C2)C3)n1 +CCCn1ncnc1COc1ccc(C)nc1C[C@H](C)[NH3+] +CCSc1ccc(C(=O)N2CC[C@H](C)[C@H](O)C2)cn1 +C[C@H]1[C@H](C(=O)[O-])CCN1S(=O)(=O)[C@@H](C)C#N +COc1ccccc1NC(=O)CSc1nnc(C)c(=O)n1N +Cc1ccc(S(=O)(=O)OCCc2coc3ccccc23)cc1 +COc1ccccc1[C@@H](C)NC(=O)[C@@H](C)Oc1cccc(F)c1 +CC[C@@H](Oc1ccccc1/C=C1\S/C(=N\c2cccc(O)c2)N(CC)C1=O)C(=O)[O-] +C[C@@H]([NH2+]C[C@H]1CC[C@H](C(N)=O)O1)c1ccc2c(c1)OCCCO2 +Cc1ccccc1Nc1nc(N)nc(COc2ccc(F)c(Cl)c2)n1 +CC(=O)Nc1ccc(OC(=O)/C=C/c2ccc(C(N)=O)cc2)cc1 +O=C([O-])c1ccc(-c2ccncc2)cn1 +O[C@H](c1c(F)c(F)c(F)c(F)c1F)C(Cl)(Cl)Cl +COc1ccc(/C=N/NC(=O)CNc2ccc3ccccc3c2)cc1OC +Cc1ccc(NC(=O)/C(C#N)=C/c2cc(C)n(-c3ccc(O)cc3)c2C)cc1Cl +Cc1cccn2c(=O)c(C(=O)N[C@H]3CCN(C(=O)C(C)C)C3)cnc12 +O=C(NCCCS(=O)(=O)c1ccccc1)c1n[nH]c2ccccc12 +C1=C(CC[NH2+]Cc2ccco2)CCCC1 +CCN(CC(=O)NCc1ccc(F)cc1)C(=O)c1cnc(-c2cccnc2)s1 +FC(F)(F)c1cccc2c1CCCC2 +CN(C)C(=O)CCCNC(=O)c1ccnc(OC(C)(C)C)c1 +Cc1ccc(NC(=O)[C@H](C)[NH+](C)Cc2nnc(C3CC3)n2C)c(C)c1 +CN(C)c1cccc(C(=O)OCC(=O)C(C)(C)C)c1 +Cc1nsnc1Cn1nnc(C(=O)NC(C)C)c1C +CC(=O)Cc1nsc(N[C@@H](C)c2ccccc2)n1 +COc1ccc(CNC(=O)[C@H]2Oc3ccccc3O[C@@H]2C)cn1 +[NH3+]CC1CCC(c2nc3ccc(Cl)cc3s2)CC1 +CCN(C(=O)NC1CC[NH+](C[C@@H](O)COC)CC1)C1CCCC1 +CCSc1nc2ccccc2c(=O)n1CCc1ccccc1 +Cc1cc(C(=O)COC(=O)c2cc(Cl)c3c(c2)OCCCO3)c(C)n1C1CC1 +COc1ccc([C@@H](CNC(=O)c2ccc([N+](=O)[O-])o2)[NH+](C)C)cc1 +CC[C@H](NC(=O)c1ccc(C#N)cn1)C(=O)N1CCOCC1 +CCOC(=O)NC(=O)c1c(NC(=O)Cc2ccc(F)cc2)sc2c1CC[C@H](C)C2 +CC(C)n1cnnc1SCC(=O)Nc1ccc2c(c1)nc(C1CC1)n2C +CCc1ccc([C@H](O)C2(C[NH3+])CCCC2)cc1 +COc1ccc(CNC(=O)N2CCc3c([nH]c4ccccc34)[C@H]2C)cc1OC +COc1ccc([C@@H]2C(C#N)=C(N)Oc3cc(C)n(CCN4CCOCC4)c(=O)c32)cc1OC +O=c1[nH]nc([O-])n1/N=C/c1ccco1 +C[C@H](Oc1cccc(Cl)c1)C(=O)N1CCC(Cc2ccccc2)CC1 +COc1cc([C@@H]2C(C(=O)Nc3ccc(F)cc3)=C(C)Nc3nc(C)nn32)cc(OC)c1OC +CC[C@@H](C)[C@@H](O)C[NH2+][C@@H](c1cccs1)C1CC1 +CSc1cc(-c2cccs2)oc(=O)c1C#N +CC(C)[C@@H](NC(=O)c1ccc(NS(C)(=O)=O)cc1)C(=O)[O-] +[NH3+][C@H](CO)c1ccc(N2CCOCC2)c(Cl)c1Cl +CCCn1cc(NC(=O)c2cc3nc(-c4ccccc4)cc(-c4ccccc4)n3n2)cn1 +Cc1ccc(S(=O)(=O)N2CCC(C(=O)N3CCCc4ccccc43)CC2)cc1C +C=CCN(CC=C)C(=O)C1CCN(C(=O)C(C)(C)C)CC1 +Cc1nc(CSc2nncc3ccccc23)nc2ccccc12 +C[C@H]1CCCC[NH+]1C[C@@H]1CCC(C)(C)[C@@H]1[NH3+] +COc1cc(C(=O)Nc2ccccc2Oc2ccccc2)on1 +COc1ccc(S(=O)(=O)N2CCOCC2)cc1NC(=O)/C=C/c1ccc(F)c(Cl)c1 +Cc1ccc(F)c(C[NH+]2CCC(C(=O)NC(C)C)CC2)c1 +CCn1nc(C)c(CNC(=O)[C@H]2[NH+]=c3ccccc3=C2NC(=O)c2cccc(C)c2)c1C +COc1ccc(C(=O)N2CCC[C@H](C(=O)Nc3cc(Cl)ccc3F)C2)c2ccccc12 +CCc1ccc(/C=C(\C#N)C(N)=O)s1 +COc1cccc(N2C(=O)Nc3ccccc3[C@]2(O)C(=O)NCc2ccccc2)c1 +COC(=O)c1sccc1NC(=O)[C@@H]1CC[NH2+][C@@H]1C +C/[NH+]=C(/NCc1ccc([N+]2=CCCC2)cc1)N[C@H]1CC[C@@H](SC)C1 +N#Cc1csc(C(=O)N2CC[C@H]3CCCC[C@@H]32)c1 +Cc1cccc(NC(=O)[C@H](C)[S@@](=O)Cc2ccc(F)c(F)c2)c1C +CNS(=O)(=O)c1cccc([C@H](C)NC(=O)c2ccc(Cn3cccn3)cc2)c1 +CC[NH2+][C@@]1(C(=O)OC)CCC[C@@H](Oc2ccccc2)C1 +COCCCn1c(C)c(C)c(C#N)c1NC(=O)C[NH+]1CC(C)(C)C1(C)C +C[C@H]1CCC[C@@H](C)N1C(=O)[C@H]1C[C@H]1c1ccccc1Cl +COCc1ccc(C[NH+](C)Cc2ccccc2O)o1 +Cc1c(F)cc(N)cc1S(=O)(=O)NCC(N)=O +CCNS(=O)(=O)[C@@H]1CC[NH+](C[C@@H]2CCCc3ccccc32)C1 +CC1(C)[C@@H]2CC[C@@]1(CS(=O)(=O)NCCCO)C(=O)C2 +COc1ccc(-n2ccc(CNC(=O)c3cc(Cl)ccc3[N+](=O)[O-])n2)cc1 +CC[C@@H](NC(=O)NC1CCC(C(=O)OC(C)(C)C)CC1)[C@H]1CCCO1 +CN(CC[NH+](C)C)C(=O)C[C@H]1COCCN1C(=O)c1ccc2[nH]nnc2c1 +CC[C@H](c1ccc(F)cc1)N(C)C(=O)Cn1nnc(-c2ccccc2)n1 +O=C(Cn1cccc1-c1nc(-c2ccc(OC(F)(F)F)cc2)no1)Nc1nccs1 +CCc1nsc(Nc2ccc(CC(=O)N3CC[NH+](CC)CC3)cc2)n1 +CS[C@@H]1CC[C@H](NC(=O)/C=C(/C)c2ccccc2)C1 +Cc1ccc([N+](=O)[O-])cc1NCC(=O)N[C@](C)(C#N)C1CC1 +CC1(C)[C@H]2OCC[C@@H]2[C@H]1NC(=O)CCNC(=O)C12CC3CC(CC(C3)C1)C2 +c1ccc(COC2CC[NH+](Cc3cccnc3)CC2)cc1 +O=C(C1CCCC1)N1CCC[C@@H]([NH+]2CCC(CO)CC2)C1 +COC(=O)CNC(=O)c1sc2ncn(CC(=O)N3CCCCC3)c(=O)c2c1C +CC(C)CCNC(=O)[C@@H](C)Oc1ccc(N)cc1C(=O)[O-] +CCN(CC)C(=O)[C@@H]1C[C@@H]([NH3+])CN1C(=O)Cc1cccc(O)c1 +COc1ccc(F)cc1NC(=O)N1CCO[C@H](c2ccc(C)o2)C1 +COc1ccc(-c2nnc(SCC(=O)c3ccc(Br)cc3)o2)cc1OC +CC(C)=CC(=O)NCCC1CCN(c2cc[nH+]cc2)CC1 +C[C@]1(O)[C@](C)(O)[C@@H](CO)O[C@](C)(Oc2c[nH]c3ccc(Br)c(Cl)c23)[C@]1(C)O +O=C(CCCc1nc(-c2cccnc2)no1)N1CCC[C@@H](Cc2ccccc2)C1 +Cn1nc(NC(=O)c2cccc(F)c2)c2c1NC(=O)C[C@@H]2c1ccccc1 +O=C(CSc1ccncc1)NCCN1Cc2ccccc2O[C@@H](c2ccccc2)C1 +N#Cc1ccc(OCCn2cc(Cl)cn2)cc1 +C[C@@H]1CCN(C(=O)Nc2ccc(O[C@@H]3CCOC3)cc2)[C@H](C)C1 +CC[C@H](C)[C@H](C)[NH2+]Cc1ncccc1F +C#CC(C)(C)NC(=O)c1ccc(OC)c(O)c1 +COc1cc([N+](=O)[O-])ccc1OCc1nc(-c2cccs2)no1 +CC1CCN(C(=O)C[NH+]2CCC[C@@H](c3nc4ccccc4o3)C2)CC1 +C[C@H]1CCC[C@H](NC(=O)Cc2c[nH]c3ccccc23)[C@@H]1C +Cn1nc(CNC(=O)Nc2ccccc2C(F)(F)F)cc1-c1ccncc1 +CC[NH2+]C[C@H](Cc1cscn1)c1cccc(F)c1 +CCCCS(=O)(=O)N1CCN(c2ccc(-n3ccnc3C)nn2)CC1 +O=C(c1cc2ccccc2o1)N(C[C@H]1CCCO1)c1nc2c(F)cccc2s1 +C[C@H](CC#N)Sc1ccccc1NC(=O)c1ccc(Cl)nc1Cl +O=C(CCCc1nc2ccccc2s1)N[C@H]1CCOC1=O +COC(=O)c1cc(S(=O)(=O)N[C@H](C)c2ccccc2C)cn1C +Cc1cnn(CC(=O)[C@@H](C#N)c2nc([O-])c3ccc(Cl)cc3n2)c1 +COc1ccc(C)cc1NC(=O)[C@H]1CCCN1c1cc(C)ccc1[N+](=O)[O-] +CC[NH+]1CCC2(CC1)OC[C@H](C(=O)[O-])N2C(=O)c1ccc(F)cc1 +Cc1ccc2c(c1)N(C(=O)C[C@H](O)c1ccc(Cl)cc1)CC2 +O=c1[nH]cnc2c1[nH]c(=S)n2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +Cc1cn2c([nH+]1)CC[C@H](NC(=O)C[C@@H]1CCCc3ccccc31)C2 +CC(=O)c1cc(CN2CCC3=NN=C(c4ccccc4F)[C@@H]3C2)cs1 +C[C@H](Nc1ccc(S(=O)(=O)N2CCCCC2)cn1)[C@@H](C)CO +COCCn1nc(C)c(NC(=O)N2CCC[C@H]2c2cccc(C)c2)c1C +CCCCS(=O)(=O)[N-]c1ccc(NC(=O)[C@H]2CCC[NH+](C)C2)cc1 +Fc1ccc(Oc2ccnc(Sc3nnc(-c4cccs4)o3)n2)cc1 +N#Cc1ccc(NC(=O)[C@@H]2CSCN2C(=O)c2cn(Cc3ccccc3)c3ccccc23)cc1 +CC[C@@H](C)n1nccc1NC(=O)C(=O)N1CCc2cc(F)ccc2C1 +Cc1ccc(-c2cnc(CCC(=O)N(C)C3CCOCC3)o2)cc1 +C[C@H](CNC(=O)c1ccc(-c2ccccc2)[nH]c1=O)Oc1ccc(F)cc1 +CCOC(=O)c1c(NC(=O)[C@H]2CCCN2S(C)(=O)=O)sc2ccccc12 +Cc1ccc(C(=O)Cc2cccc(O)c2)cc1 +Cc1ccccc1-c1nn(CN2CCCc3ccc(S(C)(=O)=O)cc32)c(=S)o1 +CCOc1cccc([C@H](C)NC[C@@](C)(O)c2ccc(F)cc2F)c1 +CC[C@@H]1CCCCCN1C(=O)c1cnc2sc(C)cn2c1=O +N#Cc1ccc(OCC(=O)NCc2cccc(CO)c2)cc1 +O=C(Nc1ccccc1)NC1CCN(C(=O)[C@H]2CCCC[C@H]2C(F)(F)F)CC1 +CC1(C)CCC(O)(C[NH2+][C@@H]2CCOC3(CCC3)C2)CC1 +[NH3+][C@H]1CCC[C@H]1CCN1C(=O)c2cccc3cccc1c23 +C#CCN(Cc1cc(Br)cc(OC)c1O)[C@@H]1CCS(=O)(=O)C1 +Cc1ccc(-c2nc3nc(CN4CC[NH+](C)CC4)cc([O-])n3n2)cc1 +C[C@@](O)(CNC(=O)C1CCCC1)c1cccs1 +O[C@H](CSc1nnc(-c2c[nH]c3ccccc23)n1C1CC1)CN1CCOCC1 +C[C@@H]1CCC/C(=N/[NH+]=C(/[S-])NCc2ccccc2)C1 +O=C(c1cc2ccc(Cl)cc2[nH]1)N1CCC[C@@H]1Cn1nnc(-c2cccs2)n1 +COc1ccc(S(=O)(=O)Oc2ccc(C(C)=O)cc2OC)cc1 +CC(C)c1ccccc1NC(=O)C[NH+](C(C)C)[C@@H]1CCCC[C@@H]1O +O=Cc1ccn(-c2ccc(Br)cc2)c1 +O=C(C1CC1)N1CCC[C@H](Cn2cc[nH+]c2-c2cc3n(n2)CC[NH2+]C3)C1 +O=C(CCNc1ccccc1[N+](=O)[O-])N1CCC[C@@H]([NH+]2CCCC2)C1 +Cc1ccc2c(c1)-c1onc(C(=O)N3C[C@@H](C)C[C@H](C)C3)c1CO2 +O=C(COC(=O)c1ccc(Cl)nc1)NC(=O)Nc1ccc2c(c1)OCCO2 +CCC[C@@H]1C[C@H]1NC(=O)C1(c2ccc(F)cc2F)CCOCC1 +CCOC(=O)C1CCC(NC(=O)[C@@](C)([NH3+])CC)CC1 +CC[C@@H](O)C(=O)NCc1cccnc1Oc1ccccc1OC +C[C@@H](Sc1nnc(-c2cccs2)n1-c1ccccc1)C(=O)N1CC(=O)Nc2ccccc21 +Cc1cc(F)ccc1NC(=O)COc1ccc2c(c1)CCC2 +C[C@H](NC(=O)NCCC[S@](C)=O)c1ccc(Cl)s1 +O=C(Cn1c(=O)c(=O)n(Cc2ccncc2)c2ncccc21)NCCc1ccccc1 +CS(=O)(=O)c1ccc(C(=O)Nc2ccc(F)c(F)c2F)cc1 +CCCCOc1ccccc1/C=C1\SC(N2CCC(C)CC2)=NC1=O +CC(C)[C@@H](CNC(=O)N1CCc2ccc(Cl)cc2C1)c1cccnc1 +CCCOc1ncnc(Nc2cc(Cl)cc(Cl)c2)c1N +CC(=O)Nc1ccc(NC(=O)c2nnn(-c3ccc(C)c(C)c3)c2C)cc1 +CCOC1CC[NH+](CC[C@@H](O)c2ccc(C)c(F)c2)CC1 +IC[C@@H]1Cn2c(nnc2-c2ccncc2)S1 +CCOc1ccc([C@H]2CCCN2C(=O)c2[nH]c(C)c(C(C)=O)c2C)cc1 +CC(C)CONc1ncnc2sc3c(c12)CCC3 +CC(C)[C@@H](ON1C(=O)c2ccccc2C1=O)C(=O)[O-] +COC[C@H](O)C[NH+]1CCC(C)(C)C1 +COC(=O)[C@@H](c1ccccc1Cl)N1CCCSCC1 +O=C(Nc1nc2ccc(F)cc2s1)c1cc(-c2ccccc2O)[nH]n1 +C/C(=N\Nc1ncnc2sc(C)c(C)c12)c1cccc(OC(F)F)c1 +CC(C)CN1CCO[C@@H](CNC(=O)/C=C/c2ccnc(Cl)c2)C1 +Cc1cc(F)ccc1CCNC(=O)Cc1c[nH]c2c(C)cccc12 +NC(=O)[C@@H]1CCCN(C(=O)Cn2nc(-c3cccs3)oc2=O)C1 +CCCCCn1c(SCC(=O)[O-])nc2ccccc2c1=O +Cc1ccc2nc(NC(=O)c3ccc(OCc4nc(-c5ccco5)cs4)cc3)sc2c1 +COc1ccccc1N1CCN(c2ccc(=O)n(CC(=O)NC3CC3)n2)CC1 +C/C(=C/C(=O)N[C@@H](C)c1c(C)noc1C)c1ccccc1OC(F)F +COCCn1nc(C)c(NC(=O)N2CC[C@H](Cc3ccccc3)C2)c1C +Cc1ccsc1C[NH+](Cc1nc2ccccc2n1C(C)C)C[C@H](C)O +Cc1cccc(CNC(=O)C[C@H]2Oc3ccc(C)cc3NC2=O)c1 +C[C@@H](c1ccc([S@](C)=O)cc1)N(C)C(=O)c1cc2cccc(F)c2o1 +COc1cc(OC)c(C(C)=O)cc1CSc1nnnn1-c1ccccc1 +Cc1sc(=O)n(CCC(=O)NC2CC(C)(C)[NH2+]C(C)(C)C2)c1C +CC[C@@H]1CCCCN1C(=S)NC(=O)c1ccc(C)cc1 +CCC[C@H](C)C(=O)N[C@H](C)c1cccc(Br)c1 +COc1cccc([C@H]2CCCN2C(=O)c2ccccc2I)c1 +C[C@H](NC(=O)N1CCCC[C@@H]1C1OCCO1)c1cccc(-n2ccnc2)c1 +CS(=O)(=O)N1CCC[C@@H](C[NH+]2CCC[C@H](CO)C2)C1 +CCN(C(=O)Cn1nc2n(c1=O)CCCCC2)[C@H]1CCS(=O)(=O)C1 +COc1ccccc1NC[C@H]1CCCN(S(C)(=O)=O)C1 +COc1cc([C@@H]2CC(=O)Nc3c2cnn3Cc2cccnc2)cc2c1OCO2 +O=C(CNc1ccc(Cl)cc1NC(=O)c1ccco1)Nc1ccc(F)c(Cl)c1 +Cc1nn(C)cc1[C@@H](C)NC(=O)C(=O)Nc1ccc(OCC2CCCCC2)cc1 +CC(C)(C)[S@](=O)CCNC(=O)c1cccc(F)c1Cl +C[C@@H](O)c1ccc(F)cc1OCc1nc(C(C)(C)C)cs1 +COC[C@H](NC(=O)Nc1cn[nH]c1)c1ccc(F)c(F)c1 +C[C@@H]1CS(=O)(=O)N(c2ccc(S(=O)(=O)Nc3ccccc3C(F)(F)F)cc2)C1=O +CC(C)c1ccc2c(c1)[C@]1(CC(O)=Nc3c1cnn3Cc1ccccc1Cl)C(=O)N2C +CC[C@@H](C)C(=O)NCC(=O)N(C)[C@@H](C)c1cc(F)ccc1F +CO/N=C\C(C#N)=C/c1cccnc1 +CO[C@H](c1ccc(Cl)cc1)[C@@H](C)NC(=O)C(=O)Nc1ccccc1C +CC(C)CNC(=O)[C@](C)(N)C(F)(F)F +C[C@@H](C(=O)C1=c2ccccc2=[NH+]C1)[NH+]1CCC[C@@H]1[C@@H]1CC=CS1 +Cc1nc(Br)ccc1NC(=O)NCc1cnn(C)c1 +COc1c(C)cnc(CNC(=O)Nc2ccc(N(C)C)cc2)c1C +COc1ccc2cc(COC(=O)COc3ccccc3C#N)ccc2c1 +CCS(=O)(=O)CCN(C)Cc1c[nH]nc1-c1ccc(C)cc1 +COc1ccc([C@@](C)([NH3+])Cc2[nH+]ccn2C)cc1 +C[C@H]1CCN(C(=O)NCCc2nnc3n2CCCCC3)[C@@H](C)C1 +O=C(NC[C@@H]1CCC[NH+](Cc2ccccc2F)C1)c1nc[nH]n1 +O=C1CC[C@@H](NC(=O)COc2ccc(Cl)c(Cl)c2)CN1 +Cc1noc(C)c1COc1ccc(C[NH2+]C[C@H]2CCCO2)cc1 +N#Cc1cnn2c1N[C@@H](c1ccccc1)C[C@@H]2C(F)F +C[C@@H]1Cc2ccccc2N1C(=O)[C@H]1CCCN(C(=O)NC2CC2)C1 +COCc1nc(C(=O)OCC2=CC[C@H]3C[C@@H]2C3(C)C)cs1 +CCN(Cc1ccc(Br)s1)C(=O)C[NH+](C)CC(=O)[O-] +O=C([O-])[C@H]1CCCN(c2ccc([O-])nn2)C1 +COc1ccc(CCCC(=O)Nc2cccc(S(N)(=O)=O)c2)cc1F +Fc1ccccc1[C@@H](c1nnnn1C1CCCCC1)[NH+]1CCN(c2ccccc2)CC1 +O=C(C/C(=N\Nc1nc(-c2ccccc2)cs1)c1ccccc1)C(F)(F)F +C[C@H]([NH2+]CC(=O)N(C)C)c1ccc(Cl)s1 +CCOC(=O)COc1ccccc1/C=C1/C(=O)NC(=O)N(c2ccc3c(c2)OCO3)C1=O +Cc1cc(C(=O)NNC(=O)c2cccc3ccccc23)c(C)o1 +COc1ccc(-c2csc(NC(=O)Nc3ccc(F)cc3)n2)cc1OC +Cc1ccc(C)c(S(=O)(=O)N2CCN([C@H](C)c3nc(N)nc(Nc4ccccc4)n3)CC2)c1 +Cc1cccc(C)c1-n1nnnc1CSCc1nnc(C)n1C +COc1cccc(C(=O)N[C@@](C)(C(N)=O)c2cccc(Cl)c2)c1 +C[C@H](CCO)[NH2+][C@H]1CCc2c(Br)cccc21 +CCc1ccsc1-c1cnc(C[NH3+])o1 +NC(=O)C1(N2CCCC2)CC[NH2+]CC1 +CC(C)C[C@@H]([NH3+])C(=O)N1CC[C@H](C(=O)[O-])[C@@H]1C +CC[C@H](C)Cn1c(CCCl)nc2c(C)nn(C)c21 +Cc1csc([C@H](C)NC(=O)CCC[NH+]2CCCCC2)n1 +CCC(=O)NN/C(C)=C/C(=O)NCC(C)(C)C +CCC(=O)N1CCC([NH+](C)Cc2ccc(SC)c(OC)c2)CC1 +Clc1ccc(CNc2ncccc2Cl)cn1 +C[C@H](c1nc(C(C)(C)C)no1)[S@](=O)Cc1ncn(-c2ccccc2)n1 +Cn1cnn(C[NH+](Cc2ccc(F)cc2)C2CC2)c1=S +C#CC(C)(C)NC[C@H]1CN(C)CCO1 +C[C@H](NC(=O)[C@H]1CCCN1S(C)(=O)=O)c1ccc2c(c1)OCO2 +O=C(Nc1ccc2ncccc2c1)C(=O)NC1CCC(O)CC1 +CC(C)[C@H]([NH2+]CC1CCN(C(=O)OC(C)(C)C)CC1)c1cccnc1 +CCC[C@H](C)NC(=O)C[NH2+]Cc1cscc1C +CS(=O)(=O)N1CCC(C(=O)Nc2sc3c(c2C#N)CCCC3)CC1 +O=C(CN(C(=O)Cn1nnc2ccccc21)c1ccccc1)NC[C@H]1CCCO1 +O=C(Nc1cccc(N2CCCNC2=O)c1)C(=O)N1CCc2cc(F)ccc2C1 +C[C@@H]1CC[NH+](CCCN2C(=O)CNC2=O)C[C@@H]1O +O=C(COc1ccc(Br)cc1)NOCc1ccccc1 +CC(=O)C[C@]1(O)C(=O)N(Cc2ccc(C)cc2)c2c(C)cccc21 +C[C@H](OC(=O)c1ccc2ccccc2n1)C(=O)NCC1CCCCC1 +C[C@@H](Sc1cc(Cl)ccc1Cl)C(=O)N1CCC[C@H](CCC(N)=O)C1 +COc1cc(-c2ccno2)ccc1S(=O)(=O)NCc1ccco1 +CCC[C@@H](CC)Nc1c(F)c(F)nc(F)c1F +Oc1cccc([C@@H]2CN(c3nccc(Oc4ccc(F)cc4)n3)CCO2)c1 +COc1ccc([C@H](CNC(=O)c2cccc3ccccc23)[NH+]2CCCC2)cc1 +CC1(C)[C@@H]2CC[C@@]1(C)[C@H](NC(=O)COc1ccc(C3SCCCS3)cc1)C2 +Cc1c(C)n(-c2ccccc2)c2nc(C(=O)Nc3ccc(F)cc3)nc(N3CCCCC3)c12 +CC(C)C(=O)Nc1cccc(NC(=O)C(=O)NCC[C@H]2C[C@H]3CC[C@@H]2C3)c1 +COc1ccc(CNC(=O)c2cc(N3C(=O)C(C)(C)CS3(=O)=O)ccc2Cl)cc1OC +CC(=O)Nc1cccc(NC(=O)CCc2c(C)[nH]c(=S)[nH]c2=O)c1 +COc1ccccc1[C@H]1CCCN1C(=O)[C@@H](C)CCOc1ccccc1 +CCNc1ncc(COCC2CCCCC2)s1 +CC[C@H](C)C(=O)Nc1ccccn1 +O=C([O-])CC1=C(C(=O)[O-])CCCC1 +Fc1ccc(C[NH2+]C[C@@H]([C@H]2CCOC2)N2CCOCC2)c(F)c1 +CCc1cc(Cn2cc(N)nn2)n(C)n1 +N#Cc1cccnc1Oc1ccccc1NCc1cccc2ccccc12 +O=C(NCc1ccc([N+](=O)[O-])cc1)N[C@@H]1CCCC[C@H]1CO +C[C@H](NC(=O)NC[C@H](C)C[C@@H](C)O)c1ccc(S(C)(=O)=O)cc1 +CC(C)Oc1ccc(NC(=O)NC[C@H](C)N2CCOCC2)c(F)c1 +COc1ccccc1COC1CCN(C(=O)[C@H]2CCC[C@@H](C)C2)CC1 +CC(=O)O[C@H]1CC[C@H]2[C@H]3C[C@H](OC(C)=O)[C@]45C[C@H]4CC[C@]5(C)[C@@H]3CC[C@]12C +CCCn1nnnc1CN1CC[C@]2(C1)NC(=O)N(C(C)C)C2=O +CC1=C(C(=O)OCC(C)C)[C@H](c2cccc(F)c2)c2c(n(C)c(=O)n(C)c2=O)N1 +Cc1cc(C)c2c(-c3ccccc3)nc(SCC(=O)NC3CC3)n2n1 +COc1cc2c(cc1OC)[C@H](C(=O)[O-])[C@H](c1cccc(Cl)c1)N(C)C2=O +COCCN1C(=O)CC[C@@H]2C[NH+](Cc3cc(C)ccc3C)CC[C@@H]21 +Cc1cnc([C@H](C)NC(=O)NNC(=O)Nc2ccccc2)s1 +CNC(=O)c1ccc(NC(=O)c2csc(-c3ccccc3)n2)cc1 +CNc1nc(C2CCN(C(=O)Cc3ccccn3)CC2)[nH+]c2c1CN(C(C)=O)CC2 +Cc1cc(C)n(C[C@@H](C)CNC(=O)NCc2cc3ccccc3o2)n1 +Cc1nc2n(n1)C[C@H]([NH2+]C[C@@H](O)CN(C)Cc1ccccc1)CC2 +c1ccc(Cn2c(SCc3ncon3)nnc2-c2cccs2)cc1 +COc1ccc(-n2nc(C)c3c2C[C@H](c2cc(OC)c(OC)c(OC)c2)CC3=O)cc1 +COc1ccc(OC)c(/C=C/C(=O)OCC(C)C)c1 +C[C@H](NC(=O)[C@H]1CC[C@H](C[NH3+])O1)C(=O)N(C)C +Cc1cccc(C(C)C)c1NC(=O)[C@@H](C)Sc1nnc(-c2cccs2)n1N +Cn1cc(C(=O)Nc2ccccc2C(=O)NCCc2ccccc2)c(=O)c2cccn21 +Cc1ccccc1[C@@H]1C[C@H](C)N(C(=O)[C@@H](C)Sc2ccccn2)C1 +Cc1occc1C(=O)/C(C#N)=C/c1ccc([C@@H]2C[C@H]2C)o1 +CC[C@](C)(C[NH3+])[C@H](O)c1ccc2c(c1)OCO2 +CCc1nn(CC)c(C[C@@]2(C3CC3)CCC[NH2+]2)c1Br +CC(=O)Nc1ccc(CN2CC[NH+](C3CCCC3)[C@H](CCO)C2)cc1 +COc1ccc([C@H](O)[C@@H](C)NC(=O)[C@@H](C)SC)cc1 +CCC(=O)N1CCC[C@@H]1c1cc(C(F)(F)F)c2c(=O)n(C)c(=O)n(C)c2n1 +C[C@H]([NH3+])[C@@H](CC(=O)[O-])c1ccccc1 +Cc1ccc([C@H]2C3=C(NC(=O)N2C)c2ccccc2C3=O)cc1 +C[C@@]1(Cc2ccc3c(c2)OCO3)CCC(=O)N(CCc2ccc(O)cc2)C1 +Cc1cc(C)cc(O[C@H]2CCCC(C)(C)[C@@H]2O)c1 +Cc1c(-c2nc(-c3cccs3)no2)sc2nc[nH]c(=O)c12 +COc1cc([C@H]2C(C(=O)Nc3ccccn3)=C(C)NC3=C2C(=O)CCC3)ccc1OCc1ccccc1 +COc1cc(C)c([C@@H](C)NC2CC[NH+]([C@H]3CCCC[C@@H]3O)CC2)cc1OC +C[C@@H]1CCC[C@@H](NS(=O)(=O)Cc2cccc(N)c2)C1 +CCCN1C(N)=[NH+]C[C@@H]1c1cc(Cl)c2c(c1)OCO2 +CC(C)C[C@@H](NC(=O)[C@@H]1C[C@@H]1c1cccc(Cl)c1Cl)C(=O)Nc1cc[nH]n1 +Cc1nc(CCC[NH+]2CCC[C@H]2C(N)=O)cs1 +NC(=O)COc1cccc(CNC(=O)c2cc3cc(Cl)ccc3[nH]2)c1 +COc1ccc(CNC(=O)c2cc(=O)c3ccc(Br)cc3o2)cc1 +CC[C@@H](C)c1ccccc1N1C[C@H](C(=O)N2CCN(C)CC2)CC1=O +CC[NH2+][C@H](Cc1ccccc1Cl)[C@H]1C[NH+](C)CCN1C +C[NH+]1CCC(N[C@@H]2CC(=O)N(CCc3cccc(Cl)c3)C2)CC1 +CCN1CC(=O)Nc2cc(C(=O)NC3CC[NH+](C4CCCC4)CC3)ccc21 +COc1ccc(Br)cc1/C=C/C(=O)N1CCN(C(=O)c2ccccc2)CC1 +NC(=O)COc1ccc(C(=O)N[C@H]2CCCc3ccccc32)cc1 +O=C(COc1ncnc2ccc(Br)cc12)Nc1ccccc1Cl +C[C@@H](c1ccc(Cl)cc1Cl)N(C)C(=O)c1ccc(NC(N)=O)cc1 +Cc1ccc(N2C(=O)[C@@H](Cc3cccc(C)c3)S/C2=C(/C#N)C(N)=O)cc1 +CC(C)CN(C(=O)NCc1ccc(C(F)(F)F)cc1)C1CC1 +ClCCc1nc2cccnc2n1CCn1cccn1 +CC[C@@](C)([C@@H]([NH3+])c1cc(Br)ccc1F)N1CCOCC1 +Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 +CC1(C)OC[C@H]([C@H]2O[C@@H]3OC(C)(C)O[C@@H]3[C@H]2OS(C)(=O)=O)O1 +Cc1cccc([C@H](CCl)CCC[C@@H]2CCCO2)c1 diff --git a/open_biomed/models/MoleculeSTM/models/GA/__init__.py b/open_biomed/models/MoleculeSTM/models/GA/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/open_biomed/models/MoleculeSTM/models/GA/crossover.py b/open_biomed/models/MoleculeSTM/models/GA/crossover.py new file mode 100644 index 0000000..ec6d6b1 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/GA/crossover.py @@ -0,0 +1,194 @@ +''' +Written by Jan H. Jensen 2018 +''' +from rdkit import Chem +from rdkit.Chem import AllChem + +import random +import numpy as np + +from rdkit import rdBase +rdBase.DisableLog('rdApp.error') + +average_size = 39.15 +size_stdev = 3.50 + + +def cut(mol): + if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): + return None + bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) #single bond not in ring + #print bis,bis[0],bis[1] + bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()] + + fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)]) + + try: + fragments = Chem.GetMolFrags(fragments_mol,asMols=True) + return fragments + except: + return None + + +def cut_ring(mol): + for i in range(10): + if random.random() < 0.5: + if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')): + return None + bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R]@[R]@[R]'))) + bis = ((bis[0],bis[1]),(bis[2],bis[3]),) + else: + if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')): + return None + bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R;!D2]@[R]'))) + bis = ((bis[0],bis[1]),(bis[1],bis[2]),) + + #print bis + bs = [mol.GetBondBetweenAtoms(x,y).GetIdx() for x,y in bis] + + fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1),(1,1)]) + + try: + fragments = Chem.GetMolFrags(fragments_mol,asMols=True) + except: + return None + + if len(fragments) == 2: + return fragments + + return None + +def ring_OK(mol): + if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')): + return True + + ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]')) + + cycle_list = mol.GetRingInfo().AtomRings() + max_cycle_length = max([ len(j) for j in cycle_list ]) + macro_cycle = max_cycle_length > 6 + + double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]')) + + return not ring_allene and not macro_cycle and not double_bond_in_small_ring + +def mol_OK(mol): + try: + Chem.SanitizeMol(mol) + test_mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol)) + if test_mol == None: + return None + target_size = size_stdev*np.random.randn() + average_size #parameters set in GA_mol + if mol.GetNumAtoms() > 5 and mol.GetNumAtoms() < target_size: + return True + else: + return False + except: + return False + + +def crossover_ring(parent_A,parent_B): + ring_smarts = Chem.MolFromSmarts('[R]') + if not parent_A.HasSubstructMatch(ring_smarts) and not parent_B.HasSubstructMatch(ring_smarts): + return None + + rxn_smarts1 = ['[*:1]~[1*].[1*]~[*:2]>>[*:1]-[*:2]','[*:1]~[1*].[1*]~[*:2]>>[*:1]=[*:2]'] + rxn_smarts2 = ['([*:1]~[1*].[1*]~[*:2])>>[*:1]-[*:2]','([*:1]~[1*].[1*]~[*:2])>>[*:1]=[*:2]'] + for i in range(10): + fragments_A = cut_ring(parent_A) + fragments_B = cut_ring(parent_B) + #print [Chem.MolToSmiles(x) for x in list(fragments_A)+list(fragments_B)] + if fragments_A == None or fragments_B == None: + return None + + new_mol_trial = [] + for rs in rxn_smarts1: + rxn1 = AllChem.ReactionFromSmarts(rs) + new_mol_trial = [] + for fa in fragments_A: + for fb in fragments_B: + new_mol_trial.append(rxn1.RunReactants((fa,fb))[0]) + + new_mols = [] + for rs in rxn_smarts2: + rxn2 = AllChem.ReactionFromSmarts(rs) + for m in new_mol_trial: + m = m[0] + if mol_OK(m): + new_mols += list(rxn2.RunReactants((m,))) + + new_mols2 = [] + for m in new_mols: + m = m[0] + if mol_OK(m) and ring_OK(m): + new_mols2.append(m) + + if len(new_mols2) > 0: + return random.choice(new_mols2) + + return None + +def crossover_non_ring(parent_A,parent_B): + for i in range(10): + fragments_A = cut(parent_A) + fragments_B = cut(parent_B) + if fragments_A == None or fragments_B == None: + return None + rxn = AllChem.ReactionFromSmarts('[*:1]-[1*].[1*]-[*:2]>>[*:1]-[*:2]') + new_mol_trial = [] + for fa in fragments_A: + for fb in fragments_B: + new_mol_trial.append(rxn.RunReactants((fa,fb))[0]) + + new_mols = [] + for mol in new_mol_trial: + mol = mol[0] + if mol_OK(mol): + new_mols.append(mol) + + if len(new_mols) > 0: + return random.choice(new_mols) + + return None + +def crossover(parent_A,parent_B): + parent_smiles = [Chem.MolToSmiles(parent_A),Chem.MolToSmiles(parent_B)] + try: + Chem.Kekulize(parent_A,clearAromaticFlags=True) + Chem.Kekulize(parent_B,clearAromaticFlags=True) + except: + pass + for i in range(10): + if random.random() <= 0.5: + #print 'non-ring crossover' + new_mol = crossover_non_ring(parent_A,parent_B) + if new_mol != None: + new_smiles = Chem.MolToSmiles(new_mol) + if new_mol != None and new_smiles not in parent_smiles: + return new_mol + else: + #print 'ring crossover' + new_mol = crossover_ring(parent_A,parent_B) + if new_mol != None: + new_smiles = Chem.MolToSmiles(new_mol) + if new_mol != None and new_smiles not in parent_smiles: + return new_mol + + return None + +if __name__ == "__main__": + smiles1 = 'CC(C)(C)c1ccc2occ(CC(=O)Nc3ccccc3F)c2c1' + smiles2 = 'C[C@@H]1CC(Nc2cncc(-c3nncn3C)c2)C[C@@H](C)C1' + + smiles1 = 'Cc1ccc(S(=O)(=O)N2C(N)=C(C#N)C(c3ccc(Cl)cc3)C2C(=O)c2ccccc2)cc1' + smiles2 = 'CC(C#N)CNC(=O)c1cccc(Oc2cccc(C(F)(F)F)c2)c1' + + mol1 = Chem.MolFromSmiles(smiles1) + mol2 = Chem.MolFromSmiles(smiles2) + + child = crossover(mol1,mol2) + mutation_rate = 1.0 + #mutated_child = mutate(child,mutation_rate) + + for i in range(100): + child = crossover(mol1,mol2) diff --git a/open_biomed/models/MoleculeSTM/models/GA/mutate.py b/open_biomed/models/MoleculeSTM/models/GA/mutate.py new file mode 100644 index 0000000..f52905d --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/GA/mutate.py @@ -0,0 +1,132 @@ +''' +Written by Jan H. Jensen 2018 +''' +from rdkit import Chem +from rdkit.Chem import AllChem + +import random +import numpy as np +import MoleculeSTM.models.GA.crossover as co + +from rdkit import rdBase +rdBase.DisableLog('rdApp.error') + +def delete_atom(): + choices = ['[*:1]~[D1]>>[*:1]', '[*:1]~[D2]~[*:2]>>[*:1]-[*:2]', + '[*:1]~[D3](~[*;!H0:2])~[*:3]>>[*:1]-[*:2]-[*:3]', + '[*:1]~[D4](~[*;!H0:2])(~[*;!H0:3])~[*:4]>>[*:1]-[*:2]-[*:3]-[*:4]', + '[*:1]~[D4](~[*;!H0;!H1:2])(~[*:3])~[*:4]>>[*:1]-[*:2](-[*:3])-[*:4]'] + p = [0.25,0.25,0.25,0.1875,0.0625] + + return np.random.choice(choices, p=p) + +def append_atom(): + choices = [['single',['C','N','O','F','S','Cl','Br'],7*[1.0/7.0]], + ['double',['C','N','O'],3*[1.0/3.0]], + ['triple',['C','N'],2*[1.0/2.0]] ] + p_BO = [0.60,0.35,0.05] + + index = np.random.choice(list(range(3)), p=p_BO) + + BO, atom_list, p = choices[index] + new_atom = np.random.choice(atom_list, p=p) + + if BO == 'single': + rxn_smarts = '[*;!H0:1]>>[*:1]X'.replace('X','-'+new_atom) + if BO == 'double': + rxn_smarts = '[*;!H0;!H1:1]>>[*:1]X'.replace('X','='+new_atom) + if BO == 'triple': + rxn_smarts = '[*;H3:1]>>[*:1]X'.replace('X','#'+new_atom) + + return rxn_smarts + +def insert_atom(): + choices = [['single',['C','N','O','S'],4*[1.0/4.0]], + ['double',['C','N'],2*[1.0/2.0]], + ['triple',['C'],[1.0]] ] + p_BO = [0.60,0.35,0.05] + + index = np.random.choice(list(range(3)), p=p_BO) + + BO, atom_list, p = choices[index] + new_atom = np.random.choice(atom_list, p=p) + + if BO == 'single': + rxn_smarts = '[*:1]~[*:2]>>[*:1]X[*:2]'.replace('X',new_atom) + if BO == 'double': + rxn_smarts = '[*;!H0:1]~[*:2]>>[*:1]=X-[*:2]'.replace('X',new_atom) + if BO == 'triple': + rxn_smarts = '[*;!R;!H1;!H0:1]~[*:2]>>[*:1]#X-[*:2]'.replace('X',new_atom) + + return rxn_smarts + +def change_bond_order(): + choices = ['[*:1]!-[*:2]>>[*:1]-[*:2]','[*;!H0:1]-[*;!H0:2]>>[*:1]=[*:2]', + '[*:1]#[*:2]>>[*:1]=[*:2]','[*;!R;!H1;!H0:1]~[*:2]>>[*:1]#[*:2]'] + p = [0.45,0.45,0.05,0.05] + + return np.random.choice(choices, p=p) + +def delete_cyclic_bond(): + return '[*:1]@[*:2]>>([*:1].[*:2])' + +def add_ring(): + choices = ['[*;!r;!H0:1]~[*;!r:2]~[*;!r;!H0:3]>>[*:1]1~[*:2]~[*:3]1', + '[*;!r;!H0:1]~[*!r:2]~[*!r:3]~[*;!r;!H0:4]>>[*:1]1~[*:2]~[*:3]~[*:4]1', + '[*;!r;!H0:1]~[*!r:2]~[*:3]~[*:4]~[*;!r;!H0:5]>>[*:1]1~[*:2]~[*:3]~[*:4]~[*:5]1', + '[*;!r;!H0:1]~[*!r:2]~[*:3]~[*:4]~[*!r:5]~[*;!r;!H0:6]>>[*:1]1~[*:2]~[*:3]~[*:4]~[*:5]~[*:6]1'] + p = [0.05,0.05,0.45,0.45] + + return np.random.choice(choices, p=p) + +def change_atom(mol): + choices = ['#6','#7','#8','#9','#16','#17','#35'] + p = [0.15,0.15,0.14,0.14,0.14,0.14,0.14] + + X = np.random.choice(choices, p=p) + while not mol.HasSubstructMatch(Chem.MolFromSmarts('['+X+']')): + X = np.random.choice(choices, p=p) + Y = np.random.choice(choices, p=p) + while Y == X: + Y = np.random.choice(choices, p=p) + + return '[X:1]>>[Y:1]'.replace('X',X).replace('Y',Y) + +def mutate(mol,mutation_rate): + + if random.random() > mutation_rate: + return mol + + Chem.Kekulize(mol,clearAromaticFlags=True) + p = [0.15,0.14,0.14,0.14,0.14,0.14,0.15] + for i in range(10): + rxn_smarts_list = 7*[''] + rxn_smarts_list[0] = insert_atom() + rxn_smarts_list[1] = change_bond_order() + rxn_smarts_list[2] = delete_cyclic_bond() + rxn_smarts_list[3] = add_ring() + rxn_smarts_list[4] = delete_atom() + rxn_smarts_list[5] = change_atom(mol) + rxn_smarts_list[6] = append_atom() + rxn_smarts = np.random.choice(rxn_smarts_list, p=p) + + #print('mutation',rxn_smarts) + + rxn = AllChem.ReactionFromSmarts(rxn_smarts) + + new_mol_trial = rxn.RunReactants((mol,)) + + new_mols = [] + for m in new_mol_trial: + m = m[0] + #print Chem.MolToSmiles(mol),mol_OK(mol) + if co.mol_OK(m) and co.ring_OK(m): + new_mols.append(m) + + if len(new_mols) > 0: + return random.choice(new_mols) + + return None + +if __name__ == "__main__": + pass diff --git a/open_biomed/models/MoleculeSTM/models/MLP.py b/open_biomed/models/MoleculeSTM/models/MLP.py new file mode 100644 index 0000000..b5175c2 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/MLP.py @@ -0,0 +1,49 @@ +from torch import nn +from torch.nn import functional as F +from collections.abc import Sequence + + +class MLP(nn.Module): + def __init__(self, input_dim, hidden_dims, batch_norm=False, activation="relu", dropout=0): + super(MLP, self).__init__() + + if not isinstance(hidden_dims, Sequence): + hidden_dims = [hidden_dims] + self.dims = [input_dim] + hidden_dims + + if isinstance(activation, str): + self.activation = getattr(F, activation) + else: + self.activation = activation + if dropout: + self.dropout = nn.Dropout(dropout) + else: + self.dropout = None + + self.layers = nn.ModuleList() + for i in range(len(self.dims) - 1): + self.layers.append(nn.Linear(self.dims[i], self.dims[i + 1])) + if batch_norm: + self.batch_norms = nn.ModuleList() + for i in range(len(self.dims) - 2): + self.batch_norms.append(nn.BatchNorm1d(self.dims[i + 1])) + else: + self.batch_norms = None + + def forward(self, input): + layer_input = input + + for i, layer in enumerate(self.layers): + hidden = layer(layer_input) + if i < len(self.layers) - 1: + if self.batch_norms: + x = hidden.flatten(0, -2) + hidden = self.batch_norms[i](x).view_as(hidden) + hidden = self.activation(hidden) + if self.dropout: + hidden = self.dropout(hidden) + if hidden.shape == layer_input.shape: + hidden = hidden + layer_input + layer_input = hidden + + return hidden \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/__init__.py b/open_biomed/models/MoleculeSTM/models/__init__.py new file mode 100644 index 0000000..7e38623 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/__init__.py @@ -0,0 +1,2 @@ +from models.MoleculeSTM.models.molecule_gnn_model import GNN, GNN_graphpred +from models.MoleculeSTM.models.MLP import MLP \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py new file mode 100644 index 0000000..27eca55 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py @@ -0,0 +1 @@ +from models.MoleculeSTM.models.mega_molbart.megatron_bart import MegatronBART \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/decoder.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/decoder.py new file mode 100644 index 0000000..b7aaad1 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/mega_molbart/decoder.py @@ -0,0 +1,426 @@ +# coding=utf-8 + +import torch +from rdkit import Chem, RDLogger +from .util import DEFAULT_MAX_SEQ_LEN + +class DecodeSampler: + def __init__( + self, + tokenizer, + max_seq_len=DEFAULT_MAX_SEQ_LEN + ): + self.tokenizer = tokenizer + self.max_seq_len = max_seq_len + + assert max_seq_len > 1, f"Max sequence must be at least 2, got {max_seq_len}" + + self.begin_token_id = self.tokenizer.vocab[self.tokenizer.begin_token] + self.pad_token_id = self.tokenizer.vocab[self.tokenizer.pad_token] + self.end_token_id = self.tokenizer.vocab[self.tokenizer.end_token] + + self.bad_token_ll = -1e5 + + RDLogger.DisableLog("rdApp.*") + + + def decode(self, decode_fn, batch_size, sampling_alg="greedy", device="cpu", **kwargs): + """ Sample a molecule from a model by calling the decode function argument + + Args: + decode_fn: A function mapping a batched sequence of token identifiers and their associated pad masks + to a log probability distribution over possible next tokens + batch_size: The number of elements to pass into the decode function in one batch + sampling_alg: Algorithm to use for sampling from the model + + Returns: + (SMILES of sampled molecules (List[str]), log likelihoods (List[float])) + """ + + if sampling_alg == "greedy": + output = self.greedy_decode(decode_fn, batch_size, device) + + elif sampling_alg == "beam": + output = self.beam_decode(decode_fn, batch_size, device, kwargs) + + else: + raise ValueError(f"Unknown sampling algorithm {sampling_alg}") + + return output + + + def greedy_decode(self, decode_fn, batch_size, device="cpu"): + """ Sample molecules from the model using greedy search + + Args: + decode_fn (fn): Function used to apply tokens to model and produce log probability distribution + batch_size (int): Number of molecules to sample + device: Torch device to create tensors on + + Returns: + (List[str], List[float]): Tuple of (molecules, their log likelihoods) + """ + + # Create tensors which will be reused + token_ids = [self.begin_token_id] + ([self.pad_token_id] * (self.max_seq_len - 1)) + token_ids = [token_ids] * batch_size + token_ids = torch.tensor(token_ids, device=device).transpose(0, 1) + pad_mask = torch.zeros((self.max_seq_len, batch_size), device=device, dtype=torch.bool) + log_lhs = torch.zeros((batch_size)) + + # Iteratively apply the tokens to the model and build up the sequence + for i in range(1, self.max_seq_len): + token_ids_seq = token_ids[:i, :] + pad_mask_seq = pad_mask[:i, :] + + # Sample next id for each element in the batch + output_dist = decode_fn(token_ids_seq, pad_mask_seq) + probs, output_ids = output_dist.max(dim=2) + new_ids = output_ids[-1, :] + new_probs = probs[-1, :] + + # Generate next elements in the pad mask. An element is padded if: + # 1. The previous token is an end token + # 2. The previous token is a pad token + is_end_token = token_ids[i-1, :] == self.end_token_id + is_pad_token = token_ids[i-1, :] == self.pad_token_id + new_pad_mask = torch.logical_or(is_end_token, is_pad_token) + + # Break if sampling is complete + if new_pad_mask.sum().item() == new_pad_mask.numel(): + break + + # Ensure all sequences contain an end token + if i == self.max_seq_len - 1: + new_ids[~new_pad_mask] = self.end_token_id + + # Set the token to pad where required, update the token ids and update lls + new_ids[new_pad_mask] = self.pad_token_id + token_ids[i, :] = new_ids + pad_mask[i, :] = new_pad_mask + log_lhs += new_probs.cpu() + + tokens = token_ids.transpose(0, 1).tolist() + tokens = self.tokenizer.convert_ids_to_tokens(tokens) + mol_strs = self.tokenizer.detokenize(tokens) + log_lhs = log_lhs.tolist() + + return mol_strs, log_lhs + + + def beam_decode(self, decode_fn, batch_size, device="cpu", k=5): + """ Sample molecules from the model using beam search + + Samples molecules by iteratively building up the sequence of SMILES characters using beam search. + Molecules are returned in a 2D list where batch_size is the outer dimension and k is the inner dimension. + + Args: + decode_fn (fn): Function used to apply tokens to model and produce log probability distribution + batch_size (int): Number of molecules to sample + device: Torch device to create tensors on + k (int): Number of beams + + Returns: + (List[List[str]], List[List[float]]): Tuple of (molecules, their log likelihoods) + """ + + # Create tensors which will be reused + token_ids = [self.begin_token_id] + ([self.pad_token_id] * (self.max_seq_len - 1)) + token_ids = [token_ids] * batch_size + token_ids = torch.tensor(token_ids, device=device).transpose(0, 1) + pad_mask = torch.zeros((self.max_seq_len, batch_size), device=device, dtype=torch.bool) + + ts = token_ids[:1, :] + ms = pad_mask[:1, :] + ll = torch.zeros((batch_size)) + + # Apply starting token to model to get a distribution over next tokens + first_lls = self._beam_step(decode_fn, ts, ms, ll) + top_lls, top_idxs = torch.topk(first_lls, k, dim=1) + top_ids = list(top_idxs.T) + + # Setup tensors for each beam which will be reused + token_ids_list = [token_ids.clone() for _ in range(k)] + pad_mask_list = [pad_mask.clone() for _ in range(k)] + lls_list = list(top_lls.cpu().T) + + for beam_idx, ids in enumerate(top_ids): + token_ids_list[beam_idx][1, :] = ids + pad_mask_list[beam_idx][1, :] = 0 + + for i in range(2, self.max_seq_len): + complete = self._update_beams_(i, decode_fn, token_ids_list, pad_mask_list, lls_list) + if complete: + break + + tokens_list = [token_ids.transpose(0, 1).tolist() for token_ids in token_ids_list] + tokens_list = [self.tokenizer.convert_ids_to_tokens(tokens) for tokens in tokens_list] + mol_strs_list = [self.tokenizer.detokenize(tokens) for tokens in tokens_list] + log_lhs_list = [log_lhs.tolist() for log_lhs in lls_list] + + # Transpose and sort list of molecules based on ll + new_mol_strs = self._transpose_list(mol_strs_list) + new_log_lhs = self._transpose_list(log_lhs_list) + sorted_mols, sorted_lls = self._sort_beams(new_mol_strs, new_log_lhs) + + return sorted_mols, sorted_lls + + + def _update_beams_(self, i, decode_fn, token_ids_list, pad_mask_list, lls_list): + """ Update beam tokens and pad mask in-place using a single decode step + + Updates token ids and pad mask in-place by producing the probability distribution over next tokens + and choosing the top k (number of beams) log likelihoods to choose the next tokens. + Sampling is complete if every batch element in every beam has produced an end token. + + Args: + i (int): The current iteration counter + decode_fn (fn): Function used to apply tokens to model and produce log probability distribution + token_ids_list (List[torch.Tensor]): List of token_ids, each of shape [seq_len, batch_size] + pad_mask_list (List[torch.Tensor]): List of pad_masks, each of shape [seq_len, batch_size] + lls_list (List[torch.Tensor]): List of log likelihoods, each of shape [batch_size] + + Returns: + (bool): Specifies whether all of the beams are complete + """ + + assert len(token_ids_list) == len(pad_mask_list) == len(lls_list) + + num_beams = len(token_ids_list) + + ts = [token_ids[:i, :] for token_ids in token_ids_list] + ms = [pad_mask[:i, :] for pad_mask in pad_mask_list] + + # Apply current seqs to model to get a distribution over next tokens + # new_lls is a tensor of shape [batch_size, vocab_size * num_beams] + new_lls = [self._beam_step(decode_fn, t, m, lls) for t, m, lls in zip(ts, ms, lls_list)] + _, vocab_size = new_lls[0].shape + new_lls = torch.cat(new_lls, dim=1) + + # Keep lists (of length num_beams) of tensors of shape [batch_size] + top_lls, top_idxs = torch.topk(new_lls, num_beams, dim=1) + new_ids_list = list((top_idxs % vocab_size).T) + beam_idxs_list = list((top_idxs // vocab_size).T) + top_lls = list(top_lls.T) + + beam_complete = [] + new_ts_list = [] + new_pm_list = [] + new_lls_list = [] + + # Set the sampled tokens, pad masks and log likelihoods for each of the new beams + for new_beam_idx, (new_ids, beam_idxs, lls) in enumerate(zip(new_ids_list, beam_idxs_list, top_lls)): + # Get the previous sequences corresponding to the new beams + token_ids = [token_ids_list[beam_idx][:, b_idx] for b_idx, beam_idx in enumerate(beam_idxs)] + token_ids = torch.stack(token_ids).transpose(0, 1) + + # Generate next elements in the pad mask. An element is padded if: + # 1. The previous token is an end token + # 2. The previous token is a pad token + is_end_token = token_ids[i-1, :] == self.end_token_id + is_pad_token = token_ids[i-1, :] == self.pad_token_id + new_pad_mask = torch.logical_or(is_end_token, is_pad_token) + beam_complete.append(new_pad_mask.sum().item() == new_pad_mask.numel()) + + # Ensure all sequences contain an end token + if i == self.max_seq_len - 1: + new_ids[~new_pad_mask] = self.end_token_id + + # Set the tokens to pad if an end token as already been produced + new_ids[new_pad_mask] = self.pad_token_id + token_ids[i, :] = new_ids + + # Generate full pad mask sequence for new token sequence + pad_mask = [pad_mask_list[beam_idx][:, b_idx] for b_idx, beam_idx in enumerate(beam_idxs)] + pad_mask = torch.stack(pad_mask).transpose(0, 1) + pad_mask[i, :] = new_pad_mask + + # Add tokens, pad mask and lls to list to be updated after all beams have been processed + new_ts_list.append(token_ids) + new_pm_list.append(pad_mask) + new_lls_list.append(lls) + + complete = sum(beam_complete) == len(beam_complete) + + # Update all tokens, pad masks and lls + if not complete: + for beam_idx, (ts, pm, lls) in enumerate(zip(new_ts_list, new_pm_list, new_lls_list)): + token_ids_list[beam_idx] = ts + pad_mask_list[beam_idx] = pm + lls_list[beam_idx] = lls + + return complete + + def _beam_step(self, decode_fn, tokens, mask, lls): + """ Apply tokens to model to produce the log likelihoods for the full sequence + + A single iteration of decode is applied to the model to produce the next tokens in the sequences + and the log likelihoods for the entire sequences (including the next token) + The lls are returned as a distribution over all possible next tokens + + Args: + decode_fn (fn): Function used to apply tokens to model and produce log probability distribution + tokens (torch.Tensor): Tensor of shape [seq_len, batch_size] containing the current token ids + mask (torch.Tensor): BoolTensor of shape [seq_len, batch_size] containing the padding mask + lls (torch.Tensor): Tensor of shape [batch_size] containing log likelihoods for seqs so far + + Returns: + seq_lls (torch.Tensor): Tensor of shape [batch_size, vocab_size] + """ + + output_dist = decode_fn(tokens, mask) + next_token_lls = output_dist[-1, :, :].cpu() + + # Create a vector from which only a pad token can be sampled + # And use this vector in the output for sequences which are complete + _, vocab_size = tuple(next_token_lls.shape) + complete_seq_ll = torch.ones((1, vocab_size)) * self.bad_token_ll + complete_seq_ll[:, self.pad_token_id] = 0.0 + + is_end_token = tokens[-1, :] == self.end_token_id + is_pad_token = tokens[-1, :] == self.pad_token_id + ll_mask = torch.logical_or(is_end_token, is_pad_token).cpu().unsqueeze(1) + masked_lls = (ll_mask * complete_seq_ll) + (~ll_mask * next_token_lls) + + seq_lls = (lls + masked_lls.T).T + return seq_lls + + @staticmethod + def _transpose_list(l): + """ Transpose 2D list so that inner dimension is first + + Args: + l (List[Any]): List to be transposed + + Returns: + (List[Any]): Transposed list + """ + + outer_dim = len(l) + inner_dim = len(l[0]) + + transposed = [[[]] * outer_dim for _ in range(inner_dim)] + for outer_idx, inner in enumerate(l): + for inner_idx, item in enumerate(inner): + transposed[inner_idx][outer_idx] = item + + return transposed + + @staticmethod + def _sort_beams(mol_strs, log_lhs): + """ Return mols sorted by their log likelihood + + Args: + mol_strs (List[List[str]]): SMILES encoding of molecules + log_lhs (List[List[float]]): Log likelihood for each molecule + + Returns: + (List[str], List[float]): Tuple of sorted molecules and sorted log lhs + """ + + assert len(mol_strs) == len(log_lhs) + + sorted_mols = [] + sorted_lls = [] + + for mols, lls in zip(mol_strs, log_lhs): + mol_lls = sorted(zip(mols, lls), reverse=True, key=lambda mol_ll: mol_ll[1]) + mols, lls = tuple(zip(*mol_lls)) + sorted_mols.append(list(mols)) + sorted_lls.append(list(lls)) + + return sorted_mols, sorted_lls + + @staticmethod + def calc_sampling_metrics(sampled_smiles, target_smiles): + """ Calculate sampling metrics for the model + + If sampled_smiles is a List[List[str]] then the following metrics for beam search are calculated (up to the + maximum given by the number of elements in the inner lists): + - "top_1_accuracy" + - "top_5_accuracy" + - "top_10_accuracy" + - "top_20_accuracy" + - "top_50_accuracy" + The SMILES strings must be sorted in decreasing order of their predicted likelihood + + If the sampled_smiles is a List[str] then "accuracy" is calculated + + The the number of invalid SMILES "invalid" is also returned (for beam search this is just from the top_1) + + Args: + sampled_smiles: SMILES strings produced by decode function, + target_smiles: target molecules as canonicalised SMILES strings + + Returns: + dict containing results + """ + + num_sampled = len(sampled_smiles) + num_target = len(target_smiles) + err_msg = f"The number of sampled and target molecules must be the same, got {num_sampled} and {num_target}" + assert num_sampled == num_target, err_msg + + data_type = type(sampled_smiles[0]) + if data_type == str: + results = DecodeSampler._calc_greedy_metrics(sampled_smiles, target_smiles) + elif data_type == list: + results = DecodeSampler._calc_beam_metrics(sampled_smiles, target_smiles) + else: + raise TypeError(f"Elements of sampled_smiles must be either a str or a list, got {data_type}") + + return results + + @staticmethod + def _calc_greedy_metrics(sampled_smiles, target_smiles): + sampled_mols = [Chem.MolFromSmiles(smi) for smi in sampled_smiles] + invalid = [mol is None for mol in sampled_mols] + + canon_smiles = ["Unknown" if mol is None else Chem.MolToSmiles(mol) for mol in sampled_mols] + target_mols = [Chem.MolFromSmiles(smi) for smi in target_smiles] + canon_target_smiles = [Chem.MolToSmiles(mol) for mol in target_mols] + correct_smiles = [canon_target_smiles[idx] == smi for idx, smi in enumerate(canon_smiles)] + + num_correct = sum(correct_smiles) + total = len(correct_smiles) + num_invalid = sum(invalid) + perc_invalid = num_invalid / total + accuracy = num_correct / total + + # Todo: need to move accuracy and perc_invalid to cuda for reducing later + metrics = { + "accuracy": accuracy, + "invalid": perc_invalid + } + + return metrics + + @staticmethod + def _calc_beam_metrics(sampled_smiles, target_smiles): + top_1_samples = [mols[0] for mols in sampled_smiles] + top_1_results = DecodeSampler._calc_greedy_metrics(top_1_samples, target_smiles) + + metrics = { + "top_1_accuracy": top_1_results["accuracy"], + "invalid": top_1_results["invalid"] + } + + ks = [2, 3, 5, 10, 20, 50] + num_samples_list = [k for k in ks if k <= len(sampled_smiles[0])] + + for num_samples in num_samples_list: + top_k_correct = [] + num_mols = len(sampled_smiles) + + for batch_idx, mols in enumerate(sampled_smiles): + samples = mols[:num_samples] + samples_mols = [Chem.MolFromSmiles(smi) for smi in samples] + samples_smiles = ["Unknown" if mol is None else Chem.MolToSmiles(mol) for mol in samples_mols] + correct_smiles = [smi == target_smiles[batch_idx] for smi in samples_smiles] + is_correct = sum(correct_smiles) >= 1 + top_k_correct.append(is_correct) + + accuracy = sum(top_k_correct) / num_mols + metrics[f"top_{str(num_samples)}_accuracy"] = accuracy + + return metrics \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py new file mode 100644 index 0000000..3108e53 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py @@ -0,0 +1,471 @@ +''' +Credit to https://github.com/NVIDIA/cheminformatics/blob/master/megamolbart/megamolbart/inference.py +''' +import logging +from functools import partial +from pathlib import Path +from typing import List +from rdkit import Chem +import random +import numpy as np + +import torch +from torch.nn.parallel import DistributedDataParallel as torchDDP +import pandas as pd +from megatron.checkpointing import load_checkpoint +import megatron.checkpointing as megatron_checkpointing +from megatron.global_vars import set_global_variables +from models.MoleculeSTM.cuchemcommon.workflow import BaseGenerativeWorkflow, add_jitter +from .decoder import DecodeSampler +from megatron import get_args, mpu +from megatron.initialize import initialize_megatron +from .megatron_bart import MegatronBART +from .tokenizer import MolEncTokenizer +from .util import (REGEX, DEFAULT_CHEM_TOKEN_START, DEFAULT_MAX_SEQ_LEN, + DEFAULT_VOCAB_PATH, + DEFAULT_NUM_LAYERS, DEFAULT_D_MODEL, DEFAULT_NUM_HEADS) + + +logger = logging.getLogger(__name__) + + +@add_jitter.register(torch.Tensor) +def _(embedding, radius, cnt, shape): + if shape is not None: + embedding = torch.reshape(embedding, (1, shape[0], shape[1])).to(embedding.device) + permuted_emb = embedding.permute(1, 0, 2) + + distorteds = [] + for i in range(cnt): + noise = torch.normal(0, radius, permuted_emb.shape).to(embedding.device) + distorted = (noise + permuted_emb).permute(1, 0, 2) + distorteds.append(distorted) + + return distorteds + + +def use_model_module(model): + ''' Credit to https://github.com/MolecularAI/MolBART/blob/megatron-molbart-with-zinc/megatron_molbart/checkpointing.py#L20 ''' + use_model = isinstance(model, torchDDP) + try: + from deepspeed.runtime.engine import DeepSpeedEngine + except: + pass + else: + use_model = use_model | isinstance(model, DeepSpeedEngine) + return use_model + + +class MegaMolBART(BaseGenerativeWorkflow): + + def __init__(self, + input_dir=None, + output_dir=None, + max_seq_len=DEFAULT_MAX_SEQ_LEN, + vocab_path=DEFAULT_VOCAB_PATH, + regex=REGEX, + default_chem_token_start=DEFAULT_CHEM_TOKEN_START, + num_layers=DEFAULT_NUM_LAYERS, + hidden_size=DEFAULT_D_MODEL, + num_attention_heads=DEFAULT_NUM_HEADS, + decoder_max_seq_len=None, + grad_enabled=True) -> None: + super().__init__() + + torch.set_grad_enabled(grad_enabled) # Testing this instead of `with torch.no_grad():` context since it doesn't exit + + self.device = 'cuda' # Megatron arg loading seems to only work with GPU + self.min_jitter_radius = 1.0 + self.max_model_position_embeddings = max_seq_len + + args = { + 'num_layers': num_layers, + 'hidden_size': hidden_size, + 'num_attention_heads': num_attention_heads, + 'max_position_embeddings': self.max_model_position_embeddings, + 'tokenizer_type': 'GPT2BPETokenizer', + 'vocab_file': vocab_path, + } + if input_dir is not None: + args["load"] = input_dir + if output_dir is not None: + args["save"] = output_dir + args["save_interval"] = 1 + + initialize_megatron(args_defaults=args, ignore_unknown_args=True) + args = get_args() + self.tokenizer = self.load_tokenizer(args.vocab_file, regex, default_chem_token_start) + self.model = self.load_model(args, self.tokenizer, decoder_max_seq_len) + + def _compute_radius(self, scaled_radius): # TODO REMOVE + if scaled_radius: + return float(scaled_radius * self.min_jitter_radius) + else: + return self.min_jitter_radius + + def load_tokenizer(self, tokenizer_vocab_path, regex, default_chem_token_start): + """Load tokenizer from vocab file + + Params: + tokenizer_vocab_path: str, path to tokenizer vocab + + Returns: + MolEncTokenizer tokenizer object + """ + print("Loading vocab from {}.".format(tokenizer_vocab_path)) + tokenizer_vocab_path = Path(tokenizer_vocab_path) + tokenizer = MolEncTokenizer.from_vocab_file( + tokenizer_vocab_path, + regex, + default_chem_token_start) + + return tokenizer + + def load_model(self, args, tokenizer, decoder_max_seq_len=None): + """Load saved model checkpoint + + Params: + tokenizer: MolEncTokenizer tokenizer object + decoder_max_seq_len: int, maximum sequence length + args: Megatron initialized arguments + + Returns: + MegaMolBART trained model + """ + + vocab_size = len(tokenizer) + pad_token_idx = tokenizer.vocab[tokenizer.pad_token] + + # TODO how to handle length overrun for batch processing + if not decoder_max_seq_len: + decoder_max_seq_len = args.max_position_embeddings + + sampler = DecodeSampler(tokenizer, decoder_max_seq_len) + model = MegatronBART( + sampler, + pad_token_idx, + vocab_size, + args.hidden_size, + args.num_layers, + args.num_attention_heads, + args.hidden_size * 4, + args.max_position_embeddings, + dropout=0.1, + ) + if args.load is not None: + print("Loading from {}".format(args.load)) + self.iteration = load_checkpoint(model, None, None) + model = model.cuda() + return model + + def save_model(self, iteration, model, optimizer=None, lr_scheduler=None): + ''' Credit to https://github.com/MolecularAI/MolBART/blob/megatron-molbart-with-zinc/megatron_molbart/checkpointing.py#L46 ''' + + """Save a model checkpoint.""" + args = get_args() + + # Only rank zero of the data parallel writes to the disk. + if use_model_module(model): + model = model.module + + if mpu.get_data_parallel_rank() == 0: + + # Arguments, iteration, and model. + state_dict = {} + state_dict['args'] = args + state_dict['checkpoint_version'] = 2.0 + state_dict['iteration'] = iteration + state_dict['model'] = model.state_dict_for_save_checkpoint() + + # Optimizer stuff. + if not args.no_save_optim: + if optimizer is not None: + state_dict['optimizer'] = optimizer.state_dict() + if lr_scheduler is not None: + state_dict['lr_scheduler'] = lr_scheduler.state_dict() + + # RNG states. + if not args.no_save_rng: + state_dict['random_rng_state'] = random.getstate() + state_dict['np_rng_state'] = np.random.get_state() + state_dict['torch_rng_state'] = torch.get_rng_state() + state_dict['cuda_rng_state'] = torch.cuda.get_rng_state() + state_dict['rng_tracker_states'] = mpu.get_cuda_rng_tracker().get_states() + + # Save. + checkpoint_name = megatron_checkpointing.get_checkpoint_name(args.save, iteration) + print('global rank {} is saving checkpoint at iteration {:7d} to {}'. + format(torch.distributed.get_rank(), iteration, + checkpoint_name)) + megatron_checkpointing.ensure_directory_exists(checkpoint_name) + torch.save(state_dict, checkpoint_name) + print(' successfully saved {}'.format(checkpoint_name)) + + # Wait so everyone is done (necessary) + torch.distributed.barrier() + # And update the latest iteration + if torch.distributed.get_rank() == 0: + tracker_filename = megatron_checkpointing.get_checkpoint_tracker_filename(args.save) + with open(tracker_filename, 'w') as f: + f.write(str(iteration)) + # Wait so everyone is done (not necessary) + torch.distributed.barrier() + return + + def smiles2embedding(self, smiles, pad_length=None): + """Calculate embedding and padding mask for smiles with optional extra padding + + Params + smiles: string, input SMILES molecule + pad_length: optional extra + + Returns + embedding array and boolean mask + """ + + assert isinstance(smiles, str) + if pad_length: + assert pad_length >= len(smiles) + 2 + + tokens = self.tokenizer.tokenize([smiles], pad=True) + + # Append to tokens and mask if appropriate + if pad_length: + for i in range(len(tokens['original_tokens'])): + n_pad = pad_length - len(tokens['original_tokens'][i]) + tokens['original_tokens'][i] += [self.tokenizer.pad_token] * n_pad + tokens['masked_pad_masks'][i] += [1] * n_pad + + token_ids = torch.tensor(self.tokenizer.convert_tokens_to_ids(tokens['original_tokens'])).cuda().T + pad_mask = torch.tensor(tokens['masked_pad_masks']).bool().cuda().T + token_ids = token_ids[:self.max_model_position_embeddings] + pad_mask = pad_mask[:self.max_model_position_embeddings] + encode_input = {"encoder_input": token_ids, "encoder_pad_mask": pad_mask} + + embedding = self.model.encode(encode_input) + torch.cuda.empty_cache() + return embedding, pad_mask + + def smileslist2embedding(self, smiles_list): + if isinstance(smiles_list, dict): + self_smiles_list={} + self_smiles_list['input_ids'] = [tensor.unsqueeze(0) for tensor in smiles_list['input_ids']] + token_ids = torch.cat(self_smiles_list['input_ids'], dim=0).cuda() + self_smiles_list['pad_masks'] = [tensor.unsqueeze(0) for tensor in smiles_list['pad_masks']] + pad_mask = torch.cat(self_smiles_list['pad_masks'], dim=0).bool().cuda() + else: + tokens = self.tokenizer.tokenize(smiles_list, pad=True) + token_ids = torch.tensor(self.tokenizer.convert_tokens_to_ids(tokens['original_tokens'])).cuda().T + pad_mask = torch.tensor(tokens['masked_pad_masks']).bool().cuda().T + # use collater + # token_ids = torch.tensor(smiles_list['original_tokens']).cuda().T + # pad_mask = torch.tensor(smiles_list['masked_pad_masks']).bool().cuda().T + token_ids = token_ids[:self.max_model_position_embeddings] + pad_mask = pad_mask[:self.max_model_position_embeddings] + encode_input = {"encoder_input": token_ids, "encoder_pad_mask": pad_mask} + + embedding = self.model.encode(encode_input) + torch.cuda.empty_cache() + return embedding, pad_mask + + def smileslist2embedding_model_given(self, model, smiles_list): + if isinstance(smiles_list, dict): + self_smiles_list={} + self_smiles_list['input_ids'] = [tensor.unsqueeze(0) for tensor in smiles_list['input_ids']] + token_ids = torch.cat(self_smiles_list['input_ids'], dim=0).cuda() + self_smiles_list['pad_masks'] = [tensor.unsqueeze(0) for tensor in smiles_list['pad_masks']] + pad_mask = torch.cat(self_smiles_list['pad_masks'], dim=0).bool().cuda() + else: + tokens = self.tokenizer.tokenize(smiles_list, pad=True) + token_ids = torch.tensor(self.tokenizer.convert_tokens_to_ids(tokens['original_tokens'])).cuda().T + pad_mask = torch.tensor(tokens['masked_pad_masks']).bool().cuda().T + # token_ids = torch.tensor(smiles_list['original_tokens']).cuda().T + # pad_mask = torch.tensor(smiles_list['masked_pad_masks']).bool().cuda().T + token_ids = token_ids[:self.max_model_position_embeddings] + pad_mask = pad_mask[:self.max_model_position_embeddings] + encode_input = {"encoder_input": token_ids, "encoder_pad_mask": pad_mask} + + embedding = model.encode(encode_input) + torch.cuda.empty_cache() + return embedding, pad_mask + + def inverse_transform(self, embeddings, mem_pad_mask, k=1, sanitize=True): + mem_pad_mask = mem_pad_mask.clone() + smiles_interp_list = [] + + batch_size = 1 # TODO: parallelize this loop as a batch + with torch.no_grad(): + for memory in embeddings: + + if isinstance(memory, list): + memory = torch.FloatTensor(memory).cuda() + + decode_fn = partial(self.model._decode_fn, + mem_pad_mask=mem_pad_mask.type(torch.LongTensor).cuda(), + memory=memory) + + mol_strs, _ = self.model.sampler.beam_decode(decode_fn, + batch_size=batch_size, + device='cuda', + k=k) + mol_strs = sum(mol_strs, []) # flatten list + + # TODO: add back sanitization and validity checking once model is trained + logger.warn('WARNING: MOLECULE VALIDATION AND SANITIZATION CURRENTLY DISABLED') + for smiles in mol_strs: + if sanitize: + mol = Chem.MolFromSmiles(smiles, sanitize=sanitize) + if mol: + sanitized_smiles = Chem.MolToSmiles(mol) + smiles_interp_list.append(sanitized_smiles) + logger.debug(f'Sanitized SMILES {sanitized_smiles} added...') + break + smiles_interp_list.append(smiles) + + return smiles_interp_list + + def interpolate_molecules(self, smiles1, smiles2, num_interp, tokenizer, k=1): + """Interpolate between two molecules in embedding space. + + Params + smiles1: str, input SMILES molecule + smiles2: str, input SMILES molecule + num_interp: int, number of molecules to interpolate + tokenizer: MolEncTokenizer tokenizer object + k: number of molecules for beam search, default 1. Can increase if there are issues with validity + + Returns + list of interpolated smiles molecules + """ + + pad_length = max(len(smiles1), len(smiles2)) + 2 # add 2 for start / stop + embedding1, pad_mask1 = self.smiles2embedding(smiles1, + pad_length=pad_length) + + embedding2, pad_mask2 = self.smiles2embedding(smiles2, + pad_length=pad_length) + + scale = torch.linspace(0.0, 1.0, num_interp + 2)[ + 1:-1] # skip first and last because they're the selected molecules + scale = scale.unsqueeze(0).unsqueeze(-1).cuda() + + interpolated_emb = torch.lerp(embedding1, embedding2, scale).cuda() # dims: batch, tokens, embedding + combined_mask = (pad_mask1 & pad_mask2).bool().cuda() + + embeddings = [] + dims = [] + for emb in interpolated_emb.permute(1, 0, 2): + dims.append(emb.shape) + embeddings.append(emb) + + generated_mols = self.inverse_transform(embeddings, + combined_mask, + k=k, + sanitize=True) + generated_mols = [smiles1] + generated_mols + [smiles2] + embeddings = [embedding1] + embeddings + [embedding2] + dims = [embedding1.shape] + dims + [embedding2.shape] + return generated_mols, embeddings, combined_mask, dims + + def find_similars_smiles_list(self, + smiles: str, + num_requested: int = 10, + scaled_radius=None, + force_unique=False): + distance = self._compute_radius(scaled_radius) + logger.info(f'Computing with distance {distance}...') + + embedding, pad_mask = self.smiles2embedding(smiles) + + neighboring_embeddings = self.addjitter(embedding, distance, cnt=num_requested) + + generated_mols = self.inverse_transform(neighboring_embeddings, + pad_mask.bool().cuda(), + k=1, sanitize=True) + if force_unique: + generated_mols = list(set(generated_mols)) + + generated_mols = [smiles] + generated_mols + neighboring_embeddings = [embedding] + neighboring_embeddings + return generated_mols, neighboring_embeddings, pad_mask + + def find_similars_smiles(self, + smiles: str, + num_requested: int = 10, + scaled_radius=None, + force_unique=False): + generated_mols, neighboring_embeddings, pad_mask = \ + self.find_similars_smiles_list(smiles, + num_requested=num_requested, + scaled_radius=scaled_radius, + force_unique=force_unique) + + # Rest of the applications and libraries use RAPIDS and cuPY libraries. + # For interoperability, we need to convert the embeddings to cupy. + embeddings = [] + dims = [] + for neighboring_embedding in neighboring_embeddings: + dims.append(neighboring_embedding.shape) + embeddings.append(neighboring_embedding.flatten().tolist()) + + generated_df = pd.DataFrame({'SMILES': generated_mols, + 'embeddings': embeddings, + 'embeddings_dim': dims, + 'Generated': [True for i in range(len(generated_mols))]}) + generated_df.iat[0, 3] = False + + if force_unique: + inv_transform_funct = partial(self.inverse_transform, + mem_pad_mask=pad_mask) + generated_df = self.compute_unique_smiles(generated_df, + inv_transform_funct, + scaled_radius=scaled_radius) + return generated_df + + def interpolate_smiles(self, + smiles: List, + num_points: int = 10, + scaled_radius=None, + force_unique=False): + num_points = int(num_points) + if len(smiles) < 2: + raise Exception('At-least two or more smiles are expected') + + k = 1 + result_df = [] + for idx in range(len(smiles) - 1): + interpolated_mol, interpolated_embeddings, combined_mask, dims = \ + self.interpolate_molecules(smiles[idx], + smiles[idx + 1], + num_points, + self.tokenizer, + k=k) + + # Rest of the applications and libraries use RAPIDS and cuPY libraries. + # For interoperability, we need to convert the embeddings to cupy. + embeddings = [] + for interpolated_embedding in interpolated_embeddings: + embeddings.append(interpolated_embedding.cpu()) + + interp_df = pd.DataFrame({'SMILES': interpolated_mol, + 'embeddings': embeddings, + 'embeddings_dim': dims, + 'Generated': [True for i in range(len(interpolated_mol))]}) + + inv_transform_funct = partial(self.inverse_transform, mem_pad_mask=combined_mask) + + # Mark the source and desinations as not generated + interp_df.iat[0, 3] = False + interp_df.iat[-1, 3] = False + + if force_unique: + interp_df = self.compute_unique_smiles(interp_df, + inv_transform_funct, + scaled_radius=scaled_radius) + + result_df.append(interp_df) + + result_df = pd.concat(result_df) + smile_list = list(result_df['SMILES']) + + return result_df, smile_list \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/megatron_bart.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/megatron_bart.py new file mode 100644 index 0000000..e780307 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/mega_molbart/megatron_bart.py @@ -0,0 +1,800 @@ +from megatron.module import MegatronModule +from apex.normalization import FusedLayerNorm +from megatron import mpu +from torch.nn import init +import torch.nn as nn +import torch.nn.functional as F +import torch +import math +from functools import partial +from .tokenizer import load_tokenizer +from .util import DEFAULT_CHEM_TOKEN_START, DEFAULT_VOCAB_PATH, REGEX + + +class MultiheadAttention(MegatronModule): + + def __init__( + self, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + cross_attention=False, + init_method=init.xavier_uniform_, + ): + + super(MultiheadAttention, self).__init__() + + self.embed_dim = embed_dim + self.num_heads = num_heads + self.attn_dropout = nn.Dropout(p=dropout) + self.bias = bias + self.cross_attention = cross_attention + self.head_dim = self.embed_dim // self.num_heads + self.scaling = self.head_dim ** -0.5 + self.init_method = init_method + self.skip_bias = not bias + + # Self-Attention is Column Parallelized + self.query_key_value = mpu.ColumnParallelLinear(self.embed_dim, + 3 * self.embed_dim, gather_output=True, + init_method=self.init_method, + skip_bias_add=self.skip_bias) + + # Cross-Attention is Row and Column Parallelized + self.q_proj = mpu.RowParallelLinear(self.embed_dim, + self.embed_dim, input_is_parallel=False, + init_method=self.init_method, bias=bias, + skip_bias_add=self.skip_bias) + self.key_value = mpu.ColumnParallelLinear(self.embed_dim, 2 + * self.embed_dim, gather_output=True, + init_method=self.init_method, + skip_bias_add=self.skip_bias) + + # Final projection is Row Parallelized + self.out_proj = mpu.RowParallelLinear(self.embed_dim, + self.embed_dim, input_is_parallel=False, + init_method=self.init_method, bias=bias) + + def forward( + self, + query, + key=None, + value=None, + key_padding_mask=None, + attn_mask=None, + ): + """Input shape: Time x Batch x Channel + + Args: + query - tokens/states of shape [Time x Batch x Channel] + key - tokens/states of shape [Time x Batch x Channel] + value - tokens/states of shape [Time x Batch x Channel] + key_padding_mask - keys that are pads where padding + elements are indicated by 1s. Shape: [batch, src_len]. + attn_mask - typically used to implement causal attention, where + the mask prevents the attention from looking forward in time. + Shape: [tgt_len, src_len]. + Returns: + outputs - attention probability scores of shape (Time x Batch x Channel) + """ + + (tgt_len, bsz, embed_dim) = query.size() + + # Compute attention projections + if not self.cross_attention: + (q_k_v, bias) = self.query_key_value(query) + (q, k, v) = mpu.split_tensor_along_last_dim(q_k_v, 3) + else: + q, _ = self.q_proj(query) + if key is None: + assert value is None, \ + 'Cross attention mode: since key is None, value must also be None.' + k = v = None + else: + (k_v, bias) = self.key_value(key) + (k, v) = mpu.split_tensor_along_last_dim(k_v, 2) + + # Scale query and reshape + q = q.contiguous() + q *= self.scaling + q = q.view(tgt_len, bsz * self.num_heads, + self.head_dim).transpose(0, 1) + if k is not None: + k = k.contiguous().view(-1, bsz * self.num_heads, + self.head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * self.num_heads, + self.head_dim).transpose(0, 1) + + # Compute attention scores + src_len = k.size(1) + attn_weights = torch.bmm(q, k.transpose(1, 2)) + assert list(attn_weights.size()) == [bsz * self.num_heads, + tgt_len, src_len] + + # Apply causal attention mask + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + attn_weights += attn_mask + + # Apply padding mask + if key_padding_mask is not None: + attn_weights = attn_weights.view(bsz, self.num_heads, + tgt_len, src_len) + attn_weights = \ + attn_weights.masked_fill(key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float('-inf')) + attn_weights = attn_weights.view(bsz * self.num_heads, + tgt_len, src_len) + + # Compute attention probabilities + attn_weights = F.softmax(attn_weights, dim=-1) + attn_probs = self.attn_dropout(attn_weights) + + # Compute context and output projection + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, + self.head_dim] + if attn.size(1) == 1: # a single decoder step (sequence length == 1) + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, + embed_dim) + (attn, bias) = self.out_proj(attn) + attn_output_weights = attn_probs.view(bsz, self.num_heads, + tgt_len, src_len) + attn_output_weights = attn_output_weights.sum(dim=1) \ + / self.num_heads + return (attn, attn_output_weights) + + +class EncoderLayer(MegatronModule): + + def __init__( + self, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + init_method=init.xavier_uniform_, + ): + + super(EncoderLayer, self).__init__() + self.self_attn = MultiheadAttention( + embed_dim, + num_heads, + dropout=dropout, + bias=bias, + cross_attention=False, + init_method=init_method, + ) + self.self_attn_layer_norm = FusedLayerNorm(embed_dim) + self.attn_dropout = nn.Dropout(p=dropout) + self.activation_fn = F.gelu + self.activation_dropout = nn.Dropout(p=dropout) + self.fc1 = mpu.ColumnParallelLinear(embed_dim, 4 + * embed_dim, gather_output=False, + init_method=init_method, skip_bias_add=False) + self.fc2 = mpu.RowParallelLinear(4 * embed_dim, + embed_dim, input_is_parallel=True, + init_method=init_method, skip_bias_add=False) + self.final_layer_norm = FusedLayerNorm(embed_dim) + + def forward( + self, + x, + encoder_padding_mask=None, + attn_mask=None, + ): + """ + Args: + x: input to the layer of shape (seq_len, batch, embed_dim) + encoder_padding_mask: binary ByteTensor of shape + (batch, seq_len) where padding elements are indicated by 1. + attn_mask: binary tensor of shape (tgt_len, src_len), + where tgt_len is the length of output and src_len is the + length of input, though here both are equal to seq_len. + Returns: + encoded output of shape (seq_len, batch, embed_dim) + """ + + if attn_mask is not None: + attn_mask = attn_mask.masked_fill(attn_mask.to(torch.bool), + -1e8) + residual = x + x = self.self_attn_layer_norm(x) + (x, weights) = self.self_attn(query=x, key=x, value=x, + key_padding_mask=encoder_padding_mask, + attn_mask=attn_mask) + x = self.attn_dropout(x) + x = x + residual + residual = x + x = self.final_layer_norm(x) + x, _ = self.fc1(x) + x = self.activation_fn(x) + x = self.activation_dropout(x) + x, _ = self.fc2(x) + x = self.attn_dropout(x) + x = x + residual + return x + + +class DecoderLayer(MegatronModule): + + def __init__( + self, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + init_method=init.xavier_uniform_, + ): + + super(DecoderLayer, self).__init__() + self.self_attn = MultiheadAttention( + embed_dim, + num_heads, + dropout=dropout, + bias=bias, + cross_attention=False, + init_method=init_method, + ) + self.self_attn_layer_norm = FusedLayerNorm(embed_dim) + self.encoder_attn = MultiheadAttention( + embed_dim, + num_heads, + dropout=dropout, + bias=bias, + cross_attention=True, + init_method=init_method, + ) + self.encoder_attn_layer_norm = FusedLayerNorm(embed_dim) + self.dropout = nn.Dropout(p=dropout) + self.activation_fn = F.gelu + self.activation_dropout = nn.Dropout(p=dropout) + self.fc1 = mpu.ColumnParallelLinear(embed_dim, 4 + * embed_dim, gather_output=False, + init_method=init_method, skip_bias_add=False) + self.fc2 = mpu.RowParallelLinear(4 * embed_dim, + embed_dim, input_is_parallel=True, + init_method=init_method, skip_bias_add=False) + self.final_layer_norm = FusedLayerNorm(embed_dim) + + def forward( + self, + x, + encoder_out=None, + encoder_padding_mask=None, + self_attn_mask=None, + self_attn_padding_mask=None, + ): + """ + Args: + x: input to decoder layer of shape (seq_len, batch, embed_dim) + encoder_out: output from the encoder + encoder_padding_mask: binary ByteTensor of shape + (batch, seq_len) where padding elements are indicated by 1 + self_attn_mask: binary tensor of shape (tgt_len, src_len), + where tgt_lent is the length of output and src_len is the + length of input, though here both are equal to seq_len. + self_attn_padding_mask: binary ByteTensor of shape + (batch, seq_len) where padding elements are indicated by 1. + Returns: + encoded output of shape (seq_len, batch, embed_dim) + """ + + residual = x + x = self.self_attn_layer_norm(x) + + # Self-Attention block + + (x, weights) = self.self_attn(query=x, key=x, value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask) + x = self.dropout(x) + x = x + residual + + # Cross-Attention block + if encoder_out is not None: + residual = x + x = self.encoder_attn_layer_norm(x) + (x, attn) = self.encoder_attn(query=x, key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask) + x = self.dropout(x) + x = x + residual + residual = x + x = self.final_layer_norm(x) + + # Fully-connected block + x, _ = self.fc1(x) + x = self.activation_fn(x) + x = self.activation_dropout(x) + x, _ = self.fc2(x) + x = self.dropout(x) + x = x + residual + return x + + +class ParallelTransformerEncoder(MegatronModule): + + def __init__( + self, + num_layers, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + init_method=init.xavier_uniform_, + ): + + super(ParallelTransformerEncoder, self).__init__() + self.layers = nn.ModuleList([]) + self.num_layers = num_layers + self.embed_dim = embed_dim + self.num_heads = num_heads + self.attn_dropout = dropout + self.bias = bias + self.init_method = init_method + self.layers.extend([self.build_encoder_layer() for i in + range(self.num_layers)]) + self.norm = FusedLayerNorm(self.embed_dim) + + def build_encoder_layer(self): + layer = EncoderLayer(self.embed_dim, self.num_heads, + dropout=self.attn_dropout, bias=self.bias, + init_method=self.init_method) + return layer + + def forward( + self, + src, + mask=None, + src_key_padding_mask=None, + ): + """Pass the input through the encoder layers in turn. + Args: + src: the sequence to the encoder (required). + mask: the mask for the src sequence (optional). + src_key_padding_mask: the mask for the src keys per batch (optional). + Returns: + encoded output of shape (src_len, batch, embed_dim) + """ + + output = src + for mod in self.layers: + output = mod(output, attn_mask=mask, + encoder_padding_mask=src_key_padding_mask) + output = self.norm(output) + return output + + +class ParallelTransformerDecoder(MegatronModule): + + def __init__( + self, + num_layers, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + init_method=init.xavier_uniform_, + ): + + super(ParallelTransformerDecoder, self).__init__() + self.layers = nn.ModuleList([]) + self.num_layers = num_layers + self.embed_dim = embed_dim + self.num_heads = num_heads + self.attn_dropout = dropout + self.bias = bias + self.init_method = init_method + self.layers.extend([self.build_decoder_layer() for i in + range(self.num_layers)]) + self.norm = FusedLayerNorm(self.embed_dim) + + def build_decoder_layer(self): + layer = DecoderLayer(self.embed_dim, self.num_heads, + dropout=self.attn_dropout, bias=self.bias, + init_method=self.init_method) + return layer + + def forward( + self, + tgt, + memory, + tgt_mask=None, + tgt_key_padding_mask=None, + memory_key_padding_mask=None, + ): + """Pass the inputs (and mask) through the decoder layer in turn. + Args: + tgt: the sequence to the decoder (required). + memory: the sequence from the last layer of the encoder (required). + tgt_mask: the mask for the tgt sequence (optional). + tgt_key_padding_mask: the mask for the tgt keys per batch (optional). + memory_key_padding_mask: the mask for the memory keys per batch (optional). + Returns: + decoded output of shape (tgt_len, batch, embed_dim) + """ + + output = tgt + for mod in self.layers: + output = mod(output, encoder_out=memory, + encoder_padding_mask=memory_key_padding_mask, + self_attn_mask=tgt_mask, + self_attn_padding_mask=tgt_key_padding_mask) + output = self.norm(output) + return output + + +class MegatronBART(MegatronModule): + + def __init__( + self, + decode_sampler, + pad_token_idx, + vocab_size, + d_model, + num_layers, + num_heads, + d_feedforward, + max_seq_len, + dropout=0.0, + ): + + super().__init__() + + self.sampler = decode_sampler + self.pad_token_idx = pad_token_idx + self.val_sampling_alg = 'greedy' + self.num_beams = 5 + self.vocab_size = vocab_size + self.d_model = d_model + self.num_layers = num_layers + self.num_heads = num_heads + self.d_feedforward = d_feedforward + self.max_seq_len = max_seq_len + self.dropout = dropout + self.emb_dropout = nn.Dropout(p=dropout) + init_method = init.xavier_uniform_ + + self.emb = nn.Embedding(vocab_size, d_model) + self.dropout = dropout + self.encoder = ParallelTransformerEncoder( + self.num_layers, + self.d_model, + self.num_heads, + self.dropout, + bias=True, + init_method=init_method, + ) + self.decoder = ParallelTransformerDecoder( + self.num_layers, + self.d_model, + self.num_heads, + self.dropout, + bias=True, + init_method=init_method, + ) + self.token_fc = mpu.RowParallelLinear(d_model, vocab_size, + input_is_parallel=False, init_method=init_method, + skip_bias_add=False) + self.loss_fn = nn.CrossEntropyLoss(reduction='none', + ignore_index=pad_token_idx) + self.log_softmax = nn.LogSoftmax(dim=2) + self._init_params(init_method) + self.register_buffer('pos_emb', self._positional_embs()) + + def forward(self, x): + """ Apply SMILES strings to model + + The dictionary returned will be passed to other functions, so its contents are fairly flexible, + except that it must contain the key "token_output" which is the output of the model + (possibly after any fully connected layers) for each token. + + Arg: + x (dict { + "encoder_input": tensor of token_ids of shape (src_len, batch_size), + "encoder_pad_mask": bool tensor of padded elems of shape (src_len, batch_size), + "decoder_input": tensor of decoder token_ids of shape (tgt_len, batch_size) + "decoder_pad_mask": bool tensor of decoder padding mask of shape (tgt_len, batch_size) + }): + + Returns: + Output from model (dict containing key "token_output" and "model_output") + """ + + encoder_input = x['encoder_input'] + decoder_input = x['decoder_input'] + encoder_pad_mask = x['encoder_pad_mask'].transpose(0, 1) + decoder_pad_mask = x['decoder_pad_mask'].transpose(0, 1) + + encoder_embs = self._construct_input(encoder_input) + decoder_embs = self._construct_input(decoder_input) + + (seq_len, _, _) = tuple(decoder_embs.size()) + tgt_mask = \ + self._generate_square_subsequent_mask(seq_len).to(decoder_embs.device) + + memory = self.encoder(encoder_embs, + src_key_padding_mask=encoder_pad_mask) + model_output = self.decoder(decoder_embs, memory, + tgt_mask=tgt_mask, + tgt_key_padding_mask=decoder_pad_mask, + memory_key_padding_mask=encoder_pad_mask.clone()) + + token_output, _ = self.token_fc(model_output) + output = {'model_output': model_output, + 'token_output': token_output} + + return output + + def encode(self, batch): + """ Construct the memory embedding for an encoder input + + Args: + batch (dict { + "encoder_input": tensor of token_ids of shape (src_len, batch_size), + "encoder_pad_mask": bool tensor of padded elems of shape (src_len, batch_size), + }) + + Returns: + encoder memory (Tensor of shape (seq_len, batch_size, d_model)) + """ + + encoder_input = batch['encoder_input'] + encoder_pad_mask = batch['encoder_pad_mask'].transpose(0, 1) + encoder_embs = self._construct_input(encoder_input) + model_output = self.encoder(encoder_embs, + src_key_padding_mask=encoder_pad_mask) + return model_output + + def decode(self, batch): + """ Construct an output from a given decoder input + + Args: + batch (dict { + "decoder_input": tensor of decoder token_ids of shape (tgt_len, batch_size) + "decoder_pad_mask": bool tensor of decoder padding mask of shape (tgt_len, batch_size) + "memory_input": tensor from encoded input of shape (src_len, batch_size, d_model) + "memory_pad_mask": bool tensor of memory padding mask of shape (src_len, batch_size) + }) + """ + + decoder_input = batch['decoder_input'] + decoder_pad_mask = batch['decoder_pad_mask'].transpose(0, 1) + memory_input = batch['memory_input'] + memory_pad_mask = batch['memory_pad_mask'].transpose(0, 1) + + decoder_embs = self._construct_input(decoder_input) + + (seq_len, _, _) = tuple(decoder_embs.size()) + tgt_mask = \ + self._generate_square_subsequent_mask(seq_len).to(decoder_embs.device) + + model_output = self.decoder(decoder_embs, memory_input, + tgt_key_padding_mask=decoder_pad_mask, + memory_key_padding_mask=memory_pad_mask, + tgt_mask=tgt_mask) + token_output, _ = self.token_fc(model_output) + token_probs = self.log_softmax(token_output) + return token_probs + + def validation_step(self, batch, batch_idx=None): + self.eval() + # TODO: This can be further optimized + tokenizer = load_tokenizer(vocab_path=DEFAULT_VOCAB_PATH, chem_token_start=DEFAULT_CHEM_TOKEN_START, regex=REGEX) + + with torch.no_grad(): + model_output = self.forward(batch) + #target_smiles = batch['target_smiles'] + token_ids = batch['target'] + tokens = token_ids.transpose(0, 1).tolist() + tokens = tokenizer.convert_ids_to_tokens(tokens) + target_smiles = tokenizer.detokenize(tokens) + + loss = self._calc_loss(batch, model_output) + token_acc = self._calc_char_acc(batch, model_output) + perplexity = self._calc_perplexity(batch, model_output) + (mol_strs, log_lhs) = self.sample_molecules(batch, + sampling_alg=self.val_sampling_alg) + metrics = self.sampler.calc_sampling_metrics(mol_strs, + target_smiles) + + self.train() + + val_outputs = { + 'val_loss': loss.item(), + 'val_token_acc': token_acc, + 'val_perplexity': perplexity, + 'val_molecular_accuracy': metrics['accuracy'], + 'val_invalid_smiles': metrics['invalid'], + } + return val_outputs + + def _calc_loss(self, batch_input, model_output): + """ Calculate the loss for the model + + Args: + batch_input (dict): Input given to model, + model_output (dict): Output from model + + Returns: + loss (singleton tensor), + """ + + tokens = batch_input['target'] + pad_mask = batch_input['target_pad_mask'] + token_output = model_output['token_output'] + token_mask_loss = self._calc_mask_loss(token_output, tokens, + pad_mask) + return token_mask_loss + + def _calc_mask_loss( + self, + token_output, + target, + target_mask, + ): + """ Calculate the loss for the token prediction task + + Args: + token_output (Tensor of shape (seq_len, batch_size, vocab_size)): token output from transformer + target (Tensor of shape (seq_len, batch_size)): Original (unmasked) SMILES token ids from the tokenizer + target_mask (Tensor of shape (seq_len, batch_size)): Pad mask for target tokens + + Output: + loss (singleton Tensor): Loss computed using cross-entropy, + """ + + (seq_len, batch_size) = tuple(target.size()) + token_pred = token_output.reshape((seq_len * batch_size, + -1)).float() + loss = self.loss_fn(token_pred, + target.reshape(-1)).reshape((seq_len, + batch_size)) + inv_target_mask = ~(target_mask > 0) + num_tokens = inv_target_mask.sum() + loss = loss.sum() / num_tokens + return loss + + def _calc_perplexity(self, batch_input, model_output): + target_ids = batch_input['target'] + target_mask = batch_input['target_pad_mask'] + vocab_dist_output = model_output['token_output'] + inv_target_mask = ~(target_mask > 0) + log_probs = vocab_dist_output.gather(2, + target_ids.unsqueeze(2)).squeeze(2) + log_probs = log_probs * inv_target_mask + log_probs = log_probs.sum(dim=0) + seq_lengths = inv_target_mask.sum(dim=0) + exp = -(1 / seq_lengths) + perp = torch.pow(log_probs.exp(), exp) + return perp.mean().item() + + def _calc_char_acc(self, batch_input, model_output): + token_ids = batch_input['target'] + target_mask = batch_input['target_pad_mask'] + token_output = model_output['token_output'] + target_mask = ~(target_mask > 0) + (_, pred_ids) = torch.max(token_output.float(), dim=2) + correct_ids = torch.eq(token_ids, pred_ids) + correct_ids = correct_ids * target_mask + num_correct = correct_ids.sum() + total = target_mask.sum() + accuracy = num_correct / total + return accuracy + + def sample_molecules(self, batch_input, sampling_alg='greedy'): + """ Sample molecules from the model + + Args: + batch_input (dict): Input given to model + sampling_alg (str): Algorithm to use to sample SMILES strings from model + + Returns: + ([[str]], [[float]]): Tuple of molecule SMILES strings and log lhs (outer dimension is batch) + """ + + enc_input = batch_input['encoder_input'] + enc_mask = batch_input['encoder_pad_mask'] + + # Freezing the weights reduces the amount of memory leakage in the transformer + #model.eval() + + with torch.no_grad(): + + encode_input = {'encoder_input': enc_input, + 'encoder_pad_mask': enc_mask} + memory = self.encode(encode_input) + mem_mask = enc_mask.clone() + (_, batch_size, _) = tuple(memory.size()) + decode_fn = partial(self._decode_fn, memory=memory, + mem_pad_mask=mem_mask) + #self.sampler.device = self.device + if sampling_alg == 'greedy': + (mol_strs, log_lhs) = \ + self.sampler.greedy_decode(decode_fn, batch_size,device=memory.device) + elif sampling_alg == 'beam': + (mol_strs, log_lhs) = \ + self.sampler.beam_decode(decode_fn, batch_size, + self.num_beams,device=memory.device) + + # Must remember to unfreeze! + #model.train() + + return (mol_strs, log_lhs) + + def _decode_fn( + self, + token_ids, + pad_mask, + memory, + mem_pad_mask, + ): + decode_input = { + 'decoder_input': token_ids, + 'decoder_pad_mask': pad_mask, + 'memory_input': memory, + 'memory_pad_mask': mem_pad_mask, + } + model_output = self.decode(decode_input) + return model_output + + def _construct_input(self, token_ids, sentence_masks=None): + (seq_len, _) = tuple(token_ids.size()) + token_embs = self.emb(token_ids) + + # Scaling the embeddings like this is done in other transformer libraries + token_embs = token_embs * math.sqrt(self.d_model) + positional_embs = self.pos_emb[:seq_len, : + ].unsqueeze(0).transpose(0, 1) + embs = token_embs + positional_embs + embs = self.emb_dropout(embs) + return embs + + def _positional_embs(self): + """ Produces a tensor of positional embeddings for the model + + Returns a tensor of shape (self.max_seq_len, self.d_model) filled with positional embeddings, + which are created from sine and cosine waves of varying wavelength + """ + + encs = torch.tensor([dim / self.d_model for dim in range(0, + self.d_model, 2)]) + encs = 10000 ** encs + encs = [(torch.sin(pos / encs), torch.cos(pos / encs)) + for pos in range(self.max_seq_len)] + encs = [torch.stack(enc, dim=1).flatten()[:self.d_model] + for enc in encs] + encs = torch.stack(encs) + return encs + + def _generate_square_subsequent_mask(self, sz): + """ + Method copied from Pytorch nn.Transformer. + Generate a square mask for the sequence. The masked positions are filled with float('-inf'). + Unmasked positions are filled with float(0.0). + + Args: + sz (int): Size of mask to generate + + Returns: + torch.Tensor: Square autoregressive mask for decode + """ + + mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) + mask = mask.float().masked_fill(mask == 0, float('-inf' + )).masked_fill(mask == 1, float(0.0)) + return mask + + def _init_params(self, method): + """ + Apply initialisation of learnable weights + """ + + for p in self.parameters(): + if p.dim() > 1: + method(p) diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/tokenizer.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/tokenizer.py new file mode 100644 index 0000000..f62c319 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/mega_molbart/tokenizer.py @@ -0,0 +1,483 @@ +# coding=utf-8 + +import re +import torch +import random +from pathlib import Path +from .util import (DEFAULT_BEGIN_TOKEN, DEFAULT_END_TOKEN, DEFAULT_PAD_TOKEN, \ + DEFAULT_UNK_TOKEN, DEFAULT_MASK_TOKEN, DEFAULT_SEP_TOKEN, \ + DEFAULT_MASK_PROB, DEFAULT_SHOW_MASK_TOKEN_PROB, DEFAULT_MASK_SCHEME, \ + DEFAULT_SPAN_LAMBDA, DEFAULT_VOCAB_PATH, DEFAULT_CHEM_TOKEN_START, REGEX) + + +class MolEncTokenizer(): + def __init__( + self, + vocab, + chem_token_idxs, + prog, + begin_token=DEFAULT_BEGIN_TOKEN, + end_token=DEFAULT_END_TOKEN, + pad_token=DEFAULT_PAD_TOKEN, + unk_token=DEFAULT_UNK_TOKEN, + mask_token=DEFAULT_MASK_TOKEN, + sep_token=DEFAULT_SEP_TOKEN, + mask_prob=DEFAULT_MASK_PROB, + show_mask_token_prob=DEFAULT_SHOW_MASK_TOKEN_PROB, + mask_scheme=DEFAULT_MASK_SCHEME, + span_lambda=DEFAULT_SPAN_LAMBDA + ): + """ Initialise the tokenizer + + Args: + vocab (List[str]): Vocabulary for tokenizer + chem_token_idxs (List[int]): List of idxs of chemical tokens + prog (re.Pattern): Regex object for tokenizing + begin_token (str): Token to use at start of each sequence + end_token (str): Token to use at end of each sequence + pad_token (str): Token to use when padding batches of sequences + unk_token (str): Token to use for tokens which are not in the vocabulary + mask_token (str): Token to use when masking pieces of the sequence + sep_token (str): Token to use when sepatating two sentences + mask_prob (float): Probability of token being masked when masking is enabled + show_mask_token_prob (float): Probability of a masked token being replaced with mask token + mask_scheme (str): Masking scheme used by the tokenizer when masking + span_lambda (float): Mean for poisson distribution when sampling a span of tokens + """ + + self.vocab = {t: i for i, t in enumerate(vocab)} + self.decode_vocab = {i: t for t, i in self.vocab.items()} + self.chem_token_idxs = chem_token_idxs + self.prog = prog + + self.begin_token = begin_token + self.end_token = end_token + self.pad_token = pad_token + self.unk_token = unk_token + self.mask_token = mask_token + self.sep_token = sep_token + + self.mask_prob = mask_prob + self.show_mask_token_prob = show_mask_token_prob + self.mask_scheme = mask_scheme + self.span_lambda = span_lambda + + self.unk_id = self.vocab[unk_token] + self.unk_token_cnt = {} + + @staticmethod + def from_vocab_file( + vocab_path, + regex, + chem_tokens_start_idx, + pad_token_idx=0, + unk_token_idx=1, + begin_token_idx=2, + end_token_idx=3, + mask_token_idx=4, + sep_token_idx=5, + mask_prob=DEFAULT_MASK_PROB, + show_mask_token_prob=DEFAULT_SHOW_MASK_TOKEN_PROB, + mask_scheme=DEFAULT_MASK_SCHEME, + span_lambda=DEFAULT_SPAN_LAMBDA + ): + """ Load the tokenizer object from a vocab file and regex + + Reads a newline separated list of tokens from a file to use as the vocabulary + Note: Assumes that the chemical tokens run from chem_tokens_start_idx to the end of the tokens list + Anything after the defined tokens and before chem_tokens_start_idx is assumed to be an extra token + and is added to the regex for tokenizing + + Args: + vocab_path (str): Path to vocab file + regex (str): Regex to use for tokenizing + chem_tokens_start_idx (int): Index of the start of the chemical tokens in the tokens list + + Returns: + MolEncTokenizer object + """ + + text = Path(vocab_path).read_text() + tokens = text.split("\n") + tokens = [t for t in tokens if t is not None and t != ""] + + token_idxs = [pad_token_idx, unk_token_idx, begin_token_idx, end_token_idx, mask_token_idx, sep_token_idx] + extra_tokens_idxs = range(max(token_idxs) + 1, chem_tokens_start_idx) + extra_tokens = [tokens[idx] for idx in extra_tokens_idxs] + prog = MolEncTokenizer._get_compiled_regex(regex, extra_tokens) + + pad_token = tokens[pad_token_idx] + unk_token = tokens[unk_token_idx] + begin_token = tokens[begin_token_idx] + end_token = tokens[end_token_idx] + mask_token = tokens[mask_token_idx] + sep_token = tokens[sep_token_idx] + + chem_tokens_idxs = list(range(chem_tokens_start_idx, len(tokens))) + tokenizer = MolEncTokenizer( + tokens, + chem_tokens_idxs, + prog, + begin_token=begin_token, + end_token=end_token, + pad_token=pad_token, + unk_token=unk_token, + mask_token=mask_token, + sep_token=sep_token, + mask_prob=mask_prob, + show_mask_token_prob=show_mask_token_prob, + mask_scheme=mask_scheme, + span_lambda=span_lambda + ) + return tokenizer + @staticmethod + + def from_pretrained( + vocab_path, + regex=REGEX, + chem_tokens_start_idx=DEFAULT_CHEM_TOKEN_START, + pad_token_idx=0, + unk_token_idx=1, + begin_token_idx=2, + end_token_idx=3, + mask_token_idx=4, + sep_token_idx=5, + mask_prob=DEFAULT_MASK_PROB, + show_mask_token_prob=DEFAULT_SHOW_MASK_TOKEN_PROB, + mask_scheme=DEFAULT_MASK_SCHEME, + span_lambda=DEFAULT_SPAN_LAMBDA + ): + """ Load the tokenizer object from a vocab file and regex + + Reads a newline separated list of tokens from a file to use as the vocabulary + Note: Assumes that the chemical tokens run from chem_tokens_start_idx to the end of the tokens list + Anything after the defined tokens and before chem_tokens_start_idx is assumed to be an extra token + and is added to the regex for tokenizing + + Args: + vocab_path (str): Path to vocab file + regex (str): Regex to use for tokenizing + chem_tokens_start_idx (int): Index of the start of the chemical tokens in the tokens list + + Returns: + MolEncTokenizer object + """ + + text = Path(vocab_path).read_text() + tokens = text.split("\n") + tokens = [t for t in tokens if t is not None and t != ""] + + token_idxs = [pad_token_idx, unk_token_idx, begin_token_idx, end_token_idx, mask_token_idx, sep_token_idx] + extra_tokens_idxs = range(max(token_idxs) + 1, chem_tokens_start_idx) + extra_tokens = [tokens[idx] for idx in extra_tokens_idxs] + prog = MolEncTokenizer._get_compiled_regex(regex, extra_tokens) + + pad_token = tokens[pad_token_idx] + unk_token = tokens[unk_token_idx] + begin_token = tokens[begin_token_idx] + end_token = tokens[end_token_idx] + mask_token = tokens[mask_token_idx] + sep_token = tokens[sep_token_idx] + + chem_tokens_idxs = list(range(chem_tokens_start_idx, len(tokens))) + tokenizer = MolEncTokenizer( + tokens, + chem_tokens_idxs, + prog, + begin_token=begin_token, + end_token=end_token, + pad_token=pad_token, + unk_token=unk_token, + mask_token=mask_token, + sep_token=sep_token, + mask_prob=mask_prob, + show_mask_token_prob=show_mask_token_prob, + mask_scheme=mask_scheme, + span_lambda=span_lambda + ) + return tokenizer + + @staticmethod + def from_smiles( + smiles, + regex, + extra_tokens=None, + begin_token=DEFAULT_BEGIN_TOKEN, + end_token=DEFAULT_END_TOKEN, + pad_token=DEFAULT_PAD_TOKEN, + unk_token=DEFAULT_UNK_TOKEN, + mask_token=DEFAULT_MASK_TOKEN, + sep_token=DEFAULT_SEP_TOKEN, + mask_prob=DEFAULT_MASK_PROB, + show_mask_token_prob=DEFAULT_SHOW_MASK_TOKEN_PROB, + mask_scheme=DEFAULT_MASK_SCHEME, + span_lambda=DEFAULT_SPAN_LAMBDA + ): + """ Build the tokenizer from smiles strings and a regex + + Args: + smiles (List[str]): SMILES strings to use to build vocabulary + regex (str): Regex to use for tokenizing + extra_tokens (Optional[List[str]]): Additional tokens to add to the vocabulary that + may not appear in the SMILES strings + """ + + vocab = { + pad_token: 0, + unk_token: 1, + begin_token: 2, + end_token: 3, + mask_token: 4, + sep_token: 5 + } + + extra_tokens = [] if extra_tokens is None else extra_tokens + [vocab.setdefault(token, len(vocab)) for token in extra_tokens] + + chem_start_idx = len(vocab) + prog = MolEncTokenizer._get_compiled_regex(regex, extra_tokens) + print(f"Chemistry tokens start at index {chem_start_idx}") + + for smi in smiles: + for token in prog.findall(smi): + vocab.setdefault(token, len(vocab)) + + chem_token_idxs = list(range(chem_start_idx, len(vocab))) + + vocab = sorted(vocab.items(), key=lambda k_v: k_v[1]) + vocab = [key for key, val in vocab] + + tokenizer = MolEncTokenizer( + vocab, + chem_token_idxs, + prog, + begin_token=begin_token, + end_token=end_token, + pad_token=pad_token, + unk_token=unk_token, + mask_token=mask_token, + sep_token=sep_token, + mask_prob=mask_prob, + show_mask_token_prob=show_mask_token_prob, + mask_scheme=mask_scheme, + span_lambda=span_lambda + ) + return tokenizer + + def save_vocab(self, vocab_path): + tokens = sorted(self.vocab.items(), key=lambda k_v: k_v[1]) + tokens = [key for key, val in tokens] + + tokens_str = "" + for token in tokens: + tokens_str += f"{token}\n" + + p = Path(vocab_path) + p.write_text(tokens_str) + + def __len__(self): + return len(self.vocab) + + def tokenize(self, sents1, sents2=None, mask=False, pad=False): + if sents2 is not None and len(sents1) != len(sents2): + raise ValueError("Sentence 1 batch and sentence 2 batch must have the same number of elements") + + tokens = self._regex_match(sents1) + m_tokens, token_masks = self._mask_tokens(tokens, empty_mask=not mask) + + sent_masks = None + if sents2 is not None: + sents2_tokens = self._regex_match(sents2) + sents2_m_tokens, sents2_masks = self._mask_tokens(sents2_tokens, empty_mask=not mask) + tokens, sent_masks = self._concat_sentences(tokens, sents2_tokens, self.sep_token) + m_tokens, _ = self._concat_sentences(m_tokens, sents2_m_tokens, self.sep_token) + token_masks, _ = self._concat_sentences(token_masks, sents2_masks, False) + + + tokens = [[self.begin_token] + ts + [self.end_token] for ts in tokens] + m_tokens = [[self.begin_token] + ts + [self.end_token] for ts in m_tokens] + token_masks = [[False] + ts + [False] for ts in token_masks] + sent_masks = [[0] + mask + [1] for mask in sent_masks] if sent_masks is not None else None + + output = {} + + if pad: + tokens, orig_pad_masks = self._pad_seqs(tokens, self.pad_token) + m_tokens, masked_pad_masks = self._pad_seqs(m_tokens, self.pad_token) + token_masks, _ = self._pad_seqs(token_masks, False) + sent_masks, _ = self._pad_seqs(sent_masks, False) if sent_masks is not None else (None, None) + output["original_pad_masks"] = orig_pad_masks + output["masked_pad_masks"] = masked_pad_masks + + output["original_tokens"] = tokens + + if mask: + output["masked_tokens"] = m_tokens + output["token_masks"] = token_masks + + if sent_masks is not None: + output["sentence_masks"] = sent_masks + + return output + + def _regex_match(self, smiles): + tokenized = [] + data_type = type(smiles) + if data_type == str: + smiles = smiles.split() + # tokenized = self.prog.findall(smiles) + for smi in smiles: + tokens = self.prog.findall(smi) + tokenized.append(tokens) + + return tokenized + + @staticmethod + def _get_compiled_regex(regex, extra_tokens): + regex_string = r"(" + for token in extra_tokens: + processed_token = token + for special_character in "()[].|": + processed_token = processed_token.replace(special_character, f"\\{special_character}") + regex_string += processed_token + r"|" + + regex_string += regex + r"|" + regex_string += r".)" + return re.compile(regex_string) + + def _concat_sentences(self, tokens1, tokens2, sep): + tokens = [ts1 + [sep] + ts2 for ts1, ts2 in zip(tokens1, tokens2)] + sent_masks = [([0] * len(ts1)) + [0] + ([1] * len(ts2)) for ts1, ts2 in zip(tokens1, tokens2)] + return tokens, sent_masks + + def detokenize(self, tokens_list): + new_tokens_list = [] + for tokens in tokens_list: + if tokens[0] == self.begin_token: + tokens = tokens[1:] + + # Remove any tokens after the end token (and end token) if it's there + if self.end_token in tokens: + end_token_idx = tokens.index(self.end_token) + tokens = tokens[:end_token_idx] + + new_tokens_list.append(tokens) + + strs = ["".join(tokens) for tokens in new_tokens_list] + return strs + + def convert_tokens_to_ids(self, token_data): + ids_list = [] + for tokens in token_data: + for token in tokens: + token_id = self.vocab.get(token) + if token_id is None: + self._inc_in_dict(self.unk_token_cnt, token) + + ids = [self.vocab.get(token, self.unk_id) for token in tokens] + ids_list.append(ids) + + return ids_list + + def convert_ids_to_tokens(self, token_ids): + tokens_list = [] + for ids in token_ids: + for token_id in ids: + token = self.decode_vocab.get(token_id) + if token is None: + raise ValueError(f"Token id {token_id} is not recognised") + + tokens = [self.decode_vocab.get(token_id) for token_id in ids] + tokens_list.append(tokens) + + return tokens_list + + def print_unknown_tokens(self): + print(f"{'Token':<10}Count") + for token, cnt in self.unk_token_cnt.items(): + print(f"{token:<10}{cnt}") + + print() + + @staticmethod + def _inc_in_dict(coll, item): + cnt = coll.get(item, 0) + cnt += 1 + coll[item] = cnt + + def _mask_tokens(self, tokens, empty_mask=False): + if empty_mask: + mask = [[False] * len(ts) for ts in tokens] + return tokens, mask + + masked_tokens = [] + token_masks = [] + + for ts in tokens: + if self.mask_scheme == "replace": + masked, token_mask = self._mask_replace(ts) + elif self.mask_scheme == "span": + masked, token_mask = self._mask_span(ts) + else: + raise ValueError(f"Unrecognised mask scheme: {self.mask_scheme}") + + masked_tokens.append(masked) + token_masks.append(token_mask) + + return masked_tokens, token_masks + + def _mask_replace(self, ts): + mask_bools = [True, False] + weights = [self.mask_prob, 1 - self.mask_prob] + token_mask = random.choices(mask_bools, weights=weights, k=len(ts)) + masked = [self._mask_token(ts[i]) if m else ts[i] for i, m in enumerate(token_mask)] + return masked, token_mask + + def _mask_span(self, ts): + curr_token = 0 + masked = [] + token_mask = [] + + mask_bools = [True, False] + weights = [self.mask_prob, 1 - self.mask_prob] + sampled_mask = random.choices(mask_bools, weights=weights, k=len(ts)) + + while curr_token < len(ts): + # If mask, sample from a poisson dist to get length of mask + if sampled_mask[curr_token]: + mask_len = torch.poisson(torch.tensor(self.span_lambda)).long().item() + masked.append(self.mask_token) + token_mask.append(True) + curr_token += mask_len + + # Otherwise don't mask + else: + masked.append(ts[curr_token]) + token_mask.append(False) + curr_token += 1 + + return masked, token_mask + + def _mask_token(self, token): + rand = random.random() + if rand < self.show_mask_token_prob: + return self.mask_token + + elif rand < self.show_mask_token_prob + ((1 - self.show_mask_token_prob) / 2): + token_idx = random.choice(self.chem_token_idxs) + return self.decode_vocab[token_idx] + + else: + return token + + @staticmethod + def _pad_seqs(seqs, pad_token): + pad_length = max([len(seq) for seq in seqs]) + padded = [seq + ([pad_token] * (pad_length - len(seq))) for seq in seqs] + masks = [([0] * len(seq)) + ([1] * (pad_length - len(seq))) for seq in seqs] + return padded, masks + + +def load_tokenizer(vocab_path=DEFAULT_VOCAB_PATH, chem_token_start=DEFAULT_CHEM_TOKEN_START, regex=REGEX): + tokenizer = MolEncTokenizer.from_vocab_file(vocab_path, regex, chem_token_start) + return tokenizer \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/util.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/util.py new file mode 100644 index 0000000..37807ad --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/mega_molbart/util.py @@ -0,0 +1,21 @@ +DEFAULT_VOCAB_PATH = "bart_vocab.txt" + +# Tokenization and vocabulary +DEFAULT_MAX_SEQ_LEN = 512 +DEFAULT_CHEM_TOKEN_START = 272 +DEFAULT_BEGIN_TOKEN = "^" +DEFAULT_END_TOKEN = "&" +DEFAULT_PAD_TOKEN = "" +DEFAULT_UNK_TOKEN = "?" +DEFAULT_MASK_TOKEN = "" +DEFAULT_SEP_TOKEN = "" +DEFAULT_MASK_PROB = 0.15 +DEFAULT_SHOW_MASK_TOKEN_PROB = 1.0 +DEFAULT_MASK_SCHEME = "span" +DEFAULT_SPAN_LAMBDA = 3.0 +REGEX = "\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\/|:|~|@|\?|>|\*|\$|\%[0-9]{2}|[0-9]" + +# Model parameters +DEFAULT_D_MODEL = 256 +DEFAULT_NUM_LAYERS = 4 +DEFAULT_NUM_HEADS = 8 \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py b/open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py new file mode 100644 index 0000000..eb2fdd9 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py @@ -0,0 +1,197 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_geometric.nn import (MessagePassing, global_add_pool, + global_max_pool, global_mean_pool) +from torch_geometric.nn.inits import glorot, zeros +from torch_geometric.utils import add_self_loops, softmax, degree +from torch_scatter import scatter_add +from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder +from collections import OrderedDict + + +class GINConv(MessagePassing): + def __init__(self, emb_dim, aggr="add"): + ''' + emb_dim (int): node embedding dimensionality + ''' + super(GINConv, self).__init__(aggr=aggr) + + self.mlp = torch.nn.Sequential(torch.nn.Linear(emb_dim, 2*emb_dim), torch.nn.BatchNorm1d(2*emb_dim), torch.nn.ReLU(), torch.nn.Linear(2*emb_dim, emb_dim)) + self.eps = torch.nn.Parameter(torch.Tensor([0])) + + self.bond_encoder = BondEncoder(emb_dim = emb_dim) + + def forward(self, x, edge_index, edge_attr): + edge_embedding = self.bond_encoder(edge_attr) + out = self.mlp((1 + self.eps) *x + self.propagate(edge_index, x=x, edge_attr=edge_embedding)) + return out + + def message(self, x_j, edge_attr): + return F.relu(x_j + edge_attr) + + def update(self, aggr_out): + return aggr_out + + +class GCNConv(MessagePassing): + def __init__(self, emb_dim, aggr="add"): + super(GCNConv, self).__init__(aggr=aggr) + + self.linear = torch.nn.Linear(emb_dim, emb_dim) + self.root_emb = torch.nn.Embedding(1, emb_dim) + self.bond_encoder = BondEncoder(emb_dim = emb_dim) + + def forward(self, x, edge_index, edge_attr): + x = self.linear(x) + edge_embedding = self.bond_encoder(edge_attr) + + row, col = edge_index + + #edge_weight = torch.ones((edge_index.size(1), ), device=edge_index.device) + deg = degree(row, x.size(0), dtype = x.dtype) + 1 + deg_inv_sqrt = deg.pow(-0.5) + deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 + + norm = deg_inv_sqrt[row] * deg_inv_sqrt[col] + + return self.propagate(edge_index, x=x, edge_attr = edge_embedding, norm=norm) + F.relu(x + self.root_emb.weight) * 1./deg.view(-1,1) + + def message(self, x_j, edge_attr, norm): + return norm.view(-1, 1) * F.relu(x_j + edge_attr) + + def update(self, aggr_out): + return aggr_out + + +class GNN(nn.Module): + def __init__(self, num_layer, emb_dim, JK="last", drop_ratio=0., gnn_type="gin"): + + if num_layer < 2: + raise ValueError("Number of GNN layers must be greater than 1.") + + super(GNN, self).__init__() + self.drop_ratio = drop_ratio + self.num_layer = num_layer + self.JK = JK + + self.atom_encoder = AtomEncoder(emb_dim) + + ###List of MLPs + self.gnns = nn.ModuleList() + for layer in range(num_layer): + if gnn_type == "gin": + self.gnns.append(GINConv(emb_dim, aggr="add")) + elif gnn_type == "gcn": + self.gnns.append(GCNConv(emb_dim)) + + ###List of batchnorms + self.batch_norms = nn.ModuleList() + for layer in range(num_layer): + self.batch_norms.append(nn.BatchNorm1d(emb_dim)) + + # def forward(self, x, edge_index, edge_attr): + def forward(self, *argv): + if len(argv) == 3: + x, edge_index, edge_attr = argv[0], argv[1], argv[2] + elif len(argv) == 1: + data = argv[0] + x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr + else: + raise ValueError("unmatched number of arguments.") + + x = self.atom_encoder(x) + + h_list = [x] + for layer in range(self.num_layer): + h = self.gnns[layer](h_list[layer], edge_index, edge_attr) + h = self.batch_norms[layer](h) + # h = F.dropout(F.relu(h), self.drop_ratio, training = self.training) + if layer == self.num_layer - 1: + # remove relu for the last layer + h = F.dropout(h, self.drop_ratio, training=self.training) + else: + h = F.dropout(F.relu(h), self.drop_ratio, training=self.training) + h_list.append(h) + + ### Different implementations of Jk-concat + if self.JK == "concat": + node_representation = torch.cat(h_list, dim=1) + elif self.JK == "last": + node_representation = h_list[-1] + elif self.JK == "max": + h_list = [h.unsqueeze_(0) for h in h_list] + node_representation = torch.max(torch.cat(h_list, dim=0), dim=0)[0] + elif self.JK == "sum": + h_list = [h.unsqueeze_(0) for h in h_list] + node_representation = torch.sum(torch.cat(h_list, dim=0), dim=0)[0] + else: + raise ValueError("not implemented.") + return node_representation + + +class GNN_graphpred(nn.Module): + """ + Extension of GIN to incorporate edge information by concatenation. + + Args: + num_layer (int): the number of GNN layers + arg.emb_dim (int): dimensionality of embeddings + num_tasks (int): number of tasks in multi-task learning scenario + JK (str): last, concat, max or sum. + graph_pooling (str): sum, mean, max, attention, set2set + + See https://arxiv.org/abs/1810.00826 + JK-net: https://arxiv.org/abs/1806.03536 """ + + def __init__(self, num_layer, emb_dim, num_tasks, JK, graph_pooling, molecule_node_model=None): + super(GNN_graphpred, self).__init__() + + if num_layer < 2: + raise ValueError("# layers must > 1.") + + self.molecule_node_model = molecule_node_model + self.num_layer = num_layer + self.emb_dim = emb_dim + self.num_tasks = num_tasks + self.JK = JK + + # Different kind of graph pooling + if graph_pooling == "sum": + self.pool = global_add_pool + elif graph_pooling == "mean": + self.pool = global_mean_pool + elif graph_pooling == "max": + self.pool = global_max_pool + else: + raise ValueError("Invalid graph pooling type.") + + # For graph-level binary classification + self.mult = 1 + + if self.JK == "concat": + self.graph_pred_linear = nn.Linear(self.mult * (self.num_layer + 1) * self.emb_dim, + self.num_tasks) + else: + self.graph_pred_linear = nn.Linear(self.mult * self.emb_dim, self.num_tasks) + return + + def from_pretrained(self, model_file): + print("Loading from {} ...".format(model_file)) + state_dict = torch.load(model_file) + self.molecule_node_model.load_state_dict(state_dict) + return + + def forward(self, *argv): + if len(argv) == 4: + x, edge_index, edge_attr, batch = argv[0], argv[1], argv[2], argv[3] + elif len(argv) == 1: + data = argv[0] + x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch + else: + raise ValueError("unmatched number of arguments.") + + node_representation = self.molecule_node_model(x, edge_index, edge_attr) + graph_representation = self.pool(node_representation, batch) + output = self.graph_pred_linear(graph_representation) + return graph_representation, output \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/splitters.py b/open_biomed/models/MoleculeSTM/splitters.py new file mode 100644 index 0000000..0f9ac8d --- /dev/null +++ b/open_biomed/models/MoleculeSTM/splitters.py @@ -0,0 +1,93 @@ +import random +from collections import defaultdict +from itertools import compress + +import numpy as np +import torch +from rdkit.Chem.Scaffolds import MurckoScaffold +from sklearn.model_selection import StratifiedKFold + +from torch.utils.data import Subset + + +def generate_scaffold(smiles, include_chirality=False): + """ Obtain Bemis-Murcko scaffold from smiles + :return: smiles of scaffold """ + scaffold = MurckoScaffold.MurckoScaffoldSmiles( + smiles=smiles, includeChirality=include_chirality) + return scaffold + + +def scaffold_split(dataset, smiles_list, task_idx=None, null_value=0, + frac_train=0.8, frac_valid=0.1, frac_test=0.1, + pyg_dataset=True): + """ + Adapted from https://github.com/deepchem/deepchem/blob/master/deepchem/splits/splitters.py + Split dataset by Bemis-Murcko scaffolds + This function can also ignore examples containing null values for a + selected task when splitting. Deterministic split + :param dataset: pytorch geometric dataset obj + :param smiles_list: list of smiles corresponding to the dataset obj + :param task_idx: column idx of the data.y tensor. Will filter out + examples with null value in specified task column of the data.y tensor + prior to splitting. If None, then no filtering + :param null_value: float that specifies null value in data.y to filter if + task_idx is provided + :param frac_train, frac_valid, frac_test: fractions + :param pyg_dataset: if this is pytorch or pytorch-gemetric dataset + :return: train, valid, test slices of the input dataset obj. """ + np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0) + + if task_idx is not None: + # filter based on null values in task_idx + # get task array + y_task = np.array([data.y[task_idx].item() for data in dataset]) + # boolean array that correspond to non null values + non_null = y_task != null_value + smiles_list = list(compress(enumerate(smiles_list), non_null)) + else: + non_null = np.ones(len(dataset)) == 1 + smiles_list = list(compress(enumerate(smiles_list), non_null)) + + # create dict of the form {scaffold_i: [idx1, idx....]} + all_scaffolds = {} + for i, smiles in smiles_list: + scaffold = generate_scaffold(smiles, include_chirality=True) + if scaffold not in all_scaffolds: + all_scaffolds[scaffold] = [i] + else: + all_scaffolds[scaffold].append(i) + + # sort from largest to smallest sets + all_scaffolds = {key: sorted(value) for key, value in all_scaffolds.items()} + all_scaffold_sets = [ + scaffold_set for (scaffold, scaffold_set) in sorted( + all_scaffolds.items(), key=lambda x: (len(x[1]), x[1][0]), reverse=True) + ] + + # get train, valid test indices + train_cutoff = frac_train * len(smiles_list) + valid_cutoff = (frac_train + frac_valid) * len(smiles_list) + train_idx, valid_idx, test_idx = [], [], [] + for scaffold_set in all_scaffold_sets: + if len(train_idx) + len(scaffold_set) > train_cutoff: + if len(train_idx) + len(valid_idx) + len(scaffold_set) > valid_cutoff: + test_idx.extend(scaffold_set) + else: + valid_idx.extend(scaffold_set) + else: + train_idx.extend(scaffold_set) + + assert len(set(train_idx).intersection(set(valid_idx))) == 0 + assert len(set(test_idx).intersection(set(valid_idx))) == 0 + + if pyg_dataset: + train_dataset = dataset[torch.tensor(train_idx)] + valid_dataset = dataset[torch.tensor(valid_idx)] + test_dataset = dataset[torch.tensor(test_idx)] + return train_dataset, valid_dataset, test_dataset + else: + train_dataset = Subset(dataset, train_idx) + valid_dataset = Subset(dataset, valid_idx) + test_dataset = Subset(dataset, test_idx) + return train_dataset, valid_dataset, test_dataset diff --git a/open_biomed/models/MoleculeSTM/utils.py b/open_biomed/models/MoleculeSTM/utils.py new file mode 100644 index 0000000..b0086a5 --- /dev/null +++ b/open_biomed/models/MoleculeSTM/utils.py @@ -0,0 +1,71 @@ +import numpy as np +import torch + + +# This is for BERT +def padarray(A, size, value=0): + t = size - len(A) + return np.pad(A, pad_width=(0, t), mode='constant', constant_values = value) + + +# This is for BERT +def preprocess_each_sentence(sentence, tokenizer, max_seq_len): + text_input = tokenizer( + sentence, truncation=True, max_length=max_seq_len, + padding='max_length', return_tensors='np') + + input_ids = text_input['input_ids'].squeeze() + attention_mask = text_input['attention_mask'].squeeze() + + sentence_tokens_ids = padarray(input_ids, max_seq_len) + sentence_masks = padarray(attention_mask, max_seq_len) + return [sentence_tokens_ids, sentence_masks] + + +# This is for BERT +def prepare_text_tokens(device, description, tokenizer, max_seq_len): + B = len(description) + tokens_outputs = [preprocess_each_sentence(description[idx], tokenizer, max_seq_len) for idx in range(B)] + tokens_ids = [o[0] for o in tokens_outputs] + masks = [o[1] for o in tokens_outputs] + tokens_ids = torch.Tensor(tokens_ids).long().to(device) + masks = torch.Tensor(masks).bool().to(device) + return tokens_ids, masks + + +def get_molecule_repr_MoleculeSTM(molecule_data, mol2latent=None, molecule_type="SMILES", MegaMolBART_wrapper=None, molecule_model=None): + if molecule_type == "SMILES": + embedding, pad_mask = MegaMolBART_wrapper.smileslist2embedding(molecule_data) # [pad, B, d], [pad, B] + molecule_repr = embedding[0, :, :] # [B, d] + else: + molecule_repr, _ = molecule_model(molecule_data) + + if mol2latent is not None: + molecule_repr = mol2latent(molecule_repr) + return molecule_repr + + +def freeze_network(model): + for param in model.parameters(): + param.requires_grad = False + return + + +def get_num_task_and_type(dataset): + if dataset in ["esol", "freesolv", "lipophilicity"]: + return 1, "regression" + elif dataset in ["hiv", "bace", "bbbp"]: + return 1, "classification" + elif dataset == "tox21": + return 12, "classification" + elif dataset == "pcba": + return 92, "classification" + elif dataset == "muv": + return 17, "classification" + elif dataset == "toxcast": + return 617, "classification" + elif dataset == "sider": + return 27, "classification" + elif dataset == "clintox": + return 2, "classification" + raise ValueError("Invalid dataset name.") diff --git a/open_biomed/models/__init__.py b/open_biomed/models/__init__.py index 587bf80..b526adf 100644 --- a/open_biomed/models/__init__.py +++ b/open_biomed/models/__init__.py @@ -4,6 +4,7 @@ from models.knowledge import * from models.text import * from models.multimodal import * +from models.multimodal.molkformer import * SUPPORTED_MOL_ENCODER = { "cnn": MolCNN, @@ -18,7 +19,8 @@ "biomedgpt-10b": BioMedGPTV, "kv-plm": KVPLM, "momu": MoMu, - "molfm": MolFM + "molfm": MolFM, + "molkformer":MolKFormer } SUPPORTED_MOL_DECODER = { diff --git a/open_biomed/models/multimodal/molkformer/kformer.py b/open_biomed/models/multimodal/molkformer/kformer.py new file mode 100644 index 0000000..afe1946 --- /dev/null +++ b/open_biomed/models/multimodal/molkformer/kformer.py @@ -0,0 +1,1244 @@ +""" + * Copyright (c) 2023, salesforce.com, inc. + * All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause + * By Junnan Li + * Based on huggingface code base + * https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/models/bert +""" + +import math +import os +import warnings +from dataclasses import dataclass +from typing import Optional, Tuple, Dict, Any + +import torch +from torch import Tensor, device, dtype, nn +import torch.utils.checkpoint +from torch import nn +from torch.nn import CrossEntropyLoss +import torch.nn.functional as F + +from transformers.activations import ACT2FN +from transformers.file_utils import ( + ModelOutput, +) +from transformers.modeling_outputs import ( + BaseModelOutputWithPastAndCrossAttentions, + BaseModelOutputWithPoolingAndCrossAttentions, + CausalLMOutputWithCrossAttentions, + MaskedLMOutput, + MultipleChoiceModelOutput, + NextSentencePredictorOutput, + QuestionAnsweringModelOutput, + SequenceClassifierOutput, + TokenClassifierOutput, +) +from transformers.modeling_utils import ( + PreTrainedModel, + apply_chunking_to_forward, + find_pruneable_heads_and_indices, + prune_linear_layer, +) +from transformers.utils import logging +from transformers.models.bert.configuration_bert import BertConfig + +logger = logging.get_logger(__name__) + + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word and position embeddings.""" + + def __init__(self, config): + super().__init__() + self.word_embeddings = nn.Embedding( + config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id + ) + self.position_embeddings = nn.Embedding( + config.max_position_embeddings, config.hidden_size + ) + self.token_type_embeddings = nn.Embedding( + config.type_vocab_size, config.hidden_size + ) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + # position_ids (1, len position emb) is contiguous in memory and exported when serialized + self.register_buffer( + "position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)) + ) + self.position_embedding_type = getattr( + config, "position_embedding_type", "absolute" + ) + + self.config = config + + def forward( + self, + input_ids=None, + position_ids=None, + token_type_ids=None, + query_embeds=None, + past_key_values_length=0, + ): + if input_ids is not None: + seq_length = input_ids.size()[1] + else: + seq_length = 0 + + if position_ids is None: + position_ids = self.position_ids[ + :, past_key_values_length : seq_length + past_key_values_length + ].clone() + + if token_type_ids is None and input_ids is not None: + token_type_ids = torch.zeros(input_ids.size(), dtype=torch.long, device=self.position_ids.device) + + if input_ids is not None: + embeddings = self.word_embeddings(input_ids) + if self.position_embedding_type == "absolute": + position_embeddings = self.position_embeddings(position_ids) + embeddings = embeddings + position_embeddings + token_type_embeddings = self.token_type_embeddings(token_type_ids) + embeddings = embeddings + token_type_embeddings + if query_embeds is not None: + embeddings = torch.cat((query_embeds, embeddings), dim=1) + else: + embeddings = query_embeds + + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config, is_cross_attention): + super().__init__() + self.config = config + if config.hidden_size % config.num_attention_heads != 0 and not hasattr( + config, "embedding_size" + ): + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads) + ) + + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + if is_cross_attention: + self.key = nn.Linear(config.encoder_width, self.all_head_size) + self.value = nn.Linear(config.encoder_width, self.all_head_size) + else: + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + self.position_embedding_type = getattr( + config, "position_embedding_type", "absolute" + ) + if ( + self.position_embedding_type == "relative_key" + or self.position_embedding_type == "relative_key_query" + ): + self.max_position_embeddings = config.max_position_embeddings + self.distance_embedding = nn.Embedding( + 2 * config.max_position_embeddings - 1, self.attention_head_size + ) + self.save_attention = False + + def save_attn_gradients(self, attn_gradients): + self.attn_gradients = attn_gradients + + def get_attn_gradients(self): + return self.attn_gradients + + def save_attention_map(self, attention_map): + self.attention_map = attention_map + + def get_attention_map(self): + return self.attention_map + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + ( + self.num_attention_heads, + self.attention_head_size, + ) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_value=None, + output_attentions=False, + ): + + # If this is instantiated as a cross-attention module, the keys + # and values come from an encoder; the attention mask needs to be + # such that the encoder's padding tokens are not attended to. + is_cross_attention = encoder_hidden_states is not None + + if is_cross_attention: + key_layer = self.transpose_for_scores(self.key(encoder_hidden_states)) + value_layer = self.transpose_for_scores(self.value(encoder_hidden_states)) + attention_mask = encoder_attention_mask + elif past_key_value is not None: + key_layer = self.transpose_for_scores(self.key(hidden_states)) + value_layer = self.transpose_for_scores(self.value(hidden_states)) + key_layer = torch.cat([past_key_value[0], key_layer], dim=2) + value_layer = torch.cat([past_key_value[1], value_layer], dim=2) + else: + key_layer = self.transpose_for_scores(self.key(hidden_states)) + value_layer = self.transpose_for_scores(self.value(hidden_states)) + + mixed_query_layer = self.query(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + + past_key_value = (key_layer, value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + + if ( + self.position_embedding_type == "relative_key" + or self.position_embedding_type == "relative_key_query" + ): + seq_length = hidden_states.size()[1] + position_ids_l = torch.arange( + seq_length, dtype=torch.long, device=hidden_states.device + ).view(-1, 1) + position_ids_r = torch.arange( + seq_length, dtype=torch.long, device=hidden_states.device + ).view(1, -1) + distance = position_ids_l - position_ids_r + positional_embedding = self.distance_embedding( + distance + self.max_position_embeddings - 1 + ) + positional_embedding = positional_embedding.to( + dtype=query_layer.dtype + ) # fp16 compatibility + + if self.position_embedding_type == "relative_key": + relative_position_scores = torch.einsum( + "bhld,lrd->bhlr", query_layer, positional_embedding + ) + attention_scores = attention_scores + relative_position_scores + elif self.position_embedding_type == "relative_key_query": + relative_position_scores_query = torch.einsum( + "bhld,lrd->bhlr", query_layer, positional_embedding + ) + relative_position_scores_key = torch.einsum( + "bhrd,lrd->bhlr", key_layer, positional_embedding + ) + attention_scores = ( + attention_scores + + relative_position_scores_query + + relative_position_scores_key + ) + + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + if is_cross_attention and self.save_attention: + self.save_attention_map(attention_probs) + attention_probs.register_hook(self.save_attn_gradients) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs_dropped = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs_dropped = attention_probs_dropped * head_mask + + context_layer = torch.matmul(attention_probs_dropped, value_layer) + + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + outputs = ( + (context_layer, attention_probs) if output_attentions else (context_layer,) + ) + + outputs = outputs + (past_key_value,) + return outputs + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Module): + def __init__(self, config, is_cross_attention=False): + super().__init__() + self.self = BertSelfAttention(config, is_cross_attention) + self.output = BertSelfOutput(config) + self.pruned_heads = set() + + def prune_heads(self, heads): + if len(heads) == 0: + return + heads, index = find_pruneable_heads_and_indices( + heads, + self.self.num_attention_heads, + self.self.attention_head_size, + self.pruned_heads, + ) + + # Prune linear layers + self.self.query = prune_linear_layer(self.self.query, index) + self.self.key = prune_linear_layer(self.self.key, index) + self.self.value = prune_linear_layer(self.self.value, index) + self.output.dense = prune_linear_layer(self.output.dense, index, dim=1) + + # Update hyper params and store pruned heads + self.self.num_attention_heads = self.self.num_attention_heads - len(heads) + self.self.all_head_size = ( + self.self.attention_head_size * self.self.num_attention_heads + ) + self.pruned_heads = self.pruned_heads.union(heads) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_value=None, + output_attentions=False, + ): + self_outputs = self.self( + hidden_states, + attention_mask, + head_mask, + encoder_hidden_states, + encoder_attention_mask, + past_key_value, + output_attentions, + ) + attention_output = self.output(self_outputs[0], hidden_states) + + outputs = (attention_output,) + self_outputs[ + 1: + ] # add attentions if we output them + return outputs + + +class BertIntermediate(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + if isinstance(config.hidden_act, str): + self.intermediate_act_fn = ACT2FN[config.hidden_act] + else: + self.intermediate_act_fn = config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config, layer_num): + super().__init__() + self.config = config + self.chunk_size_feed_forward = config.chunk_size_feed_forward + self.seq_len_dim = 1 + self.attention = BertAttention(config) + self.layer_num = layer_num + if ( + self.config.add_cross_attention + and layer_num % self.config.cross_attention_freq == 0 + ): + self.crossattention = BertAttention( + config, is_cross_attention=self.config.add_cross_attention + ) + self.has_cross_attention = True + else: + self.has_cross_attention = False + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + self.intermediate_query = BertIntermediate(config) + self.output_query = BertOutput(config) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_value=None, + output_attentions=False, + query_length=0, + ): + # decoder uni-directional self-attention cached key/values tuple is at positions 1,2 + self_attn_past_key_value = ( + past_key_value[:2] if past_key_value is not None else None + ) + self_attention_outputs = self.attention( + hidden_states, + attention_mask, + head_mask, + output_attentions=output_attentions, + past_key_value=self_attn_past_key_value, + ) + attention_output = self_attention_outputs[0] + outputs = self_attention_outputs[1:-1] + + present_key_value = self_attention_outputs[-1] + + if query_length > 0: + query_attention_output = attention_output[:, :query_length, :] + + if self.has_cross_attention: + assert ( + encoder_hidden_states is not None + ), "encoder_hidden_states must be given for cross-attention layers" + cross_attention_outputs = self.crossattention( + query_attention_output, + attention_mask, + head_mask, + encoder_hidden_states, + encoder_attention_mask, + output_attentions=output_attentions, + ) + query_attention_output = cross_attention_outputs[0] + outputs = ( + outputs + cross_attention_outputs[1:-1] + ) # add cross attentions if we output attention weights + + layer_output = apply_chunking_to_forward( + self.feed_forward_chunk_query, + self.chunk_size_feed_forward, + self.seq_len_dim, + query_attention_output, + ) + if attention_output.shape[1] > query_length: + layer_output_text = apply_chunking_to_forward( + self.feed_forward_chunk, + self.chunk_size_feed_forward, + self.seq_len_dim, + attention_output[:, query_length:, :], + ) + layer_output = torch.cat([layer_output, layer_output_text], dim=1) + else: + layer_output = apply_chunking_to_forward( + self.feed_forward_chunk, + self.chunk_size_feed_forward, + self.seq_len_dim, + attention_output, + ) + outputs = (layer_output,) + outputs + + outputs = outputs + (present_key_value,) + + return outputs + + def feed_forward_chunk(self, attention_output): + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + return layer_output + + def feed_forward_chunk_query(self, attention_output): + intermediate_output = self.intermediate_query(attention_output) + layer_output = self.output_query(intermediate_output, attention_output) + return layer_output + + +class BertEncoder(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.layer = nn.ModuleList( + [BertLayer(config, i) for i in range(config.num_hidden_layers)] + ) + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + use_cache=None, + output_attentions=False, + output_hidden_states=False, + return_dict=True, + query_length=0, + mode='text' + ): + all_hidden_states = () if output_hidden_states else None + all_self_attentions = () if output_attentions else None + all_cross_attentions = ( + () if output_attentions and self.config.add_cross_attention else None + ) + + next_decoder_cache = () if use_cache else None + + if mode == 'text': + start_layer = 0 + end_layer = self.config.contrastive_layer + elif mode == 'fusion': + start_layer = self.config.contrastive_layer + end_layer = self.config.num_hidden_layers + else: + start_layer = 0 + end_layer = self.config.num_hidden_layers + + for i in range(start_layer, end_layer): + layer_module = self.layer[i] + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states,) + + layer_head_mask = head_mask[i] if head_mask is not None else None + past_key_value = past_key_values[i] if past_key_values is not None else None + + if getattr(self.config, "gradient_checkpointing", False) and self.training: + + if use_cache: + logger.warn( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False + + def create_custom_forward(module): + def custom_forward(*inputs): + return module( + *inputs, past_key_value, output_attentions, query_length + ) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(layer_module), + hidden_states, + attention_mask, + layer_head_mask, + encoder_hidden_states, + encoder_attention_mask, + ) + else: + layer_outputs = layer_module( + hidden_states, + attention_mask, + layer_head_mask, + encoder_hidden_states, + encoder_attention_mask, + past_key_value, + output_attentions, + query_length, + ) + + hidden_states = layer_outputs[0] + if use_cache: + next_decoder_cache += (layer_outputs[-1],) + if output_attentions: + all_self_attentions = all_self_attentions + (layer_outputs[1],) + all_cross_attentions = all_cross_attentions + (layer_outputs[2],) + + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states,) + + if not return_dict: + return tuple( + v + for v in [ + hidden_states, + next_decoder_cache, + all_hidden_states, + all_self_attentions, + all_cross_attentions, + ] + if v is not None + ) + return BaseModelOutputWithPastAndCrossAttentions( + last_hidden_state=hidden_states, + past_key_values=next_decoder_cache, + hidden_states=all_hidden_states, + attentions=all_self_attentions, + cross_attentions=all_cross_attentions, + ) + + +class BertPooler(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + if isinstance(config.hidden_act, str): + self.transform_act_fn = ACT2FN[config.hidden_act] + else: + self.transform_act_fn = config.hidden_act + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config): + super().__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + self.bias = nn.Parameter(torch.zeros(config.vocab_size)) + + # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings` + self.decoder.bias = self.bias + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config): + super().__init__() + self.predictions = BertLMPredictionHead(config) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertPreTrainedModel(PreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = BertConfig + base_model_prefix = "bert" + _keys_to_ignore_on_load_missing = [r"position_ids"] + + def _init_weights(self, module): + """Initialize the weights""" + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + +class BertModel(BertPreTrainedModel): + """ + The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of + cross-attention is added between the self-attention layers, following the architecture described in `Attention is + all you need `__ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, + Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin. + argument and :obj:`add_cross_attention` set to :obj:`True`; an :obj:`encoder_hidden_states` is then expected as an + input to the forward pass. + """ + + def __init__(self, config, add_pooling_layer=False): + super().__init__(config) + self.config = config + + self.embeddings = BertEmbeddings(config) + + self.encoder = BertEncoder(config) + + self.pooler = BertPooler(config) if add_pooling_layer else None + + self.init_weights() + + def get_input_embeddings(self): + return self.embeddings.word_embeddings + + def set_input_embeddings(self, value): + self.embeddings.word_embeddings = value + + def _prune_heads(self, heads_to_prune): + """ + Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base + class PreTrainedModel + """ + for layer, heads in heads_to_prune.items(): + self.encoder.layer[layer].attention.prune_heads(heads) + + def get_extended_attention_mask( + self, + attention_mask: Tensor, + input_shape: Tuple[int], + device: device, + is_decoder: bool, + has_query: bool = False, + ) -> Tensor: + """ + Makes broadcastable attention and causal masks so that future and masked tokens are ignored. + + Arguments: + attention_mask (:obj:`torch.Tensor`): + Mask with ones indicating tokens to attend to, zeros for tokens to ignore. + input_shape (:obj:`Tuple[int]`): + The shape of the input to the model. + device: (:obj:`torch.device`): + The device of the input to the model. + + Returns: + :obj:`torch.Tensor` The extended attention mask, with a the same dtype as :obj:`attention_mask.dtype`. + """ + # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable to all heads. + if attention_mask.dim() == 3: + extended_attention_mask = attention_mask[:, None, :, :] + elif attention_mask.dim() == 2: + # Provided a padding mask of dimensions [batch_size, seq_length] + # - if the model is a decoder, apply a causal mask in addition to the padding mask + # - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length] + if is_decoder: + batch_size, seq_length = input_shape + + seq_ids = torch.arange(seq_length, device=device) + causal_mask = ( + seq_ids[None, None, :].repeat(batch_size, seq_length, 1) + <= seq_ids[None, :, None] + ) + + # add a prefix ones mask to the causal mask + # causal and attention masks must have same type with pytorch version < 1.3 + causal_mask = causal_mask.to(attention_mask.dtype) + + if causal_mask.shape[1] < attention_mask.shape[1]: + prefix_seq_len = attention_mask.shape[1] - causal_mask.shape[1] + if has_query: # UniLM style attention mask + causal_mask = torch.cat( + [ + torch.zeros( + (batch_size, prefix_seq_len, seq_length), + device=device, + dtype=causal_mask.dtype, + ), + causal_mask, + ], + axis=1, + ) + causal_mask = torch.cat( + [ + torch.ones( + (batch_size, causal_mask.shape[1], prefix_seq_len), + device=device, + dtype=causal_mask.dtype, + ), + causal_mask, + ], + axis=-1, + ) + extended_attention_mask = ( + causal_mask[:, None, :, :] * attention_mask[:, None, None, :] + ) + else: + extended_attention_mask = attention_mask[:, None, None, :] + else: + raise ValueError( + "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format( + input_shape, attention_mask.shape + ) + ) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to( + dtype=self.dtype + ) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + return extended_attention_mask + + def forward( + self, + input_ids=None, + attention_mask=None, + position_ids=None, + head_mask=None, + query_embeds=None, + encoder_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + is_decoder=False, + mode=None + ): + r""" + encoder_hidden_states (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if + the model is configured as a decoder. + encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in + the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``: + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + past_key_values (:obj:`tuple(tuple(torch.FloatTensor))` of length :obj:`config.n_layers` with each tuple having 4 tensors of shape :obj:`(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): + Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding. + If :obj:`past_key_values` are used, the user can optionally input only the last :obj:`decoder_input_ids` + (those that don't have their past key value states given to this model) of shape :obj:`(batch_size, 1)` + instead of all :obj:`decoder_input_ids` of shape :obj:`(batch_size, sequence_length)`. + use_cache (:obj:`bool`, `optional`): + If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up + decoding (see :obj:`past_key_values`). + """ + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # use_cache = use_cache if use_cache is not None else self.config.use_cache + + if input_ids is None: + assert ( + query_embeds is not None + ), "You have to specify query_embeds when input_ids is None" + + # past_key_values_length + past_key_values_length = ( + past_key_values[0][0].shape[2] - self.config.num_query_tokens + if past_key_values is not None + else 0 + ) + + query_length = query_embeds.shape[1] if query_embeds is not None else 0 + + if encoder_embeds is None: + embedding_output = self.embeddings( + input_ids=input_ids, + position_ids=position_ids, + query_embeds=query_embeds, + past_key_values_length=past_key_values_length, + ) + else: + embedding_output = encoder_embeds + if query_embeds is not None: + embedding_output = torch.cat([query_embeds, embedding_output], dim=1) + + + input_shape = embedding_output.size()[:-1] + batch_size, seq_length = input_shape + device = embedding_output.device + + if attention_mask is None: + attention_mask = torch.ones( + ((batch_size, seq_length + past_key_values_length)), device=device + ) + + # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable to all heads. + if is_decoder: + extended_attention_mask = self.get_extended_attention_mask( + attention_mask, + input_ids.shape, + device, + is_decoder, + has_query=(query_embeds is not None), + ) + else: + extended_attention_mask = self.get_extended_attention_mask( + attention_mask, input_shape, device, is_decoder + ) + + # If a 2D or 3D attention mask is provided for the cross-attention + # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length] + if encoder_hidden_states is not None: + if type(encoder_hidden_states) == list: + encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[ + 0 + ].size() + else: + ( + encoder_batch_size, + encoder_sequence_length, + _, + ) = encoder_hidden_states.size() + encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length) + + if type(encoder_attention_mask) == list: + encoder_extended_attention_mask = [ + self.invert_attention_mask(mask) for mask in encoder_attention_mask + ] + elif encoder_attention_mask is None: + encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device) + encoder_extended_attention_mask = self.invert_attention_mask( + encoder_attention_mask + ) + else: + encoder_extended_attention_mask = self.invert_attention_mask( + encoder_attention_mask + ) + else: + encoder_extended_attention_mask = None + + # Prepare head mask if needed + # 1.0 in head_mask indicate we keep the head + # attention_probs has shape bsz x n_heads x N x N + # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] + # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] + head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers) + + encoder_outputs = self.encoder( + embedding_output, + attention_mask=extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + query_length=query_length, + mode=mode + ) + sequence_output = encoder_outputs[0] + pooled_output = ( + self.pooler(sequence_output) if self.pooler is not None else None + ) + + if not return_dict: + return (sequence_output, pooled_output) + encoder_outputs[1:] + + return BaseModelOutputWithPoolingAndCrossAttentions( + last_hidden_state=sequence_output, + pooler_output=pooled_output, + past_key_values=encoder_outputs.past_key_values, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + cross_attentions=encoder_outputs.cross_attentions, + ) + + +class BertLMHeadModel(BertPreTrainedModel): + + _keys_to_ignore_on_load_unexpected = [r"pooler"] + _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"] + + def __init__(self, config): + super().__init__(config) + + self.bert = BertModel(config, add_pooling_layer=False) + self.cls = BertOnlyMLMHead(config) + + self.init_weights() + + def get_output_embeddings(self): + return self.cls.predictions.decoder + + def set_output_embeddings(self, new_embeddings): + self.cls.predictions.decoder = new_embeddings + + def forward( + self, + input_ids=None, + attention_mask=None, + position_ids=None, + head_mask=None, + query_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + labels=None, + past_key_values=None, + use_cache=True, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + return_logits=False, + is_decoder=True, + reduction="mean", + ): + r""" + encoder_hidden_states (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if + the model is configured as a decoder. + encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in + the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``: + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in + ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are + ignored (masked), the loss is only computed for the tokens with labels n ``[0, ..., config.vocab_size]`` + past_key_values (:obj:`tuple(tuple(torch.FloatTensor))` of length :obj:`config.n_layers` with each tuple having 4 tensors of shape :obj:`(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): + Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding. + If :obj:`past_key_values` are used, the user can optionally input only the last :obj:`decoder_input_ids` + (those that don't have their past key value states given to this model) of shape :obj:`(batch_size, 1)` + instead of all :obj:`decoder_input_ids` of shape :obj:`(batch_size, sequence_length)`. + use_cache (:obj:`bool`, `optional`): + If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up + decoding (see :obj:`past_key_values`). + Returns: + Example:: + >>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig + >>> import torch + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased') + >>> config = BertConfig.from_pretrained("bert-base-cased") + >>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config) + >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") + >>> outputs = model(**inputs) + >>> prediction_logits = outputs.logits + """ + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + if labels is not None: + use_cache = False + if past_key_values is not None: + query_embeds = None + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + head_mask=head_mask, + query_embeds=query_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + is_decoder=is_decoder, + ) + + sequence_output = outputs[0] + if query_embeds is not None: + sequence_output = outputs[0][:, query_embeds.shape[1] :, :] + + prediction_scores = self.cls(sequence_output) + + if return_logits: + return prediction_scores[:, :-1, :].contiguous() + + lm_loss = None + if labels is not None: + # we are doing next-token prediction; shift prediction scores and input ids by one + shifted_prediction_scores = prediction_scores[:, :-1, :].contiguous() + labels = labels[:, 1:].contiguous() + loss_fct = CrossEntropyLoss(reduction=reduction, label_smoothing=0.1) + lm_loss = loss_fct( + shifted_prediction_scores.view(-1, self.config.vocab_size), + labels.view(-1), + ) + if reduction == "none": + lm_loss = lm_loss.view(prediction_scores.size(0), -1).sum(1) + + if not return_dict: + output = (prediction_scores,) + outputs[2:] + return ((lm_loss,) + output) if lm_loss is not None else output + + return CausalLMOutputWithCrossAttentions( + loss=lm_loss, + logits=prediction_scores, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + cross_attentions=outputs.cross_attentions, + ) + + def prepare_inputs_for_generation( + self, input_ids, query_embeds, past=None, attention_mask=None, **model_kwargs + ): + # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly + if attention_mask is None: + attention_mask = input_ids.new_ones(input_ids.shape) + query_mask = input_ids.new_ones(query_embeds.shape[:-1]) + attention_mask = torch.cat([query_mask, attention_mask], dim=-1) + + # cut decoder_input_ids if past is used + if past is not None: + input_ids = input_ids[:, -1:] + + return { + "input_ids": input_ids, + "query_embeds": query_embeds, + "attention_mask": attention_mask, + "past_key_values": past, + "encoder_hidden_states": model_kwargs.get("encoder_hidden_states", None), + "encoder_attention_mask": model_kwargs.get("encoder_attention_mask", None), + "is_decoder": True, + } + + def _reorder_cache(self, past, beam_idx): + reordered_past = () + for layer_past in past: + reordered_past += ( + tuple( + past_state.index_select(0, beam_idx) for past_state in layer_past + ), + ) + return reordered_past + + +class BertForMaskedLM(BertPreTrainedModel): + + _keys_to_ignore_on_load_unexpected = [r"pooler"] + _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"] + + def __init__(self, config): + super().__init__(config) + + self.bert = BertModel(config, add_pooling_layer=False) + self.cls = BertOnlyMLMHead(config) + + self.init_weights() + + def get_output_embeddings(self): + return self.cls.predictions.decoder + + def set_output_embeddings(self, new_embeddings): + self.cls.predictions.decoder = new_embeddings + + def forward( + self, + input_ids=None, + attention_mask=None, + position_ids=None, + head_mask=None, + query_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + labels=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + return_logits=False, + is_decoder=False, + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., + config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored + (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` + """ + + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + outputs = self.bert( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + head_mask=head_mask, + query_embeds=query_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + is_decoder=is_decoder, + ) + + if query_embeds is not None: + sequence_output = outputs[0][:, query_embeds.shape[1] :, :] + prediction_scores = self.cls(sequence_output) + + if return_logits: + return prediction_scores + + masked_lm_loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() # -100 index = padding token + masked_lm_loss = loss_fct( + prediction_scores.view(-1, self.config.vocab_size), labels.view(-1) + ) + + if not return_dict: + output = (prediction_scores,) + outputs[2:] + return ( + ((masked_lm_loss,) + output) if masked_lm_loss is not None else output + ) + + return MaskedLMOutput( + loss=masked_lm_loss, + logits=prediction_scores, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/open_biomed/models/multimodal/molkformer/mol_kformer.py b/open_biomed/models/multimodal/molkformer/mol_kformer.py new file mode 100644 index 0000000..93e5cd0 --- /dev/null +++ b/open_biomed/models/multimodal/molkformer/mol_kformer.py @@ -0,0 +1,277 @@ +import copy + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.distributed as dist + +from transformers import BertTokenizer, T5Tokenizer, T5Config, T5ForConditionalGeneration +from transformers.modeling_outputs import BaseModelOutput + +from models.base_models import MolEncoder, TextEncoder +from models.molecule.gnn_graphmvp import GNNGraphMVP +from models.multimodal.molkformer.kformer import BertConfig, BertLMHeadModel +from utils.mol_utils import convert_pyg_batch, get_biot5_tokenizer +from torch_geometric.nn import (MessagePassing, global_add_pool, global_max_pool, global_mean_pool) +class MolKFormer(MolEncoder, TextEncoder): + def __init__(self, config): + super().__init__() + self.pooling = global_mean_pool + self.structure_config = config["structure"] + self.kformer_config = BertConfig.from_json_file(config["kformer_config_file"]) + self.projection_dim = config["projection_dim"] + self.max_n_atoms = config["max_n_atoms"] + self.num_query_tokens = self.kformer_config.num_query_tokens + self.encoder_tokenizer = BertTokenizer.from_pretrained(config["encoder_tokenizer"]) + #self.decoder_tokenizer = T5Tokenizer.from_pretrained(config["decoder_tokenizer"]) + self.decoder_tokenizer = get_biot5_tokenizer(config["decoder_tokenizer"], config["path_selfies"]) + + self.structure_encoder = GNNGraphMVP( + num_layer=self.structure_config["gin_num_layers"], + emb_dim=self.structure_config["gin_hidden_dim"], + gnn_type="gin", + drop_ratio=self.structure_config["drop_ratio"], + JK="last", + ) + if "ckpt" in self.structure_config: + self.structure_encoder.load_state_dict(torch.load(self.structure_config["ckpt"], map_location="cpu"), strict=False) + self.structure_linear = nn.Linear(self.structure_encoder.output_dim, self.kformer_config.hidden_size) + self.structure_proj_head = nn.Linear(self.kformer_config.hidden_size, self.projection_dim) + #self.structure_proj_head = nn.Linear(self.structure_encoder.output_dim, self.projection_dim) + + self.kformer = BertLMHeadModel(self.kformer_config) + self.query_tokens = nn.Parameter( + torch.zeros(1, self.num_query_tokens, self.kformer_config.hidden_size) + ) + self.query_tokens.data.normal_(mean=0.0, std=self.kformer_config.initializer_range) + self.text_proj_head = nn.Linear(self.kformer_config.hidden_size, self.projection_dim) + self.mtm_head = nn.Linear(self.kformer_config.hidden_size, 2) + + decoder_config = T5Config.from_json_file(config["decoder"]["config_file"]) + self.text_decoder = T5ForConditionalGeneration(decoder_config) + self.text_decoder.resize_token_embeddings(35073) + self.enc2dec = nn.Linear(self.kformer_config.hidden_size, self.text_decoder.config.hidden_size) + + #self.h_proj = nn.Linear(self.kformer_config.hidden_size, self.projection_dim) + #self.t_proj = nn.Linear(self.kformer_config.hidden_size, self.projection_dim) + self.norm = False + + def forward(self, mol, text=None, prompt=None, cal_loss=False): + # calculate molecule feature + batch_size = torch.max(mol.batch).item() + 1 + _, node_embeds, node_attention_mask = self.get_graph_feats(mol, batch_size) + query_embeds = self.query_tokens.expand(batch_size, -1, -1) + + #return query_outputs.squeeze() + if not cal_loss: + input_ids, attention_mask = self.seq_wrap(prompt, text) + attention_mask = torch.cat([torch.ones(query_embeds.shape[:-1], dtype=torch.long).to(query_embeds.device), attention_mask], dim=1) + return self.kformer.bert( + input_ids, + query_embeds=query_embeds, + attention_mask=attention_mask, + encoder_hidden_states=node_embeds, + encoder_attention_mask=node_attention_mask, + return_dict=True, + ).last_hidden_state[:, :self.num_query_tokens, :] + else: + if prompt is not None: + prompt_embeds = self.kformer.get_input_embeddings()(prompt["input_ids"]) + query_attention_mask = torch.cat([torch.ones(query_embeds.shape[:-1], dtype=torch.long).to(self.device), prompt["attention_mask"]], dim=1) + else: + prompt_embeds = None + query_attention_mask = torch.ones(query_embeds.shape[:-1], dtype=torch.long).to(self.device) + query_outputs = self.kformer.bert( + encoder_embeds=prompt_embeds, + query_embeds=query_embeds, + attention_mask=query_attention_mask, + encoder_hidden_states=node_embeds, + encoder_attention_mask=node_attention_mask, + return_dict=True + ).last_hidden_state[:, :self.num_query_tokens, :] + mol_feats = F.normalize(self.structure_proj_head(query_outputs), dim=-1) + + text_outputs = self.kformer.bert( + text["input_ids"], + attention_mask=text["attention_mask"], + return_dict=True + ) + text_embeds = text_outputs.last_hidden_state + text_feats = F.normalize(self.text_proj_head(text_embeds[:, 0, :]), dim=-1) + + sim_m2t = torch.matmul(mol_feats.unsqueeze(1), text_feats.unsqueeze(-1)).squeeze() + sim_m2t, _ = sim_m2t.max(dim=-1) + sim_t2m = torch.matmul(text_feats.unsqueeze(1).unsqueeze(1), mol_feats.transpose(1, 2)).squeeze() + sim_t2m, _ = sim_t2m.max(dim=-1) + + # find hard negatives + with torch.no_grad(): + weights_m2t = F.softmax(sim_m2t, dim=1) + 1e-4 + weights_m2t.fill_diagonal_(0.0) + weights_t2m = F.softmax(sim_t2m, dim=1) + 1e-4 + weights_t2m.fill_diagonal_(0.0) + idx_neg_m2t = [] + for i in range(batch_size): + idx_neg_m2t.append(torch.multinomial(weights_m2t[i], 1).item()) + idx_neg_m2t = torch.tensor(idx_neg_m2t, dtype=int).to(node_embeds) + idx_neg_t2m = [] + for i in range(batch_size): + idx_neg_t2m.append(torch.multinomial(weights_t2m[i], 1).item()) + + node_embeds_mtm = torch.cat([node_embeds, node_embeds, node_embeds[idx_neg_t2m]], dim=0) + node_attention_mask_mtm = torch.cat([node_attention_mask, node_attention_mask, node_attention_mask[idx_neg_t2m]], dim=0) + wrapped_input_ids, wrapped_attention_mask = self.seq_wrap(prompt, text) + text_input_ids_mtm = torch.cat([wrapped_input_ids, wrapped_input_ids[idx_neg_m2t], wrapped_input_ids], dim=0) + text_attention_mask_mtm = torch.cat([wrapped_attention_mask, wrapped_attention_mask[idx_neg_m2t], wrapped_attention_mask], dim=0) + query_embeds_mtm = self.query_tokens.expand(node_embeds_mtm.shape[0], -1, -1) + query_attention_mask_mtm = torch.ones(query_embeds_mtm.shape[:-1], dtype=torch.long).to(query_embeds_mtm.device) + text_attention_mask_mtm = torch.cat([query_attention_mask_mtm, text_attention_mask_mtm], dim=1) + mtm_labels = torch.cat([torch.ones(batch_size, dtype=torch.long), torch.zeros(2 * batch_size, dtype=torch.long)], dim=0).to(query_embeds_mtm.device) + + output = self.kformer.bert( + input_ids=text_input_ids_mtm, + query_embeds=query_embeds_mtm, + attention_mask=text_attention_mask_mtm, + encoder_hidden_states=node_embeds_mtm, + encoder_attention_mask=node_attention_mask_mtm, + return_dict=True + ) + mtm_output = self.mtm_head(output["last_hidden_state"][:, : self.num_query_tokens, :]).mean(dim=1) + loss_mtm = F.cross_entropy(mtm_output, mtm_labels) + return loss_mtm + + def seq_wrap(self, seq1, seq2): + if seq1 is None: + return seq2["input_ids"], seq2["attention_mask"] + if seq2 is None: + return seq1["input_ids"], seq1["attention_mask"] + batch_size = seq1["input_ids"].shape[0] + wrapped_inputs, wrapped_attention_mask = [], [] + for i in range(batch_size): + cur_len = seq1["attention_mask"][i].sum() + wrapped_inputs.append(torch.cat([ + seq1["input_ids"][i, :cur_len], + seq2["input_ids"][i], + seq1["input_ids"][i, cur_len:] + ], dim=0)) + wrapped_attention_mask.append(torch.cat([ + seq1["attention_mask"][i, :cur_len], + seq2["attention_mask"][i], + seq1["attention_mask"][i, cur_len:] + ], dim=0)) + return torch.stack(wrapped_inputs, dim=0), torch.stack(wrapped_attention_mask, dim=0) + + def get_graph_feats(self, graph, batch_size): + graph_embeds, node_embeds = self.structure_encoder(graph) + # batch = graph.batch + # a = self.pooling(node_embeds, batch) + all_node_feats = self.structure_linear(node_embeds) + # serialize node feature + node_feats = [] + node_attention_mask = [] + for i in range(batch_size): + feat = all_node_feats[torch.where(graph.batch == i)] + if feat.shape[0] < self.max_n_atoms: + node_feats.append(torch.cat(( + feat, + torch.zeros(self.max_n_atoms - feat.shape[0], feat.shape[1]).to(feat.device) + ), dim=0)) + node_attention_mask.append(torch.cat(( + torch.ones(feat.shape[0]).to(feat.device), + torch.zeros(self.max_n_atoms - feat.shape[0]).to(feat.device) + ), dim=0)) + else: + node_feats.append(feat[:self.max_n_atoms, :]) + node_attention_mask.append(torch.ones(self.max_n_atoms).to(feat.device)) + node_feats = torch.stack(node_feats, dim=0) + node_attention_mask = torch.stack(node_attention_mask, dim=0) + return graph_embeds, node_feats, node_attention_mask + + def encode_mol(self, mol, proj=False): + if "text" in mol: + s = mol["structure"] + if "graph" in mol["structure"]: + s = s["graph"] + mol_embeds = self.forward(s, prompt=mol["text"]) + else: + # mol = mol["structure"]["Graph"] + mol = mol["structure"] + batch_size = torch.max(mol.batch).item() + 1 + _, node_embeds, node_attention_mask = self.get_graph_feats(mol, batch_size) + query_embeds = self.query_tokens.expand(batch_size, -1, -1) + attention_mask = torch.ones(query_embeds.shape[:-1], dtype=torch.long).to(query_embeds.device) + mol_embeds = self.kformer.bert( + query_embeds=query_embeds, + attention_mask=attention_mask, + encoder_hidden_states=node_embeds, + encoder_attention_mask=node_attention_mask, + return_dict=True + ).last_hidden_state + if proj: + mol_embeds = F.normalize(self.structure_proj_head(mol_embeds), dim=-1) + return mol_embeds + + def encode_text(self, text, return_cls=True, proj=False): + text_embeds = self.kformer.bert( + text["input_ids"], + attention_mask=text["attention_mask"], + return_dict=True, + ).last_hidden_state + if return_cls: + text_embeds = text_embeds[:, 0, :] + if proj: + text_embeds = F.normalize(self.text_proj_head(text_embeds), dim=-1) + return text_embeds + + def decode(self, mol, num_beams, max_length): + h_graph = self.encode_mol(mol) + h_graph = self.enc2dec(h_graph) + h_smi = self.text_decoder.encoder(**mol["structure"]["SMILES"]).last_hidden_state + h = torch.cat([h_graph, h_smi], dim=1) + attention_mask = torch.ones(h_graph.shape[:-1], dtype=torch.long).to(h.device) + attention_mask = torch.cat([attention_mask, mol["structure"]["SMILES"].attention_mask], dim=1) + h = BaseModelOutput( + last_hidden_state=h, + hidden_states=None, + attentions=None + ) + outputs = self.text_decoder.generate( + encoder_outputs=h, + attention_mask=attention_mask, + num_beams=num_beams, + max_length=max_length + ) + return outputs + #return self.decoder_tokenizer.batch_decode(outputs, skip_special_tokens=True) + + def predict_similarity_score(self, mol, text): + if "text" in mol: + prompt = mol["text"] + mol = mol["structure"] + else: + prompt = None + preds = self.forward(mol, text, prompt=prompt) + return F.softmax(self.mtm_head(preds).mean(dim=1), dim=-1)[:, 1] + + def causal_generation_loss(self, mol, text): + labels = text["input_ids"].masked_fill(~text["attention_mask"].bool(), -100) + #h = self.enc2dec(self.encode_mol(mol)) + #attention_mask = torch.ones(h.shape[:-1], dtype=torch.long).to(h.device) + h_graph = self.encode_mol(mol) + h_graph = self.enc2dec(h_graph) + h_smi = self.text_decoder.encoder(**mol["structure"]["SMILES"]).last_hidden_state + h = torch.cat([h_graph, h_smi], dim=1) + attention_mask = torch.ones(h_graph.shape[:-1], dtype=torch.long).to(h.device) + attention_mask = torch.cat([attention_mask, mol["structure"]["SMILES"].attention_mask], dim=1) + h = BaseModelOutput( + last_hidden_state=h, + hidden_states=None, + attentions=None + ) + return self.text_decoder( + encoder_outputs=h, + attention_mask=attention_mask, + decoder_attention_mask=text["attention_mask"], + return_dict=True, + labels=labels + ).loss \ No newline at end of file diff --git a/open_biomed/models/task_model/moledit_model.py b/open_biomed/models/task_model/moledit_model.py new file mode 100644 index 0000000..53a61cc --- /dev/null +++ b/open_biomed/models/task_model/moledit_model.py @@ -0,0 +1,52 @@ +import torch +import torch.nn as nn + +from transformers.modeling_outputs import BaseModelOutput + +from models.multimodal.molt5 import MolT5 +from models import SUPPORTED_MOL_ENCODER + +from utils.mol_utils import convert_pyg_batch +from models.multimodal.molkformer.mol_kformer import MolKFormer + + +class MoleditModel(nn.Module): + def __init__(self, config): + super(MoleditModel, self).__init__() + self.model = SUPPORTED_MOL_ENCODER[config["graph"]["name"]](config["graph"]) + self.ckpt = torch.load(config["graph"]["init_checkpoint"], map_location="cpu") + if config["graph"]["name"] == "molkformer": + self.ckpt = self.ckpt["model"] + self.model.load_state_dict(self.ckpt, strict=False) + self.use_molkformer = True if config["graph"]["name"] == "molkformer" else False + self.use_momu = True if config["graph"]["name"] == "momu" else False + + def forward(self, mol): + h, encoder_attention_mask = self.encode(mol) + return h, encoder_attention_mask + + + def decode(self, mol, num_beams, max_length): + h, encoder_attention_mask = self.encode(mol) + return self.generate_model.decode( + encoder_outputs=h, + encoder_attention_mask=encoder_attention_mask, + num_beams=num_beams, + max_length=max_length + ) + + def encode(self, mol): + if "input_ids" in mol: + h = self.model.encode_text(mol, proj=True) + encoder_attention_mask = 1 + else: + if self.use_molkformer==True: + # mol={"structure":{"Graph":mol}} + mol={"structure":mol} + graph_feats = self.model.encode_mol(mol, proj=True) + h = graph_feats.mean(dim=1) + if self.use_momu==True: + graph_feats = self.model.encode_mol(mol, proj=True) + h = graph_feats + encoder_attention_mask = 1 + return h, encoder_attention_mask \ No newline at end of file diff --git a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py new file mode 100644 index 0000000..ccb9368 --- /dev/null +++ b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py @@ -0,0 +1,311 @@ +import argparse +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +import numpy as np +from tqdm import tqdm +import time +import json +import re +import copy + +import torch +import torch.nn as nn +from torch import optim +import torch.nn.functional as F +from torch.utils.data import DataLoader as torch_DataLoader +from torch_geometric.loader import DataLoader as pyg_DataLoader + + +from models.MoleculeSTM.utils import get_molecule_repr_MoleculeSTM +from models.MoleculeSTM.models import MLP +from models.MoleculeSTM.downstream_molecule_edit_utils import load_molecule_models +from models.MoleculeSTM.utils import freeze_network +from models.MoleculeSTM.datasets import ZINC250K_Dataset_SMILES, ZINC250K_Dataset_Graph +from datasets.moledit_dataset import SUPPORTED_MOLEDIT_DATASET +from models.task_model.moledit_model import MoleditModel +from models.MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART + +def cycle_index(num, shift): + arr = torch.arange(num) + shift + arr[-shift:] = torch.arange(shift) + return arr + + +def do_CL(X, Y, args): + if args.normalize: + X = F.normalize(X, dim=-1) + Y = F.normalize(Y, dim=-1) + + if args.SSL_loss == 'EBM_NCE': + criterion = nn.BCEWithLogitsLoss() + neg_Y = torch.cat([Y[cycle_index(len(Y), i + 1)] for i in range(args.CL_neg_samples)], dim=0) + neg_X = X.repeat((args.CL_neg_samples, 1)) + + pred_pos = torch.sum(X * Y, dim=1) / args.T + pred_neg = torch.sum(neg_X * neg_Y, dim=1) / args.T + + loss_pos = criterion(pred_pos, torch.ones(len(pred_pos)).to(pred_pos.device)) + loss_neg = criterion(pred_neg, torch.zeros(len(pred_neg)).to(pred_neg.device)) + SSL_loss = (loss_pos + args.CL_neg_samples * loss_neg) / (1 + args.CL_neg_samples) + + SSL_acc = (torch.sum(pred_pos > 0).float() + torch.sum(pred_neg < 0).float()) / \ + (len(pred_pos) + len(pred_neg)) + SSL_acc = SSL_acc.detach().cpu().item() + + elif args.SSL_loss == 'InfoNCE': + criterion = nn.CrossEntropyLoss() + B = X.size()[0] + logits = torch.mm(X, Y.transpose(1, 0)) # B*B + logits = torch.div(logits, args.T) + labels = torch.arange(B).long().to(logits.device) # B*1 + + SSL_loss = criterion(logits, labels) + pred = logits.argmax(dim=1, keepdim=False) + SSL_acc = pred.eq(labels).sum().detach().cpu().item() * 1. / B + + elif args.SSL_loss == 'RR': + criterion = nn.MSELoss() + SSL_loss = criterion(X, Y) + SSL_acc = 0 + + else: + raise Exception + + return SSL_loss, SSL_acc + +def mean_pooling(token_embeddings, attention_mask): + attention_mask = ~attention_mask + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() # [pad, B, d] + sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0) # [B, d] + sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9) # [B, d] + return sum_embeddings / sum_mask + + +def get_molecule_repr_generation(molecule_data, molecule_model, molecule_type="MegaMolBART", MegaMolBART_wrapper=None): + if molecule_type == "MegaMolBART": + embedding, pad_mask = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model, molecule_data) # [pad, B, d], [pad, B] + molecule_repr = mean_pooling(embedding, pad_mask) + else: + molecule_repr, _ = molecule_model(molecule_data) + return molecule_repr + + +def save_model(save_best, epoch=None): + if args.output_path is not None: + if save_best: + global optimal_loss + print("save model with loss: {:.5f}".format(optimal_loss)) + model_file = "model.pth" + + elif epoch is None: + model_file = "model_final.pth" + + else: + model_file = "model_{}.pth".format(epoch) + + saved_file_path = os.path.join(args.output_path, "generation2MoleculeSTM_{}".format(model_file)) + torch.save(generation2MoleculeSTM.state_dict(), saved_file_path) + + saved_file_path = os.path.join(args.output_path, "MoleculeSTM2generation_{}".format(model_file)) + torch.save(MoleculeSTM2generation.state_dict(), saved_file_path) + return + + +def train(epoch): + if args.verbose: + L = tqdm(dataloader) + else: + L = dataloader + + start_time = time.time() + accum_loss, accum_acc = 0, 0 + for batch in L: + if args.MoleculeSTM_molecule_type == "SMILES": + SMILES_list = batch["structure"]["SMILES"] + else: + SMILES_list = batch["structure"]["SMILES"] + graph = batch["structure"]["graph"] + graph = graph.to(device) + + + if args.MoleculeSTM_molecule_type == "SMILES": + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) + + else: + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + graph, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) + + if args.generation_model == "MegaMolBART": + molecule_repr_generation = get_molecule_repr_generation( + SMILES_list, molecule_model=molecule_model_generation, + molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper + ) + else: # for HierVAE + hiervae_data_list = MolGraph.tensorize(SMILES_list, vocab, avocab) + molecule_repr_generation = molecule_model_generation.forward_MoleculeSTM(hiervae_data_list) + molecule_repr_generation2MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) + + loss_01, acc_01 = do_CL(molecule_repr_generation, molecule_repr_MoleculeSTM2generation, args) + loss_02, acc_02 = do_CL(molecule_repr_MoleculeSTM, molecule_repr_generation2MoleculeSTM, args) + loss = (loss_01 + loss_02) / 2 + acc = (acc_01 + acc_02) / 2 + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + accum_loss += loss.item() + accum_acc += acc + + accum_loss /= len(L) + accum_acc /= len(L) + + global optimal_loss + temp_loss = accum_loss + if temp_loss < optimal_loss: + optimal_loss = temp_loss + save_model(save_best=True, epoch=epoch) + print("SSL Loss: {:.5f}\tSSL Acc: {:.5f}\tTime: {:.5f}".format(accum_loss, accum_acc, time.time() - start_time)) + return + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", type=str, default="cuda:0") + parser.add_argument("--verbose", type=int, default=1) + parser.add_argument("--dataset_path", type=str, default=None) + parser.add_argument("--dataset", type=str, default="ZINC250K") + parser.add_argument("--MoleculeSTM_molecule_type", type=str, default=None, choices=["SMILES", "Graph"]) + parser.add_argument("--output_path", type=str, default=None) + parser.add_argument("--config_path", type=str, default=None) + parser.add_argument("--mode", type=str, default="train") + ########## for MoleculeSTM ########## + parser.add_argument("--MoleculeSTM_model_dir", type=str, default=None) + parser.add_argument("--SSL_emb_dim", type=int, default=256) + ########## for 2D GNN ########## + parser.add_argument("--gnn_emb_dim", type=int, default=300) + parser.add_argument("--num_layer", type=int, default=5) + parser.add_argument('--JK', type=str, default='last') + parser.add_argument("--dropout_ratio", type=float, default=0.5) + parser.add_argument("--gnn_type", type=str, default="gin") + parser.add_argument('--graph_pooling', type=str, default='mean') + + ########## for generation ########## + parser.add_argument('--generation_model', type=str, default="MegaMolBART", choices=["MegaMolBART"]) + + ######### for MegaMolBART ########## + parser.add_argument("--MegaMolBART_generation_model_dir", type=str, default=None) + parser.add_argument("--vocab_path", type=str, default=None) + + ########## for optimization ########## + parser.add_argument("--batch_size", type=int, default=256) + parser.add_argument("--num_workers", type=int, default=8) + parser.add_argument("--epochs", type=int, default=1) + parser.add_argument("--decay", type=float, default=0) + parser.add_argument("--generation_lr", type=float, default=1e-2) + parser.add_argument("--MoleculeSTM_lr", type=float, default=1e-2) + parser.add_argument("--T", type=float, default=0.1) + parser.add_argument("--SSL_loss", type=str, default="EBM_NCE", choices=["EBM_NCE", "InfoNCE", "RR"]) + parser.add_argument("--CL_neg_samples", type=int, default=1) + parser.add_argument('--use_normalize', dest='normalize', action='store_true') + parser.add_argument('--no_normalize', dest='normalize', action='store_false') + parser.set_defaults(normalize=True) + parser.add_argument("--MASTER_PORT", type=str, default='6001') + + args = parser.parse_args() + print(args) + + config = json.load(open(args.config_path)) + os.environ['MASTER_PORT'] = args.MASTER_PORT + # load dataset + if args.generation_model == "MegaMolBART": + if args.MoleculeSTM_molecule_type == "SMILES": + if args.dataset == "ZINC250K": + dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") + # dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") + # dataset = ZINC250K_Dataset_SMILES(dataset_root) + elif args.dataset == "ZINC250K1K": + dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") + dataset = ZINC250K_Dataset_SMILES(dataset_root, 1000) + elif args.dataset == "ZINC250K10K": + dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") + dataset = ZINC250K_Dataset_SMILES(dataset_root, 10000) + else: + raise Exception + dataloader_class = pyg_DataLoader + else: + if args.dataset == "ZINC250K": + dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") + # dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") + # dataset = ZINC250K_Dataset_Graph(dataset_root) + elif args.dataset == "ZINC250K1K": + dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") + dataset = ZINC250K_Dataset_Graph(dataset_root, 1000) + elif args.dataset == "ZINC250K10K": + dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") + dataset = ZINC250K_Dataset_Graph(dataset_root, 10000) + else: + raise Exception + dataloader_class = pyg_DataLoader + else: + raise NotImplementedError + + + device = torch.device(args.device) \ + if torch.cuda.is_available() else torch.device("cpu") + + np.random.seed(args.seed) + torch.random.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + + # load model + if config["model"]=="molstm-MegaMolBART": + MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ + molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM = load_molecule_models(args) + mol2latent_MoleculeSTM = mol2latent_MoleculeSTM.to(device) + freeze_network(mol2latent_MoleculeSTM) + mol2latent_MoleculeSTM.eval() + else: + MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) + molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) + molecule_dim_generation = 256 + molecule_dim_MoleculeSTM = args.SSL_emb_dim + molecule_model_MoleculeSTM = MoleditModel(config["network"]) + mol2latent_MoleculeSTM = None + + torch.cuda.set_device(int(re.search(r'\d+', args.device).group())) + + molecule_model_generation = molecule_model_generation.to(device) + molecule_model_MoleculeSTM = molecule_model_MoleculeSTM.to(device) + freeze_network(molecule_model_generation) + freeze_network(molecule_model_MoleculeSTM) + molecule_model_generation.eval() + molecule_model_MoleculeSTM.eval() + + + dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) + + generation2MoleculeSTM = MLP(molecule_dim_generation, [molecule_dim_MoleculeSTM, molecule_dim_MoleculeSTM]).to(device) + MoleculeSTM2generation = MLP(molecule_dim_MoleculeSTM, [molecule_dim_generation, molecule_dim_generation]).to(device) + + model_param_group = [ + {"params": generation2MoleculeSTM.parameters(), "lr": args.generation_lr}, + {"params": MoleculeSTM2generation.parameters(), "lr": args.MoleculeSTM_lr}, + ] + optimizer = optim.Adam(model_param_group, weight_decay=args.decay) + optimal_loss = 1e10 + + for e in range(1, args.epochs+1): + print("Epoch {}".format(e)) + train(e) diff --git a/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py b/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py new file mode 100644 index 0000000..6b039cb --- /dev/null +++ b/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py @@ -0,0 +1,247 @@ +import argparse +import math +import numpy as np +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +import json +import torch +from torch import optim +import torch.nn.functional as F +from tqdm import tqdm +import re +from models.MoleculeSTM.downstream_molecule_edit_utils import get_SMILES_list, get_description_list, load_language_molecule_and_edit_models, clip_loss_for_edit, evaluate_SMILES_list +from models.MoleculeSTM.utils import prepare_text_tokens +from models.MoleculeSTM.models import GNN, GNN_graphpred, MLP +from transformers import BertTokenizer +from models.multimodal import * +from models.task_model.moledit_model import MoleditModel +from models.MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART + +def get_lr(t, initial_lr, rampdown=0.25, rampup=0.05): + lr_ramp = min(1, (1 - t) / rampdown) + lr_ramp = 0.5 - 0.5 * math.cos(lr_ramp * math.pi) + lr_ramp = lr_ramp * min(1, t / rampup) + return initial_lr * lr_ramp + + +def mean_pooling(token_embeddings, attention_mask): + attention_mask = ~attention_mask + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() # [pad, B, d] + sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0) # [B, d] + sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9) # [B, d] + return sum_embeddings / sum_mask + + +def check_edit(SMILES, text): + if config["model"]=="molstm-MegaMolBART": + text_list = [text] + text_tokens_ids, text_masks = prepare_text_tokens( + device=device, description=text_list, tokenizer=text_tokenizer, max_seq_len=args.max_seq_len) + text_output = text_model(input_ids=text_tokens_ids, attention_mask=text_masks) + text_repr = text_output["pooler_output"] + text_repr = text2latent(text_repr) + else: + text_list = text_tokenizer(text, truncation=True, padding=True, return_tensors='pt').to(device) + del text_list["token_type_ids"] + text_output = text_model(text_list) + text_repr = text_output[0] + + + first_and_second_SMILES_list = [] + + latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding([SMILES]) # [pad, B, d], [pad, B] + first_and_second_SMILES_list.append(SMILES) + + regenerated_mols = MegaMolBART_wrapper.inverse_transform([latent_code_init], pad_mask_init.bool().cuda(), k=1, sanitize=True) + first_and_second_SMILES_list.append(regenerated_mols[0]) + + l2_lambda_list = [ + 1e1, 1e0, 1e-1, 1e-2, 1e-3 + ] + result_SMILES_list_one_pair, result_eval_list_one_pair = [], [] + + if args.use_noise_for_init: + print("Use random noise for init") + random_noise = torch.randn(latent_code_init.size()).to(device) + + for l2_lambda in l2_lambda_list: + print("l2 lambda: {}".format(l2_lambda)) + current_SMILES_list = [first_and_second_SMILES_list[0]] + [first_and_second_SMILES_list[1]] + if args.use_noise_for_init: + print("Use random noise for init") + latent = latent_code_init.detach().clone() + random_noise + else: + print("No random noise for init") + latent = latent_code_init.detach().clone() + pad_mask = pad_mask_init.detach().clone() + latent.requires_grad = True + optimizer = optim.Adam([latent], lr=args.lr) + + if args.verbose: + L = tqdm(range(args.epochs)) + else: + L = range(args.epochs) + + for i in L: + t = i / args.epochs + lr = get_lr(t, args.lr) + optimizer.param_groups[0]["lr"] = lr + + molecule_repr_generation = mean_pooling(latent, pad_mask) # [B, d] + if args.normalize: + molecule_repr_generation = F.normalize(molecule_repr_generation, dim=-1) + molecule_repr_MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) + + clip_loss_ = clip_loss_for_edit(molecule_repr_MoleculeSTM, text_repr) + l2_loss_ = l2_lambda * ((latent_code_init - latent) ** 2).mean() + + loss = clip_loss_ + l2_loss_ + + optimizer.zero_grad() + loss.backward(retain_graph=True) + optimizer.step() + print("clip loss: {:.5f}\tL2 loss: {:.5f}".format(clip_loss_.item(), l2_loss_.item())) + + generated_mols = MegaMolBART_wrapper.inverse_transform([latent], pad_mask.bool().cuda(), k=1, sanitize=True) + current_SMILES_list.append(generated_mols[0]) + result_SMILES_list_one_pair.append([text] + current_SMILES_list + ['{}'.format(l2_lambda)]) + + current_result_list = evaluate_SMILES_list(current_SMILES_list, text) + result_eval_list_one_pair.append(current_result_list) + print() + + result_eval_list_one_pair = np.array(result_eval_list_one_pair) + result_eval_list_one_pair = np.any(result_eval_list_one_pair, axis=0, keepdims=True) + print("result_eval_list_one_pair\n", result_eval_list_one_pair) + return result_SMILES_list_one_pair, result_eval_list_one_pair + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", type=str, default="cuda:0") + parser.add_argument("--verbose", type=int, default=1) + + ########## for editing ########## + parser.add_argument("--input_description", type=str, default=None) + parser.add_argument("--input_description_id", type=int, default=101) + parser.add_argument("--input_SMILES", type=str, default=None) + parser.add_argument("--input_SMILES_file", type=str, default=None) + parser.add_argument("--output_model_dir", type=str, default=None) + parser.add_argument("--use_noise_for_init", dest="use_noise_for_init", action="store_true") + parser.add_argument("--no_noise_for_init", dest="use_noise_for_init", action="store_false") + parser.set_defaults(use_noise_for_init=False) + parser.add_argument('--normalize', dest='normalize', action='store_true') + parser.add_argument('--no_normalize', dest='normalize', action='store_false') + parser.set_defaults(normalize=True) + + parser.add_argument("--dataset_path", type=str, default="./datasets/mol_edit") + parser.add_argument("--SSL_emb_dim", type=int, default=256) + parser.add_argument("--max_seq_len", type=int, default=512) + parser.add_argument("--config_path", type=str, default=None) + ########## for MoleculeSTM ########## + parser.add_argument("--MoleculeSTM_model_dir", type=str, default=None) + parser.add_argument("--MoleculeSTM_molecule_type", type=str, default=None, choices=["SMILES", "Graph"]) + + ########## for MegaMolBART ########## + parser.add_argument("--MegaMolBART_generation_model_dir", type=str, default=None) + parser.add_argument("--vocab_path", type=str, default=None) + parser.add_argument("--text_mode", type=str, default=None) + + ########## for MoleculeSTM and generation projection ########## + parser.add_argument("--language_edit_model_dir_new", type=str, default=None) + + parser.add_argument("--language_edit_model_dir", type=str, default=None) + ########## for editing ########## + parser.add_argument("--lr_rampup", type=float, default=0.05) + parser.add_argument("--lr", type=float, default=0.1) + parser.add_argument("--epochs", type=int, default=100) + parser.add_argument("--MASTER_PORT", type=str, default='6001') + args = parser.parse_args() + + print(args) + + config = json.load(open(args.config_path)) + os.environ['MASTER_PORT'] = args.MASTER_PORT + device = torch.device(args.device) \ + if torch.cuda.is_available() else torch.device("cpu") + if config["model"]=="molstm-MegaMolBART": + text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim, \ + text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation = load_language_molecule_and_edit_models(args) + text2latent = text2latent.to(device) + mol2latent = mol2latent.to(device) + text2latent.eval() + mol2latent.eval() + + else: + text_model = MoleditModel(config["network"]) + text_tokenizer = BertTokenizer.from_pretrained(args.text_mode, model_max_length=512, cache_dir=args.text_mode) + # This is loading from the pretarined_MegaMolBART + MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) + molecule_model = MegaMolBART_wrapper.model + print("Loading from pretrained MegaMolBART ({}).".format(args.MegaMolBART_generation_model_dir)) + + # generation2MoleculeSTM = nn.Linear(molecule_dim_generation, args.SSL_emb_dim) + generation2MoleculeSTM = MLP(256, [args.SSL_emb_dim, args.SSL_emb_dim]) + input_model_path = os.path.join(args.language_edit_model_dir_new, "generation2MoleculeSTM_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + generation2MoleculeSTM.load_state_dict(state_dict) + + # MoleculeSTM2generation = nn.Linear(args.SSL_emb_dim, molecule_dim_generation) + MoleculeSTM2generation = MLP(args.SSL_emb_dim, [256, 256]) + input_model_path = os.path.join(args.language_edit_model_dir_new, "MoleculeSTM2generation_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + MoleculeSTM2generation.load_state_dict(state_dict) + + text_model = text_model.to(device) + molecule_model = molecule_model.to(device) + generation2MoleculeSTM.to(device) + MoleculeSTM2generation.to(device) + text_model.eval() + molecule_model.eval() + generation2MoleculeSTM.eval() + MoleculeSTM2generation.eval() + + np.random.seed(args.seed) + torch.random.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + device = torch.device(args.device) \ + if torch.cuda.is_available() else torch.device("cpu") + + print("\n\n\nstart editing\n\n\n") + + source_SMILES_list = get_SMILES_list(args) + description_list = get_description_list(args) + + torch.cuda.set_device(int(re.search(r'\d+', args.device).group())) + + for description in description_list: + print("===== for description {} =====".format(description)) + result_SMILES_list, result_acc_list = [], [] + for SMILES in source_SMILES_list: + print("===== for SMILES {} =====".format(SMILES)) + result_SMILES_list_, result_acc_list_ = check_edit(SMILES, description) + result_SMILES_list.extend(result_SMILES_list_) + result_acc_list.append(result_acc_list_) + print("\n\n\n") + + result_acc_list = np.concatenate(result_acc_list, axis=0) + result_acc_list = np.sum(result_acc_list, axis=0) + result_acc_list = 100. * result_acc_list / len(source_SMILES_list) + result_acc_row = '\t'.join(['{}'.format(x) for x in result_acc_list]) + print("===== Accuracy =====\t{}".format(result_acc_row)) + + if args.output_model_dir is not None: + saver_file = os.path.join(args.output_model_dir, "edited_SMILES.tsv") + with open(saver_file, 'a') as f: + for row in result_SMILES_list: + row = "\t".join(row) + print(row, file=f) + + saver_file = os.path.join(args.output_model_dir, "accuracy") + np.savez(saver_file, result_acc_list) From 864384bfc90fd9fea3ecd5c5aea28f7120bde92d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Sat, 4 Nov 2023 21:06:23 +0800 Subject: [PATCH 3/9] add molstm for momu&molkformer --- .../encoders/multimodal/kformer_config.json | 25 +++++++++++++++++ open_biomed/datasets/moledit_dataset.py | 2 +- open_biomed/feature/mol_featurizer.py | 17 ++++++++++++ open_biomed/models/multimodal/__init__.py | 3 ++- open_biomed/utils/mol_utils.py | 27 ++++++++++++++++++- 5 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 configs/encoders/multimodal/kformer_config.json diff --git a/configs/encoders/multimodal/kformer_config.json b/configs/encoders/multimodal/kformer_config.json new file mode 100644 index 0000000..47920e6 --- /dev/null +++ b/configs/encoders/multimodal/kformer_config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertForMaskedLM" + ], + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "type_vocab_size": 2, + "vocab_size": 31090, + "encoder_width": 768, + "add_cross_attention": true, + "cross_attention_freq": 2, + "num_query_tokens": 32, + "contrastive_layer": 6 + } + \ No newline at end of file diff --git a/open_biomed/datasets/moledit_dataset.py b/open_biomed/datasets/moledit_dataset.py index aafe085..96ba1fd 100644 --- a/open_biomed/datasets/moledit_dataset.py +++ b/open_biomed/datasets/moledit_dataset.py @@ -59,7 +59,7 @@ def __init__(self, path, config, split): self.split = split super(ZINC250K, self).__init__(path, config) - def _load_data(self, subset_size=1000): + def _load_data(self, subset_size=None): SMILES_file = os.path.join(self.path, "raw/250k_rndm_zinc_drugs_clean_3.csv") df = pd.read_csv(SMILES_file) diff --git a/open_biomed/feature/mol_featurizer.py b/open_biomed/feature/mol_featurizer.py index 9e92415..00d2079 100644 --- a/open_biomed/feature/mol_featurizer.py +++ b/open_biomed/feature/mol_featurizer.py @@ -23,6 +23,7 @@ from sklearn.preprocessing import OneHotEncoder from torch_geometric.data import Data from transformers import BertTokenizer, T5Tokenizer +from models.MoleculeSTM.models.mega_molbart.tokenizer import MolEncTokenizer from feature.base_featurizer import BaseFeaturizer from feature.kg_featurizer import SUPPORTED_KG_FEATURIZER @@ -169,6 +170,21 @@ def __call__(self, data): result = self.tokenizer(data, max_length=self.max_length, padding=True, truncation=True) return result +class MolSTMFeaturizer(BaseFeaturizer): + name2tokenizer = { + "molbart": MolEncTokenizer, + } + + def __init__(self, config): + super(MolSTMFeaturizer, self).__init__() + self.tokenizer = self.name2tokenizer[config["transformer_type"]].from_pretrained(config["model_name_or_path"]) + + def __call__(self, data): + result = self.tokenizer.tokenize(data, pad=False) + result = self.tokenizer.convert_tokens_to_ids(result['original_tokens']) + + return result + class MolBPEFeaturizer(BaseFeaturizer): def __init__(self, config): super(MolBPEFeaturizer, self).__init__() @@ -783,6 +799,7 @@ def __getitem__(self, index): "OneHot": MolOneHotFeaturizer, "KV-PLM*": MolBPEFeaturizer, "transformer": MolTransformerTokFeaturizer, + "moleculeSTM": MolSTMFeaturizer, "fp": MolFPFeaturizer, "TGSA": MolTGSAFeaturizer, "ogb": MolGraphFeaturizer, diff --git a/open_biomed/models/multimodal/__init__.py b/open_biomed/models/multimodal/__init__.py index c628cc0..7b61dbf 100644 --- a/open_biomed/models/multimodal/__init__.py +++ b/open_biomed/models/multimodal/__init__.py @@ -5,4 +5,5 @@ from models.multimodal.molfm.molfm import MolFM from models.multimodal.molfm.drugfm import DrugFM from models.multimodal.molt5 import MolT5 -from models.multimodal.text2mol import Text2MolMLP \ No newline at end of file +from models.multimodal.text2mol import Text2MolMLP +from models.multimodal.molkformer.mol_kformer import MolKFormer \ No newline at end of file diff --git a/open_biomed/utils/mol_utils.py b/open_biomed/utils/mol_utils.py index 55dad0b..0738625 100644 --- a/open_biomed/utils/mol_utils.py +++ b/open_biomed/utils/mol_utils.py @@ -432,4 +432,29 @@ def save_vocabulary( index = token_index writer.write(token + "\n") index += 1 - return (vocab_file,) \ No newline at end of file + return (vocab_file,) + +def get_biot5_tokenizer(path_t5, path_selfies): + from transformers import T5Tokenizer + tokenizer = T5Tokenizer.from_pretrained(path_t5) + tokenizer.model_max_length = int(1e9) + + amino_acids = [ + "A", "C", "D", "E", "F", + "G", "H", "I", "K", "L", + "M", "N", "P", "Q", "R", + "S", "T", "V", "W", "Y" + ] + prefixed_amino_acids = [f"

{aa}" for aa in amino_acids] + tokenizer.add_tokens(prefixed_amino_acids) + + selfies_dict_list = [line.strip() for line in open(path_selfies, "r")] + tokenizer.add_tokens(selfies_dict_list) + + special_tokens_dict = {'additional_special_tokens': + ['', '', + '', '', + 'MOLECULE NAME', 'DESCRIPTION', + 'PROTEIN NAME', 'FUNCTION', 'SUBCELLULAR LOCATION', 'PROTEIN FAMILIES']} + tokenizer.add_special_tokens(special_tokens_dict, replace_additional_special_tokens=False) + return tokenizer \ No newline at end of file From 678cf3d224cc2752febd6cfdc43943e9f14becbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Wed, 8 Nov 2023 16:20:32 +0800 Subject: [PATCH 4/9] add moledit --- ...json => molkformer-Graph-MegaMolBART.json} | 2 +- configs/moledit/molstm-Graph-MegaMolBART.json | 47 + configs/moledit/molstm-MegaMolBART.json | 24 - .../moledit/molstm-SMILES-MegaMolBART.json | 40 + ...lBART.json => momu-Graph-MegaMolBART.json} | 2 +- open_biomed/datasets/moledit_dataset.py | 2 +- open_biomed/feature/mol_featurizer.py | 2 +- open_biomed/models/MoleculeSTM/__init__.py | 0 ...guage_edit_step_00_check_reconstruction.py | 106 -- ...t_step_01_molecule_representation_align.py | 229 ---- ...nguage_edit_step_02_latent_optimization.py | 161 --- .../backup/downstream_language_edit_utils.py | 133 -- .../MoleculeSTM/cuchemcommon/__init__.py | 0 .../MoleculeSTM/cuchemcommon/context.py | 53 - .../MoleculeSTM/cuchemcommon/data/__init__.py | 45 - .../cuchemcommon/data/cluster_wf.py | 61 - .../cuchemcommon/data/generative_wf.py | 19 - .../cuchemcommon/data/helper/__init__.py | 0 .../cuchemcommon/data/helper/chembldata.py | 320 ----- .../MoleculeSTM/cuchemcommon/fingerprint.py | 95 -- .../models/MoleculeSTM/cuchemcommon/smiles.py | 38 - .../cuchemcommon/utils/__init__.py | 1 - .../MoleculeSTM/cuchemcommon/utils/logger.py | 106 -- .../cuchemcommon/utils/singleton.py | 26 - .../MoleculeSTM/cuchemcommon/utils/sysinfo.py | 68 - .../MoleculeSTM/datasets/DrugBankGraph.py | 235 ---- .../MoleculeSTM/datasets/DrugBankSMILES.py | 94 -- .../MoleculeSTM/datasets/MoleculeNetGraph.py | 584 -------- .../MoleculeSTM/datasets/MoleculeNetSMILES.py | 36 - .../models/MoleculeSTM/datasets/PubChemSTM.py | 275 ---- .../MoleculeSTM/datasets/PubChemSTM_raw.py | 172 --- .../MoleculeSTM/datasets/ZINC250K_Graph.py | 67 - .../MoleculeSTM/datasets/ZINC250K_SMILES.py | 31 - .../models/MoleculeSTM/datasets/__init__.py | 8 - .../models/MoleculeSTM/datasets/utils.py | 182 --- .../MoleculeSTM/models/GA/ZINC_first_1000.smi | 1000 -------------- .../models/MoleculeSTM/models/GA/__init__.py | 0 .../models/MoleculeSTM/models/GA/crossover.py | 194 --- .../models/MoleculeSTM/models/GA/mutate.py | 132 -- open_biomed/models/MoleculeSTM/models/MLP.py | 49 - .../models/MoleculeSTM/models/__init__.py | 2 - .../models/mega_molbart/__init__.py | 1 - open_biomed/models/MoleculeSTM/splitters.py | 93 -- open_biomed/models/MoleculeSTM/utils.py | 71 - open_biomed/models/__init__.py | 4 +- open_biomed/models/multimodal/__init__.py | 4 +- .../multimodal/mega_molbart/__init__.py | 1 + .../mega_molbart/decoder.py | 0 .../mega_molbart/mega_mol_bart.py | 9 +- .../mega_molbart/megatron_bart.py | 0 .../mega_molbart/tokenizer.py | 0 .../mega_molbart/util.py | 0 .../mega_molbart}/workflow.py | 11 +- .../moleculestm.py} | 176 ++- .../models/task_model/moledit_model.py | 53 +- open_biomed/tasks/mol_edit/accuracy.npz | Bin 0 -> 272 bytes open_biomed/tasks/mol_edit/edited_SMILES.tsv | 1195 +++++++++++++++++ .../moledit_step_01_Space_Alignment.py | 74 +- .../moledit_step_02_Latent_Optimization.py | 89 +- .../molstm_utils.py} | 57 +- scripts/multimodal/moledit/edit.sh | 18 +- scripts/multimodal/moledit/train.sh | 13 +- 62 files changed, 1628 insertions(+), 4882 deletions(-) rename configs/moledit/{molkformer-MegaMolBART.json => molkformer-Graph-MegaMolBART.json} (93%) create mode 100644 configs/moledit/molstm-Graph-MegaMolBART.json delete mode 100644 configs/moledit/molstm-MegaMolBART.json create mode 100644 configs/moledit/molstm-SMILES-MegaMolBART.json rename configs/moledit/{momu-MegaMolBART.json => momu-Graph-MegaMolBART.json} (91%) delete mode 100644 open_biomed/models/MoleculeSTM/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py delete mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py delete mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py delete mode 100644 open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/context.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py delete mode 100644 open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/datasets/utils.py delete mode 100644 open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi delete mode 100644 open_biomed/models/MoleculeSTM/models/GA/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/models/GA/crossover.py delete mode 100644 open_biomed/models/MoleculeSTM/models/GA/mutate.py delete mode 100644 open_biomed/models/MoleculeSTM/models/MLP.py delete mode 100644 open_biomed/models/MoleculeSTM/models/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py delete mode 100644 open_biomed/models/MoleculeSTM/splitters.py delete mode 100644 open_biomed/models/MoleculeSTM/utils.py create mode 100644 open_biomed/models/multimodal/mega_molbart/__init__.py rename open_biomed/models/{MoleculeSTM/models => multimodal}/mega_molbart/decoder.py (100%) rename open_biomed/models/{MoleculeSTM/models => multimodal}/mega_molbart/mega_mol_bart.py (97%) rename open_biomed/models/{MoleculeSTM/models => multimodal}/mega_molbart/megatron_bart.py (100%) rename open_biomed/models/{MoleculeSTM/models => multimodal}/mega_molbart/tokenizer.py (100%) rename open_biomed/models/{MoleculeSTM/models => multimodal}/mega_molbart/util.py (100%) rename open_biomed/models/{MoleculeSTM/cuchemcommon => multimodal/mega_molbart}/workflow.py (95%) rename open_biomed/models/{MoleculeSTM/models/molecule_gnn_model.py => multimodal/moleculestm.py} (51%) create mode 100644 open_biomed/tasks/mol_edit/accuracy.npz create mode 100644 open_biomed/tasks/mol_edit/edited_SMILES.tsv rename open_biomed/{models/MoleculeSTM/downstream_molecule_edit_utils.py => utils/molstm_utils.py} (92%) diff --git a/configs/moledit/molkformer-MegaMolBART.json b/configs/moledit/molkformer-Graph-MegaMolBART.json similarity index 93% rename from configs/moledit/molkformer-MegaMolBART.json rename to configs/moledit/molkformer-Graph-MegaMolBART.json index 9bf615a..f5333fd 100644 --- a/configs/moledit/molkformer-MegaMolBART.json +++ b/configs/moledit/molkformer-Graph-MegaMolBART.json @@ -10,7 +10,7 @@ "SMILES": { "name": "moleculeSTM", "transformer_type": "molbart", - "model_name_or_path": "./open_biomed/models/MoleculeSTM/bart_vocab.txt", + "model_name_or_path": "./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt", "max_length": 512 }, "graph": { diff --git a/configs/moledit/molstm-Graph-MegaMolBART.json b/configs/moledit/molstm-Graph-MegaMolBART.json new file mode 100644 index 0000000..f55c7e7 --- /dev/null +++ b/configs/moledit/molstm-Graph-MegaMolBART.json @@ -0,0 +1,47 @@ +{ + "model": "molstm-MegaMolBART", + "data": { + "mol": { + "modality": ["structure"], + "featurizer": { + "structure": { + "name": "MultiScale", + "scales": ["SMILES", "graph"], + "SMILES": { + "name": "moleculeSTM", + "transformer_type": "molbart", + "model_name_or_path": "./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt", + "max_length": 512 + }, + "graph": { + "name": "BaseGNN" + } + } + } + } + }, + "network": { + "graph": { + "name": "molstm", + "structure": { + "name": "gnn", + "gin_hidden_dim": 300, + "gin_num_layers": 5, + "drop_ratio": 0.5, + "output_dim": 300, + "ckpt" : "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_Graph/molecule_model.pth", + "MegaMolBART_generation_model_dir" : "./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints", + "vocab_path": "./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt" + }, + "text": { + "output_dim": 768, + "ckpt": "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_Graph/text_model.pth", + "bert_path": "./ckpts/text_ckpts/scibert_scivocab_uncased" + }, + "structure_proj_ckpt": "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_Graph/mol2latent_model.pth", + "text_proj_ckpt": "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_Graph/text2latent_model.pth", + "projection_dim": 256 + } + + } +} \ No newline at end of file diff --git a/configs/moledit/molstm-MegaMolBART.json b/configs/moledit/molstm-MegaMolBART.json deleted file mode 100644 index 7532052..0000000 --- a/configs/moledit/molstm-MegaMolBART.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model": "molstm-MegaMolBART", - "data": { - "mol": { - "modality": ["structure"], - "featurizer": { - "structure": { - "name": "MultiScale", - "scales": ["SMILES", "graph"], - "SMILES": { - "name": "moleculeSTM", - "transformer_type": "molbart", - "model_name_or_path": "./open_biomed/models/MoleculeSTM/bart_vocab.txt", - "max_length": 512 - }, - "graph": { - "name": "BaseGNN" - } - } - } - } - } - -} \ No newline at end of file diff --git a/configs/moledit/molstm-SMILES-MegaMolBART.json b/configs/moledit/molstm-SMILES-MegaMolBART.json new file mode 100644 index 0000000..aa3b841 --- /dev/null +++ b/configs/moledit/molstm-SMILES-MegaMolBART.json @@ -0,0 +1,40 @@ +{ + "model": "molstm-MegaMolBART", + "data": { + "mol": { + "modality": ["structure"], + "featurizer": { + "structure": { + "name": "MultiScale", + "scales": ["SMILES"], + "SMILES": { + "name": "moleculeSTM", + "transformer_type": "molbart", + "model_name_or_path": "./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt", + "max_length": 512 + } + } + } + } + }, + "network": { + "smiles": { + "name": "molstm", + "structure": { + "name": "magamolbart", + "output_dim": 256, + "MegaMolBART_generation_model_dir" : "./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints", + "vocab_path": "./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt" + }, + "text": { + "output_dim": 768, + "ckpt": "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_SMILES/text_model.pth", + "bert_path": "./ckpts/text_ckpts/scibert_scivocab_uncased" + }, + "structure_proj_ckpt": "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_SMILES/mol2latent_model.pth", + "text_proj_ckpt": "./ckpts/fusion_ckpts/moleculestm/demo_checkpoints_SMILES/text2latent_model.pth", + "projection_dim": 256 + } + + } +} \ No newline at end of file diff --git a/configs/moledit/momu-MegaMolBART.json b/configs/moledit/momu-Graph-MegaMolBART.json similarity index 91% rename from configs/moledit/momu-MegaMolBART.json rename to configs/moledit/momu-Graph-MegaMolBART.json index f2dd4fa..6989b95 100644 --- a/configs/moledit/momu-MegaMolBART.json +++ b/configs/moledit/momu-Graph-MegaMolBART.json @@ -10,7 +10,7 @@ "SMILES": { "name": "moleculeSTM", "transformer_type": "molbart", - "model_name_or_path": "./open_biomed/models/MoleculeSTM/bart_vocab.txt", + "model_name_or_path": "./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt", "max_length": 512 }, "graph": { diff --git a/open_biomed/datasets/moledit_dataset.py b/open_biomed/datasets/moledit_dataset.py index 96ba1fd..f7fa9db 100644 --- a/open_biomed/datasets/moledit_dataset.py +++ b/open_biomed/datasets/moledit_dataset.py @@ -13,7 +13,7 @@ from feature.mol_featurizer import MolMultiModalFeaturizer from feature.text_featurizer import TextTransformerTokFeaturizer from utils.mol_utils import valid_smiles -from models.MoleculeSTM.models.mega_molbart.tokenizer import MolEncTokenizer +from models.multimodal.mega_molbart.tokenizer import MolEncTokenizer class MoleditDataset(Dataset, ABC): def __init__(self, path, config): diff --git a/open_biomed/feature/mol_featurizer.py b/open_biomed/feature/mol_featurizer.py index 00d2079..4189ef1 100644 --- a/open_biomed/feature/mol_featurizer.py +++ b/open_biomed/feature/mol_featurizer.py @@ -23,7 +23,7 @@ from sklearn.preprocessing import OneHotEncoder from torch_geometric.data import Data from transformers import BertTokenizer, T5Tokenizer -from models.MoleculeSTM.models.mega_molbart.tokenizer import MolEncTokenizer +from models.multimodal.mega_molbart.tokenizer import MolEncTokenizer from feature.base_featurizer import BaseFeaturizer from feature.kg_featurizer import SUPPORTED_KG_FEATURIZER diff --git a/open_biomed/models/MoleculeSTM/__init__.py b/open_biomed/models/MoleculeSTM/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py deleted file mode 100644 index 3f10668..0000000 --- a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_00_check_reconstruction.py +++ /dev/null @@ -1,106 +0,0 @@ -import argparse -import os -import numpy as np -from rdkit import Chem -from rdkit.Chem import Descriptors - -import torch -from torch.utils.data import DataLoader as torch_DataLoader - -from MoleculeSTM.utils import freeze_network -from MoleculeSTM.datasets import ZINC15_Datasets_Only_SMILES, PubChem_Datasets_Only_SMILES -from MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART - -props = [ - "qed", "MolWt", "MolLogP", "TPSA", - "HeavyAtomCount", "NumAromaticRings", "NumHAcceptors", "NumHDonors", "NumRotatableBonds" -] -props = [ - "MolWt", "MolLogP" -] -prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--device", type=int, default=0) - parser.add_argument("--verbose", type=int, default=1) - parser.add_argument("--dataspace_path", type=str, default="../../Datasets") - parser.add_argument("--dataset", type=str, default="ZINC15") - parser.add_argument("--molecule_type", type=str, default="MegaMolBART", choices=["MegaMolBART", "Graph"]) - - ########## for MoleculeSTM ########## - parser.add_argument("--CLIP_input_model_dir", type=str, default="../../pretrained_model") - parser.add_argument("--SSL_emb_dim", type=int, default=256) - - ########## for generation ########## - parser.add_argument("--generation_model_dir", type=str, default="../../Datasets/pretrained_MegaMolBART/checkpoints") - - ########## for optimization ########## - parser.add_argument("--batch_size", type=int, default=64) - parser.add_argument("--num_workers", type=int, default=8) - - args = parser.parse_args() - print(args) - - # This is loading from the pretarined_MegaMolBART - MegaMolBART_wrapper = MegaMolBART(input_dir=args.generation_model_dir, output_dir=None) - molecule_model_generation = MegaMolBART_wrapper.model - print("Loading from pretrained MegaMolBART ({}).".format(args.generation_model_dir)) - molecule_dim_generation = 256 - - device = torch.device("cuda:" + str(args.device)) \ - if torch.cuda.is_available() else torch.device("cpu") - molecule_model_generation = molecule_model_generation.to(device) - - np.random.seed(args.seed) - torch.random.manual_seed(args.seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(args.seed) - device = torch.device("cuda:" + str(args.device)) \ - if torch.cuda.is_available() else torch.device("cpu") - - freeze_network(molecule_model_generation) - molecule_model_generation.eval() - - if args.molecule_type == "MegaMolBART": - if args.dataset == "ZINC15": - dataset_root = os.path.join(args.dataspace_path, "ZINC15_data") - dataset = ZINC15_Datasets_Only_SMILES(dataset_root) - elif "PubChem" in args.dataset: - dataset_root = os.path.join(args.dataspace_path, "PubChem_data") - dataset = PubChem_Datasets_Only_SMILES(dataset_root) - else: - raise Exception - dataloader_class = torch_DataLoader - else: - raise Exception - - dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) - - for batch_idx, batch in enumerate(dataloader): - SMILES_list = batch - print("SMILES_list", SMILES_list) - - for original_SMILES in SMILES_list: - mol = Chem.MolFromSmiles(original_SMILES) - for name, func in prop_pred: - value = func(mol) - print("{}: {}".format(name, value)) - canon_original_SMILES = Chem.MolToSmiles(mol) - - latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model_generation, [original_SMILES]) # [pad, B, d], [pad, B] - print("latent_code:\t", latent_code_init[0, :, :5]) - - latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model_generation, [canon_original_SMILES]) # [pad, B, d], [pad, B] - print("latent_code:\t", latent_code_init[0, :, :5]) - - generated_SMILES = MegaMolBART_wrapper.inverse_transform([latent_code_init], pad_mask_init.bool().cuda(), k=1, sanitize=True) - print("original SMILES: \t", original_SMILES) - print("original SMILES (canon): \t", canon_original_SMILES) - print("reconstructured SMILES: \t", generated_SMILES[0]) - print() - - if batch_idx >= 9: - break diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py deleted file mode 100644 index bea3fc9..0000000 --- a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_01_molecule_representation_align.py +++ /dev/null @@ -1,229 +0,0 @@ -import argparse -import os -import numpy as np -from tqdm import tqdm -import time - -import torch -import torch.nn as nn -from torch import optim -import torch.nn.functional as F -from torch.utils.data import DataLoader as torch_DataLoader - -from MoleculeSTM.utils import get_molecule_repr_MoleculeSTM -from MoleculeSTM.downstream_language_edit_utils import load_molecule_models -from MoleculeSTM.utils import freeze_network -from MoleculeSTM.datasets import PubChem_Datasets_Only_SMILES - - -def cycle_index(num, shift): - arr = torch.arange(num) + shift - arr[-shift:] = torch.arange(shift) - return arr - - -def do_CL(X, Y, args): - if args.normalize: - X = F.normalize(X, dim=-1) - Y = F.normalize(Y, dim=-1) - - if args.SSL_loss == 'EBM_NCE': - criterion = nn.BCEWithLogitsLoss() - neg_Y = torch.cat([Y[cycle_index(len(Y), i + 1)] for i in range(args.CL_neg_samples)], dim=0) - neg_X = X.repeat((args.CL_neg_samples, 1)) - - pred_pos = torch.sum(X * Y, dim=1) / args.T - pred_neg = torch.sum(neg_X * neg_Y, dim=1) / args.T - - loss_pos = criterion(pred_pos, torch.ones(len(pred_pos)).to(pred_pos.device)) - loss_neg = criterion(pred_neg, torch.zeros(len(pred_neg)).to(pred_neg.device)) - CL_loss = (loss_pos + args.CL_neg_samples * loss_neg) / (1 + args.CL_neg_samples) - - CL_acc = (torch.sum(pred_pos > 0).float() + torch.sum(pred_neg < 0).float()) / \ - (len(pred_pos) + len(pred_neg)) - CL_acc = CL_acc.detach().cpu().item() - - elif args.SSL_loss == 'InfoNCE': - criterion = nn.CrossEntropyLoss() - B = X.size()[0] - logits = torch.mm(X, Y.transpose(1, 0)) # B*B - logits = torch.div(logits, args.T) - labels = torch.arange(B).long().to(logits.device) # B*1 - - CL_loss = criterion(logits, labels) - pred = logits.argmax(dim=1, keepdim=False) - CL_acc = pred.eq(labels).sum().detach().cpu().item() * 1. / B - - else: - raise Exception - - return CL_loss, CL_acc - - -def mean_pooling(token_embeddings, attention_mask): - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() # [pad, B, d] - sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0) # [B, d] - sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9) # [B, d] - return sum_embeddings / sum_mask - - -def get_molecule_repr_generation(molecule_data, molecule_model, molecule_type="MegaMolBART", MegaMolBART_wrapper=None): - if molecule_type == "MegaMolBART": - embedding, pad_mask = MegaMolBART_wrapper.smileslist2embedding_model_given(molecule_model, molecule_data) # [pad, B, d], [pad, B] - # molecule_repr = embedding[0, :, :] # [B, d] - # next we will take the mean pooling instead of the CLS token. - molecule_repr = mean_pooling(embedding, pad_mask) - else: - molecule_repr, _ = molecule_model(molecule_data) - return molecule_repr - - -def save_model(save_best, epoch=None): - if args.output_model_dir is not None: - if save_best: - global optimal_loss - print("save model with loss: {:.5f}".format(optimal_loss)) - model_file = "model.pth" - - elif epoch is None: - model_file = "model_final.pth" - - else: - model_file = "model_{}.pth".format(epoch) - - saved_file_path = os.path.join(args.output_model_dir, "generation2MoleculeSTM_{}".format(model_file)) - torch.save(generation2MoleculeSTM.state_dict(), saved_file_path) - - saved_file_path = os.path.join(args.output_model_dir, "MoleculeSTM2generation_{}".format(model_file)) - torch.save(MoleculeSTM2generation.state_dict(), saved_file_path) - return - - -def train(epoch): - if args.verbose: - L = tqdm(dataloader) - else: - L = dataloader - - start_time = time.time() - accum_loss, accum_acc = 0, 0 - for batch in L: - SMILES_list = batch - - molecule_repr_generation = get_molecule_repr_generation( - SMILES_list, molecule_model=molecule_model_generation, - molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper - ) - molecule_repr_generation2MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) - - molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( - SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, - molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper - ) - molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) - - loss_01, acc_01 = do_CL(molecule_repr_generation, molecule_repr_MoleculeSTM2generation, args) - loss_02, acc_02 = do_CL(molecule_repr_MoleculeSTM, molecule_repr_generation2MoleculeSTM, args) - loss = (loss_01 + loss_02) / 2 - acc = (acc_01 + acc_02) / 2 - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - accum_loss += loss.item() - accum_acc += acc - - accum_loss /= len(L) - accum_acc /= len(L) - - global optimal_loss - temp_loss = accum_loss - if temp_loss < optimal_loss: - optimal_loss = temp_loss - save_model(save_best=True, epoch=epoch) - print("CL Loss: {:.5f}\tCL Acc: {:.5f}Time: {:.5f}".format(accum_loss, accum_acc, time.time() - start_time)) - return - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--device", type=int, default=0) - parser.add_argument("--verbose", type=int, default=1) - parser.add_argument("--dataspace_path", type=str, default="../../Datasets") - parser.add_argument("--dataset", type=str, default="PubChem") - parser.add_argument("--molecule_type", type=str, default="MegaMolBART", choices=["MegaMolBART", "Graph"]) - parser.add_argument("--output_model_dir", type=str, default=None) - - ########## for MoleculeSTM ########## - parser.add_argument("--MoleculeSTM_model_dir", type=str, default="../../pretrained_model_Raw") - parser.add_argument("--SSL_emb_dim", type=int, default=256) - - ########## for generation ########## - parser.add_argument("--generation_model_dir", type=str, default="../../Datasets/pretrained_MegaMolBART/checkpoints") - - ########## for optimization ########## - parser.add_argument("--batch_size", type=int, default=256) - parser.add_argument("--num_workers", type=int, default=8) - parser.add_argument("--epochs", type=int, default=100) - parser.add_argument("--decay", type=float, default=0) - parser.add_argument("--generation_lr", type=float, default=1e-4) - parser.add_argument("--MoleculeSTM_lr", type=float, default=1e-4) - parser.add_argument("--T", type=float, default=0.1) - parser.add_argument("--SSL_loss", type=str, default="EBM_NCE", choices=["EBM_NCE", "InfoNCE"]) - parser.add_argument("--CL_neg_samples", type=int, default=1) - parser.add_argument('--normalize', dest='normalize', action='store_true') - parser.add_argument('--no_normalize', dest='normalize', action='store_false') - parser.set_defaults(normalize=True) - - args = parser.parse_args() - print(args) - - MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ - molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM = load_molecule_models(args) - device = torch.device("cuda:" + str(args.device)) \ - if torch.cuda.is_available() else torch.device("cpu") - molecule_model_generation = molecule_model_generation.to(device) - molecule_model_MoleculeSTM = molecule_model_MoleculeSTM.to(device) - mol2latent_MoleculeSTM = mol2latent_MoleculeSTM.to(device) - - np.random.seed(args.seed) - torch.random.manual_seed(args.seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(args.seed) - device = torch.device("cuda:" + str(args.device)) \ - if torch.cuda.is_available() else torch.device("cpu") - - freeze_network(molecule_model_generation) - freeze_network(mol2latent_MoleculeSTM) - freeze_network(molecule_model_MoleculeSTM) - molecule_model_generation.eval() - mol2latent_MoleculeSTM.eval() - molecule_model_MoleculeSTM.eval() - - if args.molecule_type == "MegaMolBART": - if "PubChem" in args.dataset: - dataset_root = os.path.join(args.dataspace_path, "PubChem_data") - else: - raise Exception - dataset = PubChem_Datasets_Only_SMILES(dataset_root) - dataloader_class = torch_DataLoader - else: - raise Exception - - dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) - - generation2MoleculeSTM = nn.Linear(molecule_dim_generation, molecule_dim_MoleculeSTM).to(device) - MoleculeSTM2generation = nn.Linear(molecule_dim_MoleculeSTM, molecule_dim_generation).to(device) - - model_param_group = [ - {"params": generation2MoleculeSTM.parameters(), "lr": args.generation_lr}, - {"params": MoleculeSTM2generation.parameters(), "lr": args.MoleculeSTM_lr}, - ] - optimizer = optim.Adam(model_param_group, weight_decay=args.decay) - optimal_loss = 1e10 - - for e in range(1, args.epochs+1): - print("Epoch {}".format(e)) - train(e) diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py deleted file mode 100644 index 5bf5129..0000000 --- a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_step_02_latent_optimization.py +++ /dev/null @@ -1,161 +0,0 @@ -import argparse -import math -import numpy as np -from rdkit import Chem, RDLogger - -import torch -from torch import optim -import torch.nn.functional as F -from tqdm import tqdm -from downstream_language_edit_utils import load_language_molecule_and_edit_models, clip_loss_for_edit, evaluate_SMILES_list -from MoleculeSTM.utils import prepare_text_tokens - - -def get_lr(t, initial_lr, rampdown=0.25, rampup=0.05): - lr_ramp = min(1, (1 - t) / rampdown) - lr_ramp = 0.5 - 0.5 * math.cos(lr_ramp * math.pi) - lr_ramp = lr_ramp * min(1, t / rampup) - return initial_lr * lr_ramp - - -def mean_pooling(token_embeddings, attention_mask): - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() # [pad, B, d] - sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0) # [B, d] - sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9) # [B, d] - return sum_embeddings / sum_mask - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--device", type=int, default=0) - parser.add_argument("--verbose", type=int, default=1) - - ########## for editing ########## - parser.add_argument("--description", type=str) - parser.add_argument("--input_model_dir", type=str) - parser.add_argument("--mode", type=str, default="edit", choices=["edit", "free_generation"]) - parser.add_argument("--input_SMILES", type=str, default=None) - parser.add_argument("--l2_lambda", type=float, default=0.008) - - ########## for ? ########## - parser.add_argument("--dataspace_path", type=str, default="../../Datasets") - parser.add_argument("--SSL_emb_dim", type=int, default=256) - parser.add_argument("--max_seq_len", type=int, default=512) - - ########## for MoleculeSTM ########## - parser.add_argument("--MoleculeSTM_model_dir", type=str, default="../../pretrained_model_Raw") - - ########## for generation ########## - parser.add_argument("--generation_model_dir", type=str, default="../../Datasets/pretrained_MegaMolBART/checkpoints") - - ########## for MoleculeSTM and generation projection ########## - parser.add_argument("--language_edit_model_dir", type=str, default="edit_temp/EBM_NCE") - - ########## for editing ########## - parser.add_argument("--lr_rampup", type=float, default=0.05) - parser.add_argument("--lr", type=float, default=0.1) - parser.add_argument("--epochs", type=int, default=100) - args = parser.parse_args() - - print(args) - - text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim, \ - text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation = load_language_molecule_and_edit_models(args) - device = torch.device("cuda:" + str(args.device)) \ - if torch.cuda.is_available() else torch.device("cpu") - text_model = text_model.to(device) - molecule_model = molecule_model.to(device) - text2latent = text2latent.to(device) - mol2latent = mol2latent.to(device) - generation2MoleculeSTM.to(device) - MoleculeSTM2generation.to(device) - text_model.eval() - molecule_model.eval() - text2latent.eval() - mol2latent.eval() - generation2MoleculeSTM.eval() - MoleculeSTM2generation.eval() - - np.random.seed(args.seed) - torch.random.manual_seed(args.seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(args.seed) - device = torch.device("cuda:" + str(args.device)) \ - if torch.cuda.is_available() else torch.device("cpu") - - description_list = [args.description] - text_tokens_ids, text_masks = prepare_text_tokens( - device=device, description=description_list, tokenizer=text_tokenizer, max_seq_len=args.max_seq_len) - text_output = text_model(input_ids=text_tokens_ids, attention_mask=text_masks) - text_repr = text_output["pooler_output"] - text_repr = text2latent(text_repr) - - record_SMILES_list = [] - - if args.mode == "edit": - SMILES_list = [args.input_SMILES] - latent_code_init, pad_mask_init = MegaMolBART_wrapper.smileslist2embedding(SMILES_list) # [pad, B, d], [pad, B] - molecule_repr_generation_init = mean_pooling(latent_code_init, pad_mask_init) # [B, d] - # record_SMILES_list.append(args.input_SMILES) - else: - padding_dim = 10 - latent_code_init = torch.randn(padding_dim, 1, molecule_dim).to(device) - pad_mask_init = torch.zeros(padding_dim, 1).bool().to(device) - print("latent_code_init", latent_code_init.size()) - print("pad_mask_init", pad_mask_init.size()) - - generated_mols = MegaMolBART_wrapper.inverse_transform( - [latent_code_init], pad_mask_init.bool().cuda(), k=1, sanitize=True) - print("initial SMILES", generated_mols[0]) - record_SMILES_list.append(generated_mols[0]) - - l2_lambda_list = [ - 1, 0.1, 0.01, 0.001, 0.0001, - 3, 0.3, 0.03, 0.003, 0.0003, - 5, 0.5, 0.05, 0.005, 0.0005, - 8, 0.8, 0.08, 0.008, 0.0008, - ] - l2_lambda_list = [ - 0.1, - ] - - for l2_lambda in l2_lambda_list: - result_SMILES_list = [record_SMILES_list[0]] - print("with lambda {} ......".format(l2_lambda)) - latent = latent_code_init.detach().clone() - latent.requires_grad = True - optimizer = optim.Adam([latent], lr=args.lr) - - if args.verbose: - L = tqdm(range(args.epochs)) - else: - L = range(args.epochs) - for i in L: - t = i / args.epochs - lr = get_lr(t, args.lr) - optimizer.param_groups[0]["lr"] = lr - - molecule_repr_generation = mean_pooling(latent, pad_mask_init) # [B, d] - # molecule_repr_MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) - - clip_loss_ = clip_loss_for_edit(molecule_repr_generation, mol2latent, text_repr) - l2_loss_ = args.l2_lambda * ((latent_code_init - latent) ** 2).sum() - - loss = clip_loss_ + l2_loss_ - print(clip_loss_.item(), l2_loss_.item()) - - optimizer.zero_grad() - loss.backward(retain_graph=True) - optimizer.step() - print("clip loss: {:.5f}\tL2 loss: {:.5f}".format(clip_loss_.item(), args.l2_lambda * l2_loss_)) - - generated_mols = MegaMolBART_wrapper.inverse_transform( - [latent], pad_mask_init.bool().cuda(), k=1, sanitize=True) - # print("generated_mols",generated_mols[0]) - # Chem.SanitizeMol(generated_mols[0]) - print("final SMILES", generated_mols[0]) - result_SMILES_list.append(generated_mols[0]) - - evaluate_SMILES_list(result_SMILES_list) - print() diff --git a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py b/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py deleted file mode 100644 index 40e7e2b..0000000 --- a/open_biomed/models/MoleculeSTM/backup/downstream_language_edit_utils.py +++ /dev/null @@ -1,133 +0,0 @@ -import os -import copy -import torch -import torch.nn as nn -import torch.nn.functional as F -from transformers import AutoModel, AutoTokenizer -from MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART -from rdkit import Chem, RDLogger -from rdkit.Chem import AllChem, Descriptors -lg = RDLogger.logger() -lg.setLevel(RDLogger.CRITICAL) - - -def load_molecule_models(args): - """ - This function returns the two encoders, one for molecule generative model and one for CLIP. - TODO: now we adopt MegaMolBART for both. Will make this more flexible in the future. - """ - # This is loading from the pretarined_MegaMolBART - MegaMolBART_wrapper = MegaMolBART(input_dir=args.generation_model_dir, output_dir=None) - molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) - print("Loading from pretrained MegaMolBART ({}).".format(args.generation_model_dir)) - molecule_dim_generation = 256 - - input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") - molecule_model_MoleculeSTM = MegaMolBART_wrapper.model - state_dict = torch.load(input_model_path, map_location='cpu') - print("Loading from {}...".format(input_model_path)) - molecule_model_MoleculeSTM.load_state_dict(state_dict) - molecule_dim_MoleculeSTM = args.SSL_emb_dim - - mol2latent_MoleculeSTM = nn.Linear(256, molecule_dim_MoleculeSTM) - input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - mol2latent_MoleculeSTM.load_state_dict(state_dict) - return MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ - molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM - - -def load_language_molecule_and_edit_models(args): - pretrained_SciBERT_folder = os.path.join(args.dataspace_path, 'pretrained_SciBERT') - # text_tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased', cache_dir=pretrained_SciBERT_folder) - # TODO: check https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py#L1501 - # text_model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased', cache_dir=pretrained_SciBERT_folder) - text_tokenizer = AutoTokenizer.from_pretrained('/mnt/cyz_dair/projects/MoleculeSTM-main/MoleculeSTM-main/data/pretrained_SciBERT', cache_dir=pretrained_SciBERT_folder) - text_model = AutoModel.from_pretrained('/mnt/cyz_dair/projects/MoleculeSTM-main/MoleculeSTM-main/data/pretrained_SciBERT', cache_dir=pretrained_SciBERT_folder) - - text_dim = 768 - - input_model_path = os.path.join(args.MoleculeSTM_model_dir, "text_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - text_model.load_state_dict(state_dict) - - """ - input_model_path = os.path.join(args.MoleculeSTM_model_dir, "molecule_model.pth") - print("Loading from {}...".format(input_model_path)) - MegaMolBART_wrapper = MegaMolBART(input_dir=None, output_dir=None) - molecule_model = MegaMolBART_wrapper.model - state_dict = torch.load(input_model_path, map_location='cpu') - molecule_model.load_state_dict(state_dict) - """ - # This is loading from the pretarined_MegaMolBART - MegaMolBART_wrapper = MegaMolBART(input_dir=args.generation_model_dir, output_dir=None) - molecule_model = MegaMolBART_wrapper.model - print("Loading from pretrained MegaMolBART ({}).".format(args.generation_model_dir)) - molecule_dim_generation = 256 - molecule_dim_MoleculeSTM = 256 - - text2latent = nn.Linear(text_dim, args.SSL_emb_dim) - input_model_path = os.path.join(args.MoleculeSTM_model_dir, "text2latent_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - text2latent.load_state_dict(state_dict) - - mol2latent = nn.Linear(molecule_dim_generation, args.SSL_emb_dim) - input_model_path = os.path.join(args.MoleculeSTM_model_dir, "mol2latent_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - mol2latent.load_state_dict(state_dict) - - generation2MoleculeSTM = nn.Linear(molecule_dim_generation, molecule_dim_MoleculeSTM) - input_model_path = os.path.join(args.language_edit_model_dir, "generation2MoleculeSTM_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - generation2MoleculeSTM.load_state_dict(state_dict) - - MoleculeSTM2generation = nn.Linear(molecule_dim_MoleculeSTM, molecule_dim_generation) - input_model_path = os.path.join(args.language_edit_model_dir, "MoleculeSTM2generation_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - MoleculeSTM2generation.load_state_dict(state_dict) - - return text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim_generation, text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation - - -def clip_loss_for_edit(molecule_repr, mol2latent, text_repr): - # molecule_repr = F.normalize(molecule_repr, dim=-1) - # molecule_repr = mol2latent(molecule_repr) - molecule_repr = F.normalize(molecule_repr, dim=-1) - - text_repr = F.normalize(text_repr, dim=-1) - - similarity = -torch.mm(molecule_repr, text_repr.transpose(0, 1))[0] - return similarity - - -def evaluate_SMILES_list(SMILES_list): - print("SMILES_list:") - print(SMILES_list) - mol_list = [] - for SMILES in SMILES_list: - mol = Chem.MolFromSmiles(SMILES) - # Chem.SanitizeMol(mol) - # print(SMILES, mol) - if mol is None: - continue - mol_list.append(mol) - print("mol_list", len(mol_list)) - - print() - props = ["MolWt", "MolLogP", "TPSA", "qed"] - props = ["MolLogP"] - prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props] - for name, func in prop_pred: - print("evaluating with {}".format(name)) - for SMILES, mol in zip(SMILES_list, mol_list): - value = func(mol) - print("====={} & {:.5f}".format(SMILES, value)) - print() - - return \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/context.py b/open_biomed/models/MoleculeSTM/cuchemcommon/context.py deleted file mode 100644 index 74e2793..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/context.py +++ /dev/null @@ -1,53 +0,0 @@ -import logging -import os -from configparser import RawConfigParser -from io import StringIO - -from models.MoleculeSTM.cuchemcommon.utils.singleton import Singleton - -logger = logging.getLogger(__name__) - -CONFIG_FILE = '.env' - - -class Context(metaclass=Singleton): - - def __init__(self): - - self.dask_client = None - self.compute_type = 'gpu' - self.is_benchmark = False - self.benchmark_file = None - self.cache_directory = None - self.n_molecule = None - self.batch_size = 10000 - - self.config = {} - if os.path.exists(CONFIG_FILE): - logger.info('Reading properties from %s...', CONFIG_FILE) - self.config = self._load_properties_file(CONFIG_FILE) - else: - logger.warn('Could not locate %s', CONFIG_FILE) - - def _load_properties_file(self, properties_file): - """ - Reads a properties file using ConfigParser. - - :param propertiesFile/configFile: - """ - config_file = open(properties_file, 'r') - config_content = StringIO('[root]\n' + config_file.read()) - config = RawConfigParser() - config.read_file(config_content) - - return config._sections['root'] - - def get_config(self, config_name, default=None): - """ - Returns values from local configuration. - """ - try: - return self.config[config_name] - except KeyError: - logger.warn('%s not found, returing default.', config_name) - return default diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py deleted file mode 100644 index 3a07d30..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/data/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -from typing import List - - -class ClusterWfDAO(object): - """ - Base class for all DAO for fetching data for Clustering Workflows - """ - - def meta_df(self): - """ - Returns df with dtype set for structure without any column filter. - """ - return NotImplemented - - def fetch_molecular_embedding(self, n_molecules: int, cache_directory: str = None): - """ - Fetch molecular properties from database/cache into a dask array. - """ - return NotImplemented - - def fetch_molecular_embedding_by_id(self, molecule_id: List): - """ - Fetch molecular properties from database for the given id. Id depends on - the backend databse. For chemble DB it should be molregid. - """ - return NotImplemented - - def fetch_id_from_smile(self, new_molecules: List): - """ - Fetch molecular details for a list of molecules. The values in the list - of molecules depends on database/service used. For e.g. it could be - ChemblId or molreg_id for Chemble database. - """ - return NotImplemented - - -class GenerativeWfDao(object): - - def fetch_id_from_chembl(self, id: List): - """ - Fetch molecular details for a list of molecules. The values in the list - of molecules depends on database/service used. For e.g. it could be - ChemblId or molreg_id for Chemble database. - """ - return NotImplemented diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py deleted file mode 100644 index 6462d5f..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/data/cluster_wf.py +++ /dev/null @@ -1,61 +0,0 @@ -import logging -import math -import os -from typing import List - -import cudf -import dask -import dask_cudf -from cuchemcommon.context import Context -from cuchemcommon.data.helper.chembldata import BATCH_SIZE, ChEmblData -from cuchemcommon.utils.singleton import Singleton - -from . import ClusterWfDAO - -logger = logging.getLogger(__name__) - -FINGER_PRINT_FILES = 'filter_*.h5' - - -class ChemblClusterWfDao(ClusterWfDAO, metaclass=Singleton): - - def __init__(self, fp_type): - self.chem_data = ChEmblData(fp_type) - - def meta_df(self): - chem_data = ChEmblData() - return chem_data._meta_df() - - def fetch_molecular_embedding(self, - n_molecules: int, - cache_directory: str = None): - context = Context() - if cache_directory: - hdf_path = os.path.join(cache_directory, FINGER_PRINT_FILES) - logger.info('Reading %d rows from %s...', n_molecules, hdf_path) - mol_df = dask.dataframe.read_hdf(hdf_path, 'fingerprints') - - if n_molecules > 0: - npartitions = math.ceil(n_molecules / BATCH_SIZE) - mol_df = mol_df.head(n_molecules, compute=False, npartitions=npartitions) - else: - logger.info('Reading molecules from database...') - mol_df = self.chem_data.fetch_mol_embedding(num_recs=n_molecules, - batch_size=context.batch_size) - - return mol_df - - def fetch_molecular_embedding_by_id(self, molecule_id: List): - context = Context() - meta = self.chem_data._meta_df() - fp_df = self.chem_data._fetch_mol_embedding(molregnos=molecule_id, - batch_size=context.batch_size) \ - .astype(meta.dtypes) - - fp_df = cudf.from_pandas(fp_df) - fp_df = dask_cudf.from_cudf(fp_df, npartitions=1).reset_index() - return fp_df - - def fetch_id_from_chembl(self, new_molecules: List): - logger.debug('Fetch ChEMBL ID using molregno...') - return self.chem_data.fetch_id_from_chembl(new_molecules) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py deleted file mode 100644 index 9e16a2d..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/data/generative_wf.py +++ /dev/null @@ -1,19 +0,0 @@ -import logging -from typing import List - -from cuchemcommon.data.helper.chembldata import ChEmblData -from cuchemcommon.utils.singleton import Singleton - -from . import GenerativeWfDao - -logger = logging.getLogger(__name__) - - -class ChemblGenerativeWfDao(GenerativeWfDao, metaclass=Singleton): - - def __init__(self, fp_type): - self.chem_data = ChEmblData(fp_type) - - def fetch_id_from_chembl(self, id: List): - logger.debug('Fetch ChEMBL ID using molregno...') - return self.chem_data.fetch_id_from_chembl(id) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py b/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py deleted file mode 100644 index 7b0d272..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/data/helper/chembldata.py +++ /dev/null @@ -1,320 +0,0 @@ -import os -import warnings -import pandas -import sqlite3 -import logging - -from typing import List -from dask import delayed, dataframe - -from contextlib import closing -from cuchemcommon.utils.singleton import Singleton -from cuchemcommon.context import Context - -warnings.filterwarnings("ignore", message=r"deprecated", category=FutureWarning) -logger = logging.getLogger(__name__) - -BATCH_SIZE = 100000 -ADDITIONAL_FEILD = ['canonical_smiles', 'transformed_smiles'] -IMP_PROPS = [ - 'alogp', - 'aromatic_rings', - 'full_mwt', - 'psa', - 'rtb'] -IMP_PROPS_TYPE = [pandas.Series([], dtype='float64'), - pandas.Series([], dtype='int64'), - pandas.Series([], dtype='float64'), - pandas.Series([], dtype='float64'), - pandas.Series([], dtype='int64')] -ADDITIONAL_FEILD_TYPE = [pandas.Series([], dtype='object'), - pandas.Series([], dtype='object')] - -SQL_MOLECULAR_PROP = """ -SELECT md.molregno as molregno, md.chembl_id, cp.*, cs.* -FROM compound_properties cp, - compound_structures cs, - molecule_dictionary md -WHERE cp.molregno = md.molregno - AND md.molregno = cs.molregno - AND md.molregno in (%s) -""" - - -# DEPRECATED. Please add code to DAO classes. -class ChEmblData(object, metaclass=Singleton): - - def __init__(self, fp_type): - - context = Context() - db_file = context.get_config('data_mount_path', default='/data') - db_file = os.path.join(db_file, 'db/chembl_27.db') - - if not os.path.exists(db_file): - logger.error('%s not found', db_file) - raise Exception('{} not found'.format(db_file)) - - self.fp_type = fp_type - self.chembl_db = 'file:%s?mode=ro' % db_file - - logger.info('ChEMBL database: %s...' % self.chembl_db) - - def fetch_props_by_molregno(self, molregnos): - """ - Returns compound properties and structure filtered by ChEMBL IDs along - with a list of columns. - """ - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = SQL_MOLECULAR_PROP % " ,".join(list(map(str, molregnos))) - cur.execute(select_stmt) - - cols = list(map(lambda x: x[0], cur.description)) - return cols, cur.fetchall() - - def fetch_props_by_chemble(self, chemble_ids): - """ - Returns compound properties and structure filtered by ChEMBL IDs along - with a list of columns. - """ - sql_stml = """ - SELECT md.molregno as molregno, md.chembl_id, cp.*, cs.* - FROM compound_properties cp, - compound_structures cs, - molecule_dictionary md - WHERE cp.molregno = md.molregno - AND md.molregno = cs.molregno - AND md.chembl_id in (%s) - """ - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = sql_stml % "'%s'" % "','".join([x.strip().upper() for x in chemble_ids]) - cur.execute(select_stmt) - - cols = list(map(lambda x: x[0], cur.description)) - return cols, cur.fetchall() - - def fetch_molregno_by_chemblId(self, chemblIds): - logger.debug('Fetch ChEMBL ID using molregno...') - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = ''' - SELECT md.molregno as molregno - FROM compound_properties cp, - compound_structures cs, - molecule_dictionary md - WHERE cp.molregno = md.molregno - AND md.molregno = cs.molregno - AND md.chembl_id in (%s) - ''' % "'%s'" % "','".join(chemblIds) - cur.execute(select_stmt) - return cur.fetchall() - - def fetch_id_from_chembl(self, new_molecules: List): - logger.debug('Fetch ChEMBL ID using molregno...') - - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = ''' - SELECT cs.molregno as molregno, md.chembl_id as chembl_id, - cs.canonical_smiles as smiles - FROM compound_structures cs, - molecule_dictionary md - WHERE md.molregno = cs.molregno - AND md.chembl_id in (%s) - ''' % "'%s'" % "','".join([x.strip().upper() for x in new_molecules]) - cur.execute(select_stmt) - - return cur.fetchall() - - def fetch_chemblId_by_molregno(self, molregnos): - logger.debug('Fetch ChEMBL ID using molregno...') - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = ''' - SELECT md.chembl_id as chembl_id - FROM molecule_dictionary md - WHERE md.molregno in (%s) - ''' % ", ".join(list(map(str, molregnos))) - cur.execute(select_stmt) - return cur.fetchall() - - def fetch_approved_drugs(self): - """Fetch approved drugs with phase >=3 as dataframe - - Args: - chembl_db_path (string): path to chembl sqlite database - Returns: - pd.DataFrame: dataframe containing SMILES strings and molecule index - """ - logger.debug('Fetching ChEMBL approved drugs...') - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = """SELECT - di.molregno, - cs.canonical_smiles, - di.max_phase_for_ind - FROM - drug_indication AS di - LEFT JOIN compound_structures AS cs ON di.molregno = cs.molregno - WHERE - di.max_phase_for_ind >= 3 - AND cs.canonical_smiles IS NOT NULL;""" - cur.execute(select_stmt) - return cur.fetchall() - - def fetch_random_samples(self, num_samples, max_len): - """Fetch random samples from ChEMBL as dataframe - - Args: - num_samples (int): number of samples to select - chembl_db_path (string): path to chembl sqlite database - Returns: - pd.DataFrame: dataframe containing SMILES strings and molecule index - """ - logger.debug('Fetching ChEMBL random samples...') - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = """SELECT - cs.molregno, - cs.canonical_smiles, - LENGTH(cs.canonical_smiles) as len - FROM - compound_structures AS cs - WHERE - cs.canonical_smiles IS NOT NULL - AND - len <= """ + f'{max_len}' + """ - ORDER BY RANDOM() - LIMIT """ + f'{num_samples};' - - cur.execute(select_stmt) - return cur.fetchall() - - def fetch_molecule_cnt(self): - logger.debug('Finding number of molecules...') - with closing(sqlite3.connect(self.chembl_db, uri=True)) as con, con, \ - closing(con.cursor()) as cur: - select_stmt = ''' - SELECT count(*) - FROM compound_properties cp, - molecule_dictionary md, - compound_structures cs - WHERE cp.molregno = md.molregno - AND md.molregno = cs.molregno - ''' - cur.execute(select_stmt) - - return cur.fetchone()[0] - - def _meta_df(self, **transformation_kwargs): - transformation = self.fp_type(**transformation_kwargs) - - prop_meta = {'id': pandas.Series([], dtype='int64')} - prop_meta.update(dict(zip(IMP_PROPS + ADDITIONAL_FEILD, - IMP_PROPS_TYPE + ADDITIONAL_FEILD_TYPE))) - prop_meta.update({i: pandas.Series([], dtype='float32') for i in range(len(transformation))}) - - return pandas.DataFrame(prop_meta) - - def _fetch_mol_embedding(self, - start=0, - batch_size=BATCH_SIZE, - molregnos=None, - **transformation_kwargs): - """ - Returns compound properties and structure for the first N number of - records in a dataframe. - """ - - logger.info('Fetching %d records starting %d...' % (batch_size, start)) - - imp_cols = ['cp.' + col for col in IMP_PROPS] - - if molregnos is None: - select_stmt = ''' - SELECT md.molregno, %s, cs.canonical_smiles - FROM compound_properties cp, - molecule_dictionary md, - compound_structures cs - WHERE cp.molregno = md.molregno - AND md.molregno = cs.molregno - LIMIT %d, %d - ''' % (', '.join(imp_cols), start, batch_size) - else: - select_stmt = ''' - SELECT md.molregno, %s, cs.canonical_smiles - FROM compound_properties cp, - molecule_dictionary md, - compound_structures cs - WHERE cp.molregno = md.molregno - AND md.molregno = cs.molregno - AND md.molregno in (%s) - LIMIT %d, %d - ''' % (', '.join(imp_cols), " ,".join(list(map(str, molregnos))), start, batch_size) - - df = pandas.read_sql(select_stmt, - sqlite3.connect(self.chembl_db, uri=True)) - - # Smiles -> Smiles transformation and filtering - # TODO: Discuss internally to find use or refactor this code to remove - # model specific filtering - df['transformed_smiles'] = df['canonical_smiles'] - # if smiles_transforms is not None: - # if len(smiles_transforms) > 0: - # for xf in smiles_transforms: - # df['transformed_smiles'] = df['transformed_smiles'].map(xf.transform) - # df.dropna(subset=['transformed_smiles'], axis=0, inplace=True) - - # Conversion to fingerprints or embeddings - # transformed_smiles = df['transformed_smiles'] - transformation = self.fp_type(**transformation_kwargs) - cache_data = transformation.transform(df) - return_df = pandas.DataFrame(cache_data) - - return_df = pandas.DataFrame( - return_df, - columns=pandas.RangeIndex(start=0, - stop=len(transformation))).astype('float32') - - return_df = df.merge(return_df, left_index=True, right_index=True) - return_df.rename(columns={'molregno': 'id'}, inplace=True) - return return_df - - def fetch_mol_embedding(self, - num_recs=None, - batch_size=BATCH_SIZE, - molregnos=None, - **transformation_kwargs): - """ - Returns compound properties and structure for the first N number of - records in a dataframe. - """ - logger.debug('Fetching properties for all molecules...') - - if num_recs is None or num_recs < 0: - num_recs = self.fetch_molecule_cnt() - - logger.info('num_recs %d', num_recs) - logger.info('batch_size %d', batch_size) - meta_df = self._meta_df(**transformation_kwargs) - - dls = [] - for start in range(0, num_recs, batch_size): - bsize = min(num_recs - start, batch_size) - dl_data = delayed(self._fetch_mol_embedding)(start=start, - batch_size=bsize, - molregnos=molregnos, - **transformation_kwargs) - dls.append(dl_data) - - return dataframe.from_delayed(dls, meta=meta_df) - - def save_fingerprints(self, hdf_path='data/filter_*.h5', num_recs=None, batch_size=5000): - """ - Generates fingerprints for all ChEMBL ID's in the database - """ - logger.debug('Fetching molecules from database for fingerprints...') - - mol_df = self.fetch_mol_embedding(num_recs=num_recs, batch_size=batch_size) - mol_df.to_hdf(hdf_path, 'fingerprints') diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py b/open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py deleted file mode 100644 index 55f2471..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/fingerprint.py +++ /dev/null @@ -1,95 +0,0 @@ -import logging -import os -from abc import ABC -from enum import Enum - -import numpy as np -import pandas as pd -from cddd.inference import InferenceModel -from cuchem.utils.data_peddler import download_cddd_models -from rdkit import Chem -from rdkit.Chem import AllChem - -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -logger = logging.getLogger(__name__) - - -def calc_morgan_fingerprints(dataframe, smiles_col='canonical_smiles'): - """Calculate Morgan fingerprints on SMILES strings - - Args: - dataframe (pd.DataFrame): dataframe containing a SMILES column for calculation - - Returns: - pd.DataFrame: new dataframe containing fingerprints - """ - mf = MorganFingerprint() - fp = mf.transform(dataframe, col_name=smiles_col) - fp = pd.DataFrame(fp) - fp.index = dataframe.index - return fp - - -class TransformationDefaults(Enum): - MorganFingerprint = {'radius': 2, 'nBits': 512} - Embeddings = {} - - -class BaseTransformation(ABC): - def __init__(self, **kwargs): - self.name = None - self.kwargs = None - self.func = None - - def transform(self, data): - return NotImplemented - - def transform_many(self, data): - return list(map(self.transform, data)) - - def __len__(self): - return NotImplemented - - -class MorganFingerprint(BaseTransformation): - - def __init__(self, **kwargs): - self.name = __class__.__name__.split('.')[-1] - self.kwargs = TransformationDefaults[self.name].value - self.kwargs.update(kwargs) - self.func = AllChem.GetMorganFingerprintAsBitVect - - def transform(self, data, col_name='transformed_smiles'): - data = data[col_name] - fp_array = [] - for mol in data: - m = Chem.MolFromSmiles(mol) - fp = self.func(m, **self.kwargs) - fp_array.append(list(fp.ToBitString())) - fp_array = np.asarray(fp_array) - return fp_array - - def __len__(self): - return self.kwargs['nBits'] - - -class Embeddings(BaseTransformation): - - def __init__(self, use_gpu=True, cpu_threads=5, model_dir=None, **kwargs): - self.name = __class__.__name__.split('.')[-1] - self.kwargs = TransformationDefaults[self.name].value - self.kwargs.update(kwargs) - model_dir = download_cddd_models() - self.func = InferenceModel(model_dir, use_gpu=use_gpu, cpu_threads=cpu_threads) - - def transform(self, data): - data = data['transformed_smiles'] - return self.func.seq_to_emb(data).squeeze() - - def inverse_transform(self, embeddings): - "Embedding array -- individual compound embeddings are in rows" - embeddings = np.asarray(embeddings) - return self.func.emb_to_seq(embeddings) - - def __len__(self): - return self.func.hparams.emb_size diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py b/open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py deleted file mode 100644 index 5034fa1..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/smiles.py +++ /dev/null @@ -1,38 +0,0 @@ -# import os -# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - -# import logging -# from abc import ABC -# from rdkit.Chem.SaltRemover import SaltRemover -# from cddd.preprocessing import remove_salt_stereo, filter_smiles - -# logger = logging.getLogger(__name__) - - -# class BaseTransformation(ABC): -# def __init__(self): -# pass - -# def transform(self, data): -# return NotImplemented - -# def transform_many(self, data): -# return list(map(self.transform, data)) -# #return [self.filter(x) for x in data] - - -# class RemoveSalt(BaseTransformation): -# def __init__(self, remover=SaltRemover()): -# self.name = __class__.__name__.split('.')[-1] -# self.remover = remover - -# def transform(self, data): -# return remove_salt_stereo(data, self.remover) - - -# class PreprocessSmiles(BaseTransformation): -# def __init__(self): -# self.name = __class__.__name__.split('.')[-1] - -# def transform(self, data): -# return filter_smiles(data) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py deleted file mode 100644 index 0de2d94..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from cuchemcommon.utils.singleton import Singleton \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py deleted file mode 100644 index 7f9e669..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/logger.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/opt/conda/envs/rapids/bin/python3 -# -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -from datetime import datetime - -from cuchemcommon.context import Context - -from .sysinfo import get_machine_config, print_machine_config - -BENCHMARK_FILE = '/data/benchmark.csv' - -logger = logging.getLogger(__name__) - - -def initialize_logfile(benchmark_file=BENCHMARK_FILE): - """Initialize benchmark file with header if needed""" - - config = get_machine_config() - config_message = print_machine_config(config) - - if not os.path.exists(benchmark_file): - with open(benchmark_file, 'w') as fh: - fh.write(f'# {config_message}\n') - fh.write('date,benchmark_type,step,time(hh:mm:ss.ms),n_molecules,n_workers,metric_name,metric_value\n') - return benchmark_file - - -class MetricsLogger(object): - - def __init__(self, - task_name, - n_molecules): - - self.task_name = task_name - self.n_molecules = n_molecules - self.start_time = None - self.metric_name = None - self.metric_value = None - - self.metric_func = None - self.metric_func_args = None - self.metric_func_kwargs = {} - - def __enter__(self): - self.start_time = datetime.now() - - return self - - def __exit__(self, type, value, traceback): - context = Context() - - runtime = datetime.now() - self.start_time - logger.info('### Runtime {} time (hh:mm:ss.ms) {}'.format(self.task_name, runtime)) - n_workers = len(context.dask_client.cluster.workers) - - if self.metric_func and context.is_benchmark: - self.metric_value = self.metric_func(*self.metric_func_args, - **self.metric_func_kwargs) - - if self.metric_value is None: - self.metric_name = '' - self.metric_value = '' - else: - logger.info('Calculated {} is {}'.format(self.metric_name, self.metric_value)) - - log_results(self.start_time, context.compute_type, self.task_name, - runtime, - n_molecules=self.n_molecules, - n_workers=n_workers, - metric_name=self.metric_name, - metric_value=self.metric_value, - benchmark_file=context.benchmark_file) - - -def log_results(date, - benchmark_type, - step, - time, - n_molecules, - n_workers, - metric_name='', - metric_value='', - benchmark_file=BENCHMARK_FILE): - """Log benchmark results to a file""" - - out_list = [date, benchmark_type, step, time, n_molecules, n_workers, metric_name, metric_value] - out_fmt = ','.join(['{}'] * len(out_list)) + '\n' - - with open(benchmark_file, 'a') as fh: - out_string = out_fmt.format(*out_list) - fh.write(out_string) diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py deleted file mode 100644 index fc28938..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/singleton.py +++ /dev/null @@ -1,26 +0,0 @@ -# singleton.py - -import logging - -""" -Metaclass for singletons. -""" - -logger = logging.getLogger(__name__) - - -class Singleton(type): - """ - Ensures single instance of a class. - - Example Usage: - class MySingleton(metaclass=Singleton) - pass - """ - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__( - *args, **kwargs) - return cls._instances[cls] diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py b/open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py deleted file mode 100644 index 1077c5b..0000000 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/utils/sysinfo.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/opt/conda/envs/rapids/bin/python3 -# -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import Counter - -import psutil -import pynvml as nv - - -def get_machine_config(): - """Get machine config for CPU and GPU(s)""" - - # CPU config - physical_cores = psutil.cpu_count(logical=False) - logical_cores = psutil.cpu_count(logical=True) - - cpufreq = psutil.cpu_freq() - cpufreq_max = cpufreq.max # Mhz - cpufreq_min = cpufreq.min - cpufreq_cur = cpufreq.current - - svmem = psutil.virtual_memory() - mem_total = svmem.total / (1024.0 ** 3) # GB - mem_avail = svmem.available / (1024.0 ** 3) - - # GPU config - nv.nvmlInit() - driver_version = nv.nvmlSystemGetDriverVersion() - deviceCount = nv.nvmlDeviceGetCount() - gpu_devices, gpu_mems = [], [] - for i in range(deviceCount): - handle = nv.nvmlDeviceGetHandleByIndex(i) - gpu_devices.append(nv.nvmlDeviceGetName(handle).decode("utf-8")) - gpu_mem = nv.nvmlDeviceGetMemoryInfo(handle).total / (1024.0 ** 3) - gpu_mems.append(gpu_mem) - - return {'cpu': {'physical_cores': physical_cores, 'logical_cores': logical_cores, - 'min_freq_MHz': cpufreq_min, 'max_freq_MHz': cpufreq_max, 'cur_freq_MHz': cpufreq_cur, - 'total_mem_GB': mem_total, 'avail_mem_GB': mem_avail}, - 'gpu': {'devices': gpu_devices, 'mem_GB': gpu_mems}} - - -def print_machine_config(config): - """Printable version of config""" - cpu_cores = config['cpu']['physical_cores'] - cpu_freq = int(round(config['cpu']['max_freq_MHz'], 0)) - ram = int(round(config['cpu']['total_mem_GB'], 0)) - cpu_config_message = f'{cpu_freq} MHz CPU with {cpu_cores} cores, {ram} GB RAM' - - gpu_devices = Counter([(x, int(round(y, 0))) for x, y in zip(config['gpu']['devices'], config['gpu']['mem_GB'])]) - gpu_config_message = '' - for (handle, mem), count in gpu_devices.items(): - gpu_config_message += f'{count} x {handle} GPU(s)' - - return ', '.join([cpu_config_message, gpu_config_message]) diff --git a/open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py b/open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py deleted file mode 100644 index 1b9ea61..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/DrugBankGraph.py +++ /dev/null @@ -1,235 +0,0 @@ -import os -from itertools import chain, repeat -import pandas as pd -import torch -from torch_geometric.data import InMemoryDataset, Data -from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple -from rdkit.Chem import AllChem - - -class DrugBank_Datasets_Graph_retrieval(InMemoryDataset): - def __init__( - self, root, train_mode, neg_sample_size, processed_dir_prefix, template="raw/SMILES_description_{}.txt", - transform=None, pre_transform=None, pre_filter=None, empty=False - ): - self.root = root - self.transform = transform - self.pre_filter = pre_filter - self.pre_transform = pre_transform - self.processed_dir_prefix = processed_dir_prefix - self.template = template - self.train_mode = train_mode - self.smiles_text_file_name = "SMILES.csv" - - super(DrugBank_Datasets_Graph_retrieval, self).__init__(root, transform, pre_transform, pre_filter) - - if not empty: - self.data, self.slices = torch.load(self.processed_paths[0]) - print('Data: {}'.format(self.data)) - - df = pd.read_csv(os.path.join(self.processed_dir, self.smiles_text_file_name)) - print(df.columns) - self.text_list = df["text"].tolist() - - # sampling - self.neg_sample_size = neg_sample_size - negative_sampled_index_file = os.path.join(self.root, "index", template.format(train_mode)) - print("Loading negative samples from {}".format(negative_sampled_index_file)) - f = open(negative_sampled_index_file, 'r') - neg_index_list = [] - for line in f.readlines(): - line = line.strip().split(",") - line = [int(x) for x in line] - neg_index_list.append(line) - self.neg_index_list = neg_index_list - - return - - def get_graph(self, index): - data = Data() - for key in self.data.keys: - item, slices = self.data[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[index], slices[index + 1]) - data[key] = item[s] - return data - - def get(self, index): - text = self.text_list[index] - data = self.get_graph(index) - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_text = [self.text_list[idx] for idx in neg_index_list] - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_data = [self.get_graph(idx) for idx in neg_index_list] - return text, data, neg_text, neg_data - - @property - def raw_file_names(self): - file_name_list = os.listdir(self.raw_dir) - return file_name_list - - @property - def processed_dir(self): - return os.path.join(self.root, 'processed', '{}_{}'.format(self.processed_dir_prefix, self.train_mode)) - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def download(self): - return - - def process(self): - data_list, SMILES_list, text_list = [], [], [] - SMILES2description_file = os.path.join(self.root, 'raw', self.template.format(self.train_mode)) - f = open(SMILES2description_file, 'r') - - for line_id, line in enumerate(f.readlines()): - line = line.strip().split("\t", 1) - SMILES = line[0] - text = line[1] - - rdkit_mol = AllChem.MolFromSmiles(SMILES) - data = mol_to_graph_data_obj_simple(rdkit_mol) - data.id = torch.tensor([line_id]) - - data_list.append(data) - SMILES_list.append(SMILES) - text_list.append(text) - - if self.pre_filter is not None: - data_list = [data for data in data_list if self.pre_filter(data)] - - if self.pre_transform is not None: - data_list = [self.pre_transform(data) for data in data_list] - - df = pd.DataFrame( - {"text": text_list, "smiles": SMILES_list}, - ) - saver_path = os.path.join(self.processed_dir, self.smiles_text_file_name) - print("saving to {}".format(saver_path)) - df.to_csv(saver_path, index=False) - - data, slices = self.collate(data_list) - torch.save((data, slices), self.processed_paths[0]) - print("saving to {}".format(self.processed_paths[0])) - print() - return - - def __len__(self): - return len(self.text_list) - - -class DrugBank_Datasets_Graph_ATC(InMemoryDataset): - def __init__( - self, root, file_name, processed_dir_prefix, neg_sample_size, prompt_template="{}.", - transform=None, pre_transform=None, pre_filter=None, empty=False - ): - self.root = root - self.transform = transform - self.pre_filter = pre_filter - self.pre_transform = pre_transform - self.file_name = file_name - self.processed_dir_prefix = processed_dir_prefix - self.smiles_text_file_name = "SMILES.csv" - self.prompt_template = prompt_template - - super(DrugBank_Datasets_Graph_ATC, self).__init__(root, transform, pre_transform, pre_filter) - - if not empty: - self.data, self.slices = torch.load(self.processed_paths[0]) - print('Data: {}'.format(self.data)) - - df = pd.read_csv(os.path.join(self.processed_dir, self.smiles_text_file_name)) - self.SMILES_list = df["smiles"].tolist() - self.ATC_code_list = df["ATC_code"].tolist() - ATC_label_list = df["ATC_label"].tolist() # This is for raw TAC label - self.ATC_label_list = [self.prompt_template.format(x) for x in ATC_label_list] - - self.neg_sample_size = neg_sample_size - negative_sampled_index_file = os.path.join(self.root, "index", file_name) - print("Loading negative samples from {}".format(negative_sampled_index_file)) - f = open(negative_sampled_index_file, 'r') - neg_index_list = [] - for line in f.readlines(): - line = line.strip().split(",") - line = [int(x) for x in line] - neg_index_list.append(line) - self.neg_index_list = neg_index_list - - assert len(self.SMILES_list) == len(self.neg_index_list) == len(self.ATC_code_list) == len(self.ATC_label_list) - return - - def get_graph(self, index): - data = Data() - for key in self.data.keys: - item, slices = self.data[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[index], slices[index + 1]) - data[key] = item[s] - return data - - def get(self, index): - text = self.ATC_label_list[index] - data = self.get_graph(index) - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_text = [self.ATC_label_list[idx] for idx in neg_index_list] - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_data = [self.get_graph(idx) for idx in neg_index_list] - return text, data, neg_text, neg_data - - @property - def raw_file_names(self): - file_name_list = os.listdir(self.raw_dir) - return file_name_list - - @property - def processed_dir(self): - return os.path.join(self.root, "processed", "molecule_{}".format(self.processed_dir_prefix)) - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def download(self): - return - - def process(self): - SMILES2ATC_txt_file = os.path.join(self.root, "raw", self.file_name) - - f = open(SMILES2ATC_txt_file, 'r') - data_list, SMILES_list, ATC_code_list, ATC_label_list = [], [], [], [] - for line_idx, line in enumerate(f.readlines()): - line = line.strip().split("\t") - SMILES = line[0] - ATC_code = line[1] - ATC_label = line[2] - rdkit_mol = AllChem.MolFromSmiles(SMILES) - data = mol_to_graph_data_obj_simple(rdkit_mol) - data.id = torch.tensor([line_idx]) - - data_list.append(data) - SMILES_list.append(SMILES) - ATC_code_list.append(ATC_code) - ATC_label_list.append(ATC_label) - - if self.pre_filter is not None: - data_list = [data for data in data_list if self.pre_filter(data)] - - if self.pre_transform is not None: - data_list = [self.pre_transform(data) for data in data_list] - - df = pd.DataFrame( - {"smiles": SMILES_list, "ATC_code": ATC_code_list, "ATC_label": ATC_label_list}, - ) - saver_path = os.path.join(self.processed_dir, self.smiles_text_file_name) - print("saving to {}".format(saver_path)) - df.to_csv(saver_path, index=False) - - data, slices = self.collate(data_list) - torch.save((data, slices), self.processed_paths[0]) - print("saving to {}".format(self.processed_paths[0])) - return - - def __len__(self): - return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py b/open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py deleted file mode 100644 index 1307920..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/DrugBankSMILES.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from torch.utils.data import Dataset - - -class DrugBank_Datasets_SMILES_retrieval(Dataset): - def __init__(self, root, train_mode, neg_sample_size, template="SMILES_description_{}.txt"): - self.root = root - - self.SMILES_list, self.text_list = [], [] - SMILES2description_file = os.path.join(self.root, "raw", template.format(train_mode)) - f = open(SMILES2description_file, 'r') - for line in f.readlines(): - line = line.strip().split("\t", 1) - SMILES = line[0] - text = line[1] - self.SMILES_list.append(SMILES) - self.text_list.append(text) - - self.neg_sample_size = neg_sample_size - negative_sampled_index_file = os.path.join(self.root, "index", template.format(train_mode)) - print("Loading negative samples from {}".format(negative_sampled_index_file)) - f = open(negative_sampled_index_file, 'r') - neg_index_list = [] - for line in f.readlines(): - line = line.strip().split(",") - line = [int(x) for x in line] - neg_index_list.append(line) - self.neg_index_list = neg_index_list - return - - def __getitem__(self, index): - description = self.text_list[index] - SMILES = self.SMILES_list[index] - - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_description = [self.text_list[idx] for idx in neg_index_list] - - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_SMILES = [self.SMILES_list[idx] for idx in neg_index_list] - - return description, SMILES, neg_description, neg_SMILES - - def __len__(self): - return len(self.SMILES_list) - - -class DrugBank_Datasets_SMILES_ATC(Dataset): - def __init__(self, root, file_name, neg_sample_size, prompt_template="{}."): - self.root = root - self.neg_sample_size = neg_sample_size - self.prompt_template = prompt_template - - SMILES2ATC_txt_file = os.path.join(self.root, 'raw', file_name) - - f = open(SMILES2ATC_txt_file, 'r') - SMILES_list, ATC_code_list, ATC_label_list = [], [], [] - for line in f.readlines(): - line = line.strip().split("\t") - SMILES_list.append(line[0]) - ATC_code_list.append(line[1]) - ATC_label_list.append(prompt_template.format(line[2])) - - self.SMILES_list = SMILES_list - self.ATC_code_list = ATC_code_list - self.ATC_label_list = ATC_label_list - - self.neg_sample_size = neg_sample_size - negative_sampled_index_file = os.path.join(self.root, "index", file_name) - print("Loading negative samples from {}".format(negative_sampled_index_file)) - f = open(negative_sampled_index_file, 'r') - neg_index_list = [] - for line in f.readlines(): - line = line.strip().split(",") - line = [int(x) for x in line] - neg_index_list.append(line) - self.neg_index_list = neg_index_list - - assert len(self.SMILES_list) == len(self.neg_index_list) == len(ATC_code_list) == len(ATC_label_list) - return - - def __getitem__(self, index): - text = self.ATC_label_list[index] - SMILES = self.SMILES_list[index] - - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_text = [self.ATC_label_list[idx] for idx in neg_index_list] - - neg_index_list = self.neg_index_list[index][:self.neg_sample_size] - neg_SMILES = [self.SMILES_list[idx] for idx in neg_index_list] - - return text, SMILES, neg_text, neg_SMILES - - def __len__(self): - return len(self.SMILES_list) \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py b/open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py deleted file mode 100644 index 4392598..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/MoleculeNetGraph.py +++ /dev/null @@ -1,584 +0,0 @@ -import os -import pickle -from itertools import chain, repeat - -import networkx as nx -import numpy as np -import pandas as pd -import torch -from ogb.utils.features import atom_to_feature_vector, bond_to_feature_vector -from rdkit import Chem -from rdkit.Chem import AllChem, Descriptors -from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect -from torch.utils import data -from torch_geometric.data import (Data, InMemoryDataset, download_url, extract_zip) - - -def mol_to_graph_data_obj_simple(mol): - """ used in MoleculeNetGraphDataset() class - Converts rdkit mol objects to graph data object in pytorch geometric - NB: Uses simplified atom and bond features, and represent as indices - :param mol: rdkit mol object - :return: graph data object with the attributes: x, edge_index, edge_attr """ - - # atoms - # num_atom_features = 2 # atom type, chirality tag - atom_features_list = [] - for atom in mol.GetAtoms(): - atom_feature = atom_to_feature_vector(atom) - atom_features_list.append(atom_feature) - x = torch.tensor(np.array(atom_features_list), dtype=torch.long) - - # bonds - if len(mol.GetBonds()) <= 0: # mol has no bonds - num_bond_features = 3 # bond type & direction - edge_index = torch.empty((2, 0), dtype=torch.long) - edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) - else: # mol has bonds - edges_list = [] - edge_features_list = [] - for bond in mol.GetBonds(): - i = bond.GetBeginAtomIdx() - j = bond.GetEndAtomIdx() - edge_feature = bond_to_feature_vector(bond) - - edges_list.append((i, j)) - edge_features_list.append(edge_feature) - edges_list.append((j, i)) - edge_features_list.append(edge_feature) - - # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] - edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) - - # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] - edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) - - data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) - - return data - - -def graph_data_obj_to_nx_simple(data): - """ torch geometric -> networkx - NB: possible issues with recapitulating relative - stereochemistry since the edges in the nx object are unordered. - :param data: pytorch geometric Data object - :return: networkx object """ - G = nx.Graph() - - # atoms - atom_features = data.x.cpu().numpy() - num_atoms = atom_features.shape[0] - for i in range(num_atoms): - temp_feature = atom_features[i] - G.add_node( - i, - x0=temp_feature[0], - x1=temp_feature[1], - x2=temp_feature[2], - x3=temp_feature[3], - x4=temp_feature[4], - x5=temp_feature[5], - x6=temp_feature[6], - x7=temp_feature[7], - x8=temp_feature[8]) - pass - - # bonds - edge_index = data.edge_index.cpu().numpy() - edge_attr = data.edge_attr.cpu().numpy() - num_bonds = edge_index.shape[1] - for j in range(0, num_bonds, 2): - begin_idx = int(edge_index[0, j]) - end_idx = int(edge_index[1, j]) - temp_feature= edge_attr[j] - if not G.has_edge(begin_idx, end_idx): - G.add_edge(begin_idx, end_idx, - e0=temp_feature[0], - e1=temp_feature[1], - e2=temp_feature[2]) - - return G - - -def nx_to_graph_data_obj_simple(G): - """ vice versa of graph_data_obj_to_nx_simple() - Assume node indices are numbered from 0 to num_nodes - 1. - NB: Uses simplified atom and bond features, and represent as indices. - NB: possible issues with recapitulating relative stereochemistry - since the edges in the nx object are unordered. """ - - # atoms - # num_atom_features = 2 # atom type, chirality tag - atom_features_list = [] - for _, node in G.nodes(data=True): - atom_feature = [node['x0'], node['x1'], node['x2'], node['x3'], node['x4'], node['x5'], node['x6'], node['x7'], node['x8']] - atom_features_list.append(atom_feature) - x = torch.tensor(np.array(atom_features_list), dtype=torch.long) - - # bonds - num_bond_features = 3 # bond type, bond direction - if len(G.edges()) > 0: # mol has bonds - edges_list = [] - edge_features_list = [] - for i, j, edge in G.edges(data=True): - edge_feature = [edge['e0'], edge['e1'], edge['e2']] - edges_list.append((i, j)) - edge_features_list.append(edge_feature) - edges_list.append((j, i)) - edge_features_list.append(edge_feature) - - # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] - edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) - - # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] - edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) - else: # mol has no bonds - edge_index = torch.empty((2, 0), dtype=torch.long) - edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) - - data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) - - return data - - -def create_standardized_mol_id(smiles): - """ smiles -> inchi """ - - if check_smiles_validity(smiles): - # remove stereochemistry - smiles = AllChem.MolToSmiles(AllChem.MolFromSmiles(smiles), - isomericSmiles=False) - mol = AllChem.MolFromSmiles(smiles) - if mol is not None: - # to catch weird issue with O=C1O[al]2oc(=O)c3ccc(cn3)c3ccccc3c3cccc(c3)\ - # c3ccccc3c3cc(C(F)(F)F)c(cc3o2)-c2ccccc2-c2cccc(c2)-c2ccccc2-c2cccnc21 - if '.' in smiles: # if multiple species, pick largest molecule - mol_species_list = split_rdkit_mol_obj(mol) - largest_mol = get_largest_mol(mol_species_list) - inchi = AllChem.MolToInchi(largest_mol) - else: - inchi = AllChem.MolToInchi(mol) - return inchi - return - - -class MoleculeNetGraphDataset(InMemoryDataset): - def __init__(self, root, dataset='zinc250k', transform=None, - pre_transform=None, pre_filter=None, empty=False): - - self.root = root - self.dataset = dataset - self.transform = transform - self.pre_filter = pre_filter - self.pre_transform = pre_transform - - super(MoleculeNetGraphDataset, self).__init__(root, transform, pre_transform, pre_filter) - - if not empty: - self.data, self.slices = torch.load(self.processed_paths[0]) - print('Dataset: {}\nData: {}'.format(self.dataset, self.data)) - - def get(self, idx): - data = Data() - for key in self.data.keys: - item, slices = self.data[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) - data[key] = item[s] - return data - - @property - def raw_file_names(self): - if self.dataset == 'davis': - file_name_list = ['davis'] - elif self.dataset == 'kiba': - file_name_list = ['kiba'] - else: - file_name_list = os.listdir(self.raw_dir) - return file_name_list - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def download(self): - return - - def process(self): - - def shared_extractor(smiles_list, rdkit_mol_objs, labels): - data_list, data_smiles_list, data_label_list = [], [], [] - if labels.ndim == 1: - labels = np.expand_dims(labels, axis=1) - for i in range(len(smiles_list)): - print(i) - rdkit_mol = rdkit_mol_objs[i] - if rdkit_mol is None: - continue - data = mol_to_graph_data_obj_simple(rdkit_mol) - data.id = torch.tensor([i]) - data.y = torch.tensor(labels[i]) - data_list.append(data) - data_smiles_list.append(smiles_list[i]) - data_label_list.append(labels[i]) - return data_list, data_smiles_list, data_label_list - - if self.dataset == 'tox21': - smiles_list, rdkit_mol_objs, labels = \ - _load_tox21_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'hiv': - smiles_list, rdkit_mol_objs, labels = \ - _load_hiv_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'bace': - smiles_list, rdkit_mol_objs, folds, labels = \ - _load_bace_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'bbbp': - smiles_list, rdkit_mol_objs, labels = \ - _load_bbbp_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'clintox': - smiles_list, rdkit_mol_objs, labels = \ - _load_clintox_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'esol': - smiles_list, rdkit_mol_objs, labels = \ - _load_esol_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'freesolv': - smiles_list, rdkit_mol_objs, labels = \ - _load_freesolv_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'lipophilicity': - smiles_list, rdkit_mol_objs, labels = \ - _load_lipophilicity_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'malaria': - smiles_list, rdkit_mol_objs, labels = \ - _load_malaria_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'cep': - smiles_list, rdkit_mol_objs, labels = \ - _load_cep_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'muv': - smiles_list, rdkit_mol_objs, labels = \ - _load_muv_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'pcba': - smiles_list, rdkit_mol_objs, labels = \ - _load_pcba_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'sider': - smiles_list, rdkit_mol_objs, labels = \ - _load_sider_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - elif self.dataset == 'toxcast': - smiles_list, rdkit_mol_objs, labels = \ - _load_toxcast_dataset(self.raw_paths[0]) - data_list, data_smiles_list, data_label_list = shared_extractor( - smiles_list, rdkit_mol_objs, labels) - - else: - raise ValueError('Dataset {} not included.'.format(self.dataset)) - - if self.pre_filter is not None: - data_list = [data for data in data_list if self.pre_filter(data)] - - if self.pre_transform is not None: - data_list = [self.pre_transform(data) for data in data_list] - - data_smiles_series = pd.Series(data_smiles_list) - saver_path = os.path.join(self.processed_dir, 'smiles.csv') - data_smiles_series.to_csv(saver_path, index=False, header=False) - - data_label_array = np.array(data_label_list) - saver_path = os.path.join(self.processed_dir, 'labels') - np.savez_compressed(saver_path, labels=data_label_array) - - data, slices = self.collate(data_list) - torch.save((data, slices), self.processed_paths[0]) - - return - - -def _load_tox21_dataset(input_path): - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - tasks = ['NR-AR', 'NR-AR-LBD', 'NR-AhR', 'NR-Aromatase', 'NR-ER', 'NR-ER-LBD', - 'NR-PPAR-gamma', 'SR-ARE', 'SR-ATAD5', 'SR-HSE', 'SR-MMP', 'SR-p53'] - labels = input_df[tasks] - # convert 0 to -1 - labels = labels.replace(0, -1) - # convert nan to 0 - labels = labels.fillna(0) - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_hiv_dataset(input_path): - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['HIV_active'] - # convert 0 to -1 - labels = labels.replace(0, -1) - # there are no nans - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_bace_dataset(input_path): - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['mol'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['Class'] - # convert 0 to -1 - labels = labels.replace(0, -1) - # there are no nans - folds = input_df['Model'] - folds = folds.replace('Train', 0) # 0 -> train - folds = folds.replace('Valid', 1) # 1 -> valid - folds = folds.replace('Test', 2) # 2 -> test - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - assert len(smiles_list) == len(folds) - return smiles_list, rdkit_mol_objs_list, folds.values, labels.values - - -def _load_bbbp_dataset(input_path): - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - - preprocessed_rdkit_mol_objs_list = [m if m is not None else None - for m in rdkit_mol_objs_list] - preprocessed_smiles_list = [AllChem.MolToSmiles(m) if m is not None else None - for m in preprocessed_rdkit_mol_objs_list] - labels = input_df['p_np'] - # convert 0 to -1 - labels = labels.replace(0, -1) - # there are no nans - assert len(smiles_list) == len(preprocessed_rdkit_mol_objs_list) - assert len(smiles_list) == len(preprocessed_smiles_list) - assert len(smiles_list) == len(labels) - return preprocessed_smiles_list, \ - preprocessed_rdkit_mol_objs_list, labels.values - - -def _load_clintox_dataset(input_path): - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - - preprocessed_rdkit_mol_objs_list = [m if m is not None else None - for m in rdkit_mol_objs_list] - preprocessed_smiles_list = [AllChem.MolToSmiles(m) if m is not None else None - for m in preprocessed_rdkit_mol_objs_list] - tasks = ['FDA_APPROVED', 'CT_TOX'] - labels = input_df[tasks] - # convert 0 to -1 - labels = labels.replace(0, -1) - # there are no nans - assert len(smiles_list) == len(preprocessed_rdkit_mol_objs_list) - assert len(smiles_list) == len(preprocessed_smiles_list) - assert len(smiles_list) == len(labels) - return preprocessed_smiles_list, \ - preprocessed_rdkit_mol_objs_list, labels.values - - -def _load_esol_dataset(input_path): - # NB: some examples have multiple species - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['measured log solubility in mols per litre'] - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_freesolv_dataset(input_path): - - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['expt'] - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_lipophilicity_dataset(input_path): - - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['exp'] - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_malaria_dataset(input_path): - - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['activity'] - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_cep_dataset(input_path): - - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - labels = input_df['PCE'] - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_muv_dataset(input_path): - - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - tasks = ['MUV-466', 'MUV-548', 'MUV-600', 'MUV-644', 'MUV-652', 'MUV-689', - 'MUV-692', 'MUV-712', 'MUV-713', 'MUV-733', 'MUV-737', 'MUV-810', - 'MUV-832', 'MUV-846', 'MUV-852', 'MUV-858', 'MUV-859'] - labels = input_df[tasks] - # convert 0 to -1 - labels = labels.replace(0, -1) - # convert nan to 0 - labels = labels.fillna(0) - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_sider_dataset(input_path): - - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - tasks = ['Hepatobiliary disorders', - 'Metabolism and nutrition disorders', 'Product issues', 'Eye disorders', - 'Investigations', 'Musculoskeletal and connective tissue disorders', - 'Gastrointestinal disorders', 'Social circumstances', - 'Immune system disorders', 'Reproductive system and breast disorders', - 'Neoplasms benign, malignant and unspecified (incl cysts and polyps)', - 'General disorders and administration site conditions', - 'Endocrine disorders', 'Surgical and medical procedures', - 'Vascular disorders', 'Blood and lymphatic system disorders', - 'Skin and subcutaneous tissue disorders', - 'Congenital, familial and genetic disorders', - 'Infections and infestations', - 'Respiratory, thoracic and mediastinal disorders', - 'Psychiatric disorders', 'Renal and urinary disorders', - 'Pregnancy, puerperium and perinatal conditions', - 'Ear and labyrinth disorders', 'Cardiac disorders', - 'Nervous system disorders', - 'Injury, poisoning and procedural complications'] - labels = input_df[tasks] - # convert 0 to -1 - labels = labels.replace(0, -1) - assert len(smiles_list) == len(rdkit_mol_objs_list) - assert len(smiles_list) == len(labels) - return smiles_list, rdkit_mol_objs_list, labels.values - - -def _load_toxcast_dataset(input_path): - - # NB: some examples have multiple species, some example smiles are invalid - input_df = pd.read_csv(input_path, sep=',') - smiles_list = input_df['smiles'] - rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] - # Some smiles could not be successfully converted - # to rdkit mol object so them to None - preprocessed_rdkit_mol_objs_list = [m if m is not None else None - for m in rdkit_mol_objs_list] - preprocessed_smiles_list = [AllChem.MolToSmiles(m) if m is not None else None - for m in preprocessed_rdkit_mol_objs_list] - tasks = list(input_df.columns)[1:] - labels = input_df[tasks] - # convert 0 to -1 - labels = labels.replace(0, -1) - # convert nan to 0 - labels = labels.fillna(0) - assert len(smiles_list) == len(preprocessed_rdkit_mol_objs_list) - assert len(smiles_list) == len(preprocessed_smiles_list) - assert len(smiles_list) == len(labels) - return preprocessed_smiles_list, \ - preprocessed_rdkit_mol_objs_list, labels.values - - -def check_smiles_validity(smiles): - try: - m = Chem.MolFromSmiles(smiles) - if m: - return True - else: - return False - except: - return False - - -def split_rdkit_mol_obj(mol): - """ - Split rdkit mol object containing multiple species or one species into a - list of mol objects or a list containing a single object respectively """ - - smiles = AllChem.MolToSmiles(mol, isomericSmiles=True) - smiles_list = smiles.split('.') - mol_species_list = [] - for s in smiles_list: - if check_smiles_validity(s): - mol_species_list.append(AllChem.MolFromSmiles(s)) - return mol_species_list - - -def get_largest_mol(mol_list): - """ - Given a list of rdkit mol objects, returns mol object containing the - largest num of atoms. If multiple containing largest num of atoms, - picks the first one """ - - num_atoms_list = [len(m.GetAtoms()) for m in mol_list] - largest_mol_idx = num_atoms_list.index(max(num_atoms_list)) - return mol_list[largest_mol_idx] diff --git a/open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py b/open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py deleted file mode 100644 index 4bfe005..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/MoleculeNetSMILES.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import numpy as np -from rdkit import Chem -from torch.utils.data import Dataset - - -class MoleculeNetSMILESDataset(Dataset): - def __init__(self, root): - ''' - This needs to be called after calling the MoleculeNetGraphDataset. - ''' - self.root = root - SMILES_file = os.path.join(root, "processed", "smiles.csv") - - self.SMILES_list = [] - with open(SMILES_file, 'r') as f: - lines = f.readlines() - for line in lines: - SMILES = line.strip() - mol = Chem.MolFromSmiles(SMILES) - canon_SMILES = Chem.MolToSmiles(mol) - self.SMILES_list.append(canon_SMILES) - - labels_file = os.path.join(root, "processed", "labels.npz") - self.labels_data = np.load(labels_file)['labels'] - - print(len(self.SMILES_list), '\t', self.labels_data.shape) - return - - def __getitem__(self, index): - SMILES = self.SMILES_list[index] - labels = self.labels_data[index] - return SMILES, labels - - def __len__(self): - return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py b/open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py deleted file mode 100644 index 6f4af30..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/PubChemSTM.py +++ /dev/null @@ -1,275 +0,0 @@ -import os -from itertools import repeat -import pandas as pd -import json -from tqdm import tqdm - -import torch -from torch.utils.data import Dataset -from torch_geometric.data import Data, InMemoryDataset - -from rdkit import Chem -from rdkit import RDLogger -RDLogger.DisableLog('rdApp.*') - -from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple - - -class PubChemSTM_Datasets_Only_SMILES(Dataset): - def __init__(self, root, subset_size=None): - self.root = root - - CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") - - df = pd.read_csv(CID2SMILES_file) - SMILES_list = df["SMILES"].tolist() - SMILES_list = sorted(set(SMILES_list)) - - self.SMILES_list = SMILES_list - if subset_size is not None: - self.SMILES_list = self.SMILES_list[:subset_size] - return - - def __getitem__(self, index): - SMILES = self.SMILES_list[index] - return SMILES - - def __len__(self): - return len(self.SMILES_list) - - -class PubChemSTM_Datasets_SMILES(Dataset): - def __init__(self, root): - self.root = root - - CID2text_file = os.path.join(self.root, "raw/CID2text.json") - CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") - self.load_CID2SMILES(CID2text_file, CID2SMILES_file) - - self.text_list = [] - missing_count = 0 - for CID, value_list in self.CID2text_data.items(): - if CID not in self.CID2SMILES: - print("CID {} missing".format(CID)) - missing_count += 1 - continue - for value in value_list: - self.text_list.append([CID, value]) - print("missing", missing_count) - print("len of text_list: {}".format(len(self.text_list))) - return - - def load_CID2SMILES(self, CID2text_file, CID2SMILES_file): - with open(CID2text_file, "r") as f: - self.CID2text_data = json.load(f) - print("len of CID2text: {}".format(len(self.CID2text_data.keys()))) - - df = pd.read_csv(CID2SMILES_file) - CID_list, SMILES_list = df["CID"].tolist(), df["SMILES"].tolist() - self.CID2SMILES = {} - for CID, SMILES in zip(CID_list, SMILES_list): - CID = str(CID) - self.CID2SMILES[CID] = SMILES - print("len of CID2SMILES: {}".format(len(self.CID2SMILES.keys()))) - return - - def __getitem__(self, index): - CID, text = self.text_list[index] - SMILES = self.CID2SMILES[CID] - return text, SMILES - - def __len__(self): - return len(self.text_list) - - -class PubChemSTM_SubDatasets_SMILES(PubChemSTM_Datasets_SMILES): - def __init__(self, root, size): - self.root = root - - CID2text_file = os.path.join(self.root, "raw/CID2text.json") - CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") - self.load_CID2SMILES(CID2text_file, CID2SMILES_file) - - self.text_list = [] - for CID, value_list in self.CID2text_data.items(): - if CID not in self.CID2SMILES: - print("CID {} missing".format(CID)) - continue - for value in value_list: - self.text_list.append([CID, value]) - if len(self.text_list) >= size: - break - print("len of text_list: {}".format(len(self.text_list))) - return - - -class PubChemSTM_Datasets_Graph(InMemoryDataset): - def __init__(self, root, transform=None, pre_transform=None, pre_filter=None): - self.root = root - self.transform = transform - self.pre_transform = pre_transform - self.pre_filter = pre_filter - # only for `process` function - self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") - self.CID2text_file = os.path.join(self.root, "raw/CID2text.json") - # `process` result file - self.CID_text_file_path = os.path.join(self.root, "processed/CID_text_list.csv") - - super(PubChemSTM_Datasets_Graph, self).__init__(root, transform, pre_transform, pre_filter) - - self.load_Graph_CID_and_text() - return - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def process(self): - suppl = Chem.SDMolSupplier(self.SDF_file_path) - - CID2graph = {} - for mol in tqdm(suppl): - CID = mol.GetProp("PUBCHEM_COMPOUND_CID") - CID = int(CID) - graph = mol_to_graph_data_obj_simple(mol) - CID2graph[CID] = graph - print("CID2graph", len(CID2graph)) - - with open(self.CID2text_file, "r") as f: - CID2text_data = json.load(f) - print("CID2data", len(CID2text_data)) - - CID_list, graph_list, text_list = [], [], [] - for CID, value_list in CID2text_data.items(): - CID = int(CID) - if CID not in CID2graph: - print("CID {} missing".format(CID)) - continue - graph = CID2graph[CID] - for value in value_list: - text_list.append(value) - CID_list.append(CID) - graph_list.append(graph) - - CID_text_df = pd.DataFrame({"CID": CID_list, "text": text_list}) - CID_text_df.to_csv(self.CID_text_file_path, index=None) - - if self.pre_filter is not None: - graph_list = [graph for graph in graph_list if self.pre_filter(graph)] - - if self.pre_transform is not None: - graph_list = [self.pre_transform(graph) for graph in graph_list] - - graphs, slices = self.collate(graph_list) - torch.save((graphs, slices), self.processed_paths[0]) - return - - def load_Graph_CID_and_text(self): - self.graphs, self.slices = torch.load(self.processed_paths[0]) - - CID_text_df = pd.read_csv(self.CID_text_file_path) - self.CID_list = CID_text_df["CID"].tolist() - self.text_list = CID_text_df["text"].tolist() - return - - def get(self, idx): - text = self.text_list[idx] - - data = Data() - for key in self.graphs.keys: - item, slices = self.graphs[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) - data[key] = item[s] - return text, data - - def __len__(self): - return len(self.text_list) - - -class PubChemSTM_SubDatasets_Graph(PubChemSTM_Datasets_Graph): - def __init__(self, root, size, transform=None, pre_transform=None, pre_filter=None): - self.root = root - self.size = size - self.transform = transform - self.pre_transform = pre_transform - self.pre_filter = pre_filter - self.size = size - # only for `process` function - self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") - self.CID2text_file = os.path.join(self.root, "raw/CID2text.json") - # `process` result file - self.CID_text_file_path = os.path.join(self.root, "processed/CID_text_list.csv") - - super(PubChemSTM_Datasets_Graph, self).__init__(root, transform, pre_transform, pre_filter) - - self.load_Graph_CID_and_text() - return - - def __len__(self): - return self.size - - -class PubChemSTM_Datasets_SMILES_and_Graph(InMemoryDataset): - def __init__(self, root, subset_size=None, transform=None, pre_transform=None, pre_filter=None): - self.root = root - - # only for `process` function - self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") - # `process` result file - self.SMILES_file_path = os.path.join(self.root, "processed_molecule_only/SMILES.csv") - - super(PubChemSTM_Datasets_SMILES_and_Graph, self).__init__(root, transform, pre_transform, pre_filter) - - self.graphs, self.slices = torch.load(self.processed_paths[0]) - - CID_text_df = pd.read_csv(self.SMILES_file_path) - self.SMILES_list = CID_text_df["smiles"].tolist() - if subset_size is not None: - self.SMILES_list = self.SMILES_list[:subset_size] - return - - @property - def processed_dir(self): - return os.path.join(self.root, 'processed_molecule_only') - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def process(self): - suppl = Chem.SDMolSupplier(self.SDF_file_path) - - SMILES_list, graph_list = [], [] - for mol in tqdm(suppl): - SMILES = Chem.MolToSmiles(mol) - SMILES_list.append(SMILES) - graph = mol_to_graph_data_obj_simple(mol) - graph_list.append(graph) - - SMILES_df = pd.DataFrame({"smiles": SMILES_list}) - SMILES_df.to_csv(self.SMILES_file_path, index=None) - - if self.pre_filter is not None: - graph_list = [graph for graph in graph_list if self.pre_filter(graph)] - - if self.pre_transform is not None: - graph_list = [self.pre_transform(graph) for graph in graph_list] - - graphs, slices = self.collate(graph_list) - torch.save((graphs, slices), self.processed_paths[0]) - return - - def get(self, idx): - SMILES = self.SMILES_list[idx] - - data = Data() - for key in self.graphs.keys: - item, slices = self.graphs[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) - data[key] = item[s] - return SMILES, data - - def __len__(self): - return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py b/open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py deleted file mode 100644 index a484f6b..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/PubChemSTM_raw.py +++ /dev/null @@ -1,172 +0,0 @@ -import os -from itertools import repeat -import pandas as pd -import json -from tqdm import tqdm - -import torch -from torch_geometric.data import Data, InMemoryDataset - -from rdkit import Chem -from rdkit import RDLogger -RDLogger.DisableLog('rdApp.*') - -from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple - -from models.MoleculeSTM.datasets import PubChemSTM_Datasets_SMILES - - -class PubChemSTM_Datasets_Raw_SMILES(PubChemSTM_Datasets_SMILES): - def __init__(self, root): - self.root = root - - CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") - # Both PubChemSTM and PubChemSTM_Raw share the same CID2SMILES file. - CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") - self.load_CID2SMILES(CID2text_file, CID2SMILES_file) - - self.text_list = [] - missing_count = 0 - for CID, value_list in self.CID2text_data.items(): - if CID not in self.CID2SMILES: - print("CID {} missing".format(CID)) - missing_count += 1 - continue - for value in value_list: - self.text_list.append([CID, value]) - print("missing", missing_count) - print("len of text_list: {}".format(len(self.text_list))) - - return - - -class PubChemSTM_SubDatasets_Raw_SMILES(PubChemSTM_Datasets_Raw_SMILES): - def __init__(self, root, size): - self.root = root - - CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") - CID2SMILES_file = os.path.join(self.root, "raw/CID2SMILES.csv") - self.load_CID2SMILES(CID2text_file, CID2SMILES_file) - - self.text_list = [] - for CID, value_list in self.CID2text_data.items(): - if CID not in self.CID2SMILES: - print("CID {} missing".format(CID)) - continue - for value in value_list: - self.text_list.append([CID, value]) - if len(self.text_list) >= size: - break - print("len of text_list: {}".format(len(self.text_list))) - return - - -class PubChemSTM_Datasets_Raw_Graph(InMemoryDataset): - def __init__(self, root, transform=None, pre_transform=None, pre_filter=None): - self.root = root - self.transform = transform - self.pre_transform = pre_transform - self.pre_filter = pre_filter - # only for `process` function - self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") - self.CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") - # `process` result file - self.CID_text_file_path = os.path.join(self.root, "processed_raw/CID_text_list.csv") - - super(PubChemSTM_Datasets_Raw_Graph, self).__init__(root, transform, pre_transform, pre_filter) - - self.load_Graph_CID_and_text() - return - - @property - def processed_dir(self) -> str: - return os.path.join(self.root, 'processed_raw') - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def process(self): - suppl = Chem.SDMolSupplier(self.SDF_file_path) - - CID2graph = {} - for mol in tqdm(suppl): - CID = mol.GetProp("PUBCHEM_COMPOUND_CID") - CID = int(CID) - graph = mol_to_graph_data_obj_simple(mol) - CID2graph[CID] = graph - print("CID2graph", len(CID2graph)) - - with open(self.CID2text_file, "r") as f: - CID2text_data = json.load(f) - print("CID2data", len(CID2text_data)) - - CID_list, graph_list, text_list = [], [], [] - for CID, value_list in CID2text_data.items(): - CID = int(CID) - if CID not in CID2graph: - print("CID {} missing".format(CID)) - continue - graph = CID2graph[CID] - for value in value_list: - text_list.append(value) - CID_list.append(CID) - graph_list.append(graph) - - CID_text_df = pd.DataFrame({"CID": CID_list, "text": text_list}) - CID_text_df.to_csv(self.CID_text_file_path, index=None) - - if self.pre_filter is not None: - graph_list = [graph for graph in graph_list if self.pre_filter(graph)] - - if self.pre_transform is not None: - graph_list = [self.pre_transform(graph) for graph in graph_list] - - graphs, slices = self.collate(graph_list) - torch.save((graphs, slices), self.processed_paths[0]) - return - - def load_Graph_CID_and_text(self): - self.graphs, self.slices = torch.load(self.processed_paths[0]) - - CID_text_df = pd.read_csv(self.CID_text_file_path) - self.CID_list = CID_text_df["CID"].tolist() - self.text_list = CID_text_df["text"].tolist() - return - - def get(self, idx): - text = self.text_list[idx] - - data = Data() - for key in self.graphs.keys: - item, slices = self.graphs[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) - data[key] = item[s] - return text, data - - def __len__(self): - return len(self.text_list) - - -class PubChemSTM_SubDatasets_Raw_Graph(PubChemSTM_Datasets_Raw_Graph): - def __init__(self, root, size, transform=None, pre_transform=None, pre_filter=None): - self.root = root - self.size = size - self.transform = transform - self.pre_transform = pre_transform - self.pre_filter = pre_filter - self.size = size - # only for `process` function - self.SDF_file_path = os.path.join(self.root, "raw/molecules.sdf") - self.CID2text_file = os.path.join(self.root, "raw/CID2text_raw.json") - # `process` result file - self.CID_text_file_path = os.path.join(self.root, "processed_raw/CID_text_list.csv") - - super(PubChemSTM_SubDatasets_Raw_Graph, self).__init__(root, transform, pre_transform, pre_filter) - - self.load_Graph_CID_and_text() - return - - def __len__(self): - return self.size diff --git a/open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py b/open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py deleted file mode 100644 index 0822062..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/ZINC250K_Graph.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -import pandas as pd -from tqdm import tqdm -from rdkit import Chem -from itertools import repeat - -import torch -from torch_geometric.data import Data, InMemoryDataset - -from models.MoleculeSTM.datasets.utils import mol_to_graph_data_obj_simple - - -class ZINC250K_Dataset_Graph(InMemoryDataset): - def __init__(self, root, subset_size=257, transform=None, pre_transform=None, pre_filter=None): - self.root = root - - self.SMILES_file = os.path.join(self.root, "raw/250k_rndm_zinc_drugs_clean_3.csv") - df = pd.read_csv(self.SMILES_file) - SMILES_list = df['smiles'].tolist() - self.SMILES_list = [x.strip() for x in SMILES_list] - - super(ZINC250K_Dataset_Graph, self).__init__(root, transform, pre_transform, pre_filter) - - self.graphs, self.slices = torch.load(self.processed_paths[0]) - - if subset_size is not None: - self.SMILES_list = self.SMILES_list[:subset_size] - return - - @property - def processed_dir(self): - return os.path.join(self.root, 'processed_molecule_only') - - @property - def processed_file_names(self): - return 'geometric_data_processed.pt' - - def process(self): - graph_list = [] - for SMILES in tqdm(self.SMILES_list): - RDKit_mol = Chem.MolFromSmiles(SMILES) - graph = mol_to_graph_data_obj_simple(RDKit_mol) - graph_list.append(graph) - - if self.pre_filter is not None: - graph_list = [graph for graph in graph_list if self.pre_filter(graph)] - - if self.pre_transform is not None: - graph_list = [self.pre_transform(graph) for graph in graph_list] - - graphs, slices = self.collate(graph_list) - torch.save((graphs, slices), self.processed_paths[0]) - return - - def get(self, idx): - SMILES = self.SMILES_list[idx] - - data = Data() - for key in self.graphs.keys: - item, slices = self.graphs[key], self.slices[key] - s = list(repeat(slice(None), item.dim())) - s[data.__cat_dim__(key, item)] = slice(slices[idx], slices[idx + 1]) - data[key] = item[s] - return SMILES, data - - def __len__(self): - return len(self.SMILES_list) \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py b/open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py deleted file mode 100644 index 54709dc..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/ZINC250K_SMILES.py +++ /dev/null @@ -1,31 +0,0 @@ -from torch.utils.data import Dataset -import os -import pandas as pd - - -class ZINC250K_Dataset_SMILES(Dataset): - def __init__(self, root, subset_size=512): - self.root = root - - SMILES_file = os.path.join(self.root, "raw/250k_rndm_zinc_drugs_clean_3.csv") - df = pd.read_csv(SMILES_file) - SMILES_list = df['smiles'].tolist() # Already canonical SMILES - self.SMILES_list = [x.strip() for x in SMILES_list] - # self.SMILES_list = [{'original_tokens': d, 'masked_pad_masks': [1,2,3]} for d in self.SMILES_list] - - new_SMILES_file = os.path.join(self.root, "raw/smiles.csv") - if not os.path.exists(new_SMILES_file): - data_smiles_series = pd.Series(self.SMILES_list) - print("saving to {}".format(new_SMILES_file)) - data_smiles_series.to_csv(new_SMILES_file, index=False, header=False) - - if subset_size is not None: - self.SMILES_list = self.SMILES_list[:subset_size] - return - - def __getitem__(self, index): - SMILES = self.SMILES_list[index] - return SMILES - - def __len__(self): - return len(self.SMILES_list) diff --git a/open_biomed/models/MoleculeSTM/datasets/__init__.py b/open_biomed/models/MoleculeSTM/datasets/__init__.py deleted file mode 100644 index 199071b..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from models.MoleculeSTM.datasets.PubChemSTM import PubChemSTM_Datasets_SMILES, PubChemSTM_SubDatasets_SMILES, PubChemSTM_Datasets_Graph, PubChemSTM_SubDatasets_Graph, PubChemSTM_Datasets_Only_SMILES, PubChemSTM_Datasets_SMILES_and_Graph -from models.MoleculeSTM.datasets.PubChemSTM_raw import PubChemSTM_Datasets_Raw_SMILES, PubChemSTM_SubDatasets_Raw_SMILES, PubChemSTM_Datasets_Raw_Graph, PubChemSTM_SubDatasets_Raw_Graph -from models.MoleculeSTM.datasets.MoleculeNetGraph import MoleculeNetGraphDataset -from models.MoleculeSTM.datasets.MoleculeNetSMILES import MoleculeNetSMILESDataset -from models.MoleculeSTM.datasets.DrugBankSMILES import DrugBank_Datasets_SMILES_retrieval, DrugBank_Datasets_SMILES_ATC -from models.MoleculeSTM.datasets.DrugBankGraph import DrugBank_Datasets_Graph_retrieval, DrugBank_Datasets_Graph_ATC -from models.MoleculeSTM.datasets.ZINC250K_SMILES import ZINC250K_Dataset_SMILES -from models.MoleculeSTM.datasets.ZINC250K_Graph import ZINC250K_Dataset_Graph \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/datasets/utils.py b/open_biomed/models/MoleculeSTM/datasets/utils.py deleted file mode 100644 index 38446aa..0000000 --- a/open_biomed/models/MoleculeSTM/datasets/utils.py +++ /dev/null @@ -1,182 +0,0 @@ - -import networkx as nx -import numpy as np -import torch -from rdkit import Chem -from torch_geometric.data import Data - - -allowable_features = { - 'possible_atomic_num_list': list(range(1, 119)), - 'possible_formal_charge_list': [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5], - 'possible_chirality_list': [ - Chem.rdchem.ChiralType.CHI_UNSPECIFIED, - Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW, - Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW, - Chem.rdchem.ChiralType.CHI_OTHER - ], - 'possible_hybridization_list': [ - Chem.rdchem.HybridizationType.S, - Chem.rdchem.HybridizationType.SP, - Chem.rdchem.HybridizationType.SP2, - Chem.rdchem.HybridizationType.SP3, - Chem.rdchem.HybridizationType.SP3D, - Chem.rdchem.HybridizationType.SP3D2, - Chem.rdchem.HybridizationType.UNSPECIFIED - ], - 'possible_numH_list': [0, 1, 2, 3, 4, 5, 6, 7, 8], - 'possible_implicit_valence_list': [0, 1, 2, 3, 4, 5, 6], - 'possible_degree_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - 'possible_bonds': [ - Chem.rdchem.BondType.SINGLE, - Chem.rdchem.BondType.DOUBLE, - Chem.rdchem.BondType.TRIPLE, - Chem.rdchem.BondType.AROMATIC - ], - 'possible_bond_dirs': [ # only for double bond stereo information - Chem.rdchem.BondDir.NONE, - Chem.rdchem.BondDir.ENDUPRIGHT, - Chem.rdchem.BondDir.ENDDOWNRIGHT - ] -} - - -def mol_to_graph_data_obj_simple(mol): - # atoms - # num_atom_features = 2 # atom type, chirality tag - atom_features_list = [] - for atom in mol.GetAtoms(): - atomic_num = atom.GetAtomicNum() - chiral_tag = atom.GetChiralTag() - if atomic_num == 0: - atomic_num = 118 # Only for one extreme case - atom_feature = [allowable_features['possible_atomic_num_list'].index(atomic_num)] + \ - [allowable_features['possible_chirality_list'].index(chiral_tag)] - atom_features_list.append(atom_feature) - x = torch.tensor(np.array(atom_features_list), dtype=torch.long) - - # bonds - if len(mol.GetBonds()) <= 0: # mol has no bonds - num_bond_features = 2 # bond type & direction - edge_index = torch.empty((2, 0), dtype=torch.long) - edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) - else: # mol has bonds - edges_list = [] - edge_features_list = [] - for bond in mol.GetBonds(): - i = bond.GetBeginAtomIdx() - j = bond.GetEndAtomIdx() - bond_type = bond.GetBondType() - bond_dir = bond.GetBondDir() - if bond_dir not in allowable_features['possible_bond_dirs']: - bond_dir = 0 - edge_feature = [allowable_features['possible_bonds'].index(bond_type)] + \ - [allowable_features['possible_bond_dirs'].index(bond_dir)] - edges_list.append((i, j)) - edge_features_list.append(edge_feature) - edges_list.append((j, i)) - edge_features_list.append(edge_feature) - - # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] - edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) - - # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] - edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) - - data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) - - return data - - -def graph_data_obj_to_mol_simple(data_x, data_edge_index, data_edge_attr): - mol = Chem.RWMol() - - # atoms - atom_features = data_x.cpu().numpy() - num_atoms = atom_features.shape[0] - for i in range(num_atoms): - atomic_num_idx, chirality_tag_idx = atom_features[i] - atomic_num = allowable_features['possible_atomic_num_list'][atomic_num_idx] - chirality_tag = allowable_features['possible_chirality_list'][chirality_tag_idx] - atom = Chem.Atom(atomic_num) - atom.SetChiralTag(chirality_tag) - mol.AddAtom(atom) - - # bonds - edge_index = data_edge_index.cpu().numpy() - edge_attr = data_edge_attr.cpu().numpy() - num_bonds = edge_index.shape[1] - for j in range(0, num_bonds, 2): - begin_idx = int(edge_index[0, j]) - end_idx = int(edge_index[1, j]) - bond_type_idx, bond_dir_idx = edge_attr[j] - bond_type = allowable_features['possible_bonds'][bond_type_idx] - bond_dir = allowable_features['possible_bond_dirs'][bond_dir_idx] - mol.AddBond(begin_idx, end_idx, bond_type) - # set bond direction - new_bond = mol.GetBondBetweenAtoms(begin_idx, end_idx) - new_bond.SetBondDir(bond_dir) - return mol - - -def graph_data_obj_to_nx_simple(data): - G = nx.Graph() - - # atoms - atom_features = data.x.cpu().numpy() - num_atoms = atom_features.shape[0] - for i in range(num_atoms): - atomic_num_idx, chirality_tag_idx = atom_features[i] - G.add_node(i, atom_num_idx=atomic_num_idx, - chirality_tag_idx=chirality_tag_idx) - pass - - # bonds - edge_index = data.edge_index.cpu().numpy() - edge_attr = data.edge_attr.cpu().numpy() - num_bonds = edge_index.shape[1] - for j in range(0, num_bonds, 2): - begin_idx = int(edge_index[0, j]) - end_idx = int(edge_index[1, j]) - bond_type_idx, bond_dir_idx = edge_attr[j] - if not G.has_edge(begin_idx, end_idx): - G.add_edge(begin_idx, end_idx, - bond_type_idx=bond_type_idx, - bond_dir_idx=bond_dir_idx) - - return G - - -def nx_to_graph_data_obj_simple(G): - # atoms - # num_atom_features = 2 # atom type, chirality tag - atom_features_list = [] - for _, node in G.nodes(data=True): - atom_feature = [node['atom_num_idx'], node['chirality_tag_idx']] - atom_features_list.append(atom_feature) - x = torch.tensor(np.array(atom_features_list), dtype=torch.long) - - # bonds - num_bond_features = 2 # bond type, bond direction - if len(G.edges()) > 0: # mol has bonds - edges_list = [] - edge_features_list = [] - for i, j, edge in G.edges(data=True): - edge_feature = [edge['bond_type_idx'], edge['bond_dir_idx']] - edges_list.append((i, j)) - edge_features_list.append(edge_feature) - edges_list.append((j, i)) - edge_features_list.append(edge_feature) - - # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] - edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) - - # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] - edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.long) - else: # mol has no bonds - edge_index = torch.empty((2, 0), dtype=torch.long) - edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) - - data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) - - return data diff --git a/open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi b/open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi deleted file mode 100644 index 3d4698d..0000000 --- a/open_biomed/models/MoleculeSTM/models/GA/ZINC_first_1000.smi +++ /dev/null @@ -1,1000 +0,0 @@ -CC(C)(C)c1ccc2occ(CC(=O)Nc3ccccc3F)c2c1 -C[C@@H]1CC(Nc2cncc(-c3nncn3C)c2)C[C@@H](C)C1 -N#Cc1ccc(-c2ccc(O[C@@H](C(=O)N3CCCC3)c3ccccc3)cc2)cc1 -CCOC(=O)[C@@H]1CCCN(C(=O)c2nc(-c3ccc(C)cc3)n3c2CCCCC3)C1 -N#CC1=C(SCC(=O)Nc2cccc(Cl)c2)N=C([O-])[C@H](C#N)C12CCCCC2 -CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br -COc1ccc(C(=O)N(C)[C@@H](C)C/C(N)=N/O)cc1O -O=C(Nc1nc[nH]n1)c1cccnc1Nc1cccc(F)c1 -Cc1c(/C=N/c2cc(Br)ccn2)c(O)n2c(nc3ccccc32)c1C#N -C[C@@H]1CN(C(=O)c2cc(Br)cn2C)CC[C@H]1[NH3+] -CCOc1ccc(OCC)c([C@H]2C(C#N)=C(N)N(c3ccccc3C(F)(F)F)C3=C2C(=O)CCC3)c1 -Cc1ccc2nc(S[C@H](C)C(=O)NC3CCC(C)CC3)n(C)c(=O)c2c1 -O=C(N1CCc2c(F)ccc(F)c2C1)C1(O)Cc2ccccc2C1 -Cc1ccccc1C(=O)N1CCC2(CC1)C[C@H](c1ccccc1)C(=O)N2C -CCCc1cc(NC(=O)CN2C(=O)NC3(CCC(C)CC3)C2=O)n(C)n1 -CC(C)Cc1nc(SCC(=O)NC[C@@H]2CCCO2)c2c(=O)n(C)c(=O)n(C)c2n1 -Cc1ccc(CNC(=O)c2ccccc2NC(=O)[C@@H]2CC(=O)N(c3ccc(C)cc3)C2)cc1 -CCCCC(=O)NC(=S)Nc1ccccc1C(=O)N1CCOCC1 -Cc1c(NC(=O)CSc2nc3sc4c(c3c(=O)[nH]2)CCCC4)c(=O)n(-c2ccccc2)n1C -CC(C)[C@@H](Oc1cccc(Cl)c1)C(=O)N1CCC(n2cccn2)CC1 -CCN(CC)C(=O)C[C@@H](C)[NH2+][C@H](C)c1cccc(F)c1F -Cc1nc2c(c(Nc3ncc(C)s3)n1)CCN(C(=O)CCc1ccccc1)C2 -O=C(NCCNC(=O)N1C[C@H]2CC=CC[C@@H]2C1)c1cccnc1 -O=c1n(CCO)c2ccccc2n1CCO -COC(=O)Cc1csc(NC(=O)Cc2coc3cc(C)ccc23)n1 -Cc1ccc(N2CC[C@@H](NS(=O)(=O)c3ccccc3C)C2=O)cc1C -CC[C@H](C)C[C@@H](C)NC(=O)N1CCN(CC(=O)NC2CC2)CC1 -CC(=O)Nc1c2n(c3ccccc13)C[C@](C)(C(=O)NC1CCCCC1)N(C1CCCCC1)C2=O -N#Cc1ccncc1NC[C@@H]1C[C@@]12CCc1ccccc12 -Cc1cccn2c(=O)c(C(=O)NC[C@H]3CCO[C@@H]3C(C)C)cnc12 -CNC(=O)c1ccc(/C=C/C(=O)Nc2c(C)cc(C)nc2Cl)cc1 -CC1=C(CNC(=O)c2cc(-c3ccccc3)nc3c2CNN3C(C)C)CN=N1 -C[C@@H](NC(=O)COC(=O)/C=C/c1ccc(Cl)cc1)c1ccccc1 -CCc1ccc(N(Cc2ccc(C)s2)C(=O)c2ccc(=O)n(C)n2)cc1 -CCOC(=O)c1nnc2ccccc2c1N1CC[C@@H]([NH+](CC)CC)C1 -Cc1ccc(C#N)cc1S(=O)(=O)NCc1ccnc(OC(C)(C)C)c1 -O=C(O[C@H]1CCOC1)C1(c2ccc(Cl)c(Cl)c2)CCC1 -CCC[NH2+][C@@H]1COC[C@H]1C(=O)NCc1cscc1C -O=C(NCc1nccc2ccccc12)c1ccc[nH]c1=O -CC(=O)c1ccc(S(=O)(=O)N2CCCC[C@H]2C)cc1 -O=[N+]([O-])c1c(Nc2cccc3ncccc23)ncnc1N1CCN(c2cccc(Cl)c2)CC1 -O=C(CCCO)Nc1ccc(F)cc1F -NC(=O)CCOc1ccc(NC(=O)C[C@H]2CCc3ccccc32)cc1 -COc1cc(C)ccc1OCC(=O)Nc1nnc(C)s1 -CC(=O)c1c(O)cccc1COc1ccccc1 -CCn1cc(S(=O)(=O)N2CCCCC[C@@H]2c2cc(-c3ccc(F)cc3)no2)cn1 -COC(=O)[C@](NC(=O)c1cccc(Cl)c1)(Nc1ccc(Br)c[nH+]1)C(F)(F)F -Cc1[nH]c2ccc(C(=O)Nc3cc(C(C)(C)C)nn3-c3ncccn3)cc2c1C -Cc1noc(C)c1C[C@H](C)C(=O)N[C@@H](C)C1CCCCC1 -CCn1cc(C(=O)N[C@H]2CC(=O)N(C)C2)c(C(C)C)n1 -COc1cccc(-c2cncc3ccccc23)c1C(=O)N(C(C)C)C(C)C -COc1ccc([C@@H](C)NC(=O)Cc2cccc3ccccc23)cc1 -O=C1C[C@H](c2nc(-c3cccnc3)no2)CN1c1cccc(Cl)c1 -C[C@H]1CCCN(c2ccc(C(=O)Nc3ccc(N4CCOCC4)cc3)cc2[N+](=O)[O-])C1 -C=CCN(C(=O)C/C=C/c1ccc(C)cc1)[C@@H]1CCS(=O)(=O)C1 -O=C(CSc1nnc(-c2cccc([N+](=O)[O-])c2)o1)Nc1nncs1 -CN(CCc1ccc(F)cc1)c1cc(Br)cc(F)c1C(N)=O -COc1ccccc1NC(=O)CSc1ccc(-c2ccccc2OC)nn1 -Cc1occc1C(=O)/C(C#N)=C\c1cccc(C(F)(F)F)c1 -COc1ccc2c(c1)N(C(=O)CCSc1ccccn1)C[C@@H](C)O2 -CC[C@@H](NC(=O)[C@H](C)n1cccn1)c1ccc(C)c(F)c1 -CCC[C@]1(C(=O)N[C@@H]2CONC2=O)CC[NH2+]C1 -O=C(c1cc2cc([N+](=O)[O-])ccc2oc1=O)N1CCN(Cc2ccccc2)CC1 -CCn1c(CC2CC[NH2+]CC2)nn(CCO)c1=O -C=CCn1c(S[C@H](C)C(=O)N2CCC(C)CC2)nnc1-c1ccc(Cl)cc1 -CCO[C@H]1C(=O)O[C@H]([C@@H](O)CO)C1=O -Cc1ccc(-c2nnc(C[NH+](CCO)[C@H]3CCc4ccccc43)o2)cc1 -Cc1cc(-n2c(C)cc(C[NH2+][C@H](C)c3ccc(F)c(F)c3)c2C)no1 -C[C@@H](NC(=O)Nc1ccn(-c2ncccc2Cl)n1)[C@@H]1CCCO1 -COc1cc(S(=O)(=O)N2CCN=C2Cc2ccccc2)ccc1Cl -COc1ccc(OC)c(/C=C2\Oc3cc(OC(=O)c4ccncc4)cc(C)c3C2=O)c1 -COc1ccc([C@@H](NC(=O)Nc2cc(C)ccc2Cl)C2CCOCC2)cc1 -C[C@H](Cc1cccs1)N(C)C[C@@H]1CCCC[C@@H]1[NH3+] -C[C@H]([NH3+])c1nc2cc(C(F)(F)F)ccc2n1C -COc1cccc(CN2CCc3nnc(CCc4ccccc4)n3CC2)c1 -O=C(N[C@H]1CCS(=O)(=O)C1)C1CC[NH2+]CC1 -COCC[C@H](C)C(=O)N[C@@H](C)COC -Cc1cc(N(C)C)ccc1NC(=O)c1ccc(CN2CC[NH+](C)CC2)cc1 -C[C@H](CNC(=O)[C@H]1CC[NH2+][C@@H]1C)C[NH+]1CCCC1 -CN(C)c1ccc([C@H](CNC(=O)C(=O)Nc2ccccc2C#N)N2CC[NH+](C)CC2)cc1 -CCOc1ncnc(S(=O)(=O)CC)c1N -CC[C@@H](NC(=O)N(C)Cc1ccc(-c2ccccc2)cc1)c1ccncc1 -O=C(Nc1ccc(-c2nc3ccccc3o2)cc1)[C@H]1CCCN1S(=O)(=O)c1ccc(F)cc1 -CC[C@@H](C)CNc1nc2ccc(Cl)cc2s1 -Cc1cc(C)c2nc(N3CCN(C(=O)[C@@H]4CCCCN4S(C)(=O)=O)CC3)sc2c1 -CCc1nnc(-c2cc3ccccc3n2CC(=O)NC(C)(C)C)o1 -CCc1ccc(NC(=O)c2nn(-c3ccc(CC)cc3)ccc2=O)cc1 -Cc1ccc(C(=O)N[C@H]2CCC[NH2+][C@H]2C)cc1F -C[C@H](OC(=O)c1nc(C2CC2)n2ccccc12)c1cnc2ccccc2c1 -CCCCOc1ccccc1C[C@@H]([NH3+])C(=O)[O-] -CCC[C@@H]1CN(C(=O)C(=O)Nc2ccc(C)nc2Cl)CCO1 -C[C@H]1C(=O)N(c2ccc3c(c2)CCC3)CCN1C(=O)c1ccc(Cl)c(Cl)c1 -COC(=O)C1(NC(=O)[C@H]2C[C@H]2c2c(F)cccc2F)CCSCC1 -N#CC1(NC(=O)COc2cccc(Cl)c2)CCCC1 -COC1CC[NH+](CCNc2nccn(C)c2=O)CC1 -C=CCN(Cc1cccc([N+](=O)[O-])c1)C(=O)Nc1cc(OC)ccc1Cl -Cc1cc(Cl)ccc1OCC(=O)N/N=C/c1ccccn1 -O=C1NC(=S)NC(=O)C1=CNc1ccc([N+](=O)[O-])cc1O -Cc1c(C(=O)N2CCOCC2)oc2c1-c1nn(CC(=O)NCc3ccco3)cc1CC2 -CCc1ccc(CNC(=O)c2ccc(-c3nccnc3N3CCCCC3)cc2)cc1 -COc1ccc([C@H]2C[C@@H](C(F)(F)F)n3nc(C(=O)NC4CCCCC4)cc3N2)cc1OC -CCCc1cc(C(=O)NNC(=O)c2cccc(Br)c2)[nH]n1 -O=[N+]([O-])c1c(Nc2ccc(F)c(F)c2)ncnc1Oc1cccc2cccnc12 -CC(C)(C)n1ncc2c1CCC[C@H]2NC(=O)CSc1nc2ccccc2o1 -Cc1ccc([C@@H](C)NC(=O)N[C@@H](CCO)c2cccs2)cc1 -CCN(CC)S(=O)(=O)c1ccc2nc(-c3ccncc3)cc(C(=O)[O-])c2c1 -CCCN(CC)c1cc[nH+]c(C(=O)[O-])c1 -Cc1ccccc1N1C(=O)C[C@H]([NH+](C2CCCCC2)C2CCCCC2)C1=O -CS(=O)(=O)[C@H]1O[C@H]1c1ccc(Cl)cc1Cl -C[C@H](CSc1ccc(C(=O)N(C)C)cn1)C(=O)[O-] -CCOC(=O)[C@H]1C=C(C#N)O[C@@H](c2ccc(C)cc2)C1 -CCC[NH2+]C1CCC(O)(Cc2nc(C)cs2)CC1 -O=C1c2ccccc2N[C@H](CSC2=NC=NC3=NC=N[C@@H]32)N1c1ccc(Cl)cc1 -CCc1nc2ccccc2c(C(=O)NCc2ccc(OC)c(C(=O)OC)c2)c1C -Cc1nn(-c2nncc(-c3ccc(Cl)cc3)n2)c2c1[C@H](c1ccccc1)CC(=O)N2 -CC(C)NC(=O)Nc1cccc(C(=O)N(C)Cc2nnc(C3CC3)n2C)c1 -Cc1nnccc1C(=O)N[C@H](C)c1ccc(Cl)s1 -O=C(NCCCc1nc(-c2ccc(Br)o2)no1)C1CC1 -CCOc1cccc(NC(=O)CCc2ccc(N)cc2)c1 -COc1ccc(/C=C2/SC(=O)N(CC(=O)Nc3ccc(F)cc3)C2=O)cc1OC -CC(C)N(C)C(=O)[C@H]1CSCN1C(=O)/C=C/SCc1ccco1 -CCC1(CC)[C@@H](NC(=O)Nc2ccc(C(=O)NC)cc2)[C@H](C)[C@@H]1OC -Cc1ccc2ncc(C(=O)Nc3ncccc3OCc3ccncc3)n2c1 -N#Cc1ccnc(N2CCC([NH2+]C[C@@H]3CCCO3)CC2)c1 -O=C(Cn1nnn(-c2cccs2)c1=O)OC1CCCCC1 -COc1cccc(COc2ccc(OC)cc2CCl)c1 -CC[C@H](NC(=O)NCc1c(C)noc1C)c1ccc(OC)cc1 -CCc1c(C(=O)Nc2ccc3c(c2)NC(=O)CS3)[nH]c(C)c1C(C)=O -CSc1ccc(/C=c2\sc3ncnn3c2=O)cc1 -COc1ccc(C(=O)/C=C(\C)Nc2ccc(F)cc2F)cc1 -O=C(C=C1CCSCC1)N[C@@H]1CCC[C@H]1Cc1ccccc1 -C[C@@H](Sc1nc(/C=C/c2cccs2)n[nH]1)C(=O)N1CCOCC1 -Cc1cccc(C(=O)N[C@@H](C(=O)N2CCC[C@@H](C)C2)C(C)C)c1 -COC(=O)[C@@H](C)Sc1nnc(Nc2cccc(Br)c2)s1 -Cc1cnc(C(=O)Nc2ccc(N(C)C3CC[NH+](C)CC3)cc2)cn1 -CC(=O)Nc1nc2ccc(NC(=O)NCc3ccccc3)cc2s1 -C=CCN1C(=O)/C(=C/c2ccccc2F)S/C1=N\S(=O)(=O)c1cccs1 -CC[C@H]1CC(=O)N(Cc2ccccc2C#CCCO)C1 -COc1ccc(F)cc1C(=O)Nc1nccs1 -CC[C@@H](CC(=O)NC1(C(=O)OC)CCSCC1)c1ccccc1 -Cc1ccc(C)c(-n2c(SCCCCCO)nc3ccccc3c2=O)c1 -CC[C@@H](C)[C@@H]([NH3+])c1ccc(Cl)s1 -COc1cccc([C@@H](C)[NH2+]CCOc2ncccc2Cl)c1 -CC[C@@H]1CCCCN1C(=O)NC1CCN(C(=O)OC(C)(C)C)CC1 -CCOc1cccc(NC(=O)NCc2ccc(N3CCSCC3)cc2)c1 -Cc1cccc(NC(=O)CN2CCN(c3ccc4c(c3)OCCO4)C2=O)n1 -C=C(C)C(=O)N[C@H](C)c1nc2ccccc2n1CCC(=O)N1CCCCCC1 -CCOC[C@H]1CC[NH+](Cc2ccc(-c3nc4ccccc4s3)o2)C1 -CCOC(=O)[C@]1(Cc2cccc(Cl)c2)CCCN(C(=O)c2ccnn2C)C1 -Cc1ccc([N+](=O)[O-])cc1NC(=O)C(=O)N1CC[C@H]([NH+]2CCCC2)C1 -CCOCCCNC(=O)N[C@@H]1CCC[C@@H](CC)C1 -O=C(Cc1cccc(F)c1F)Nc1cccc(Br)n1 -COc1ccccc1NC(=O)[C@@H]1CCCN(C(=O)Nc2cccs2)C1 -C[C@H]1CCC[C@](C#N)([C@]2(O)CCCCC2(C)C)C1 -O=C(NCc1ccc([N+]2=CCCC2)cc1)NC1(c2ccc(Cl)cc2)CC1 -CCCC(=O)N[C@@H]1CCC[NH+](Cc2ncccc2C)C1 -O=C(NCc1cccs1)C1(c2cccc(Cl)c2)CCC1 -C[C@H]1CCC[C@H](NC(=O)[C@@H](C)Sc2ncn[nH]2)[C@@H]1C -COc1ccc([C@@H]([NH2+]Cc2ccc(Cl)nc2)c2ccc(F)cc2)cc1 -COc1cc(NC(=O)[C@H](C)Sc2ccccc2Cl)cc(OC)c1 -CCN1CCC(=NNC(=O)c2ccccc2)CC1 -CCCOc1ccc(Br)cc1C[NH+]1CCC([C@@H](C)O)CC1 -Cc1cc2n(C[C@H](O)CO[C@H](c3ccccc3)c3ccccc3C)c(=O)c3ccccc3n2n1 -CCc1ncc(CN(C)C(=O)Nc2c(C)ccc([N+](=O)[O-])c2C)s1 -c1ccc2nc(NCCCc3nc4ccccc4[nH]3)cnc2c1 -Cc1c([C@H](C)[NH2+]Cc2cccn2C)cnn1C -CC(=O)N[C@@H](C(=O)NC1COC1)C(C)C -O=C(Nc1ccccc1F)c1cc2ccccc2c2cccnc12 -Cc1ccccc1N1C(=O)/C(=C/c2cccn2-c2cccc([N+](=O)[O-])c2)C([O-])=NC1=S -COCCN1C[C@H](C(=O)N(Cc2cccc(Cl)c2)C(C)C)CC1=O -COc1ccc(NC(=O)N2CCN(C(=O)Cc3csc4ccccc34)CC2)cc1OC -C#CCN(C[C@H]1CCCO1)C(=O)N[C@@H](C)c1cccc([N+](=O)[O-])c1 -Cc1cccc(C2=CCN(C(=O)Nc3ccc(C(N)=O)c(Cl)c3)CC2)c1 -COCCCN1C(=O)c2ccc(C(=O)Nc3nc(-c4ccc(C)cc4)cs3)cc2C1=O -O=S(=O)(Nc1ccc(N2CCCS2(=O)=O)cc1)c1ccc(F)c(Cl)c1 -O=C1/C(=C/c2ccccc2)Oc2c1ccc1c2CN(Cc2cccs2)CO1 -CC[C@@H](C)[C@@H](NC(=O)c1cccc(F)c1)C(=O)N=c1[nH]c2ccccc2[nH]1 -Cc1c(F)cc(N)cc1S(=O)(=O)N[C@@H](C)C1CC1 -Cc1ccc(Cn2ncc3c(N)ncnc32)cc1 -CCOC(=O)C(C)(C)c1nc(-c2ccccc2)no1 -CCOC(=O)c1sc(/C=C/c2nc3c(s2)CCC3)nc1C -C[C@@H]1CC[C@@H]([NH2+]C2CCC(NS(C)(=O)=O)CC2)c2ccccc21 -CN(C)S(=O)(=O)c1ccc(C(=O)N(C(=O)N2CCCCC2)c2ccccc2)cc1 -CC[NH2+][C@@H](CC)c1ccccc1OCc1cccc(F)c1 -C=CCOC(=O)C1=C(C)N=C2S[C@H](C)C(=O)N2[C@H]1c1ccc(F)cc1 -CCC[NH2+][C@]1(C(=O)OCC)CC[C@H](n2cc(Cl)c(C)n2)C1 -Cc1cc(NC(=O)CSc2nnc3c4ccccc4n(C)c3n2)ccc1Br -CCOC(=O)c1sc(NC(=O)c2ccc(-n3c(C)nc4ccccc4c3=O)cc2)cc1C -CC#CCC(=O)C1([NH+](CC)CC)CCCC1 -O=C(NCCCc1ccccc1)C1CCN(C(=O)[C@@H]2CC(=O)N(c3ccccc3)C2)CC1 -O=Cc1ccc(OCc2ccn(-c3cccc(F)c3)n2)cc1 -Clc1ccccc1Cn1ccnc1 -Cc1[nH]c(=O)c(C(=O)N2CCN(c3ccccc3)C(=O)C2)c(C)c1C -C[C@@H](NC(=O)NC[C@H]1CCCN(c2ncccn2)C1)C1CCOCC1 -O=C(NCCS(=O)(=O)c1ccccc1)N1CCC[C@@H]2CCC[C@@H]21 -Cc1ccc(-c2cc(NC(=O)C(C)C)c(=O)n(CC(=O)Nc3cccc(C)c3)n2)cc1 -Cc1ccc(S(=O)(=O)N2CCN(C(=O)[C@H]3CCCC[C@@H]3C(=O)[O-])CC2)cc1C -CCc1ccc(-c2nc(C(=O)N3CCO[C@H](CC)C3)cs2)cc1 -Cc1cc(C)cc(OCC(C)(C)C[NH2+]C2CC2)c1 -CCOC(=O)[C@H](F)[C@@]1(O)CCC[NH+](C(C)C)CC1 -CCn1cc(/C=C/C(=O)c2ccc3ccccc3c2)cn1 -CCC(CC)C(=O)Nc1cnn(-c2ccccc2F)c1 -O=C1O[C@H](C(=O)Nc2ccnc3ccnn23)Cc2ccccc21 -COCCn1nnnc1[C@@H](C(C)C)N1CCSCC1 -O=C(NCCNS(=O)(=O)c1cccc(Cl)c1F)c1cccnc1 -Cc1ccc(CNC(=O)NCc2nnc3n2CCC3)s1 -C/C=C/C[C@]1(C(=O)[O-])CCN(C(=O)OC(C)(C)C)C1 -O=C(N[C@@H](CO)c1ccco1)c1cc(Cl)ccc1OC1CCCC1 -Cc1cc(N(C)C)ccc1NC(=O)c1c[nH]c2nccc(Cl)c12 -CCOC(=O)c1cccc(NC(=O)c2cn[nH]c2C)c1 -CCC(CC)[S@](=O)CCC(=O)[O-] -COCc1ccc(C(=O)N(C)Cc2ccc(Cl)s2)cc1 -O=C(CCc1nc2ccccc2c(=O)[nH]1)Nc1cc(Cl)c(Cl)cc1Cl -CC[C@@H](C)NC(=O)c1ccc2c(c1)CCCN2S(C)(=O)=O -COc1ccc(C)cc1-n1nnnc1SCC(=O)Nc1cc(C)cc(C)c1 -C[C@H](NC(=O)Cc1ccc[nH]1)C(=O)N1CCCC[C@H]1C -CCOC(=O)[C@H]1CCCN(C(=O)c2cc(C(C)C)n(C)n2)C1 -C[C@H]([NH2+]Cc1nc(Cc2ccccc2)no1)[C@@H](C)n1cccn1 -c1cc(CN2CC[NH+](Cc3ccc4c(c3)OCCO4)CC2)no1 -CCCCNC(=O)CCc1c(C)nc2c3ccccc3nn2c1C -O=C(C[C@@H]1C[NH2+]CCO1)N[C@H]1C=CS(=O)(=O)C1 -C[C@@H]1CCO[C@@H]1C(=O)N1CC[C@H](C(N)=O)c2ccccc21 -CCC[NH2+][C@H](Cc1nn(C)c2ccccc12)c1ncc[nH]1 -CC(C)c1noc(-c2cc[nH+]c(N3CCN(C(=O)[C@H]4C[C@H]4C)CC3)c2)n1 -O=C(NCc1cccnc1)NCc1ccnc(OCC(F)F)c1 -C[C@@H](NC(=O)N1CCCCCCC1)[C@@H]1CCCO1 -COc1ccc([C@H]2C(C(=O)NCc3ccccc3)=C(C)Nc3ncnn32)cc1OC -Cc1cc(=O)n2c(n1)SC[C@@H]2CC(=O)NCCC(C)C -CC(C)C1=CN=N[C@H]1[C@H]1CCC[NH+](C[C@@H](C)Cc2ccc3c(c2)OCO3)C1 -COc1cc(CNC(=O)c2occc2Br)ccn1 -Cc1cccc(COc2ccc(Br)cc2/C=C2\SC(=S)NC2=O)c1 -O=C(N[C@@H]1CCO[C@@H]1c1ccc(Cl)c(F)c1)[C@H]1Cc2ccccc2O1 -Cc1ccc([C@]2([NH3+])CC[C@@H]2C)cc1 -O=C(CNC(=O)c1ccco1)OCC(=O)c1ccc2ccccc2c1 -O=C(c1cc2c(F)cccc2[nH]1)N(C[C@@H]1CCCO1)c1ccncc1 -Cc1cc(C)c(NC(=O)c2cc3ccccc3n2C)c(C)c1 -CSc1ccccc1NC(=O)N[C@@H](CO)c1ccc(Cl)cc1 -Cc1cc(Br)ccc1CNC(=O)C1CC=CC1 -COc1ccc(CN/C(C)=C2/C(=O)N(c3ccc(OC)cc3)N=C2C)cc1 -O=C(COC(=O)C1(c2ccccc2F)CCCC1)N1CCOCC1 -CC1(C)C(=O)NCC[NH+]1Cc1ccc(OCC(F)F)cc1 -Cc1ccc([N+](=O)[O-])cc1C(=O)Nc1ccc(C(=O)NC(C)C)cc1 -C[C@@H]([C@@H](O)c1ccc2ncnn2c1)[N+](=O)[O-] -C[C@H](Oc1ccc(Cl)c(Cl)c1)C(=O)NC[C@H]1CCC[C@@H]1O -COC[C@H]1CCC[NH+](Cc2cc(C)n(Cc3ccco3)c2C)C1 -Cc1ccc([C@H](C)NC(=O)CN(C)C(=O)OC(C)(C)C)cc1F -CC(=O)NCCC(=O)N1CCC[C@@H](C)C1 -COc1ccc(N2CCn3c2nn(CC(N)=O)c(=O)c3=O)cc1 -ClC(Cl)(Cl)c1nonc1C(Cl)(Cl)Cl -CCc1sc(C(=O)N2CCN([C@@H](C(N)=O)c3ccccc3)CC2)cc1C -C[NH+](C)Cc1cc(NC(=O)CCCC(=O)N2CCCCCC2)[nH]n1 -C[C@@H](Nc1ccc(COC(C)(C)C)cc1)c1ccc(C#N)cc1 -NC(=O)c1ccc(NC(=O)c2cccn(Cc3ccc(F)cc3)c2=O)cc1 -Cc1cc(C(=O)N2CC[C@H](C)C[C@H]2C)c2c(C)nn(C)c2n1 -Cc1c(C(=O)N2CCCC2)oc2c1-c1nn(CC(=O)N3C[C@@H](C)C[C@@H](C)C3)cc1CC2 -Cc1cc(C)cc(NC(=O)CC(C)C)c1 -COc1cc(OC)cc([C@H]2CC[NH+](CCC(F)(F)F)C2)c1 -CCn1ccnc(N2CCCC[C@@H](N3CC[NH+](C)CC3)C2)c1=O -C[C@@H]1CCC[C@](O)(c2ccc(Cl)s2)CC1 -CCOC(=O)C[C@H](C)CNC(=O)C(=O)N1CCc2ccc(C)cc21 -COc1ccccc1C[NH+]1CCC[C@H](N2CCCC2=O)C1 -CCNc1ccc2c(OC)ccc(F)c2n1 -Oc1ccc(C2[NH+](Cc3ccccc3)CC[NH+]2Cc2ccccc2)c(O)c1 -Fc1ccc([C@@H]2C[C@@H](c3ccc(Br)cc3)Nc3ncnn32)cc1Br -COc1ccc(O)c(CNC2CC[NH+](Cc3ccccc3Cl)CC2)c1 -Cc1ccc(NCc2cccc(C(=O)NCc3ccco3)c2)c(F)c1 -Cc1c(C(=O)N(C)[C@@H]2CCN(c3ccccc3Cl)C2=O)cnn1C -Cc1nc(C(C)(C)C)[nH]c(=O)c1C(=O)Nc1cccc(Cl)c1C -CCOC(=O)[C@H]1CCCN(C(=O)c2cn(CCc3ccccc3)nn2)C1 -C[C@@H]1[C@H](C(=O)[O-])CCN1S(=O)(=O)c1ccc(F)c(Cl)c1 -CC1CCC(OC(=O)C2=NC3=C(C(=O)C[C@@H](c4ccccc4)C3)[C@H]2C)CC1 -CN(Cc1ccno1)Cc1c(C(=O)N2CC[NH+](C3CCCCC3)CC2)nc2ccccn12 -Cc1cc(F)c([C@@H]([NH3+])[C@H]2Cc3ccccc3O2)cc1F -COc1ccc(OCC(=O)N/N=C2\CCCc3ccccc32)cc1 -CC(C)[NH+]1CCC(N2CC[NH+](Cc3c(F)ccc(F)c3F)C[C@@H]2CCO)CC1 -CC[NH2+][C@@H](C)c1cc(F)c(C)cc1N1C[C@H](C)S[C@H](C)C1 -CC(C)c1nsc(NC[C@H](C2CC2)[NH+](C)C)n1 -Cc1ccc(-c2nc(C[NH+]3CCCC[C@H]3c3cccnc3)c(C)o2)s1 -COc1cccc(C2=C[C@H](C(=O)N3CCCCC3)N=N2)c1 -O=C(Nc1ccc2[nH]c(=O)[nH]c2c1)c1cc(S(=O)(=O)NC2CC2)ccc1Br -c1ccc2c(c1)CC[C@H]([C@H]1CCCc3cccnc31)N2 -Cc1ccc(NC(=S)NC(C)C)cc1C -C[C@@H](Nc1ccc2c(c1)CCC2)C(=O)N1CCCC1 -CCOc1ccc(Nc2ccc(C#N)c([N+](=O)[O-])c2)cc1 -CC(C)OC(=O)CCNC(=O)c1cnn(-c2ccc(F)cc2)n1 -CCCCOc1ccccc1NC(=O)c1scnc1C1CC1 -O=C1N=C(N2CCCCC2)S/C1=C1/C(=O)Nc2ccccc21 -Cc1c([C@H](C)NC(=O)c2[nH]c3ccccc3c2Cl)cnn1C -CSc1cccc2sc(N3CCN(C(=O)c4ccn(C(C)C)n4)CC3)nc12 -Cc1ccc(C)c2nc3sc(C(=O)Nc4ccc5c(c4)OCO5)c(N)c3cc12 -COC(=O)CCN(Cc1cnc2ncccn12)C1CCOCC1 -O=C(c1c[nH]c2ccc(F)cc12)N(CC1CC1)CC(F)(F)F -O=C(Cc1ccc(Cl)cc1)/N=C1/S[C@@H]2CS(=O)(=O)C[C@H]2N1c1cc(Cl)ccc1Cl -Cc1cccc(C)c1NC(=O)C[NH+]1CCC(OCc2ccc(F)cc2)CC1 -C[C@H]1CCC[C@@H](C)N1C(=O)[C@@H]1COCCO1 -CC(C)[C@@H](C)CC(=O)NNC(=S)NC1CCCCC1 -Cc1cccc(NC(=O)CN2C(=O)/C(=C3\SC(=S)N(Cc4ccco4)C3=O)c3ccccc32)c1 -CC(=O)N[C@@H]1C(=O)C[C@@H]2[C@H]3CCC4=CC(=O)CC[C@@]4(C)[C@@H]3CC[C@]12C -CCCNC(=O)[C@H]1CS[C@H](c2ccccc2O)N1C(C)=O -Cc1ccc(N2CC[C@H](C(=O)NC[C@@H](CC(C)C)N3CCOCC3)C2=O)cc1 -COc1ccc(NC(=O)CCc2ccc3c(c2)OCCO3)cc1OC -Cc1ccc(C(F)(F)F)cc1/C=C/C(=O)[O-] -Cc1ccccc1C(=O)Nc1ccc(N2CC[NH+](Cc3ccccc3)CC2)cc1 -c1cc(C[NH2+]Cc2ccco2)cc(OC2CCCC2)c1 -O=C(c1cccs1)N(Cc1ccc(F)cc1)Cc1cc(-c2ccccc2)cn2nnnc12 -COc1ccccc1CC(=O)N[C@@H]1CS(=O)(=O)C[C@H]1Cl -c1cc(-c2nc3c4cn[nH]c4ncn3n2)ccc1COc1ccc2c(c1)CCC2 -CN(C(=O)c1ccc(Cl)cc1O)C1CCC(=O)CC1 -CNC(=O)CNS(=O)(=O)c1cccc(C(F)(F)F)c1 -Cc1ccc(S(=O)(=O)N2C[C@@H](CC(=O)[O-])c3ccccc32)c(C)c1 -Cc1cc(N)nc(SCC(=O)NC[C@@H](c2ccccc2)C(C)C)n1 -Cn1cc(C(N)=O)c(NC(=O)c2ccc3sccc3c2)n1 -COc1cc2c(cc1O)[C@H](c1cnc(-c3cccc(C)c3)nc1)CC(=O)N2 -CC(=O)Nc1ccc(O[C@H](C)c2nc(C(C)(C)C)no2)cc1 -O=C(c1c(-c2ccccc2)nc2sc3c(n12)CCCC3)C(F)(F)F -CC(C)CNC(=O)NC(=O)[C@@H](C)Nc1ccc(OC(C)C)cc1 -C=C[C@](C)(O)CC[C@H]1C(C)=CC(=O)[C@H]2C(C)(C)CCC[C@@]21C -CC[C@H](C)Sc1nncn1-c1ccccc1C -COCC(=O)N1CCCc2ccc(NC(=O)c3cccc(Br)c3)cc21 -C[C@H](Oc1cccc(Cl)c1)C(=O)Nc1ccc2ccccc2c1 -Cc1cc(Br)ccc1SCC(=O)N1CCC(C(=O)c2ccc3c(c2)OCCO3)CC1 -Nc1ccc2c(c1)CN(C(=O)c1ccc(Cl)cn1)CC2 -C[C@H](CN(C)C(=O)c1ccc(F)c(F)c1F)C(=O)[O-] -NC(=O)c1ccc(SCC(=O)Nc2ccc3c(c2)Cc2ccccc2-3)c([N+](=O)[O-])c1 -CN(C(=O)CSCC(F)(F)F)c1cccc([N+](=O)[O-])c1 -C=CCn1c(SCC(=O)Nc2cc(C)on2)nnc1[C@H]1COc2ccccc2O1 -CC(=O)N[C@@H](CC(=O)Nc1ccnn1Cc1ccc(C)o1)c1ccccc1 -CNC(=O)Cc1nc(C[NH+](C)C2CCC(c3ccccc3)CC2)cs1 -COCCOc1c(Cl)cccc1NC(=O)Cc1c[nH]c2ccccc12 -COCCN1[C@@H](C)CN(C(=O)C[NH+](C)C2CC2)C[C@H]1C -CSc1nncn1/N=C\c1cc(Cl)ccc1F -CC(C)(C)OC(=O)N1CC[C@H]2CC(=O)[C@H]2C1 -C[C@@H](CCO)SCc1ccccc1OC(F)F -C[C@@H]1C[C@@H]1C(=O)Nc1ccc(F)cc1C(=O)NC1CCC(O)CC1 -COc1ccccc1CNC(=O)COc1ncnc2oc(C)c(C)c12 -Fc1ccc(F)c(C[NH+]2CCC(n3cc(-c4cccnc4)nn3)CC2)c1F -O=C(C[NH+]1CCC(C(=O)c2ccc(Cl)cc2)CC1)NC[C@H]1COCCO1 -CCC[NH+](C)C[C@H]1CCN(C(=O)Nc2cc(NC(C)=O)ccc2C)C1 -Cc1nc2ncnn2c(NCCOC2CCCCCC2)c1C -C[C@H]1CN(Cc2cnn(-c3ccccc3)n2)C[C@H](C)S1 -COc1cc(OC)cc([C@@H](N[C@@H](C)c2ccc(F)cn2)c2[nH+]ccn2C)c1 -CC(C)CCc1noc(C[NH+](C)[C@H]2CCC[C@@H]2S(C)(=O)=O)n1 -CCc1nc2n(n1)CCC[C@H]2NC(=O)c1ccc(-n2cc(C)cn2)cc1 -C[C@@H](NC(=O)c1ccccc1CSc1nc2ccccc2[nH]1)C1CC1 -O=C([C@H]1CCCN1S(=O)(=O)N1CCCCC1)N1CCSCC1 -Cn1c(=O)c(=O)n(CC(=O)N2CCC3(CC2)OCCO3)c2cccnc21 -COc1ccc(C(=O)N(CC2=CC=C[C@@H]3N=CC=C23)C2CC2)cc1 -CCc1ccc(-c2nc(N)ccc2[N+](=O)[O-])cc1 -c1csc([C@@H]2CN(Cc3cnc(C4CCC4)s3)CCO2)c1 -O=S(=O)(/N=C(\[O-])c1ccsc1)N1CCCC1 -Cc1nccn1CC(=O)N1CCCC[C@@H]1CCNC(=O)c1ccccc1 -C[C@H](Sc1nnc(-c2ccc(Cl)cc2)n1C[C@H]1CCCO1)C(=O)Nc1ccc2c(c1)OCO2 -COc1ccc(-n2nnc(-c3nc(-c4ccc5c(c4)OCO5)cs3)c2C)cc1OC -CN(c1ccccc1)S(=O)(=O)c1ccc2c(c1)C(C)(C)C(=O)N2 -Cc1cc(S(=O)(=O)N2CCN(C(=O)[C@H]3C[C@H]3c3ccc(Cl)cc3)CC2)c(C)s1 -O=C(Nc1ccc(Oc2ccc(Cl)nn2)cc1)[C@@H](O)c1ccccc1 -CCCc1cc(=O)n2c(n1)SC[C@@H]2CC(=O)Nc1cccc(Cl)c1Cl -Cc1ccc(NC(=O)CSc2nnc([C@@H]3CCCN3C(=O)c3cccc(C)c3)n2C)cc1 -Cc1noc(C)c1CCCNC(=O)N[C@H]1CC(=O)N(C2CC2)C1 -CC(C)(C)[C@@H]1CCC(=O)[C@@H](CN2CCOCC2)C1 -COc1cc(/C=C2\SC([N-]c3cccc(C(=O)[O-])c3)=NC2=O)cc(OC)c1O -C=CCO[C@H](C)C(=O)Nc1ccc(F)cc1Br -O=C(CN1CCN(Cc2ccc(F)c(F)c2)CC1)c1cccs1 -CC[NH+]1C[C@H](c2ccccc2)CC2(CCN(C(=O)c3ccon3)CC2)C1 -C[C@H]1C[C@H]([NH+]2CC[C@H](S(=O)(=O)NC3CC3)C2)CC(C)(C)C1 -CC[C@H](Sc1cc(C)c2cccc(C)c2n1)C(=O)Nc1nc2ccc(S(N)(=O)=O)cc2s1 -COC(=O)[C@]1(NC2CC2)CC[C@H](Sc2ncc(C)cn2)C1 -COc1ccccc1Nc1nn(CN(C)OC)c(=S)s1 -Cc1cc(C)c(C)c(S(=O)(=O)/N=C(\[O-])c2cc(C3CC3)n(C(C)(C)C)n2)c1C -CC(C)(O)C#Cc1ccc(C[NH2+][C@H]2CCCN(c3nc4ccccc4s3)C2)s1 -COc1ccc(OC)c(S(=O)(=O)n2cc3c(=O)n(C)c(=O)n(C)c3n2)c1 -C/[NH+]=C(/NCc1noc(C(C)(C)C)n1)N[C@@H](C)c1ccc(F)cc1F -Cc1nnsc1C(=O)Nc1nnc(-c2ccc(Br)cc2)o1 -CCN(Cc1ccc(OC)c(OC)c1)C(=O)C[NH+]1CC[C@@H](C)[C@H](O)C1 -O=C([O-])[C@H]1CCCCN1C(=O)CSCc1ccccc1 -Cc1cccc([C@H](O)C[C@@H]2CCCCC[NH2+]2)c1 -O=C(C1CCCCC1)N1CCN(Cn2cc(Br)cn2)CC1 -Cc1ccccc1COc1ccc([C@@H]2C3=C(CCCC3=O)Nc3nnnn32)cc1 -CCO[C@@H]1C[C@@H]([NH+](C)C[C@@H]2CCCN(S(C)(=O)=O)C2)C12CCCCC2 -CCn1c(=O)c(=O)[nH]c2cc(C(=O)NN3C(=O)N[C@](C)(c4ccccc4)C3=O)ccc21 -COc1cc(OC)cc([C@@H](NC(=O)N(C)C2CCCCC2)c2nccn2C)c1 -O=C1[C@H]2[C@@H]3C=C[C@@H](C3)[C@H]2C(=O)N1CN(C(=O)C(F)(F)F)c1cccc(C(F)(F)F)c1 -CCNc1ncc(COc2cccc3ccccc23)s1 -Cc1noc(C)c1CCCNC(=O)c1c(C)nn(Cc2ccccc2)c1C -C[C@H]1CC([NH2+][C@@H](C)c2c[nH]c3cc(F)ccc23)C[C@H](C)O1 -C/C(=C1/SC(=O)N(c2ccc(Cl)cc2)C1=O)c1ccc(Br)cc1 -Cc1ccc(NC(=O)C[C@H]2SC([N-]c3ccc(N(C)C)cc3)=NC2=O)c(C)c1 -CO[C@H]1CCCC[C@H]1NC(=O)NC[C@H](c1cccc(F)c1)[NH+](C)C -Cc1nn(C)c(C)c1CN[C@H]1CCC[NH2+]C1 -Cc1cc(=O)[nH]c(SCC(=O)N2C[C@]3(C)C[C@H]2CC(C)(C)C3)n1 -COC1CC[NH+](Cn2nc(-c3ccc(C)cc3)n(C)c2=S)CC1 -C[NH2+][C@]1(C(=O)[O-])CCC[C@@H](OCC2CCCCC2)C1 -Cc1nnc(SCC(=O)c2cc(C)n(CC(F)(F)F)c2C)s1 -CC(C)CN(CCC#N)C(=O)NC[C@@H]1CC[C@H](C(=O)[O-])O1 -Cc1cc(CN2CCN3C(=O)NC[C@@H]3C2)cc(C)c1OC(F)F -CCNC(=O)c1cccc(NC(=O)NCCCSC)c1 -Cn1c(-c2cccc3ccccc23)nn(CN2CCOCC2)c1=S -CCCCCN1C(=O)/C(=C/c2ccc(O)c(OCC)c2)SC1=S -N#CCCN(Cc1ccccc1)C(=S)NC(=O)c1cccc(Cl)c1 -CCCS(=O)(=O)c1ccccc1C(=O)Nc1nnc(CC)s1 -CNC(=O)[C@H](C)CN(C)Cc1cc(=O)n2cccc(C)c2[nH+]1 -COCCNC(=O)/C(C#N)=C/c1cccc(O)c1 -CNC(=O)CN1c2ccccc2C(=O)N(C)[C@H]1c1ccccc1O -CC(=O)N[C@H](C)C(=O)Nc1ccc(Sc2nncs2)c(Cl)c1 -CC[n+]1c(N)n(CCOc2ccc(Cl)cc2Cl)c2ccccc21 -COC(=O)CCCc1nnc(NC(=O)N2CCC[C@@H]3CCC[C@@H]32)s1 -O=C(N[C@H]1CCCC[C@H]1OC1CCCC1)c1ccc([N+](=O)[O-])cc1 -O=C(CCOc1ccccc1)NNC(=O)CC1(O)CCCC1 -C=CCN(CC(=O)[O-])C(=O)[C@@H](C[NH3+])C(C)C -O=C(CCn1ccccc1=O)NCC1(c2ccccc2)CC1 -COc1cc(C)c([C@@H](C)NC(=O)CSC2CCCC2)cc1OC -C=CCNC(=O)Nc1ccc(F)c(NC(=O)OC)c1 -Oc1ccccc1/C=[NH+]/CCC/[NH+]=C/c1ccccc1O -Cn1cccc1Cc1nnc(SCC(=O)Nc2ccc3c(c2)OCCO3)n1C -C[C@@H](C#N)CNC(=O)c1cccc(Oc2cccc(C(F)(F)F)c2)c1 -Cc1oc(-c2ccccc2)nc1CCNC(=O)c1ccc([S@@](C)=O)cc1 -CCc1noc(C)c1C[NH+](C[C@@H]1CCCCO1)C(C)C -Cc1ccccc1Oc1cc(Br)ccc1C[NH3+] -CCc1cccc(CC)c1NC(=O)NC1CC1 -CC[NH+]1CCN(C2(CNC(=O)c3ccccc3Br)CCCCC2)CC1 -CCCCn1nc(C)c(C[NH2+]C[C@@H](C)O)c1Cl -C/C=C/C=C/C(=O)N1C[C@@H](C(=O)OC)[C@@H](C)C1 -COc1cccc(OC)c1OC1CC[NH+](Cc2ccc([C@H]3C[C@@H]3C)o2)CC1 -COC(=O)c1cc(CSc2nnc(-c3cccnc3)n2-c2ccccc2F)oc1C -CCOc1ccc2cc(C(=O)NCc3ccccc3)c(=[NH2+])oc2c1 -O[C@H]1CCN(c2ccnc(N3CCc4[nH]c5ccc(Cl)cc5c4C3)n2)C1 -O=S(=O)(NC[C@H](O)c1ccc(C(F)(F)F)cc1)c1cc(F)ccc1F -O=C(CSc1ccc2c(c1)OCCCO2)NC(=O)c1cccs1 -C[NH+](C)CCSc1ccc(NC(=O)C2CC2)nn1 -Cc1ccc(C)c(NC(=S)NCCc2cccs2)c1 -CNc1ncc(F)c(-c2cccc(Cl)c2)n1 -Cc1ccc(-c2nnc(SCC(=O)Nc3ccc(CC#N)cc3)n2N)cc1 -CCN(CC)C(=O)c1ccccc1OC(C)=O -Cc1ccc(-c2nc(-c3ccc(OCC(F)(F)F)nc3)no2)cc1 -COc1ccc([C@@H](CNc2nc3ccccc3o2)N2CCOCC2)cc1 -CNC(=O)[C@H]1CCCC[C@H]1[NH2+][C@H](C)c1cc(C)cc(C)c1 -CCN[C@H](c1cccnc1)C1([NH+]2CCCCC2)CCCC1 -Cn1ncc2c(NCc3ccco3)nc(CCc3ccccc3)nc21 -Cn1cc[nH+]c1C[C@H]1CCC[NH+](Cc2ncc(-c3ccccc3Cl)o2)C1 -Cc1cc(N2CC[C@H](C)[C@H](O)C2)nc(C)[nH+]1 -Cc1nnc(CCC[NH+]2CCC(CC[NH+]3CCCC[C@@H]3C)CC2)o1 -CCc1nn(C)cc1CNC(=O)C1(CC)CCC1 -COc1ccc(N2/C(=N/C(=O)CCCC(=O)[O-])S[C@@H]3CS(=O)(=O)C[C@H]32)cc1Cl -CC(=O)C1=C([O-])C(=O)N(CCC2=c3ccccc3=[NH+][C@H]2C)[C@H]1c1ccccc1F -COc1cc(C)c(C(=O)N[C@H]2C[C@H](C)N(c3ccccc3)C2)cc1OC -Cc1ccc([C@H]2C[C@@H]2NC(=O)N2CCC(C(N)=O)CC2)cc1C -Cc1nc2n(n1)CCC[C@@H]2N[C@@H]1CCc2c(Cl)cc(Cl)cc21 -CC(C)Oc1ccc(-c2nc(C(=O)O[C@@H](C)[C@@H]3CCOC3)cs2)cc1 -CN(C[C@@H]1CCCN(C(=O)NCCc2ccc(F)cc2)C1)C(=O)OC(C)(C)C -COc1cc(Cl)c(C)cc1NC(=O)[C@H](C)N1CCN(S(=O)(=O)c2c(C)noc2C)CC1 -Cc1nc(-c2ccc(Cl)s2)sc1C(=O)N[C@H]1C[C@H]1C -COc1cc(F)c([C@H]([NH3+])c2ccc(SC)cc2)cc1OC -Cc1ccc(-n2ccnc2SCC(=O)N(CC(N)=O)C(C)C)cc1C -Cc1cccc(/C=C2\SC(=S)N(c3c(C)cccc3C)C2=O)c1 -CC1=C(C(=O)OC(C)C)[C@H](C)N=C1C(=O)Nc1ncc(C)s1 -COC(=O)c1cccc(C(=O)N2C[C@@H](c3ccc(F)cc3)C[C@H]2C)c1 -O=C(NCCCn1ncccc1=O)[C@@H]1CC(=O)N(c2ccccc2)C1 -CCC(=O)N1CCCN(C(=O)N[C@@H]2CCc3ccccc32)CC1 -Cc1cccc([C@@H](C)[NH2+]CCS(=O)(=O)C(C)(C)C)c1C -Cc1ccc(-c2cnc(CCC(=O)NCC(C)(C)c3ccncc3)o2)cc1 -Cc1cc(NN)c2cccc(OC(F)(F)F)c2[nH+]1 -C[C@H]1CCC[C@@H](C(=O)Nc2cccc(OCCc3ccccc3)c2)[NH2+]1 -Cn1ncnc1CCNC(=O)[C@H]1C[C@@H]1c1cc(Cl)cc(Cl)c1 -CC[C@H](Sc1nnc2cc(C)c3cc(C)cc(C)c3n12)C(=O)Nc1nnc(COC)s1 -CCOc1ccc(S(=O)(=O)N2CCC(c3nnc(C4CC4)o3)CC2)cc1 -Cc1cc2nc(C)c(CCC(=O)NC[C@@H](c3ccccc3)N3CCOCC3)c(C)n2n1 -COc1ccc(N2C(=O)CS[C@H]2c2ccc(Cl)cc2)cc1Cl -C/C=C(\C)[C@@H]1C=C[C@@H]2C[C@H](C)C[C@H](C)[C@@H]2[C@@H]1C(=O)C1=C([O-])[C@H](C[C@](C)(O)C(=O)[O-])NC1=O -CS(=O)(=O)c1ccc(NC(=O)N2CCC[C@H]2CC2CCCCC2)cc1 -COc1ccc(C(=O)OCc2nc3ccccc3s2)cn1 -COc1ccc(S(=O)(=O)Nc2ccccc2-n2nc(C)cc2C)cc1NC(C)=O -Cn1c(=O)n(CC(=O)N[C@@H]2CCCc3ccc(F)cc32)c2ccccc21 -N#C/C(C(=O)NC1CCCC1)=C(/[O-])Cc1cnn(-c2ccccc2)c1 -NC(=O)[C@H](Nc1cccc(Oc2ccccc2)c1)c1ccc(F)cc1 -CCN(C[C@@H]1CCOC1)C(=O)Nc1cc2c(cc1Cl)OCCO2 -CCCCc1nnc(NC(=O)C2CCN(S(=O)(=O)c3ccc(C)cc3)CC2)s1 -CC[C@@H](NC(=O)c1ccc(Br)o1)C(=O)N1CCOCC1 -CC[C@]1(c2ccccc2)NC(=O)N(CCOc2ccc(Cl)cc2Cl)C1=O -COc1ccc(OC)c(NC(=O)c2ccc3c(c2)C(=O)N(c2cc(C)on2)C3=O)c1 -Cc1ccc(OCC(=O)NC(=S)NC[C@H]2CCCO2)cc1 -CN(C(=O)CCOc1cccc(C(N)=O)c1)[C@H]1CCC[NH+](C)C1 -COC(=O)c1ccc(NC(=O)c2c(C)sc3ncnc(N4CCC[C@H](C)C4)c23)cc1 -Cn1ncc2c1CC/C(=C\c1ccc(-n3cncn3)c(F)c1)C2=O -O=C(Cc1ccc([N+](=O)[O-])cc1)NCC1(O)CCOCC1 -C=CCOc1ccc(CNc2ccc(OC)cc2)cc1 -C[C@H]1N=C(CCNC(=O)CCC2=c3ccccc3=[NH+]C2)CS1 -COC(=O)c1cc(NC(=O)[C@@H]2CCO[C@H]2C)ccc1C -COc1ccccc1[C@@H](C)NC(=O)c1cnc2c(C)cccn2c1=O -CC[C@@H](C)NS(=O)(=O)c1cc(N2C(=O)[C@@H](C)CS2(=O)=O)ccc1OC -CC(C)(C)NS(=O)(=O)c1ccc(OCC(=O)N2CCOCC2)cc1 -COCCCNC(=O)[C@H]1CN(C(=O)c2cccs2)CC12CCCCC2 -Cc1nc(-n2cccc2)sc1C(=O)Nc1cccc(-c2cn3ccsc3n2)c1 -CC[S@](=O)[C@H]1CCCC[C@@H]1NC(=O)NC[C@H](O)c1ccco1 -Cc1cc2ncn(C[C@H]3CC3(Cl)Cl)c2cc1C -Cc1ccccc1OCC(=O)O[C@@H](C)c1nccs1 -C[C@H](C(=O)N1CCOCC1)[NH+]1CCN(Cc2ccc3c(c2)OCO3)CC1 -CCN(C(=O)[C@@H]1Cc2ccccc2S1)[C@H]1CCC[C@@H]1C[NH3+] -Cc1cc(Br)ccc1NC(=O)[C@@H](C)[NH+](C)Cc1cccs1 -COC(=O)[C@H]1CCC[C@H]1NC(=O)Nc1ccc(C)cc1C -CC(C)NS(=O)(=O)c1ccc(C(=O)N[C@H](C)c2ccccc2Br)cc1 -CC(C)OCCN1CCN(C(=O)Nc2ccccc2C(F)(F)F)CC1 -CCn1cc(-c2nc(-c3cccc(Cl)c3)no2)c(=O)c2ccccc21 -COC[C@@H](C)NC(=O)C(=O)Nc1cc(-c2ccccc2)nn1C(C)C -C#CCOc1ccccc1CN1CCN(C2=[NH+]C[C@@H](C)S2)CC1 -COCCNC(=O)[C@H]1CCCN1S(=O)(=O)c1ccc(Br)cc1 -Cc1ccc(C[C@H](O)c2c(F)cc(Br)cc2F)cc1 -COC(=O)[C@@]1(NC2CCCC2)CCCS[C@H]1C -CCC[NH2+][C@H](Cc1ccccc1)[C@@H]1CN(CC)CCO1 -C=CCn1c(=O)c2c(nc3n2[C@H](C)C(C)=NN3C)n(C)c1=O -C[C@H]1CCCC[C@H]1NC(=O)c1cc(S(=O)(=O)N2CCOCC2)ccc1Cl -COCCn1ccc2ccc(NC(=O)NCCc3ccccn3)cc21 -Cc1nc(CNC(=O)C(=O)Nc2cc(Cl)ccc2Cl)no1 -CS[C@@H]1CC[C@H](NC(=O)CCC(=O)c2ccc(C)s2)C1 -Cc1nc(/C=N/Nc2ccc(Cl)nn2)c[nH]1 -O=C(CSc1nnnn1C1CC1)N[C@H](CO)C(=O)[O-] -O=C(COc1ccc(Br)cc1)N[C@H]1CCS(=O)(=O)C1 -CCc1nnc(NC(=O)c2ccccc2N)s1 -O=C([O-])c1ccccc1-c1ccc(/C=C2\C(=O)N(c3cccc(Br)c3)C(=O)N=C2[O-])o1 -COc1ccc(-c2csc(NC(=O)c3ccc(S(C)(=O)=O)cc3)n2)cc1OC -COc1ccc(Cn2ccc3nc(N4CCN(c5ccccc5)CC4)ncc3c2=O)cc1 -Cc1nn(C)cc1/C=N/NC(=O)c1ccncc1 -N#Cc1ccc(OC2CCC(NC(=O)c3ccc[nH]3)CC2)nc1 -CC(C)c1ccc2oc(-c3ccc(C[NH3+])cc3)nc2c1 -COc1ccc([C@@H]2NC(=O)N[C@@](O)(C(F)(F)F)[C@H]2C(=O)c2ccc(F)cc2)cc1OC -Cc1cc(NC(=O)c2cccc([N+](=O)[O-])c2C)n(-c2ccccc2F)n1 -CCC(=O)N1CCCC[C@@H]1C(=O)NCCc1ccc(F)cc1C -Cc1ccc(S(=O)(=O)N2C(N)=C(C#N)[C@H](c3ccc(Cl)cc3)[C@H]2C(=O)c2ccccc2)cc1 -C[NH+](C)[C@@H]1CC[C@H](NC(=O)[C@@H]2CCCc3[nH]ncc32)C1 -CCOCCS(=O)(=O)[N-]c1cc(Br)ccc1O -NC1=NC(=O)[C@H](CC(=O)N2CC[C@H](c3ccccc3)C2)S1 -Cc1noc(-c2cccnc2N2CC[C@H](NC(=O)COc3ccc(F)cc3)C2)n1 -Cc1ccc(OC(=O)c2cccc(C(=O)Oc3ccc(C)cn3)n2)nc1 -CN(CC1CCCC1)C(=O)C(=O)Nc1cccc(SC(F)F)c1 -COc1cccnc1N(C)C(=O)C[C@H](C)Cc1ccc(Cl)cc1 -O=C(c1cc(=O)[nH]c2ccccc12)N1CCC([C@H](O)c2ccccc2)CC1 -CCO[C@@H]1C[C@@H]([NH3+])[C@@H]1Nc1ncc(Cl)cc1F -CC(=O)Nc1ccc(NC(=O)c2ccc3c(c2)Cc2ccccc2-3)cc1 -COCCOC[C@H]1CC[NH+](C2C[C@@H](C)O[C@H](C)C2)C1 -O=C1OC(c2ccccc2OC(F)F)=N/C1=C\c1cccc(F)c1 -COc1ccccc1CNC(=O)c1cc2sccc2n1Cc1cccc(F)c1 -CCOC(=O)C1(NCc2nnc(-c3cc(C)oc3C)o2)CCCC1 -N#Cc1cccc(NC(=O)N2CCC(NC(=O)CC3CCCC3)CC2)c1 -CCOC(=O)C1=C(c2ccccc2)Nc2ncnn2[C@H]1c1ccc(SC)cc1 -Cc1cc(NC(=O)N[C@@H](Cc2ccccc2)c2ccccc2F)n(C)n1 -C[C@H](Sc1cccc[n+]1[O-])C(=O)NC[C@H]1COc2ccccc2O1 -O=C(NCCOc1ccc2c(c1)OCO2)c1cc(C2CC2)on1 -N#CCC[NH2+]C1(C(=O)[O-])CC1 -CC[C@@H](Oc1ccccc1OC)C(=O)NCc1ccccn1 -Cc1nc(C[C@@H](N)[C@]2([NH+](C)C)CCC[C@H](C)C2)cs1 -CCOc1ccccc1/C=C1\Oc2c(ccc([O-])c2C[NH+]2CCN(C)CC2)C1=O -CC(C)Cn1cc[nH+]c1CN[C@@H](c1ccccc1)C(C)C -Cc1c(C[NH+]2CCC[C@H]2c2ccc3c(c2)OCO3)cc(C#N)n1C -O=C(CBr)c1cnc2ccc(Cl)cn12 -COc1ccc(F)cc1NC(=O)c1sccc1S(=O)(=O)N(C)C -COc1ccc(Br)cc1/C=C1/C(=O)NN(c2ccc(C)c(C)c2)C1=O -CC(C)(C)c1ccc(C(=O)N[C@H]2CCN3CCCc4cccc2c43)cc1 -CC[NH+]1CCC[C@@]2(CC1)C[NH+]=C(N)N2c1ccc(C)cc1 -CN(C)c1ccc(/C=C(/C#N)C(=O)c2cccc(C#N)c2)cc1 -CCCC[C@@H](NC(N)=O)C(=O)Nc1cc(OC)ccc1F -COC1=CC2=NC(SCc3cc(-c4ccccc4)on3)=NC2=CC1 -Cc1ccc([C@@](C)(O)CNC(=O)NC[C@@H](c2ccco2)[NH+]2CCCCC2)o1 -C[C@@H](NC(=O)CSc1ccc2c(c1)OCCCO2)c1ccc2ccccc2c1 -CCN1CCN(S(=O)(=O)c2cc(-c3csc(C)n3)ccc2C)CC1 -Cc1nccn1Cc1ccc(NC(=O)c2cccc(-n3cccc3)c2)cc1 -Cc1cccc([S@@](=O)Cc2ccc(N)c(F)c2)c1 -CC(C)C[C@@H](C[NH3+])c1nc(C2CCOCC2)no1 -CC1=NC(SCC(=O)Nc2ccccc2C(F)(F)F)=NC(=O)[C@H]1Cc1ccccc1 -Cc1ccc(F)cc1NC(=O)C(=O)NCCCn1cc[nH+]c1 -COc1cccc(C[NH2+]Cc2cccc(Br)c2OC)c1OC -Nc1cc(=O)[nH]c(SCC(=O)Nc2nc(-c3ccc(Br)cc3)cs2)n1 -CCc1noc(CN(CC)C(=O)C(C)(C)NC(=O)c2cccs2)n1 -CC(C)C[C@](C)(O)CNC(=O)CC[C@H](C)O -C=C[C@@H](C)NC(=O)c1c(C)cc(C)c([N+](=O)[O-])c1C -O=C(Nc1ccc(S(=O)(=O)NC[C@@H]2CCCO2)cc1)[C@H]1CC(=O)N(c2ccc(F)c(Cl)c2)C1 -CC1(C)CCC[C@H]1N1C(=O)c2cccc(N)c2C1=O -CCCNC(=O)NC(=O)CN1C(=O)N[C@](Cc2ccccc2)(c2ccccc2)C1=O -O=C(COc1cc(Cl)c(Cl)cc1Cl)N1CCN(C(=O)Nc2ccccc2)CC1 -CCc1nc2n(n1)C[C@H]([NH2+]CCNS(=O)(=O)c1ccccc1)CC2 -CCCCN(C(=O)c1oc2ccccc2c1C)c1c(N)n(CCC)c(=O)[nH]c1=O -CCCn1c(S[C@H](C(N)=O)c2ccccc2)nc2sc(CC)cc2c1=O -COc1cccc(CCNC(=O)CN2CCN(c3ccccc3O)CC2)c1 -c1ccc2c(NCc3nnc(C4CCC4)o3)cccc2c1 -CC1=C(C(=O)C2=C([O-])C(=O)N(CC[NH+](C)C)[C@H]2c2cccc(Cl)c2)[C@H](C)N=N1 -CCOc1ccc([C@@H]2Nc3ccc(C(=O)N(C)C)cc3[C@H]3C=CC[C@H]32)cc1 -COc1ccc(Cc2sc(NC(=O)[C@H]3COc4ccccc4O3)nc2C)cc1 -Cc1cnc2nc(C[C@H](O)C(F)(F)F)[nH]c2c1 -CCN(CC(C)(C)O)C(=O)COCc1ccccc1Cl -O=C(Nc1cccc2ccccc12)NC1CC[NH+](CC(F)F)CC1 -O=C1C(=O)N(CC[NH+]2CCOCC2)[C@@H](c2cccc([N+](=O)[O-])c2)/C1=C(\O)c1cccs1 -CC[C@@H](C)N(CC)C(=O)c1ccccc1N -Cc1cc(C)c(C(=O)Cn2nc(N)n(Nc3cccs3)c2=S)c(C)c1 -C[C@H]1CN(C(=O)CC[C@H](C)c2ccccc2)C(C)(C)CO1 -CN(C)c1nc2c(c(-c3ccc(S(C)(=O)=O)cc3)n1)CCCC2 -CC(C)c1ccc(CNC(N)=[NH2+])cc1 -CN(C)N1C(N)=C(C#N)[C@@H](c2cccs2)C2=C1CCCC2=O -O=C(N[C@H]1C=C[C@H](C(=O)[O-])C1)c1cc(F)c(Cl)cc1Cl -CC(C)n1nnnc1COc1cccc(C(=O)NC2CCCCCC2)c1 -O=S(=O)(NCc1ccc(Cl)cc1Cl)c1ccccc1Br -COC[C@H](NC(=O)c1cc(-c2ccccc2)c(C)[nH]c1=O)C(N)=O -O[C@H]1CCCCC[C@H]1n1cc(-c2ccccc2Cl)cn1 -CC1=C[C@@H](C)[C@H]2C(=O)N([C@H](Cc3ccccc3)C(=O)[O-])C(=O)[C@H]2C1 -CCCCNS(=O)(=O)Cc1ccc([N+](=O)[O-])cc1 -COc1ccc(CC[C@H]2C[C@@H](C(C)(C)C)CCC2=O)cc1 -C[C@H]1CN(CC(=O)Nc2nc(-c3ccccc3Cl)cs2)CCO1 -Cc1cc(Cl)cc(Cl)c1CNC(=O)c1cccs1 -CC(C)(C)OC(=O)NC1CCN(CC(=O)c2nccs2)CC1 -COc1ccc(C)cc1[C@@H](C)NC[C@@H]1CN(C2CC2)CCO1 -CCOC(=O)c1ccn(-c2cccc(NC(=O)C3CCCC3)c2)n1 -O=C1N(C[NH+]2CCN(c3ccccc3)CC2)c2ccccc2C12O[C@@H]1CCCC[C@H]1O2 -CC(C)c1nc(C(=O)[O-])nn1-c1ccccc1F -C1=C[C@H]2C[C@@H]1C[C@H]2CN1CC[NH+](C2CCCCCC2)CC1 -CC(C)C[C@H](C[NH+](C)C)Nc1ncncc1N -O=C1C=C(c2cccs2)C[C@H](c2cccs2)[C@@H]1n1cnc([N+](=O)[O-])n1 -C[NH2+]C1CCC([NH+](C)CC(=O)N[C@H](C)c2ccco2)CC1 -CC(C)c1nc2n(n1)CCC[C@H]2[NH2+]C[C@@H]1CCC[C@H](C)C1 -CCCOC(=O)c1ccc(NC(=O)c2ccc[n+]([O-])c2)cc1 -CCOc1cc(CO)cc(Br)c1OCc1ccccc1F -O=C(Cn1nnn(-c2cccs2)c1=O)NC[C@@H]1CN(Cc2ccccc2)CCO1 -CC(C)[C@H](O)CCNC(=O)C(=O)Nc1ccn(-c2ncccc2Cl)n1 -Cc1cccc(C(=O)NCCS(=O)(=O)NCC2CCC2)c1C -CCCC(=O)N1CCC(C(=O)NN=C(c2ccccc2)c2ccccc2)CC1 -O=C(N[C@H](NC(=S)Nc1ccccc1)C(Cl)(Cl)Cl)c1cccc(Br)c1 -CCN(Cc1ccccc1)C(=O)c1cc(NC(=O)Cc2ccccc2)n(C)n1 -CC(C)CC(=O)N1CCN(C(=O)c2cnc3c(c2)NC(=O)CO3)CC1 -O=C(Cc1csc(NC(=O)Nc2ccc(Cl)cc2)n1)NCCc1ccc(Cl)cc1 -Cc1ccc2c(c1)C[C@@H](C[C@@H](C[NH3+])c1ccc(F)cc1)O2 -CC1=C(C(=O)OC(C)C)[C@@H](c2ccc(C)s2)NC(=O)N1C -COc1cc(F)c([N+](=O)[O-])c(NC[C@@H](O)c2cnn(C)c2)c1 -COc1ccc(C(=O)N2CCC([C@@]3(C)NC(=O)N(C4Cc5ccccc5C4)C3=O)CC2)cc1 -CCc1cnc(NC(=O)c2cc(C)n(C(C)C)c2C)s1 -COc1ccccc1-c1nc(C[NH+](C)Cc2ccc(C#N)cc2)cs1 -COc1ccc(-c2noc(-c3cc(-c4ccc(Cl)cc4)n[nH]3)n2)cc1OC -CC[C@H](NC(=O)CN1C(=O)c2ccccc2N2C(=O)CC[C@]12C)c1ccc(C)cc1 -Cc1ccc(CNC(=O)NCc2ccnc(OC(C)(C)C)c2)cn1 -CCN(CCO)C(=O)Nc1cccc(C(=O)Nc2cccc(C#N)c2)c1 -Cc1cccc(-c2nn(C[NH+]3CCCCC3)c(=S)n2-c2ccccc2)c1 -C[NH2+][C@@H](C1CCCC1)[C@@H]1CCc2cccnc21 -Cc1ccc(-c2cccc(F)c2C(=O)[O-])c(C)c1 -CN(C)C(=O)[C@@H](Sc1nnc2n(C)c3ccccc3n12)c1ccccc1 -Cc1ccc(C(=O)NNC(=O)c2ccc(SC[C@H]3CCCO3)c([N+](=O)[O-])c2)cc1 -CCCn1/c(=N/C(=O)[C@@H](CCSC)NC(N)=O)[nH]c2ccccc21 -Cn1/c(=N/C(=O)c2sccc2S(=O)(=O)N2CCOCC2)sc2ccccc21 -COc1ccccc1N1C[C@@H](C(=O)NN2C(=O)NC3(CCCCC3)C2=O)CC1=O -COc1cc(OC)cc(C(=O)Nc2ccccc2C(=O)NC(C)(C)C)c1 -C[C@H](Nc1nc(-c2ccncc2)nc2ccccc12)c1ccccn1 -CCOc1ccc(C[NH+]2CCC[C@H]([C@H](O)c3nccn3C)C2)cc1OC -O=C([O-])c1ccc([S@@](=O)Cc2ccc(O)cc2)cc1 -CCc1nn(C)cc1CNC(=O)NCC(C)(C)Cc1ccccc1 -COC(=C(C#N)C#N)c1cccs1 -O=C(NCc1ccnc(OCC(F)F)c1)NCc1cscn1 -C[C@@H](C(=O)N1CCCC1)N1CCN(C(=O)NCc2ccco2)CC1 -CC(C)c1ccc(CN(C)C(=O)NCCCn2cccn2)cc1 -CNC(=O)[C@@H]1CCCN(C(=O)Nc2nn(-c3ccccc3Cl)cc2C)C1 -COc1ccccc1N1CC[NH+]([C@@H](C)C(=O)Nc2ccc(F)cc2)CC1 -CN1C[NH+](C)CC2=C1NCNS2(=O)=O -CNC(=O)c1ccc(O[C@@H]2CCC[C@H]([NH3+])C2)nn1 -COc1ccc(C(=O)O[C@@H](C)[C@@H]2CCCO2)cc1OC(F)F -Cc1ccc(/C=C2/SC(=S)N(CCC(=O)N3CCCc4ccccc43)C2=O)cc1 -CCc1cccc(S(=O)(=O)Nc2cccc(-c3nnnn3C)c2)c1 -Cc1ccc2c(c1)CCN2C(=O)c1ccc(C)nc1C -CCCNC(=O)CN1CCN(C(=O)Cc2c(C)nn(-c3ccccc3)c2C)CC1 -Cc1ccc(C(=O)NC2CC[NH+](Cc3nc(-c4ccccc4)cs3)CC2)s1 -CCOc1ccc(F)c(C(=O)OC[C@H]2CCCCO2)c1F -C[NH+]1CCC(NC(=O)c2ncoc2-c2ccccc2)CC1 -O=C(NC[C@@H](O)CN1CCCC1=O)Nc1cccc(F)c1 -CC[NH+]1CCC[C@H](NC(=O)c2ccc(OC)c(O)c2)C1 -O=C(CSCC(F)(F)F)N1CCN(c2ccc(Cl)cn2)CC1 -COc1ccc(Cl)cc1S(=O)(=O)N[C@H](C)C(=O)NCc1ccc2c(c1)OCO2 -Cn1cc(C(=O)Nc2ccc(-n3ccnn3)cc2)c(C(C)(C)C)n1 -Cc1cccc(NC(=O)C[C@@H]2CCCCO2)c1C(=O)[O-] -Clc1ccc([C@H](NCCc2nnc3ccccn23)C2CC2)cc1Cl -Clc1ccc(OCCCCSc2ncccn2)cc1Cl -Cn1nnc2cc(C(=O)N[C@@H](C#N)c3ccc(Cl)c(Cl)c3)ccc21 -Cc1cc(C)cc(-c2nnc(Sc3nc(C(C)C)ns3)o2)c1 -C/C(=C/c1ccc(F)cc1)C(=O)NCc1cccc(OCC(F)F)n1 -CC(C)Nc1cccc(CNC(=O)N[C@@H]2CC[NH+](CC3CC3)C2)c1 -O=C(COc1ccc(F)cc1F)NC[C@H](O)c1ccccc1Cl -Cc1ccc(-n2nc3c(c2NC(=O)C(C)C)C[S@@](=O)C3)cc1 -COc1nc(Oc2ccc3ccccc3c2)ccc1N -O=C(c1ccccc1)c1ccc2nc(Nc3ccccc3)c3nncn3c2c1 -O=C(C[C@@H](O)c1cccc(F)c1)Nc1cc(F)ccc1O -CC(C)c1nc(CSCc2ccnn2C)no1 -O=C(C1CCC1)N1CCC[C@H]1c1nc2cc(-c3ccccc3)ccc2o1 -O=C(CC[C@@H]1NC(=O)NC1=O)NC1CCN(c2ccccc2F)CC1 -Cc1ccc(-n2nc3c(c2NC(=O)c2ccc(Br)o2)CSC3)cc1C -C=CCn1c(SCc2nnc([S-])n2-c2ccccc2)nnc1-c1ccccc1 -c1nnn(C23C[C@H]4C[C@H](CC(c5nc6c7cn[nH]c7ncn6n5)(C4)C2)C3)n1 -CCCn1ncnc1COc1ccc(C)nc1C[C@H](C)[NH3+] -CCSc1ccc(C(=O)N2CC[C@H](C)[C@H](O)C2)cn1 -C[C@H]1[C@H](C(=O)[O-])CCN1S(=O)(=O)[C@@H](C)C#N -COc1ccccc1NC(=O)CSc1nnc(C)c(=O)n1N -Cc1ccc(S(=O)(=O)OCCc2coc3ccccc23)cc1 -COc1ccccc1[C@@H](C)NC(=O)[C@@H](C)Oc1cccc(F)c1 -CC[C@@H](Oc1ccccc1/C=C1\S/C(=N\c2cccc(O)c2)N(CC)C1=O)C(=O)[O-] -C[C@@H]([NH2+]C[C@H]1CC[C@H](C(N)=O)O1)c1ccc2c(c1)OCCCO2 -Cc1ccccc1Nc1nc(N)nc(COc2ccc(F)c(Cl)c2)n1 -CC(=O)Nc1ccc(OC(=O)/C=C/c2ccc(C(N)=O)cc2)cc1 -O=C([O-])c1ccc(-c2ccncc2)cn1 -O[C@H](c1c(F)c(F)c(F)c(F)c1F)C(Cl)(Cl)Cl -COc1ccc(/C=N/NC(=O)CNc2ccc3ccccc3c2)cc1OC -Cc1ccc(NC(=O)/C(C#N)=C/c2cc(C)n(-c3ccc(O)cc3)c2C)cc1Cl -Cc1cccn2c(=O)c(C(=O)N[C@H]3CCN(C(=O)C(C)C)C3)cnc12 -O=C(NCCCS(=O)(=O)c1ccccc1)c1n[nH]c2ccccc12 -C1=C(CC[NH2+]Cc2ccco2)CCCC1 -CCN(CC(=O)NCc1ccc(F)cc1)C(=O)c1cnc(-c2cccnc2)s1 -FC(F)(F)c1cccc2c1CCCC2 -CN(C)C(=O)CCCNC(=O)c1ccnc(OC(C)(C)C)c1 -Cc1ccc(NC(=O)[C@H](C)[NH+](C)Cc2nnc(C3CC3)n2C)c(C)c1 -CN(C)c1cccc(C(=O)OCC(=O)C(C)(C)C)c1 -Cc1nsnc1Cn1nnc(C(=O)NC(C)C)c1C -CC(=O)Cc1nsc(N[C@@H](C)c2ccccc2)n1 -COc1ccc(CNC(=O)[C@H]2Oc3ccccc3O[C@@H]2C)cn1 -[NH3+]CC1CCC(c2nc3ccc(Cl)cc3s2)CC1 -CCN(C(=O)NC1CC[NH+](C[C@@H](O)COC)CC1)C1CCCC1 -CCSc1nc2ccccc2c(=O)n1CCc1ccccc1 -Cc1cc(C(=O)COC(=O)c2cc(Cl)c3c(c2)OCCCO3)c(C)n1C1CC1 -COc1ccc([C@@H](CNC(=O)c2ccc([N+](=O)[O-])o2)[NH+](C)C)cc1 -CC[C@H](NC(=O)c1ccc(C#N)cn1)C(=O)N1CCOCC1 -CCOC(=O)NC(=O)c1c(NC(=O)Cc2ccc(F)cc2)sc2c1CC[C@H](C)C2 -CC(C)n1cnnc1SCC(=O)Nc1ccc2c(c1)nc(C1CC1)n2C -CCc1ccc([C@H](O)C2(C[NH3+])CCCC2)cc1 -COc1ccc(CNC(=O)N2CCc3c([nH]c4ccccc34)[C@H]2C)cc1OC -COc1ccc([C@@H]2C(C#N)=C(N)Oc3cc(C)n(CCN4CCOCC4)c(=O)c32)cc1OC -O=c1[nH]nc([O-])n1/N=C/c1ccco1 -C[C@H](Oc1cccc(Cl)c1)C(=O)N1CCC(Cc2ccccc2)CC1 -COc1cc([C@@H]2C(C(=O)Nc3ccc(F)cc3)=C(C)Nc3nc(C)nn32)cc(OC)c1OC -CC[C@@H](C)[C@@H](O)C[NH2+][C@@H](c1cccs1)C1CC1 -CSc1cc(-c2cccs2)oc(=O)c1C#N -CC(C)[C@@H](NC(=O)c1ccc(NS(C)(=O)=O)cc1)C(=O)[O-] -[NH3+][C@H](CO)c1ccc(N2CCOCC2)c(Cl)c1Cl -CCCn1cc(NC(=O)c2cc3nc(-c4ccccc4)cc(-c4ccccc4)n3n2)cn1 -Cc1ccc(S(=O)(=O)N2CCC(C(=O)N3CCCc4ccccc43)CC2)cc1C -C=CCN(CC=C)C(=O)C1CCN(C(=O)C(C)(C)C)CC1 -Cc1nc(CSc2nncc3ccccc23)nc2ccccc12 -C[C@H]1CCCC[NH+]1C[C@@H]1CCC(C)(C)[C@@H]1[NH3+] -COc1cc(C(=O)Nc2ccccc2Oc2ccccc2)on1 -COc1ccc(S(=O)(=O)N2CCOCC2)cc1NC(=O)/C=C/c1ccc(F)c(Cl)c1 -Cc1ccc(F)c(C[NH+]2CCC(C(=O)NC(C)C)CC2)c1 -CCn1nc(C)c(CNC(=O)[C@H]2[NH+]=c3ccccc3=C2NC(=O)c2cccc(C)c2)c1C -COc1ccc(C(=O)N2CCC[C@H](C(=O)Nc3cc(Cl)ccc3F)C2)c2ccccc12 -CCc1ccc(/C=C(\C#N)C(N)=O)s1 -COc1cccc(N2C(=O)Nc3ccccc3[C@]2(O)C(=O)NCc2ccccc2)c1 -COC(=O)c1sccc1NC(=O)[C@@H]1CC[NH2+][C@@H]1C -C/[NH+]=C(/NCc1ccc([N+]2=CCCC2)cc1)N[C@H]1CC[C@@H](SC)C1 -N#Cc1csc(C(=O)N2CC[C@H]3CCCC[C@@H]32)c1 -Cc1cccc(NC(=O)[C@H](C)[S@@](=O)Cc2ccc(F)c(F)c2)c1C -CNS(=O)(=O)c1cccc([C@H](C)NC(=O)c2ccc(Cn3cccn3)cc2)c1 -CC[NH2+][C@@]1(C(=O)OC)CCC[C@@H](Oc2ccccc2)C1 -COCCCn1c(C)c(C)c(C#N)c1NC(=O)C[NH+]1CC(C)(C)C1(C)C -C[C@H]1CCC[C@@H](C)N1C(=O)[C@H]1C[C@H]1c1ccccc1Cl -COCc1ccc(C[NH+](C)Cc2ccccc2O)o1 -Cc1c(F)cc(N)cc1S(=O)(=O)NCC(N)=O -CCNS(=O)(=O)[C@@H]1CC[NH+](C[C@@H]2CCCc3ccccc32)C1 -CC1(C)[C@@H]2CC[C@@]1(CS(=O)(=O)NCCCO)C(=O)C2 -COc1ccc(-n2ccc(CNC(=O)c3cc(Cl)ccc3[N+](=O)[O-])n2)cc1 -CC[C@@H](NC(=O)NC1CCC(C(=O)OC(C)(C)C)CC1)[C@H]1CCCO1 -CN(CC[NH+](C)C)C(=O)C[C@H]1COCCN1C(=O)c1ccc2[nH]nnc2c1 -CC[C@H](c1ccc(F)cc1)N(C)C(=O)Cn1nnc(-c2ccccc2)n1 -O=C(Cn1cccc1-c1nc(-c2ccc(OC(F)(F)F)cc2)no1)Nc1nccs1 -CCc1nsc(Nc2ccc(CC(=O)N3CC[NH+](CC)CC3)cc2)n1 -CS[C@@H]1CC[C@H](NC(=O)/C=C(/C)c2ccccc2)C1 -Cc1ccc([N+](=O)[O-])cc1NCC(=O)N[C@](C)(C#N)C1CC1 -CC1(C)[C@H]2OCC[C@@H]2[C@H]1NC(=O)CCNC(=O)C12CC3CC(CC(C3)C1)C2 -c1ccc(COC2CC[NH+](Cc3cccnc3)CC2)cc1 -O=C(C1CCCC1)N1CCC[C@@H]([NH+]2CCC(CO)CC2)C1 -COC(=O)CNC(=O)c1sc2ncn(CC(=O)N3CCCCC3)c(=O)c2c1C -CC(C)CCNC(=O)[C@@H](C)Oc1ccc(N)cc1C(=O)[O-] -CCN(CC)C(=O)[C@@H]1C[C@@H]([NH3+])CN1C(=O)Cc1cccc(O)c1 -COc1ccc(F)cc1NC(=O)N1CCO[C@H](c2ccc(C)o2)C1 -COc1ccc(-c2nnc(SCC(=O)c3ccc(Br)cc3)o2)cc1OC -CC(C)=CC(=O)NCCC1CCN(c2cc[nH+]cc2)CC1 -C[C@]1(O)[C@](C)(O)[C@@H](CO)O[C@](C)(Oc2c[nH]c3ccc(Br)c(Cl)c23)[C@]1(C)O -O=C(CCCc1nc(-c2cccnc2)no1)N1CCC[C@@H](Cc2ccccc2)C1 -Cn1nc(NC(=O)c2cccc(F)c2)c2c1NC(=O)C[C@@H]2c1ccccc1 -O=C(CSc1ccncc1)NCCN1Cc2ccccc2O[C@@H](c2ccccc2)C1 -N#Cc1ccc(OCCn2cc(Cl)cn2)cc1 -C[C@@H]1CCN(C(=O)Nc2ccc(O[C@@H]3CCOC3)cc2)[C@H](C)C1 -CC[C@H](C)[C@H](C)[NH2+]Cc1ncccc1F -C#CC(C)(C)NC(=O)c1ccc(OC)c(O)c1 -COc1cc([N+](=O)[O-])ccc1OCc1nc(-c2cccs2)no1 -CC1CCN(C(=O)C[NH+]2CCC[C@@H](c3nc4ccccc4o3)C2)CC1 -C[C@H]1CCC[C@H](NC(=O)Cc2c[nH]c3ccccc23)[C@@H]1C -Cn1nc(CNC(=O)Nc2ccccc2C(F)(F)F)cc1-c1ccncc1 -CC[NH2+]C[C@H](Cc1cscn1)c1cccc(F)c1 -CCCCS(=O)(=O)N1CCN(c2ccc(-n3ccnc3C)nn2)CC1 -O=C(c1cc2ccccc2o1)N(C[C@H]1CCCO1)c1nc2c(F)cccc2s1 -C[C@H](CC#N)Sc1ccccc1NC(=O)c1ccc(Cl)nc1Cl -O=C(CCCc1nc2ccccc2s1)N[C@H]1CCOC1=O -COC(=O)c1cc(S(=O)(=O)N[C@H](C)c2ccccc2C)cn1C -Cc1cnn(CC(=O)[C@@H](C#N)c2nc([O-])c3ccc(Cl)cc3n2)c1 -COc1ccc(C)cc1NC(=O)[C@H]1CCCN1c1cc(C)ccc1[N+](=O)[O-] -CC[NH+]1CCC2(CC1)OC[C@H](C(=O)[O-])N2C(=O)c1ccc(F)cc1 -Cc1ccc2c(c1)N(C(=O)C[C@H](O)c1ccc(Cl)cc1)CC2 -O=c1[nH]cnc2c1[nH]c(=S)n2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O -Cc1cn2c([nH+]1)CC[C@H](NC(=O)C[C@@H]1CCCc3ccccc31)C2 -CC(=O)c1cc(CN2CCC3=NN=C(c4ccccc4F)[C@@H]3C2)cs1 -C[C@H](Nc1ccc(S(=O)(=O)N2CCCCC2)cn1)[C@@H](C)CO -COCCn1nc(C)c(NC(=O)N2CCC[C@H]2c2cccc(C)c2)c1C -CCCCS(=O)(=O)[N-]c1ccc(NC(=O)[C@H]2CCC[NH+](C)C2)cc1 -Fc1ccc(Oc2ccnc(Sc3nnc(-c4cccs4)o3)n2)cc1 -N#Cc1ccc(NC(=O)[C@@H]2CSCN2C(=O)c2cn(Cc3ccccc3)c3ccccc23)cc1 -CC[C@@H](C)n1nccc1NC(=O)C(=O)N1CCc2cc(F)ccc2C1 -Cc1ccc(-c2cnc(CCC(=O)N(C)C3CCOCC3)o2)cc1 -C[C@H](CNC(=O)c1ccc(-c2ccccc2)[nH]c1=O)Oc1ccc(F)cc1 -CCOC(=O)c1c(NC(=O)[C@H]2CCCN2S(C)(=O)=O)sc2ccccc12 -Cc1ccc(C(=O)Cc2cccc(O)c2)cc1 -Cc1ccccc1-c1nn(CN2CCCc3ccc(S(C)(=O)=O)cc32)c(=S)o1 -CCOc1cccc([C@H](C)NC[C@@](C)(O)c2ccc(F)cc2F)c1 -CC[C@@H]1CCCCCN1C(=O)c1cnc2sc(C)cn2c1=O -N#Cc1ccc(OCC(=O)NCc2cccc(CO)c2)cc1 -O=C(Nc1ccccc1)NC1CCN(C(=O)[C@H]2CCCC[C@H]2C(F)(F)F)CC1 -CC1(C)CCC(O)(C[NH2+][C@@H]2CCOC3(CCC3)C2)CC1 -[NH3+][C@H]1CCC[C@H]1CCN1C(=O)c2cccc3cccc1c23 -C#CCN(Cc1cc(Br)cc(OC)c1O)[C@@H]1CCS(=O)(=O)C1 -Cc1ccc(-c2nc3nc(CN4CC[NH+](C)CC4)cc([O-])n3n2)cc1 -C[C@@](O)(CNC(=O)C1CCCC1)c1cccs1 -O[C@H](CSc1nnc(-c2c[nH]c3ccccc23)n1C1CC1)CN1CCOCC1 -C[C@@H]1CCC/C(=N/[NH+]=C(/[S-])NCc2ccccc2)C1 -O=C(c1cc2ccc(Cl)cc2[nH]1)N1CCC[C@@H]1Cn1nnc(-c2cccs2)n1 -COc1ccc(S(=O)(=O)Oc2ccc(C(C)=O)cc2OC)cc1 -CC(C)c1ccccc1NC(=O)C[NH+](C(C)C)[C@@H]1CCCC[C@@H]1O -O=Cc1ccn(-c2ccc(Br)cc2)c1 -O=C(C1CC1)N1CCC[C@H](Cn2cc[nH+]c2-c2cc3n(n2)CC[NH2+]C3)C1 -O=C(CCNc1ccccc1[N+](=O)[O-])N1CCC[C@@H]([NH+]2CCCC2)C1 -Cc1ccc2c(c1)-c1onc(C(=O)N3C[C@@H](C)C[C@H](C)C3)c1CO2 -O=C(COC(=O)c1ccc(Cl)nc1)NC(=O)Nc1ccc2c(c1)OCCO2 -CCC[C@@H]1C[C@H]1NC(=O)C1(c2ccc(F)cc2F)CCOCC1 -CCOC(=O)C1CCC(NC(=O)[C@@](C)([NH3+])CC)CC1 -CC[C@@H](O)C(=O)NCc1cccnc1Oc1ccccc1OC -C[C@@H](Sc1nnc(-c2cccs2)n1-c1ccccc1)C(=O)N1CC(=O)Nc2ccccc21 -Cc1cc(F)ccc1NC(=O)COc1ccc2c(c1)CCC2 -C[C@H](NC(=O)NCCC[S@](C)=O)c1ccc(Cl)s1 -O=C(Cn1c(=O)c(=O)n(Cc2ccncc2)c2ncccc21)NCCc1ccccc1 -CS(=O)(=O)c1ccc(C(=O)Nc2ccc(F)c(F)c2F)cc1 -CCCCOc1ccccc1/C=C1\SC(N2CCC(C)CC2)=NC1=O -CC(C)[C@@H](CNC(=O)N1CCc2ccc(Cl)cc2C1)c1cccnc1 -CCCOc1ncnc(Nc2cc(Cl)cc(Cl)c2)c1N -CC(=O)Nc1ccc(NC(=O)c2nnn(-c3ccc(C)c(C)c3)c2C)cc1 -CCOC1CC[NH+](CC[C@@H](O)c2ccc(C)c(F)c2)CC1 -IC[C@@H]1Cn2c(nnc2-c2ccncc2)S1 -CCOc1ccc([C@H]2CCCN2C(=O)c2[nH]c(C)c(C(C)=O)c2C)cc1 -CC(C)CONc1ncnc2sc3c(c12)CCC3 -CC(C)[C@@H](ON1C(=O)c2ccccc2C1=O)C(=O)[O-] -COC[C@H](O)C[NH+]1CCC(C)(C)C1 -COC(=O)[C@@H](c1ccccc1Cl)N1CCCSCC1 -O=C(Nc1nc2ccc(F)cc2s1)c1cc(-c2ccccc2O)[nH]n1 -C/C(=N\Nc1ncnc2sc(C)c(C)c12)c1cccc(OC(F)F)c1 -CC(C)CN1CCO[C@@H](CNC(=O)/C=C/c2ccnc(Cl)c2)C1 -Cc1cc(F)ccc1CCNC(=O)Cc1c[nH]c2c(C)cccc12 -NC(=O)[C@@H]1CCCN(C(=O)Cn2nc(-c3cccs3)oc2=O)C1 -CCCCCn1c(SCC(=O)[O-])nc2ccccc2c1=O -Cc1ccc2nc(NC(=O)c3ccc(OCc4nc(-c5ccco5)cs4)cc3)sc2c1 -COc1ccccc1N1CCN(c2ccc(=O)n(CC(=O)NC3CC3)n2)CC1 -C/C(=C/C(=O)N[C@@H](C)c1c(C)noc1C)c1ccccc1OC(F)F -COCCn1nc(C)c(NC(=O)N2CC[C@H](Cc3ccccc3)C2)c1C -Cc1ccsc1C[NH+](Cc1nc2ccccc2n1C(C)C)C[C@H](C)O -Cc1cccc(CNC(=O)C[C@H]2Oc3ccc(C)cc3NC2=O)c1 -C[C@@H](c1ccc([S@](C)=O)cc1)N(C)C(=O)c1cc2cccc(F)c2o1 -COc1cc(OC)c(C(C)=O)cc1CSc1nnnn1-c1ccccc1 -Cc1sc(=O)n(CCC(=O)NC2CC(C)(C)[NH2+]C(C)(C)C2)c1C -CC[C@@H]1CCCCN1C(=S)NC(=O)c1ccc(C)cc1 -CCC[C@H](C)C(=O)N[C@H](C)c1cccc(Br)c1 -COc1cccc([C@H]2CCCN2C(=O)c2ccccc2I)c1 -C[C@H](NC(=O)N1CCCC[C@@H]1C1OCCO1)c1cccc(-n2ccnc2)c1 -CS(=O)(=O)N1CCC[C@@H](C[NH+]2CCC[C@H](CO)C2)C1 -CCN(C(=O)Cn1nc2n(c1=O)CCCCC2)[C@H]1CCS(=O)(=O)C1 -COc1ccccc1NC[C@H]1CCCN(S(C)(=O)=O)C1 -COc1cc([C@@H]2CC(=O)Nc3c2cnn3Cc2cccnc2)cc2c1OCO2 -O=C(CNc1ccc(Cl)cc1NC(=O)c1ccco1)Nc1ccc(F)c(Cl)c1 -Cc1nn(C)cc1[C@@H](C)NC(=O)C(=O)Nc1ccc(OCC2CCCCC2)cc1 -CC(C)(C)[S@](=O)CCNC(=O)c1cccc(F)c1Cl -C[C@@H](O)c1ccc(F)cc1OCc1nc(C(C)(C)C)cs1 -COC[C@H](NC(=O)Nc1cn[nH]c1)c1ccc(F)c(F)c1 -C[C@@H]1CS(=O)(=O)N(c2ccc(S(=O)(=O)Nc3ccccc3C(F)(F)F)cc2)C1=O -CC(C)c1ccc2c(c1)[C@]1(CC(O)=Nc3c1cnn3Cc1ccccc1Cl)C(=O)N2C -CC[C@@H](C)C(=O)NCC(=O)N(C)[C@@H](C)c1cc(F)ccc1F -CO/N=C\C(C#N)=C/c1cccnc1 -CO[C@H](c1ccc(Cl)cc1)[C@@H](C)NC(=O)C(=O)Nc1ccccc1C -CC(C)CNC(=O)[C@](C)(N)C(F)(F)F -C[C@@H](C(=O)C1=c2ccccc2=[NH+]C1)[NH+]1CCC[C@@H]1[C@@H]1CC=CS1 -Cc1nc(Br)ccc1NC(=O)NCc1cnn(C)c1 -COc1c(C)cnc(CNC(=O)Nc2ccc(N(C)C)cc2)c1C -COc1ccc2cc(COC(=O)COc3ccccc3C#N)ccc2c1 -CCS(=O)(=O)CCN(C)Cc1c[nH]nc1-c1ccc(C)cc1 -COc1ccc([C@@](C)([NH3+])Cc2[nH+]ccn2C)cc1 -C[C@H]1CCN(C(=O)NCCc2nnc3n2CCCCC3)[C@@H](C)C1 -O=C(NC[C@@H]1CCC[NH+](Cc2ccccc2F)C1)c1nc[nH]n1 -O=C1CC[C@@H](NC(=O)COc2ccc(Cl)c(Cl)c2)CN1 -Cc1noc(C)c1COc1ccc(C[NH2+]C[C@H]2CCCO2)cc1 -N#Cc1cnn2c1N[C@@H](c1ccccc1)C[C@@H]2C(F)F -C[C@@H]1Cc2ccccc2N1C(=O)[C@H]1CCCN(C(=O)NC2CC2)C1 -COCc1nc(C(=O)OCC2=CC[C@H]3C[C@@H]2C3(C)C)cs1 -CCN(Cc1ccc(Br)s1)C(=O)C[NH+](C)CC(=O)[O-] -O=C([O-])[C@H]1CCCN(c2ccc([O-])nn2)C1 -COc1ccc(CCCC(=O)Nc2cccc(S(N)(=O)=O)c2)cc1F -Fc1ccccc1[C@@H](c1nnnn1C1CCCCC1)[NH+]1CCN(c2ccccc2)CC1 -O=C(C/C(=N\Nc1nc(-c2ccccc2)cs1)c1ccccc1)C(F)(F)F -C[C@H]([NH2+]CC(=O)N(C)C)c1ccc(Cl)s1 -CCOC(=O)COc1ccccc1/C=C1/C(=O)NC(=O)N(c2ccc3c(c2)OCO3)C1=O -Cc1cc(C(=O)NNC(=O)c2cccc3ccccc23)c(C)o1 -COc1ccc(-c2csc(NC(=O)Nc3ccc(F)cc3)n2)cc1OC -Cc1ccc(C)c(S(=O)(=O)N2CCN([C@H](C)c3nc(N)nc(Nc4ccccc4)n3)CC2)c1 -Cc1cccc(C)c1-n1nnnc1CSCc1nnc(C)n1C -COc1cccc(C(=O)N[C@@](C)(C(N)=O)c2cccc(Cl)c2)c1 -C[C@H](CCO)[NH2+][C@H]1CCc2c(Br)cccc21 -CCc1ccsc1-c1cnc(C[NH3+])o1 -NC(=O)C1(N2CCCC2)CC[NH2+]CC1 -CC(C)C[C@@H]([NH3+])C(=O)N1CC[C@H](C(=O)[O-])[C@@H]1C -CC[C@H](C)Cn1c(CCCl)nc2c(C)nn(C)c21 -Cc1csc([C@H](C)NC(=O)CCC[NH+]2CCCCC2)n1 -CCC(=O)NN/C(C)=C/C(=O)NCC(C)(C)C -CCC(=O)N1CCC([NH+](C)Cc2ccc(SC)c(OC)c2)CC1 -Clc1ccc(CNc2ncccc2Cl)cn1 -C[C@H](c1nc(C(C)(C)C)no1)[S@](=O)Cc1ncn(-c2ccccc2)n1 -Cn1cnn(C[NH+](Cc2ccc(F)cc2)C2CC2)c1=S -C#CC(C)(C)NC[C@H]1CN(C)CCO1 -C[C@H](NC(=O)[C@H]1CCCN1S(C)(=O)=O)c1ccc2c(c1)OCO2 -O=C(Nc1ccc2ncccc2c1)C(=O)NC1CCC(O)CC1 -CC(C)[C@H]([NH2+]CC1CCN(C(=O)OC(C)(C)C)CC1)c1cccnc1 -CCC[C@H](C)NC(=O)C[NH2+]Cc1cscc1C -CS(=O)(=O)N1CCC(C(=O)Nc2sc3c(c2C#N)CCCC3)CC1 -O=C(CN(C(=O)Cn1nnc2ccccc21)c1ccccc1)NC[C@H]1CCCO1 -O=C(Nc1cccc(N2CCCNC2=O)c1)C(=O)N1CCc2cc(F)ccc2C1 -C[C@@H]1CC[NH+](CCCN2C(=O)CNC2=O)C[C@@H]1O -O=C(COc1ccc(Br)cc1)NOCc1ccccc1 -CC(=O)C[C@]1(O)C(=O)N(Cc2ccc(C)cc2)c2c(C)cccc21 -C[C@H](OC(=O)c1ccc2ccccc2n1)C(=O)NCC1CCCCC1 -C[C@@H](Sc1cc(Cl)ccc1Cl)C(=O)N1CCC[C@H](CCC(N)=O)C1 -COc1cc(-c2ccno2)ccc1S(=O)(=O)NCc1ccco1 -CCC[C@@H](CC)Nc1c(F)c(F)nc(F)c1F -Oc1cccc([C@@H]2CN(c3nccc(Oc4ccc(F)cc4)n3)CCO2)c1 -COc1ccc([C@H](CNC(=O)c2cccc3ccccc23)[NH+]2CCCC2)cc1 -CC1(C)[C@@H]2CC[C@@]1(C)[C@H](NC(=O)COc1ccc(C3SCCCS3)cc1)C2 -Cc1c(C)n(-c2ccccc2)c2nc(C(=O)Nc3ccc(F)cc3)nc(N3CCCCC3)c12 -CC(C)C(=O)Nc1cccc(NC(=O)C(=O)NCC[C@H]2C[C@H]3CC[C@@H]2C3)c1 -COc1ccc(CNC(=O)c2cc(N3C(=O)C(C)(C)CS3(=O)=O)ccc2Cl)cc1OC -CC(=O)Nc1cccc(NC(=O)CCc2c(C)[nH]c(=S)[nH]c2=O)c1 -COc1ccccc1[C@H]1CCCN1C(=O)[C@@H](C)CCOc1ccccc1 -CCNc1ncc(COCC2CCCCC2)s1 -CC[C@H](C)C(=O)Nc1ccccn1 -O=C([O-])CC1=C(C(=O)[O-])CCCC1 -Fc1ccc(C[NH2+]C[C@@H]([C@H]2CCOC2)N2CCOCC2)c(F)c1 -CCc1cc(Cn2cc(N)nn2)n(C)n1 -N#Cc1cccnc1Oc1ccccc1NCc1cccc2ccccc12 -O=C(NCc1ccc([N+](=O)[O-])cc1)N[C@@H]1CCCC[C@H]1CO -C[C@H](NC(=O)NC[C@H](C)C[C@@H](C)O)c1ccc(S(C)(=O)=O)cc1 -CC(C)Oc1ccc(NC(=O)NC[C@H](C)N2CCOCC2)c(F)c1 -COc1ccccc1COC1CCN(C(=O)[C@H]2CCC[C@@H](C)C2)CC1 -CC(=O)O[C@H]1CC[C@H]2[C@H]3C[C@H](OC(C)=O)[C@]45C[C@H]4CC[C@]5(C)[C@@H]3CC[C@]12C -CCCn1nnnc1CN1CC[C@]2(C1)NC(=O)N(C(C)C)C2=O -CC1=C(C(=O)OCC(C)C)[C@H](c2cccc(F)c2)c2c(n(C)c(=O)n(C)c2=O)N1 -Cc1cc(C)c2c(-c3ccccc3)nc(SCC(=O)NC3CC3)n2n1 -COc1cc2c(cc1OC)[C@H](C(=O)[O-])[C@H](c1cccc(Cl)c1)N(C)C2=O -COCCN1C(=O)CC[C@@H]2C[NH+](Cc3cc(C)ccc3C)CC[C@@H]21 -Cc1cnc([C@H](C)NC(=O)NNC(=O)Nc2ccccc2)s1 -CNC(=O)c1ccc(NC(=O)c2csc(-c3ccccc3)n2)cc1 -CNc1nc(C2CCN(C(=O)Cc3ccccn3)CC2)[nH+]c2c1CN(C(C)=O)CC2 -Cc1cc(C)n(C[C@@H](C)CNC(=O)NCc2cc3ccccc3o2)n1 -Cc1nc2n(n1)C[C@H]([NH2+]C[C@@H](O)CN(C)Cc1ccccc1)CC2 -c1ccc(Cn2c(SCc3ncon3)nnc2-c2cccs2)cc1 -COc1ccc(-n2nc(C)c3c2C[C@H](c2cc(OC)c(OC)c(OC)c2)CC3=O)cc1 -COc1ccc(OC)c(/C=C/C(=O)OCC(C)C)c1 -C[C@H](NC(=O)[C@H]1CC[C@H](C[NH3+])O1)C(=O)N(C)C -Cc1cccc(C(C)C)c1NC(=O)[C@@H](C)Sc1nnc(-c2cccs2)n1N -Cn1cc(C(=O)Nc2ccccc2C(=O)NCCc2ccccc2)c(=O)c2cccn21 -Cc1ccccc1[C@@H]1C[C@H](C)N(C(=O)[C@@H](C)Sc2ccccn2)C1 -Cc1occc1C(=O)/C(C#N)=C/c1ccc([C@@H]2C[C@H]2C)o1 -CC[C@](C)(C[NH3+])[C@H](O)c1ccc2c(c1)OCO2 -CCc1nn(CC)c(C[C@@]2(C3CC3)CCC[NH2+]2)c1Br -CC(=O)Nc1ccc(CN2CC[NH+](C3CCCC3)[C@H](CCO)C2)cc1 -COc1ccc([C@H](O)[C@@H](C)NC(=O)[C@@H](C)SC)cc1 -CCC(=O)N1CCC[C@@H]1c1cc(C(F)(F)F)c2c(=O)n(C)c(=O)n(C)c2n1 -C[C@H]([NH3+])[C@@H](CC(=O)[O-])c1ccccc1 -Cc1ccc([C@H]2C3=C(NC(=O)N2C)c2ccccc2C3=O)cc1 -C[C@@]1(Cc2ccc3c(c2)OCO3)CCC(=O)N(CCc2ccc(O)cc2)C1 -Cc1cc(C)cc(O[C@H]2CCCC(C)(C)[C@@H]2O)c1 -Cc1c(-c2nc(-c3cccs3)no2)sc2nc[nH]c(=O)c12 -COc1cc([C@H]2C(C(=O)Nc3ccccn3)=C(C)NC3=C2C(=O)CCC3)ccc1OCc1ccccc1 -COc1cc(C)c([C@@H](C)NC2CC[NH+]([C@H]3CCCC[C@@H]3O)CC2)cc1OC -C[C@@H]1CCC[C@@H](NS(=O)(=O)Cc2cccc(N)c2)C1 -CCCN1C(N)=[NH+]C[C@@H]1c1cc(Cl)c2c(c1)OCO2 -CC(C)C[C@@H](NC(=O)[C@@H]1C[C@@H]1c1cccc(Cl)c1Cl)C(=O)Nc1cc[nH]n1 -Cc1nc(CCC[NH+]2CCC[C@H]2C(N)=O)cs1 -NC(=O)COc1cccc(CNC(=O)c2cc3cc(Cl)ccc3[nH]2)c1 -COc1ccc(CNC(=O)c2cc(=O)c3ccc(Br)cc3o2)cc1 -CC[C@@H](C)c1ccccc1N1C[C@H](C(=O)N2CCN(C)CC2)CC1=O -CC[NH2+][C@H](Cc1ccccc1Cl)[C@H]1C[NH+](C)CCN1C -C[NH+]1CCC(N[C@@H]2CC(=O)N(CCc3cccc(Cl)c3)C2)CC1 -CCN1CC(=O)Nc2cc(C(=O)NC3CC[NH+](C4CCCC4)CC3)ccc21 -COc1ccc(Br)cc1/C=C/C(=O)N1CCN(C(=O)c2ccccc2)CC1 -NC(=O)COc1ccc(C(=O)N[C@H]2CCCc3ccccc32)cc1 -O=C(COc1ncnc2ccc(Br)cc12)Nc1ccccc1Cl -C[C@@H](c1ccc(Cl)cc1Cl)N(C)C(=O)c1ccc(NC(N)=O)cc1 -Cc1ccc(N2C(=O)[C@@H](Cc3cccc(C)c3)S/C2=C(/C#N)C(N)=O)cc1 -CC(C)CN(C(=O)NCc1ccc(C(F)(F)F)cc1)C1CC1 -ClCCc1nc2cccnc2n1CCn1cccn1 -CC[C@@](C)([C@@H]([NH3+])c1cc(Br)ccc1F)N1CCOCC1 -Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 -CC1(C)OC[C@H]([C@H]2O[C@@H]3OC(C)(C)O[C@@H]3[C@H]2OS(C)(=O)=O)O1 -Cc1cccc([C@H](CCl)CCC[C@@H]2CCCO2)c1 diff --git a/open_biomed/models/MoleculeSTM/models/GA/__init__.py b/open_biomed/models/MoleculeSTM/models/GA/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/open_biomed/models/MoleculeSTM/models/GA/crossover.py b/open_biomed/models/MoleculeSTM/models/GA/crossover.py deleted file mode 100644 index ec6d6b1..0000000 --- a/open_biomed/models/MoleculeSTM/models/GA/crossover.py +++ /dev/null @@ -1,194 +0,0 @@ -''' -Written by Jan H. Jensen 2018 -''' -from rdkit import Chem -from rdkit.Chem import AllChem - -import random -import numpy as np - -from rdkit import rdBase -rdBase.DisableLog('rdApp.error') - -average_size = 39.15 -size_stdev = 3.50 - - -def cut(mol): - if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): - return None - bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) #single bond not in ring - #print bis,bis[0],bis[1] - bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()] - - fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)]) - - try: - fragments = Chem.GetMolFrags(fragments_mol,asMols=True) - return fragments - except: - return None - - -def cut_ring(mol): - for i in range(10): - if random.random() < 0.5: - if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')): - return None - bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R]@[R]@[R]'))) - bis = ((bis[0],bis[1]),(bis[2],bis[3]),) - else: - if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')): - return None - bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R;!D2]@[R]'))) - bis = ((bis[0],bis[1]),(bis[1],bis[2]),) - - #print bis - bs = [mol.GetBondBetweenAtoms(x,y).GetIdx() for x,y in bis] - - fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1),(1,1)]) - - try: - fragments = Chem.GetMolFrags(fragments_mol,asMols=True) - except: - return None - - if len(fragments) == 2: - return fragments - - return None - -def ring_OK(mol): - if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')): - return True - - ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]')) - - cycle_list = mol.GetRingInfo().AtomRings() - max_cycle_length = max([ len(j) for j in cycle_list ]) - macro_cycle = max_cycle_length > 6 - - double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]')) - - return not ring_allene and not macro_cycle and not double_bond_in_small_ring - -def mol_OK(mol): - try: - Chem.SanitizeMol(mol) - test_mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol)) - if test_mol == None: - return None - target_size = size_stdev*np.random.randn() + average_size #parameters set in GA_mol - if mol.GetNumAtoms() > 5 and mol.GetNumAtoms() < target_size: - return True - else: - return False - except: - return False - - -def crossover_ring(parent_A,parent_B): - ring_smarts = Chem.MolFromSmarts('[R]') - if not parent_A.HasSubstructMatch(ring_smarts) and not parent_B.HasSubstructMatch(ring_smarts): - return None - - rxn_smarts1 = ['[*:1]~[1*].[1*]~[*:2]>>[*:1]-[*:2]','[*:1]~[1*].[1*]~[*:2]>>[*:1]=[*:2]'] - rxn_smarts2 = ['([*:1]~[1*].[1*]~[*:2])>>[*:1]-[*:2]','([*:1]~[1*].[1*]~[*:2])>>[*:1]=[*:2]'] - for i in range(10): - fragments_A = cut_ring(parent_A) - fragments_B = cut_ring(parent_B) - #print [Chem.MolToSmiles(x) for x in list(fragments_A)+list(fragments_B)] - if fragments_A == None or fragments_B == None: - return None - - new_mol_trial = [] - for rs in rxn_smarts1: - rxn1 = AllChem.ReactionFromSmarts(rs) - new_mol_trial = [] - for fa in fragments_A: - for fb in fragments_B: - new_mol_trial.append(rxn1.RunReactants((fa,fb))[0]) - - new_mols = [] - for rs in rxn_smarts2: - rxn2 = AllChem.ReactionFromSmarts(rs) - for m in new_mol_trial: - m = m[0] - if mol_OK(m): - new_mols += list(rxn2.RunReactants((m,))) - - new_mols2 = [] - for m in new_mols: - m = m[0] - if mol_OK(m) and ring_OK(m): - new_mols2.append(m) - - if len(new_mols2) > 0: - return random.choice(new_mols2) - - return None - -def crossover_non_ring(parent_A,parent_B): - for i in range(10): - fragments_A = cut(parent_A) - fragments_B = cut(parent_B) - if fragments_A == None or fragments_B == None: - return None - rxn = AllChem.ReactionFromSmarts('[*:1]-[1*].[1*]-[*:2]>>[*:1]-[*:2]') - new_mol_trial = [] - for fa in fragments_A: - for fb in fragments_B: - new_mol_trial.append(rxn.RunReactants((fa,fb))[0]) - - new_mols = [] - for mol in new_mol_trial: - mol = mol[0] - if mol_OK(mol): - new_mols.append(mol) - - if len(new_mols) > 0: - return random.choice(new_mols) - - return None - -def crossover(parent_A,parent_B): - parent_smiles = [Chem.MolToSmiles(parent_A),Chem.MolToSmiles(parent_B)] - try: - Chem.Kekulize(parent_A,clearAromaticFlags=True) - Chem.Kekulize(parent_B,clearAromaticFlags=True) - except: - pass - for i in range(10): - if random.random() <= 0.5: - #print 'non-ring crossover' - new_mol = crossover_non_ring(parent_A,parent_B) - if new_mol != None: - new_smiles = Chem.MolToSmiles(new_mol) - if new_mol != None and new_smiles not in parent_smiles: - return new_mol - else: - #print 'ring crossover' - new_mol = crossover_ring(parent_A,parent_B) - if new_mol != None: - new_smiles = Chem.MolToSmiles(new_mol) - if new_mol != None and new_smiles not in parent_smiles: - return new_mol - - return None - -if __name__ == "__main__": - smiles1 = 'CC(C)(C)c1ccc2occ(CC(=O)Nc3ccccc3F)c2c1' - smiles2 = 'C[C@@H]1CC(Nc2cncc(-c3nncn3C)c2)C[C@@H](C)C1' - - smiles1 = 'Cc1ccc(S(=O)(=O)N2C(N)=C(C#N)C(c3ccc(Cl)cc3)C2C(=O)c2ccccc2)cc1' - smiles2 = 'CC(C#N)CNC(=O)c1cccc(Oc2cccc(C(F)(F)F)c2)c1' - - mol1 = Chem.MolFromSmiles(smiles1) - mol2 = Chem.MolFromSmiles(smiles2) - - child = crossover(mol1,mol2) - mutation_rate = 1.0 - #mutated_child = mutate(child,mutation_rate) - - for i in range(100): - child = crossover(mol1,mol2) diff --git a/open_biomed/models/MoleculeSTM/models/GA/mutate.py b/open_biomed/models/MoleculeSTM/models/GA/mutate.py deleted file mode 100644 index f52905d..0000000 --- a/open_biomed/models/MoleculeSTM/models/GA/mutate.py +++ /dev/null @@ -1,132 +0,0 @@ -''' -Written by Jan H. Jensen 2018 -''' -from rdkit import Chem -from rdkit.Chem import AllChem - -import random -import numpy as np -import MoleculeSTM.models.GA.crossover as co - -from rdkit import rdBase -rdBase.DisableLog('rdApp.error') - -def delete_atom(): - choices = ['[*:1]~[D1]>>[*:1]', '[*:1]~[D2]~[*:2]>>[*:1]-[*:2]', - '[*:1]~[D3](~[*;!H0:2])~[*:3]>>[*:1]-[*:2]-[*:3]', - '[*:1]~[D4](~[*;!H0:2])(~[*;!H0:3])~[*:4]>>[*:1]-[*:2]-[*:3]-[*:4]', - '[*:1]~[D4](~[*;!H0;!H1:2])(~[*:3])~[*:4]>>[*:1]-[*:2](-[*:3])-[*:4]'] - p = [0.25,0.25,0.25,0.1875,0.0625] - - return np.random.choice(choices, p=p) - -def append_atom(): - choices = [['single',['C','N','O','F','S','Cl','Br'],7*[1.0/7.0]], - ['double',['C','N','O'],3*[1.0/3.0]], - ['triple',['C','N'],2*[1.0/2.0]] ] - p_BO = [0.60,0.35,0.05] - - index = np.random.choice(list(range(3)), p=p_BO) - - BO, atom_list, p = choices[index] - new_atom = np.random.choice(atom_list, p=p) - - if BO == 'single': - rxn_smarts = '[*;!H0:1]>>[*:1]X'.replace('X','-'+new_atom) - if BO == 'double': - rxn_smarts = '[*;!H0;!H1:1]>>[*:1]X'.replace('X','='+new_atom) - if BO == 'triple': - rxn_smarts = '[*;H3:1]>>[*:1]X'.replace('X','#'+new_atom) - - return rxn_smarts - -def insert_atom(): - choices = [['single',['C','N','O','S'],4*[1.0/4.0]], - ['double',['C','N'],2*[1.0/2.0]], - ['triple',['C'],[1.0]] ] - p_BO = [0.60,0.35,0.05] - - index = np.random.choice(list(range(3)), p=p_BO) - - BO, atom_list, p = choices[index] - new_atom = np.random.choice(atom_list, p=p) - - if BO == 'single': - rxn_smarts = '[*:1]~[*:2]>>[*:1]X[*:2]'.replace('X',new_atom) - if BO == 'double': - rxn_smarts = '[*;!H0:1]~[*:2]>>[*:1]=X-[*:2]'.replace('X',new_atom) - if BO == 'triple': - rxn_smarts = '[*;!R;!H1;!H0:1]~[*:2]>>[*:1]#X-[*:2]'.replace('X',new_atom) - - return rxn_smarts - -def change_bond_order(): - choices = ['[*:1]!-[*:2]>>[*:1]-[*:2]','[*;!H0:1]-[*;!H0:2]>>[*:1]=[*:2]', - '[*:1]#[*:2]>>[*:1]=[*:2]','[*;!R;!H1;!H0:1]~[*:2]>>[*:1]#[*:2]'] - p = [0.45,0.45,0.05,0.05] - - return np.random.choice(choices, p=p) - -def delete_cyclic_bond(): - return '[*:1]@[*:2]>>([*:1].[*:2])' - -def add_ring(): - choices = ['[*;!r;!H0:1]~[*;!r:2]~[*;!r;!H0:3]>>[*:1]1~[*:2]~[*:3]1', - '[*;!r;!H0:1]~[*!r:2]~[*!r:3]~[*;!r;!H0:4]>>[*:1]1~[*:2]~[*:3]~[*:4]1', - '[*;!r;!H0:1]~[*!r:2]~[*:3]~[*:4]~[*;!r;!H0:5]>>[*:1]1~[*:2]~[*:3]~[*:4]~[*:5]1', - '[*;!r;!H0:1]~[*!r:2]~[*:3]~[*:4]~[*!r:5]~[*;!r;!H0:6]>>[*:1]1~[*:2]~[*:3]~[*:4]~[*:5]~[*:6]1'] - p = [0.05,0.05,0.45,0.45] - - return np.random.choice(choices, p=p) - -def change_atom(mol): - choices = ['#6','#7','#8','#9','#16','#17','#35'] - p = [0.15,0.15,0.14,0.14,0.14,0.14,0.14] - - X = np.random.choice(choices, p=p) - while not mol.HasSubstructMatch(Chem.MolFromSmarts('['+X+']')): - X = np.random.choice(choices, p=p) - Y = np.random.choice(choices, p=p) - while Y == X: - Y = np.random.choice(choices, p=p) - - return '[X:1]>>[Y:1]'.replace('X',X).replace('Y',Y) - -def mutate(mol,mutation_rate): - - if random.random() > mutation_rate: - return mol - - Chem.Kekulize(mol,clearAromaticFlags=True) - p = [0.15,0.14,0.14,0.14,0.14,0.14,0.15] - for i in range(10): - rxn_smarts_list = 7*[''] - rxn_smarts_list[0] = insert_atom() - rxn_smarts_list[1] = change_bond_order() - rxn_smarts_list[2] = delete_cyclic_bond() - rxn_smarts_list[3] = add_ring() - rxn_smarts_list[4] = delete_atom() - rxn_smarts_list[5] = change_atom(mol) - rxn_smarts_list[6] = append_atom() - rxn_smarts = np.random.choice(rxn_smarts_list, p=p) - - #print('mutation',rxn_smarts) - - rxn = AllChem.ReactionFromSmarts(rxn_smarts) - - new_mol_trial = rxn.RunReactants((mol,)) - - new_mols = [] - for m in new_mol_trial: - m = m[0] - #print Chem.MolToSmiles(mol),mol_OK(mol) - if co.mol_OK(m) and co.ring_OK(m): - new_mols.append(m) - - if len(new_mols) > 0: - return random.choice(new_mols) - - return None - -if __name__ == "__main__": - pass diff --git a/open_biomed/models/MoleculeSTM/models/MLP.py b/open_biomed/models/MoleculeSTM/models/MLP.py deleted file mode 100644 index b5175c2..0000000 --- a/open_biomed/models/MoleculeSTM/models/MLP.py +++ /dev/null @@ -1,49 +0,0 @@ -from torch import nn -from torch.nn import functional as F -from collections.abc import Sequence - - -class MLP(nn.Module): - def __init__(self, input_dim, hidden_dims, batch_norm=False, activation="relu", dropout=0): - super(MLP, self).__init__() - - if not isinstance(hidden_dims, Sequence): - hidden_dims = [hidden_dims] - self.dims = [input_dim] + hidden_dims - - if isinstance(activation, str): - self.activation = getattr(F, activation) - else: - self.activation = activation - if dropout: - self.dropout = nn.Dropout(dropout) - else: - self.dropout = None - - self.layers = nn.ModuleList() - for i in range(len(self.dims) - 1): - self.layers.append(nn.Linear(self.dims[i], self.dims[i + 1])) - if batch_norm: - self.batch_norms = nn.ModuleList() - for i in range(len(self.dims) - 2): - self.batch_norms.append(nn.BatchNorm1d(self.dims[i + 1])) - else: - self.batch_norms = None - - def forward(self, input): - layer_input = input - - for i, layer in enumerate(self.layers): - hidden = layer(layer_input) - if i < len(self.layers) - 1: - if self.batch_norms: - x = hidden.flatten(0, -2) - hidden = self.batch_norms[i](x).view_as(hidden) - hidden = self.activation(hidden) - if self.dropout: - hidden = self.dropout(hidden) - if hidden.shape == layer_input.shape: - hidden = hidden + layer_input - layer_input = hidden - - return hidden \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/__init__.py b/open_biomed/models/MoleculeSTM/models/__init__.py deleted file mode 100644 index 7e38623..0000000 --- a/open_biomed/models/MoleculeSTM/models/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from models.MoleculeSTM.models.molecule_gnn_model import GNN, GNN_graphpred -from models.MoleculeSTM.models.MLP import MLP \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py b/open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py deleted file mode 100644 index 27eca55..0000000 --- a/open_biomed/models/MoleculeSTM/models/mega_molbart/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from models.MoleculeSTM.models.mega_molbart.megatron_bart import MegatronBART \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/splitters.py b/open_biomed/models/MoleculeSTM/splitters.py deleted file mode 100644 index 0f9ac8d..0000000 --- a/open_biomed/models/MoleculeSTM/splitters.py +++ /dev/null @@ -1,93 +0,0 @@ -import random -from collections import defaultdict -from itertools import compress - -import numpy as np -import torch -from rdkit.Chem.Scaffolds import MurckoScaffold -from sklearn.model_selection import StratifiedKFold - -from torch.utils.data import Subset - - -def generate_scaffold(smiles, include_chirality=False): - """ Obtain Bemis-Murcko scaffold from smiles - :return: smiles of scaffold """ - scaffold = MurckoScaffold.MurckoScaffoldSmiles( - smiles=smiles, includeChirality=include_chirality) - return scaffold - - -def scaffold_split(dataset, smiles_list, task_idx=None, null_value=0, - frac_train=0.8, frac_valid=0.1, frac_test=0.1, - pyg_dataset=True): - """ - Adapted from https://github.com/deepchem/deepchem/blob/master/deepchem/splits/splitters.py - Split dataset by Bemis-Murcko scaffolds - This function can also ignore examples containing null values for a - selected task when splitting. Deterministic split - :param dataset: pytorch geometric dataset obj - :param smiles_list: list of smiles corresponding to the dataset obj - :param task_idx: column idx of the data.y tensor. Will filter out - examples with null value in specified task column of the data.y tensor - prior to splitting. If None, then no filtering - :param null_value: float that specifies null value in data.y to filter if - task_idx is provided - :param frac_train, frac_valid, frac_test: fractions - :param pyg_dataset: if this is pytorch or pytorch-gemetric dataset - :return: train, valid, test slices of the input dataset obj. """ - np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0) - - if task_idx is not None: - # filter based on null values in task_idx - # get task array - y_task = np.array([data.y[task_idx].item() for data in dataset]) - # boolean array that correspond to non null values - non_null = y_task != null_value - smiles_list = list(compress(enumerate(smiles_list), non_null)) - else: - non_null = np.ones(len(dataset)) == 1 - smiles_list = list(compress(enumerate(smiles_list), non_null)) - - # create dict of the form {scaffold_i: [idx1, idx....]} - all_scaffolds = {} - for i, smiles in smiles_list: - scaffold = generate_scaffold(smiles, include_chirality=True) - if scaffold not in all_scaffolds: - all_scaffolds[scaffold] = [i] - else: - all_scaffolds[scaffold].append(i) - - # sort from largest to smallest sets - all_scaffolds = {key: sorted(value) for key, value in all_scaffolds.items()} - all_scaffold_sets = [ - scaffold_set for (scaffold, scaffold_set) in sorted( - all_scaffolds.items(), key=lambda x: (len(x[1]), x[1][0]), reverse=True) - ] - - # get train, valid test indices - train_cutoff = frac_train * len(smiles_list) - valid_cutoff = (frac_train + frac_valid) * len(smiles_list) - train_idx, valid_idx, test_idx = [], [], [] - for scaffold_set in all_scaffold_sets: - if len(train_idx) + len(scaffold_set) > train_cutoff: - if len(train_idx) + len(valid_idx) + len(scaffold_set) > valid_cutoff: - test_idx.extend(scaffold_set) - else: - valid_idx.extend(scaffold_set) - else: - train_idx.extend(scaffold_set) - - assert len(set(train_idx).intersection(set(valid_idx))) == 0 - assert len(set(test_idx).intersection(set(valid_idx))) == 0 - - if pyg_dataset: - train_dataset = dataset[torch.tensor(train_idx)] - valid_dataset = dataset[torch.tensor(valid_idx)] - test_dataset = dataset[torch.tensor(test_idx)] - return train_dataset, valid_dataset, test_dataset - else: - train_dataset = Subset(dataset, train_idx) - valid_dataset = Subset(dataset, valid_idx) - test_dataset = Subset(dataset, test_idx) - return train_dataset, valid_dataset, test_dataset diff --git a/open_biomed/models/MoleculeSTM/utils.py b/open_biomed/models/MoleculeSTM/utils.py deleted file mode 100644 index b0086a5..0000000 --- a/open_biomed/models/MoleculeSTM/utils.py +++ /dev/null @@ -1,71 +0,0 @@ -import numpy as np -import torch - - -# This is for BERT -def padarray(A, size, value=0): - t = size - len(A) - return np.pad(A, pad_width=(0, t), mode='constant', constant_values = value) - - -# This is for BERT -def preprocess_each_sentence(sentence, tokenizer, max_seq_len): - text_input = tokenizer( - sentence, truncation=True, max_length=max_seq_len, - padding='max_length', return_tensors='np') - - input_ids = text_input['input_ids'].squeeze() - attention_mask = text_input['attention_mask'].squeeze() - - sentence_tokens_ids = padarray(input_ids, max_seq_len) - sentence_masks = padarray(attention_mask, max_seq_len) - return [sentence_tokens_ids, sentence_masks] - - -# This is for BERT -def prepare_text_tokens(device, description, tokenizer, max_seq_len): - B = len(description) - tokens_outputs = [preprocess_each_sentence(description[idx], tokenizer, max_seq_len) for idx in range(B)] - tokens_ids = [o[0] for o in tokens_outputs] - masks = [o[1] for o in tokens_outputs] - tokens_ids = torch.Tensor(tokens_ids).long().to(device) - masks = torch.Tensor(masks).bool().to(device) - return tokens_ids, masks - - -def get_molecule_repr_MoleculeSTM(molecule_data, mol2latent=None, molecule_type="SMILES", MegaMolBART_wrapper=None, molecule_model=None): - if molecule_type == "SMILES": - embedding, pad_mask = MegaMolBART_wrapper.smileslist2embedding(molecule_data) # [pad, B, d], [pad, B] - molecule_repr = embedding[0, :, :] # [B, d] - else: - molecule_repr, _ = molecule_model(molecule_data) - - if mol2latent is not None: - molecule_repr = mol2latent(molecule_repr) - return molecule_repr - - -def freeze_network(model): - for param in model.parameters(): - param.requires_grad = False - return - - -def get_num_task_and_type(dataset): - if dataset in ["esol", "freesolv", "lipophilicity"]: - return 1, "regression" - elif dataset in ["hiv", "bace", "bbbp"]: - return 1, "classification" - elif dataset == "tox21": - return 12, "classification" - elif dataset == "pcba": - return 92, "classification" - elif dataset == "muv": - return 17, "classification" - elif dataset == "toxcast": - return 617, "classification" - elif dataset == "sider": - return 27, "classification" - elif dataset == "clintox": - return 2, "classification" - raise ValueError("Invalid dataset name.") diff --git a/open_biomed/models/__init__.py b/open_biomed/models/__init__.py index b526adf..59c3363 100644 --- a/open_biomed/models/__init__.py +++ b/open_biomed/models/__init__.py @@ -6,6 +6,7 @@ from models.multimodal import * from models.multimodal.molkformer import * + SUPPORTED_MOL_ENCODER = { "cnn": MolCNN, "tgsa": GINTGSA, @@ -20,7 +21,8 @@ "kv-plm": KVPLM, "momu": MoMu, "molfm": MolFM, - "molkformer":MolKFormer + "molkformer": MolKFormer, + "molstm": MoleculeSTM } SUPPORTED_MOL_DECODER = { diff --git a/open_biomed/models/multimodal/__init__.py b/open_biomed/models/multimodal/__init__.py index 7b61dbf..2626315 100644 --- a/open_biomed/models/multimodal/__init__.py +++ b/open_biomed/models/multimodal/__init__.py @@ -6,4 +6,6 @@ from models.multimodal.molfm.drugfm import DrugFM from models.multimodal.molt5 import MolT5 from models.multimodal.text2mol import Text2MolMLP -from models.multimodal.molkformer.mol_kformer import MolKFormer \ No newline at end of file +from models.multimodal.molkformer.mol_kformer import MolKFormer +from models.multimodal.mega_molbart.mega_mol_bart import MegaMolBART +from models.multimodal.moleculestm import MoleculeSTM \ No newline at end of file diff --git a/open_biomed/models/multimodal/mega_molbart/__init__.py b/open_biomed/models/multimodal/mega_molbart/__init__.py new file mode 100644 index 0000000..d7f244e --- /dev/null +++ b/open_biomed/models/multimodal/mega_molbart/__init__.py @@ -0,0 +1 @@ +from models.multimodal.mega_molbart.megatron_bart import MegatronBART \ No newline at end of file diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/decoder.py b/open_biomed/models/multimodal/mega_molbart/decoder.py similarity index 100% rename from open_biomed/models/MoleculeSTM/models/mega_molbart/decoder.py rename to open_biomed/models/multimodal/mega_molbart/decoder.py diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py b/open_biomed/models/multimodal/mega_molbart/mega_mol_bart.py similarity index 97% rename from open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py rename to open_biomed/models/multimodal/mega_molbart/mega_mol_bart.py index 3108e53..006a4ca 100644 --- a/open_biomed/models/MoleculeSTM/models/mega_molbart/mega_mol_bart.py +++ b/open_biomed/models/multimodal/mega_molbart/mega_mol_bart.py @@ -15,7 +15,7 @@ from megatron.checkpointing import load_checkpoint import megatron.checkpointing as megatron_checkpointing from megatron.global_vars import set_global_variables -from models.MoleculeSTM.cuchemcommon.workflow import BaseGenerativeWorkflow, add_jitter +from models.multimodal.mega_molbart.workflow import BaseGenerativeWorkflow, add_jitter from .decoder import DecodeSampler from megatron import get_args, mpu from megatron.initialize import initialize_megatron @@ -257,9 +257,7 @@ def smileslist2embedding(self, smiles_list): tokens = self.tokenizer.tokenize(smiles_list, pad=True) token_ids = torch.tensor(self.tokenizer.convert_tokens_to_ids(tokens['original_tokens'])).cuda().T pad_mask = torch.tensor(tokens['masked_pad_masks']).bool().cuda().T - # use collater - # token_ids = torch.tensor(smiles_list['original_tokens']).cuda().T - # pad_mask = torch.tensor(smiles_list['masked_pad_masks']).bool().cuda().T + token_ids = token_ids[:self.max_model_position_embeddings] pad_mask = pad_mask[:self.max_model_position_embeddings] encode_input = {"encoder_input": token_ids, "encoder_pad_mask": pad_mask} @@ -279,8 +277,7 @@ def smileslist2embedding_model_given(self, model, smiles_list): tokens = self.tokenizer.tokenize(smiles_list, pad=True) token_ids = torch.tensor(self.tokenizer.convert_tokens_to_ids(tokens['original_tokens'])).cuda().T pad_mask = torch.tensor(tokens['masked_pad_masks']).bool().cuda().T - # token_ids = torch.tensor(smiles_list['original_tokens']).cuda().T - # pad_mask = torch.tensor(smiles_list['masked_pad_masks']).bool().cuda().T + token_ids = token_ids[:self.max_model_position_embeddings] pad_mask = pad_mask[:self.max_model_position_embeddings] encode_input = {"encoder_input": token_ids, "encoder_pad_mask": pad_mask} diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/megatron_bart.py b/open_biomed/models/multimodal/mega_molbart/megatron_bart.py similarity index 100% rename from open_biomed/models/MoleculeSTM/models/mega_molbart/megatron_bart.py rename to open_biomed/models/multimodal/mega_molbart/megatron_bart.py diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/tokenizer.py b/open_biomed/models/multimodal/mega_molbart/tokenizer.py similarity index 100% rename from open_biomed/models/MoleculeSTM/models/mega_molbart/tokenizer.py rename to open_biomed/models/multimodal/mega_molbart/tokenizer.py diff --git a/open_biomed/models/MoleculeSTM/models/mega_molbart/util.py b/open_biomed/models/multimodal/mega_molbart/util.py similarity index 100% rename from open_biomed/models/MoleculeSTM/models/mega_molbart/util.py rename to open_biomed/models/multimodal/mega_molbart/util.py diff --git a/open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py b/open_biomed/models/multimodal/mega_molbart/workflow.py similarity index 95% rename from open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py rename to open_biomed/models/multimodal/mega_molbart/workflow.py index 0872750..00f1efa 100644 --- a/open_biomed/models/MoleculeSTM/cuchemcommon/workflow.py +++ b/open_biomed/models/multimodal/mega_molbart/workflow.py @@ -4,7 +4,6 @@ from typing import List import numpy as np -from models.MoleculeSTM.cuchemcommon.data import GenerativeWfDao from rdkit.Chem import PandasTools, CanonSmiles logger = logging.getLogger(__name__) @@ -26,6 +25,16 @@ def _(embedding, radius, cnt, shape): return distorteds +class GenerativeWfDao(object): + + def fetch_id_from_chembl(self, id: List): + """ + Fetch molecular details for a list of molecules. The values in the list + of molecules depends on database/service used. For e.g. it could be + ChemblId or molreg_id for Chemble database. + """ + return NotImplemented + class BaseGenerativeWorkflow: diff --git a/open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py b/open_biomed/models/multimodal/moleculestm.py similarity index 51% rename from open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py rename to open_biomed/models/multimodal/moleculestm.py index eb2fdd9..64dc313 100644 --- a/open_biomed/models/MoleculeSTM/models/molecule_gnn_model.py +++ b/open_biomed/models/multimodal/moleculestm.py @@ -1,14 +1,56 @@ +import logging +logger = logging.getLogger(__name__) + + import torch import torch.nn as nn import torch.nn.functional as F +from collections.abc import Sequence + from torch_geometric.nn import (MessagePassing, global_add_pool, global_max_pool, global_mean_pool) -from torch_geometric.nn.inits import glorot, zeros from torch_geometric.utils import add_self_loops, softmax, degree -from torch_scatter import scatter_add -from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder -from collections import OrderedDict +from models.base_models import MolEncoder, TextEncoder +from transformers import BertModel +from models.multimodal.mega_molbart.mega_mol_bart import MegaMolBART + +class AtomEncoder(torch.nn.Module): + def __init__(self, emb_dim): + super(AtomEncoder, self).__init__() + + self.atom_embedding_list = torch.nn.ModuleList() + + for i, dim in enumerate([119, 4, 12, 12, 10, 6, 6, 2, 2]): + emb = torch.nn.Embedding(dim, emb_dim) + torch.nn.init.xavier_uniform_(emb.weight.data) + self.atom_embedding_list.append(emb) + + def forward(self, x): + x_embedding = 0 + for i in range(x.shape[1]): + x_embedding += self.atom_embedding_list[i](x[:,i]) + + return x_embedding + + +class BondEncoder(torch.nn.Module): + def __init__(self, emb_dim): + super(BondEncoder, self).__init__() + + self.bond_embedding_list = torch.nn.ModuleList() + + for i, dim in enumerate([5, 6, 2]): + emb = torch.nn.Embedding(dim, emb_dim) + torch.nn.init.xavier_uniform_(emb.weight.data) + self.bond_embedding_list.append(emb) + + def forward(self, edge_attr): + bond_embedding = 0 + for i in range(edge_attr.shape[1]): + bond_embedding += self.bond_embedding_list[i](edge_attr[:,i]) + + return bond_embedding class GINConv(MessagePassing): def __init__(self, emb_dim, aggr="add"): @@ -194,4 +236,128 @@ def forward(self, *argv): node_representation = self.molecule_node_model(x, edge_index, edge_attr) graph_representation = self.pool(node_representation, batch) output = self.graph_pred_linear(graph_representation) - return graph_representation, output \ No newline at end of file + return graph_representation, output + +class MoleculeSTM(MolEncoder, TextEncoder): + def __init__(self, config): + super().__init__() + self.config = config + + if config["structure"]["name"] == "magamolbart": + self.MegaMolBART_wrapper = MegaMolBART( + vocab_path=config["structure"]["vocab_path"], + input_dir=config["structure"]["MegaMolBART_generation_model_dir"], + output_dir=None + ) + self.structure_encoder = self.MegaMolBART_wrapper.model + elif config["structure"]["name"] == "gnn": + self.MegaMolBART_wrapper = MegaMolBART( + vocab_path=config["structure"]["vocab_path"], + input_dir=config["structure"]["MegaMolBART_generation_model_dir"], + output_dir=None + ) + molecule_node_model = GNN( + num_layer=config["structure"]["gin_num_layers"], + emb_dim=config["structure"]["gin_hidden_dim"], + gnn_type="gin", + drop_ratio=config["structure"]["drop_ratio"], + JK="last", + ) + self.structure_encoder = GNN_graphpred( + num_layer=config["structure"]["gin_num_layers"], + emb_dim=config["structure"]["gin_hidden_dim"], + graph_pooling="mean", + JK="last", + num_tasks=1, + molecule_node_model=molecule_node_model + ) + else: + raise AttributeError + if "ckpt" in config["structure"]: + logger.info("Loading structure encoder from %s" % (config["structure"]["ckpt"])) + state_dict = torch.load(config["structure"]["ckpt"], map_location="cpu") + self.structure_encoder.load_state_dict(state_dict) + + self.text_encoder = BertModel.from_pretrained(config["text"]["bert_path"]) + if "ckpt" in config["text"]: + logger.info("Loading text encoder from %s" % (config["text"]["ckpt"])) + state_dict = torch.load(config["text"]["ckpt"], map_location="cpu") + missing_keys, unexpected_keys = self.text_encoder.load_state_dict(state_dict, strict=False) + logger.info("missing keys: " + str(missing_keys)) + logger.info("unexpected keys: " + str(unexpected_keys)) + + self.structure_proj_head = nn.Linear(config["structure"]["output_dim"], config["projection_dim"]) + self.text_proj_head = nn.Linear(config["text"]["output_dim"], config["projection_dim"]) + if "structure_proj_ckpt" in config: + logger.info("Loading structure projection head from %s" % (config["structure_proj_ckpt"])) + state_dict = torch.load(config["structure_proj_ckpt"], map_location="cpu") + self.structure_proj_head.load_state_dict(state_dict) + if "text_proj_ckpt" in config: + logger.info("Loading text projection head from %s" % (config["text_proj_ckpt"])) + state_dict = torch.load(config["text_proj_ckpt"], map_location="cpu") + self.text_proj_head.load_state_dict(state_dict) + self.norm = False + + def encode_mol(self, structure, proj=False, return_node_feats=False): + mol_embeds, node_embeds = self.structure_encoder(structure) + if proj: + mol_embeds = self.structure_proj_head(mol_embeds) + if not return_node_feats: + return mol_embeds + else: + return mol_embeds, node_embeds + + def encode_text(self, text, proj=False): + text_embeds = self.text_encoder(text["input_ids"], attention_mask=text["attention_mask"])["pooler_output"] + if proj: + return self.text_proj_head(text_embeds) + else: + return text_embeds + + + + +class MLP(nn.Module): + def __init__(self, input_dim, hidden_dims, batch_norm=False, activation="relu", dropout=0): + super(MLP, self).__init__() + + if not isinstance(hidden_dims, Sequence): + hidden_dims = [hidden_dims] + self.dims = [input_dim] + hidden_dims + + if isinstance(activation, str): + self.activation = getattr(F, activation) + else: + self.activation = activation + if dropout: + self.dropout = nn.Dropout(dropout) + else: + self.dropout = None + + self.layers = nn.ModuleList() + for i in range(len(self.dims) - 1): + self.layers.append(nn.Linear(self.dims[i], self.dims[i + 1])) + if batch_norm: + self.batch_norms = nn.ModuleList() + for i in range(len(self.dims) - 2): + self.batch_norms.append(nn.BatchNorm1d(self.dims[i + 1])) + else: + self.batch_norms = None + + def forward(self, input): + layer_input = input + + for i, layer in enumerate(self.layers): + hidden = layer(layer_input) + if i < len(self.layers) - 1: + if self.batch_norms: + x = hidden.flatten(0, -2) + hidden = self.batch_norms[i](x).view_as(hidden) + hidden = self.activation(hidden) + if self.dropout: + hidden = self.dropout(hidden) + if hidden.shape == layer_input.shape: + hidden = hidden + layer_input + layer_input = hidden + + return hidden \ No newline at end of file diff --git a/open_biomed/models/task_model/moledit_model.py b/open_biomed/models/task_model/moledit_model.py index 53a61cc..7415a78 100644 --- a/open_biomed/models/task_model/moledit_model.py +++ b/open_biomed/models/task_model/moledit_model.py @@ -1,52 +1,47 @@ import torch import torch.nn as nn -from transformers.modeling_outputs import BaseModelOutput - -from models.multimodal.molt5 import MolT5 from models import SUPPORTED_MOL_ENCODER -from utils.mol_utils import convert_pyg_batch -from models.multimodal.molkformer.mol_kformer import MolKFormer - class MoleditModel(nn.Module): def __init__(self, config): super(MoleditModel, self).__init__() - self.model = SUPPORTED_MOL_ENCODER[config["graph"]["name"]](config["graph"]) - self.ckpt = torch.load(config["graph"]["init_checkpoint"], map_location="cpu") - if config["graph"]["name"] == "molkformer": - self.ckpt = self.ckpt["model"] - self.model.load_state_dict(self.ckpt, strict=False) - self.use_molkformer = True if config["graph"]["name"] == "molkformer" else False - self.use_momu = True if config["graph"]["name"] == "momu" else False + if "smiles" in config: + self.model = SUPPORTED_MOL_ENCODER[config["smiles"]["name"]](config["smiles"]) + self.use_molstm = True if config["smiles"]["name"] == "molstm" else False + elif "graph" in config: + self.model = SUPPORTED_MOL_ENCODER[config["graph"]["name"]](config["graph"]) + if config["graph"]["name"] == "molkformer": + self.ckpt = torch.load(config["graph"]["init_checkpoint"], map_location="cpu") + self.ckpt = self.ckpt["model"] + self.model.load_state_dict(self.ckpt, strict=False) + if config["graph"]["name"] == "momu": + self.ckpt = torch.load(config["graph"]["init_checkpoint"], map_location="cpu") + self.model.load_state_dict(self.ckpt, strict=False) + + self.use_molkformer = True if config["graph"]["name"] == "molkformer" else False + self.use_momu = True if config["graph"]["name"] == "momu" else False + self.use_molstm = True if config["graph"]["name"] == "molstm" else False def forward(self, mol): - h, encoder_attention_mask = self.encode(mol) - return h, encoder_attention_mask - - - def decode(self, mol, num_beams, max_length): - h, encoder_attention_mask = self.encode(mol) - return self.generate_model.decode( - encoder_outputs=h, - encoder_attention_mask=encoder_attention_mask, - num_beams=num_beams, - max_length=max_length - ) + h = self.encode(mol) + return h def encode(self, mol): + #text_encode if "input_ids" in mol: h = self.model.encode_text(mol, proj=True) - encoder_attention_mask = 1 + #graph_encode else: if self.use_molkformer==True: - # mol={"structure":{"Graph":mol}} mol={"structure":mol} graph_feats = self.model.encode_mol(mol, proj=True) h = graph_feats.mean(dim=1) if self.use_momu==True: graph_feats = self.model.encode_mol(mol, proj=True) h = graph_feats - encoder_attention_mask = 1 - return h, encoder_attention_mask \ No newline at end of file + if self.use_molstm==True: + graph_feats = self.model.encode_mol(mol, proj=True) + h = graph_feats + return h \ No newline at end of file diff --git a/open_biomed/tasks/mol_edit/accuracy.npz b/open_biomed/tasks/mol_edit/accuracy.npz new file mode 100644 index 0000000000000000000000000000000000000000..5e7485a9f5a03e5d895bb6fb8f135d264b832c68 GIT binary patch literal 272 zcmWIWW@Zs#fB;1XYiY*P4j>1FIT=J45{ruB4fOH~Dj69Bz=9x!AW0CI>=)`A5Xs0; z#!#)El3JWxq;934Zj)xAuA`uymS0p-l$aNvUzCyx5_e0?DNY577iT0EqyqUGhB}%$ w3bhIp04|6t7#IS)8JR?wad`r!ih-el5yXajDZrbR4J5z_gyul{Fo?qd0E^Qq@c;k- literal 0 HcmV?d00001 diff --git a/open_biomed/tasks/mol_edit/edited_SMILES.tsv b/open_biomed/tasks/mol_edit/edited_SMILES.tsv new file mode 100644 index 0000000..ba3e82e --- /dev/null +++ b/open_biomed/tasks/mol_edit/edited_SMILES.tsv @@ -0,0 +1,1195 @@ +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O N(C(=O)NN1C(=O)N()C)N(C)[C@@H]1OCCC1 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O NNC(=O)N1NC(=O)N(N)[C@@H]1ON 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O N(C(=O)NN1C(=O)N()C)N(C)[C@@H]1OCCC1 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O NNC(=O)N1NC(=O)N(N)[C@@H]1ON 0.001 +This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O 10.0 +This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O 1.0 +This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O 0.1 +This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O C(=O)(NNC(=O)cN#N)cN(C)C(=O)N 0.01 +This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O CN(C#N)NC(=O)NNC(=O)NNC(N)=O 0.001 +This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O 10.0 +This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O 1.0 +This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O 0.1 +This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O C(CC)n(C)c(=O)cc(C(=O)Nc2cnc(N3CCOCC3)cc2)c1 0.01 +This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O C(CC)C(=O)NN(C)C(=O)Nc1ccc(N)n(CCC)c1 0.001 +This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 10.0 +This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 1.0 +This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NC(=O)CN(C)OC)c1nc(C)cs1 0.1 +This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 O=C(N(OC))N(O)C)N[C@@](C)(C)NC(=O)N 0.01 +This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CON(N)C(=O)CNC(C)(C)NC(=O)N(N)O 0.001 +This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 10.0 +This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 1.0 +This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 c1cccc2c1Sc(=O)n(CCC(=O)N[C@@H](C)C1CC1)c2S 0.1 +This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1Nc2ccccc2SC1=O)C1CC1 0.01 +This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)NN1Sc2ccccc2SC1=O)C1CC1 0.001 +This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 10.0 +This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 1.0 +This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2sc(C(N)=O)c(C(N)=O)c2C1 0.1 +This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCN(C(N)=O)c2cc(NC(N)=O)cc(C(N)=O)c21 0.01 +This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N(CN(C(N)=O)c1cc(NC(N)=O)cc(C(N)=O)c1)C(N)=O 0.001 +This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 10.0 +This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 1.0 +This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 0.1 +This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Nc1ccc(F)c(N2CCN(c3ncc(Cl)cc3F)CC2)n1 0.01 +This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Nc1ccc(F)c(N2CCN(c3ncc(Cl)cc3F)C2)n1 0.001 +This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2c(C)c(C)sc12 10.0 +This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2c(C)c(C)sc12 1.0 +This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2c(C)c(C)sc12 0.1 +This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 C(=O)(NC)CSc1c2c(c(C)c(C)[nH]2)cc1 0.01 +This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 C(=O)(NC)SSc1c2c(c(C)c(C)[nH]2)c(=O)[nH]1 0.001 +This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 10.0 +This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 1.0 +This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 0.1 +This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 C(=O)(OC)Nc1cc(N2CCN(c(C(=O)N)C)C2)nc(OC)c1 0.01 +This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 C(=O)(OC)NCN(c)N1CCN(c(C(=O)N)c(C)N)C)C 0.001 +This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 10.0 +This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 1.0 +This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 0.1 +This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 N(C(=O)N[C@@H](C1CC1)ccccc(F)c1)N(C)C(=O)N 0.01 +This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 N=C(N)N[C@H](NC(N)=O)c1ccccc1 0.001 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(C)c(S(=O)(=O)N(CC(=O)NCc2ncccc2C)c2ccccn2)c1 10.0 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 1.0 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1cccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)c1 0.1 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(C)CC(=O)NCc2ccccn2)cc1 0.01 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 N(C(=O)CN(S(=O)(=O)ccc(C)cc)S(=O)(=O)N)(C)cc(C)cN 0.001 +This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 10.0 +This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 1.0 +This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 0.1 +This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)SCCN(C(=O)Nc2ccc(N3CCOCC3)cc2F)S1 0.01 +This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 O=C(Nc1ccc(N2CCOCC2)cc1F)N1CCS(=O)(=O)N1 0.001 +This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C 10.0 +This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C 1.0 +This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C 0.1 +This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C C(#N)c(C)c(C)c(NC(=O)cccc(N(C)C)c)=O 0.01 +This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C C(#N)c(C)c(C)c(NC(=O)ccc(N(C)C)c)=O 0.001 +This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O 10.0 +This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O 1.0 +This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1ccc(Sc2cc3ccccc3[nH]2)cc1C=O 0.1 +This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O O=C(N)cSS(=O)(=O)N 0.01 +This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O NC(=O)NS(N)(=O)=O 0.001 +This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 10.0 +This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 1.0 +This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 0.1 +This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 C(NC(=O)cc(C)nc2cc(-ccc(F)cc2)cc2)(=O)NCC(F)(F)F 0.01 +This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 C(NC(=O)cc(C)nc2cc(-cc(F)cc)c2)(=O)NCC(F)(F)F 0.001 +This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 10.0 +This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 1.0 +This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 0.1 +This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 CC(C)(C)c1cccc(S(=O)(=O)Nc2ccc(C(=O)N3CCC3)cc2)c1 0.01 +This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 c1c(S(=O)(NS(=O)(cc(C(C)(C)C)=O)=O)N)ccc(C(=O)N2CCCN2)c1 0.001 +This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 10.0 +This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 1.0 +This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 0.1 +This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 C(N(CC#N)CC#N)c1c2c(cccc2)cc1 0.01 +This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 C(N(CC#N)CC#N)c1c2c(cccc2)cc1 0.001 +This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F 10.0 +This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F 1.0 +This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@@H](O)c1ccco1)[C@@H]1C[C@H]1c1ccccc1F 0.1 +This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F CN(C(N)=O)[C@H](N)C(=O)NC[C@H](N)O 0.01 +This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F CN(S)C(=O)NC[C@@H](O)N(C)C(=O)N(C)S 0.001 +This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 10.0 +This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 1.0 +This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 0.1 +This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 O=S(Nc#N)(=O)Ncccc(NC(=O)c(-c2c(Cl)cccc2F)n[nH]2)c1 0.01 +This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 O=S(Nc#N)(=O)NccccNC(=O)c(N#N)c(-c2c(Cl)cccc2F)cO 0.001 +This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 10.0 +This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 1.0 +This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@@H](S/N=C\c2ccc(I)cc2)S[C@H]1CC(=O)NCCc1ccccc1 0.1 +This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 c1c(I)ccc(N2N(C)C(=O)S[C@@H]2CC(=O)NCC2)c1 0.01 +This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CCNC(=O)CNSN=c1[nH]c(=O)n(C)s1 0.001 +This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 10.0 +This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 1.0 +This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCC1)N(C)c1ccsc1 0.1 +This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 CN1NC(O)(CC(=O)NC[C@H](N)N(C)C)NNC1=O 0.01 +This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@H](NC(=O)NNC(=O)NN(C)C(=O)NN)N(C)C 0.001 +This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 10.0 +This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 1.0 +This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@H](C)O1 0.1 +This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)N[C@H]1NCN(CN(C)S(=N)(N)=O)[C@H](C)O1 0.01 +This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)N[C@H]1NCN(C(=O)NN(C)S(=N)(N)=O)CN(C)O1 0.001 +This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 10.0 +This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 1.0 +This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 0.1 +This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 CC(=O)N(C)[C@H](C)N(N)C(=O)CCSN 0.01 +This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C(N(C(=O)N(C)[C@@H](C)NC(=O)N)C1)CC#N 0.001 +This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 10.0 +This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 1.0 +This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 0.1 +This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1nccc(NCc2cc[nH]n2)n1 0.01 +This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 NC(=O)Nc1nccc(C(F)(F)F)n1 0.001 +This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 10.0 +This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 1.0 +This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 0.1 +This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 N#C[C@@H](N1CCN(C(=O)cN)CCN1)C(=O)N)N 0.01 +This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 N#C[C@@H](N1CCN(C(=O)cN)CCN(C)C(=O)N)N)C(=O)N 0.001 +This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C/c1cc(C)n(-c2ccc([N+](=O)[O-])c(C)c2)c1 10.0 +This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C/c1cc(C)n(-c2ccc([N+](=O)[O-])c(C)c2)c1 1.0 +This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C/c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1 0.1 +This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccc([N+](=O)[O-])cc1NC(=O)C#N 0.01 +This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1cc([N+](=O)[O-])ccc1NC(=O)C#N 0.001 +This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C 10.0 +This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C 1.0 +This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C 0.1 +This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(NCc2cccc(C(C)C)c2)c1C 0.01 +This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C c1(C)n(CC)c(C)cc(NNc2cc(C(C)C)ccn2)c1 0.001 +This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cn1 10.0 +This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cn1 1.0 +This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 0.1 +This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 CN(C)C(=O)NC(=O)C(=O)NCc1ccc(Cl)cc1 0.01 +This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NC(=O)N(C)Oc)N(C)C(=O)N(C)Oc 0.001 +This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)CNC(=O)N2CCCCC2)CC1 10.0 +This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCN(C(=O)NCC(=O)N2CCCCC2)CC1 1.0 +This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCN(C(=O)NCC(=O)N2CCCC2)CC1 0.1 +This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCN(C(=O)NCC(N)=O)CC1 0.01 +This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N(C(=O)N1CCN(c#N)cccc2)C(N)=O 0.001 +This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 10.0 +This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 1.0 +This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 0.1 +This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1cccc(NC(=O)CCO)c1 0.01 +This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 C(CC(Nc(NC(=O)c)cc(C)c1)=O)N(C)C(=O)N 0.001 +This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 10.0 +This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 1.0 +This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1nc(-c2cccc(OCc3cccc(F)c3)c2)cs1 0.1 +This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 C(NC(=O)c(N)c(S)(=O)N)(=O)Oc1c()c(OC)c(OC)c()c1 0.01 +This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 C(NC(=O)c(N)S)(=O)N(C)C)N(C)C(=O)OCc1c()c(O)c(O)c(N)c(N)c1 0.001 +This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 10.0 +This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 1.0 +This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 0.1 +This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2cc(S(=O)(=O)N3CCCC3)ccc2Cl)cc1 0.01 +This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#CcS(Nc1cc(NC(=O)cNS(=O)(=O)N)ccc1)(=O)N 0.001 +This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl 10.0 +This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl 1.0 +This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl 0.1 +This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CN1CCN(C(=O)NCCN)[C@H]1c1ccccc1Cl 0.01 +This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CN1CN(C(=O)NCCNS(N)(=O)=O)[C@H]1c1ccccc1Cl 0.001 +This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 10.0 +This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 1.0 +This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 0.1 +This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 C(NS(=O)(=O)C)[C@@H]1CN(C(=O)cc(-c2ccc(C)cc2)[nH]1)CCCN2 0.01 +This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(C2=CC(=O)N(C)C[C@@H](CNS(C)(=O)=O)N2)cc1 0.001 +This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c(=O)n(C)c(=O)n(C)c21 10.0 +This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c(=O)n(C)c(=O)n(C)c21 1.0 +This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c(=O)n(C)c(=O)n(C)c21 0.1 +This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](NC(=O)c1cn(C)c(=O)n1C)C(=O)N(C)CC#N 0.01 +This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C(=O)(N(C)CCN(C)C(=O)[C@@H](C)N=c1cc(=O)n(C)[nH]1)N 0.001 +This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 10.0 +This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 1.0 +This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 0.1 +This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 0.01 +This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 O=C(Oc)ONccc(S(=O)(=O)N)cc 0.001 +This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 10.0 +This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 1.0 +This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 0.1 +This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 O=S(=O)(c1ccc(S(=O)(=O)CCC(Nc(OC)ccc(C)c3)=O)cc1)N 0.01 +This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 O=S(=O)(ccc(S(=O)(=O)CCC(Nc(OC)ccc(C)c2)(=O)N)cc()N)N(C)C(=O)N 0.001 +This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 10.0 +This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 1.0 +This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 0.1 +This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 c1c(N(CC)CC)ccc(NC(=O)NNC(=O)c)c1 0.01 +This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 N(C(=O)Nccc(N(CC)C(=O)N)c)(CC)C(=O)N 0.001 +This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 10.0 +This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 1.0 +This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 0.1 +This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1n[nH]c(S(=O)(=O)[C@@H](C)S(=O)(=O)c2cccc(F)c2)n1 0.01 +This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 c1(S(=O)(=O)[C@@H](C)S(=O)(=O)cc(N(C)C)n[nH]1)cc(F)cc1 0.001 +This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 10.0 +This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 1.0 +This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@H](COC)C2)no1 0.1 +This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCN(C)C(=O)N(C)CN(C)C(=O)N(C)CN(C)C(N)=O 0.01 +This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCN(C)C(=O)N(C)N(C)C(=O)N(C)CN(C)C(N)=O 0.001 +This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C 10.0 +This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C 1.0 +This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@@H]1c2ccccc2CN1C(=O)CC(C)C 0.1 +This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C C(NC(=O)[C@@H]1Cc1cccc(N)(=O)N1C(=O)CC(C)C)#N 0.01 +This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C C(NC(=O)[C@@H](N(C)C(=O)N)C#N)c(OC)(OC)F 0.001 +This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl 10.0 +This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl 1.0 +This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl 0.1 +This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)O[C@H](C)NC(=O)Nc1ccc(Cl)cc1Cl 0.01 +This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl NC(=O)Nc1ccc(Cl)cc1OC(N)=O 0.001 +This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 10.0 +This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 1.0 +This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 0.1 +This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 C(C(=O)N1[C@@H](c(OC)ccc(OC)c2)NC(=O)C)(=O)N 0.01 +This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 C(C(=O)N1[C@@H](c(OC)ccc(OC)c)N)(=O)N 0.001 +This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 10.0 +This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 1.0 +This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2cc(/C=C3\NC(=O)NC3=O)c(OC)cc2c1 0.1 +This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 O=C1N/C(=C/c(OC)c2ccc(OC)cc2)NC(=O)NN1 0.01 +This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 O(C)c1c(/C=C2\NC(=O)NC(=O)NN2)cc(OC)c1 0.001 +This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 10.0 +This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 1.0 +This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 0.1 +This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 c(=O)(NCCNc1c([N+](=O)[O-])cnn1)c(C(C)C)n[nH]1 0.01 +This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 c(=O)(NCCNc(=O)c(=O)n(C)n[nH]2)c(=O)n(C)[nH]1 0.001 +This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1cccc2ccccc12)N1CCC[C@H](CO)C1 10.0 +This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1cccc2ccccc12)N1CCC[C@H](CO)C1 1.0 +This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 0.1 +This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 C(NC(=O)N1C[C@@H](CO)CCC1)(=O)NCc 0.01 +This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 C(NC(=O)N1C[C@@H](CO)N(C)C)(=O)N 0.001 +This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@H](C(=O)N(C)c2ncccc2N(C)C)CC1=O 10.0 +This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O 1.0 +This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O 0.1 +This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O N(C(=O)[C@@H]1CN(c(CC)cccc2)C(=O)N1)(C)N 0.01 +This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O N(C(=O)N(C)NC(=O)NNc(CC)cccc1)(C)C 0.001 +This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 10.0 +This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 1.0 +This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 0.1 +This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@H]2NCCS2)cc1 0.01 +This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 N(C(=O)[C@@H]1SCCN2C(=O)N#N)(C)CN#N 0.001 +This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)no1)N1CCOCC1 10.0 +This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)no1)N1CCOCC1 1.0 +This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 0.1 +This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(N1CCOCC1)N(C)C)NC(=O)cc(O)c(F)c(F)c(F)c2 0.01 +This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(N1CCOCCN1)cc(O)c(O)c(F)c(F)c(S)(=O)N 0.001 +This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC 10.0 +This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC 1.0 +This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC 0.1 +This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1cc(NC(=O)CN2C(=O)N(C)[C@@H](C)N2C(N)=O)ccc1OC 0.01 +This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC C(N1C(=O)N(c2ccc(C)cc2)C(=O)N1)C(Nc1cc(OC)c(OC)cN)=O 0.001 +This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 10.0 +This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 1.0 +This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 0.1 +This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 O(C)c1cc(NC(=O)NCC(=O)NN)c(OC)ccc1 0.01 +This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 O(C)c(O)cc(NC(=O)NC(=O)NCC(=O)N)c(O)c(O)c 0.001 +This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl 10.0 +This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl 1.0 +This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl 0.1 +This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl N1(c2c(Cl)cccc2)C(=O)N[C@@H](N2CCN)CC1=O 0.01 +This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl NN1CCSN(c2ccccc2Cl)NC1=O 0.001 +This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br 10.0 +This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br 1.0 +This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br 0.1 +This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br NC(=O)Nc1ccc(NS(=O)(=O)N2CCN2)cc1 0.01 +This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br N(S(=O)(=O)N1c(NC(=O)cN)ccc(OC)cN)C(=O)N 0.001 +This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 10.0 +This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 1.0 +This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 0.1 +This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C(=O)(N(CC=C)[C@@H](C)NC(=O)Cc1ccc(C)cc1 0.01 +This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C(=O)(N(CC=C)[C@@H](C)N)CcN 0.001 +This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 10.0 +This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 1.0 +This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 0.1 +This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 NCNC(=O)N1CCC(OCc2ccc(F)cc2)CC1 0.01 +This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 N(C(N1CCC(OCc)(N)C1)=O)C(N)=O 0.001 +This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 10.0 +This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 1.0 +This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 0.1 +This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 0.01 +This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)C1 0.001 +This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 10.0 +This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 1.0 +This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 0.1 +This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 O=C(NCC1(O)CCOCC1)/C=C/c(O[C@@H](C)C)O 0.01 +This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 C[C@H](O[C@H](C)O)ON(C)C(=O)NCN(C)C(N)=O 0.001 +This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 10.0 +This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 1.0 +This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@@H]2CCN(c3ccc(C)cc3)C2=O)cc1 0.1 +This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 NC(=O)CCc1ccc(OC(N)=O)cc1 0.01 +This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 O=C(ON)NC(=O)Oc 0.001 +This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 10.0 +This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 1.0 +This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 0.1 +This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 c1(N2CCC(C(=O)NCc(OC)cc(OC)c)C2)ncc(N)cc1 0.01 +This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 N(C(=O)NCc(OC)cc(OC)c)(N)S(=O)(=O)N 0.001 +This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 10.0 +This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 1.0 +This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 0.1 +This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 N(CN1C(=O)N(C)CN1)C(=O)cN 0.01 +This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 CN1CN(CNC(=O)NNC(N)=O)C(=O)N1 0.001 +This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 10.0 +This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 1.0 +This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 0.1 +This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CSCc1ccccc1)Nc1nc[nH]n1 0.01 +This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 NC(=O)NN=C(O)CSCc1ccccc1 0.001 +This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F 10.0 +This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F 1.0 +This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F 0.1 +This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F N(C(=O)c(C)ncsN)[C@@H](C(F)(F)F)Nc(C(=O)N)c 0.01 +This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F N(C(=O)Nc(C(F)(F)F)cN)C(=O)NN 0.001 +This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 10.0 +This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 1.0 +This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 0.1 +This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2ccc(Br)cc2)cc1 0.01 +This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 c(OCCOccc(OC)c)c(N)c(=N)c(N)c(S)c(=N)[nH]2 0.001 +This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C 10.0 +This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C 1.0 +This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nn2cc(C)nc2c1C 0.1 +This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCC(N)=O 0.01 +This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C C(N(C)Cc(OC)ccc(Br)c1)(=O)CCC(=O)N)C 0.001 +This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 10.0 +This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 1.0 +This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 0.1 +This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)C2)c1 0.01 +This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)C2)c1 0.001 +This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(n1-c1ccccc1)CCCS2 10.0 +This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(n1-c1ccccc1)CCCS2 1.0 +This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(n1-c1ccccc1)SCCC2 0.1 +This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 C(=O)(N(C)C)NN=c1[nH]c(=O)N 0.01 +This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 CN(CN)C(=O)NN=[SH](C)(N)N 0.001 +This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 10.0 +This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 1.0 +This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 0.1 +This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(=O)NC(C)C)C(=O)COc1ccccc1 0.01 +This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(C(=O)COc1ccccc1)C(=O)NC(=O)NC(C)C 0.001 +This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl 10.0 +This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl 1.0 +This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl 0.1 +This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@H]1COC[C@H](N)N1C(=O)N(N)S(N)(=O)=O 0.01 +This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl N(N(C(=O)N1[C@@H](C)COC[C@@H]1N)S(=O)(=O)NN 0.001 +This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 10.0 +This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 1.0 +This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 0.1 +This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@H](NC(=O)CC(C)(C)C)Nc1cccc(N2CCOC2=O)c1 0.01 +This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)NC(=O)C(N)=O 0.001 +This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 10.0 +This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 1.0 +This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 0.1 +This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 CN(C)C(=O)N(C[C@H]1NCCCO1)C(=O)/C=C/c1ccc(Cl)cn1 0.01 +This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 CN(C[C@H](O)N(C)C(N)=O)C(=O)/C=N/N(C)C(N)=O 0.001 +This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 10.0 +This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 1.0 +This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)CCCC2=O)Cc1cccs1 0.1 +This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 CN(CN(C)C(=O)NN(C)C(N)=O)C(N)=O 0.01 +This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 CN(N)NC(=O)NC(=O)N(C)N 0.001 +This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 10.0 +This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 1.0 +This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 0.1 +This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 C(=O)(Nc(C(=O)NC)cccc1)COc 0.01 +This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 C(=O)(Nc(C(=O)N)c(O)C)COc 0.001 +This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 10.0 +This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 1.0 +This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 0.1 +This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 C(C(N1CCN(C(=O)Nc)c2ccc(N)nc2)C1)C 0.01 +This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 C(C(N1CCN(C(=O)Nc)CC1)N)N(C)C 0.001 +This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC 10.0 +This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC 1.0 +This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC 0.1 +This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC c1(OC)cc(OC)c(#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)C2)c1 0.01 +This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC c1(NC(=O)CN2CCN(c3ccc(F)cc3)C2)cc(OC)c(#N)c1)c(OC)c(O)c2 0.001 +This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 10.0 +This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@@H](C)O)c2)c(C)n1 1.0 +This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 0.1 +This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 0.01 +This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@@H](C)O)c2)c(C)n1 0.001 +This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 10.0 +This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 1.0 +This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 0.1 +This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 C(C(=O)Nccc(C(=O)OCC)c)N(C)C 0.01 +This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 C(C(=O)Nccc(C(=O)OCC)N)N 0.001 +This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)C1(CCCC1)O2 10.0 +This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)C1(CCCC1)O2 1.0 +This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCC1)O2 0.1 +This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 CN(N)NC(=O)CCC(=O)NNC(N)=O 0.01 +This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 CN(NC(N)=O)NC(=O)NNC(N)=O 0.001 +This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 10.0 +This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 1.0 +This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 0.1 +This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 0.01 +This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)C2)cc1 0.001 +This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 10.0 +This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 1.0 +This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 0.1 +This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)S(=O)(=O)N1CCC(O)(CSc2ccc(Br)cc2)CC1 0.01 +This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)S(=O)(=O)N1CCC(O)(CSc2ccc(Br)cc2)CC1 0.001 +This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 10.0 +This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 1.0 +This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)cn1 0.1 +This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)NC(=O)N1CCCN1 0.01 +This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)NN1CCCN1C(N)=O 0.001 +This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 10.0 +This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 1.0 +This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 0.1 +This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1ccc(NC(=O)CN(C)c2c(C)noc2C)cc1SC 0.01 +This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1ccc(NC(=O)CN(C)c2c(C)noc2C)cc1SC 0.001 +This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 10.0 +This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 1.0 +This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 0.1 +This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 N(S(=O)(=O)NCC=C)C(=O)N1CCC(Cc)CN1 0.01 +This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 N(S(=O)(=O)NCC=C)C(=O)N1CCC(Cc)CN1 0.001 +This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 10.0 +This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 1.0 +This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 0.1 +This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 0.01 +This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 0.001 +This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 10.0 +This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 1.0 +This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 OCC#CC#Cc1ccc(Br)cc1 0.1 +This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 N=c1ccc(C#CN)c[nH]1 0.01 +This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 N#CC#Nc1ccc(=N)ccc1 0.001 +This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 10.0 +This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 1.0 +This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@H](C)NC(=O)[C@@H](C)Sc1ccc(OC)cc1 0.1 +This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 c1c(S[C@@H](C(=O)N[C@@H](C)NC(=O)[C@@H](C)Sc2ccc(OC)cc2)ccc(O)n1)cn2 0.01 +This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 O=C(N[C@@H](C)N(C)c(Sc1ccc(OC)cc1)=O)N 0.001 +This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 10.0 +This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 1.0 +This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 0.1 +This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 COc1ccccc1O[C@@H](NC(N)=O)C(=O)Nc1ccc(Cl)c(Cl)c1 0.01 +This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 COc1ccccc1O[C@@H](N)C(=O)Nc1ccc(Cl)c(Cl)c1 0.001 +This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 10.0 +This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 1.0 +This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 0.1 +This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 NC(=O)Cc1ccc(F)c(F)c1 0.01 +This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 NS(=O)(=O)Cc1ccc(F)c(F)c1 0.001 +This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 10.0 +This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 1.0 +This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 0.1 +This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 CNC(=O)N(C)C(=O)[C@@H](C)Cc1c(C)n[nH]c1C 0.01 +This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C)C(=O)N(C)C(N)=O)c1 0.001 +This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 10.0 +This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 1.0 +This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 0.1 +This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 C(NC(=O)NC(=O)NC(C)(C)NC(=O)N)cOc1ccc(F)cc1 0.01 +This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 C(NC(=O)NC(=O)NC(=O)N)c(Oc)c(Oc1ccc(F)cc1)c 0.001 +This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 10.0 +This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 1.0 +This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 0.1 +This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CC2)c1)N1CCSCC1 0.01 +This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 NNC(=O)c1cc(N)cc(OC2CC2)c1 0.001 +This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 10.0 +This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 1.0 +This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 0.1 +This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 N(C(=O)Nc(NC(=O)Nc1cc(N2CCOC2)ccc1)c)Cc(F)ccc(N)c2 0.01 +This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 N(C(=O)Nc(NC(=O)Nc(N)c(N2CCOC2)c)c()c(NC(=O)NC)c)c(O)c 0.001 +This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 10.0 +This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 1.0 +This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 0.1 +This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)C1 0.01 +This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 N(C(=O)N1CCN(Cc2cccn2)C1)CCNC(=O)C 0.001 +This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 10.0 +This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 1.0 +This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 0.1 +This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O(Cc1ccc(C(=O)NCN(C)C(=O)NC)o1)c(Cl)cc(Cl)cN 0.01 +This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O(CNC(=O)NC(=O)NO)CN(C)C(=O)OC)Oc#N 0.001 +This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C 10.0 +This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C 1.0 +This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C 0.1 +This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C O=C(N[C@@H](C(C)C)NC(=O)c(OC)ccc1)Nc1cc(O)n(C)n1 0.01 +This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C O(C)c(C(=O)N[C@H](C(=O)Nc)c(N)c(N)c(O)c)c(O)c1 0.001 +This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 10.0 +This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 1.0 +This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 0.1 +This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2ncccc12 0.01 +This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2ncccc12 0.001 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 10.0 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 1.0 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 0.1 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 c1(NC(=O)N2CCN(C(=O)Ncccc(C)c)CC2)oc(-ccc)c2)nn1 0.01 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 c1(NC(=O)N2CCN(C(=O)Nc(N)ccc(C)c)CC2)oc(S)c(N)c1 0.001 +This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O 10.0 +This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O 1.0 +This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O 0.1 +This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O c1(=O)n(C)c(=O)ccc1C(=O)N(C(C)C)Cc1cc(Cl)ccc1 0.01 +This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CCN(CNC(N)=O)C(=O)Cn1ccc(=O)n(C)c1=O 0.001 +This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 10.0 +This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 1.0 +This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 0.1 +This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 c(C(=O)NCCNC(=O)C)c(-c1ccccc1)[nH] 0.01 +This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)NC(=O)NC(N)=O 0.001 +This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 10.0 +This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 1.0 +This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 0.1 +This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 c1(Oc2ccc(NC(=O)N(C)C(=O)N(C)Cc2n(C)ncn2)ccc2)ccc(C)nn1 0.01 +This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 c(NC(=O)N(C)C(=O)N(C)C)Occ(Occc(C)nn2)ccc1 0.001 +This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 10.0 +This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 1.0 +This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 0.1 +This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(OC(=O)NNC(=O)O)cc1 0.01 +This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 O=C(OC(=O)Nc)Ncc(C#N)cc()S 0.001 +This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 10.0 +This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 1.0 +This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 0.1 +This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 N(C(N1CcN(C)c2ccccc21)=O)ccccS 0.01 +This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 N(C(N1CcN(C)c2ccccc21)=O)ccccS 0.001 +This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO 10.0 +This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO 1.0 +This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO 0.1 +This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1c[nH]c(-c2ccccc2)cc1=O)N(CCO)CCO 0.01 +This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO c(=O)(N(CCO)CCO)c(=N)[nH]c(-c1ccccc1)[nH]2 0.001 +This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1ccc(OC)cc1NC(=O)C[C@@H](C)C(C)C 10.0 +This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1ccc(OC)cc1NC(=O)C[C@@H](C)C(C)C 1.0 +This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1ccc(OC)cc1NC(=O)C[C@H](C)C(C)C 0.1 +This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CC(C)[C@@H](C)CC(=O)Nc1ccc(C(N)=O)cc1NC(N)=O 0.01 +This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C C[C@H](NC(=O)Nc1cc(NC(N)=O)ccc1N(C)C)C(N)=O 0.001 +This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 10.0 +This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 1.0 +This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCC3=O)cc2)C1 0.1 +This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1CN(S(N)(=O)=O)N(S(N)(=O)=O)N1 0.01 +This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1CN(S(N)(=O)=O)N(S(N)(=O)=O)N1 0.001 +This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 10.0 +This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 1.0 +This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 0.1 +This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 c1(CNC(=O)c(C)c(N)N)cc(Br)cc()c1 0.01 +This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 C(NC(=O)c(N)c(C)N)(=O)NC#N 0.001 +This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 10.0 +This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 1.0 +This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 0.1 +This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)C(=S)N1 0.01 +This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 N(C(=S)N1CCN(c2c(F)cccc2)C(=S)N1)c(OCC)cccc2 0.001 +This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 10.0 +This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 1.0 +This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 0.1 +This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 O=C(NNC(=O)[C@@H](C)N=S(=O)(=O)N)[C@@H](N)C#N 0.01 +This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 O=C(NNC(=O)[C@@H](N)N=S(=O)(=O)N)N=S(=O)(N)N 0.001 +This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C 10.0 +This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C 1.0 +This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C COC(=O)[C@@H]1CCN(C(=O)c2cnn(CC(C)C)c2C)c2ccccc21 0.1 +This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C C(=O)(N1c(CC)(C(=O)N)N(C)c(C)N)N 0.01 +This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C C(=O)(N1c(CC)(C(=O)N)N(C)c(N)cN)N 0.001 +This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F 10.0 +This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F 1.0 +This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F 0.1 +This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F CN(CC(F)(F)F)C(=O)N(CC(F)(F)F)C(=O)c1ccccn1 0.01 +This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F CN(CC(F)(F)F)C(=O)N(CC(F)(F)F)C(=O)c1ccccn1 0.001 +This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2c3c([nH]c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 10.0 +This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2c3c([nH]c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 1.0 +This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2c3c([nH]c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 0.1 +This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 C(N1CC(NC(=O)CN(C)C)=O)(=O)NC(=O)N 0.01 +This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 C(N(C)C)(=O)NNC(=O)NNC(=O)N)C 0.001 +This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 10.0 +This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 1.0 +This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2cnc(C)nc2n1 0.1 +This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1c(N)cc(C(=O)NCSC#N)c2c(C)noc12 0.01 +This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 C(SCNC(=O)c1c2c(C)nc(N)c(N)c2)(N)=O 0.001 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl 10.0 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl 1.0 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl 0.1 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCC4)ncn3)C2)cc1Cl 0.01 +This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl N(C(N1CCN(cnc(N2CCCC2)c2)C1)=O)cc(Cl)c(N)cc2 0.001 +This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C 10.0 +This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C 1.0 +This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C 0.1 +This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCC1)c1c(C)nn(C)c1C 0.01 +This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NC(=O)NO)c1c(C)nn(C)c1C 0.001 +This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 10.0 +This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 1.0 +This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 0.1 +This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 0.01 +This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 0.001 +This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 10.0 +This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 1.0 +This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CCN(C)C3)cc1 0.1 +This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 C(N(C)C)CN1CC(NC(=O)cNC(=O)N)(C)CN1 0.01 +This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 C(N(C)C)CN(C)C(=O)NC(=O)cNNC(=O)N 0.001 +This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 10.0 +This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 1.0 +This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 0.1 +This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 O(C)[C@@H](OC)C(Nc(NC(=O)NCc1sc(CC)cn1)C)=O 0.01 +This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 O(C)[C@@H](OC)C(Nc(NC(=O)NC(=O)NC)cc(C(=O)N)[nH]1)=O 0.001 +This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2ncnc(NO)c21 10.0 +This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 1.0 +This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2ncnc(NO)c21 0.1 +This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 O=C(NO)Nc1cnc2n(C[C@H](O)CO)cn2c1 0.01 +This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 O=NNc(N)ncn(C[C@@H](O)CO)c(N)N 0.001 +This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 10.0 +This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 1.0 +This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 0.1 +This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 c1(C)nc(CN2CCN(Cc3c(Cl)cncc3)C(=O)N2)cc1 0.01 +This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1cc(CN2CCN(Cc3ccncc3Cl)C(=O)N2)nc(N)n1 0.001 +This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 10.0 +This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 1.0 +This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 0.1 +This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 C(=O)(OCC)Nc(NC(=O)NN)ccc 0.01 +This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 C(=O)(OCC)Nc(NC(=O)OCC)cN 0.001 +This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 10.0 +This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 1.0 +This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 0.1 +This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc(N)[nH]c(=O)c(N)C(=O)N)NN 0.01 +This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(NNC(=O)C(N)N)Nc(C)c(-c(N)c(C)N)[nH]1 0.001 +This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 10.0 +This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)CNC(=O)/C=C/c1ccco1 1.0 +This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 0.1 +This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CN(C)NC(=O)N(C)C(=O)NCC(=O)N(C)C 0.01 +This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CN(C)/N=C(\N)C(=O)N(C)NC(=O)N(C)C(N)=O 0.001 +This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 10.0 +This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 1.0 +This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 0.1 +This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 C(C(C)C)(=O)N(C1CN(c(C#N)ccc(N)=O)C1)C 0.01 +This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 C(C(C)C)(=O)N(C1CN(c(C)C#N)C1)C 0.001 +This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 10.0 +This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 1.0 +This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 0.1 +This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C(N)=O)C(=O)N1CCC(COc2ccccn2)C1 0.01 +This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C(N)=O)C(=O)N1CCC(N)(CON)C1 0.001 +This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 10.0 +This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 1.0 +This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc2C1=O)C1CC1 0.1 +This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 N(C(=O)N([C@@H](C)c1ccc(Cl)cc1)N(C)C(=O)CS2)C 0.01 +This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@@H](c1ccc(Cl)cc1)N(C(=O)NN(C)C(N)=O)C1CN1 0.001 +This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 10.0 +This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 1.0 +This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 0.1 +This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 c(S[C@@H](C)C(=O)NC(=O)NC(=O)NCC)ccc(F)cc1 0.01 +This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 c(S[C@@H](C)C(=O)NC(=O)NC(=O)NC(=O)NCC)ccc(F)cc1 0.001 +This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 10.0 +This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 1.0 +This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 0.1 +This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 C1C(C(=O)N2C[C@@H](CNC(=O)C3(c)cc(Cl)c)S2)C1 0.01 +This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 C1(C(=O)N2C[C@@H](CNC(=O)C3(c)cc(Cl)c)S2)CCOC1 0.001 +This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(N)nc(N)n2)CC1 10.0 +This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(N)nc(N)n2)CC1 1.0 +This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 0.1 +This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(N)nc(N)n2)CC1 0.01 +This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 N(N1CCC(OCc2c(C)cccc2)CC1)c(N)nc(N)nN)=S 0.001 +This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 10.0 +This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 1.0 +This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 0.1 +This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 CN(CCO)C(=O)N(CCO)C(=O)NC(N)=O 0.01 +This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 NC(=O)N(CCO)C(=O)N(CCO)C(N)=O 0.001 +This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1nn(-c2ccccc2)c(C)c1NC(=S)NC1CCCCC1 10.0 +This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C 1.0 +This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C 0.1 +This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C c1(=O)n(C)c(NC(=S)NC2CCCCC2)c(=O)n1C 0.01 +This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C C(NC(=S)NC1CCCC1)(=O)NNc(C)c(=O)n(C)[nH]1 0.001 +This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 10.0 +This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 1.0 +This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@H](C)O)C(C)C)s1 0.1 +This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 O=C(N(C(=O)N)C(C)C)N[C@@H](c1ccccc1)O)O 0.01 +This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 O=C(N(C(=O)N)C(=O)N)N(C(=O)O)[C@@H](O)c1ccccc1)O 0.001 +This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 10.0 +This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(CN(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 1.0 +This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 0.1 +This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 C(N(C(=O)Nc1cc(OCC)ccc1)C(=O)Nc1cc(Cl)ccc1)(=O)Nc 0.01 +This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1cccc(NC(=O)N(C(N)=O)S(=O)(=O)Nc2cccc(Cl)c2)c1 0.001 +This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 10.0 +This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 1.0 +This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 0.1 +This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 C(Nc(NC(=O)c(C(F)(F)F)c)C)(=O)NOCC(=O)N 0.01 +This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc(NC(=O)N(C)C(F)(F)F)ccc1NC(N)=O 0.001 +This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 10.0 +This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 1.0 +This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 0.1 +This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C)[C@@H](C)C(C)C)nn2)cc1 0.01 +This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C)[C@@H](C)C(C)C)cn2)cc1 0.001 +This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 10.0 +This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 1.0 +This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 0.1 +This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 0.01 +This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 C(C(=O)N1CCC(Ncc(N)ccc2)C1)[C@@H](C)CC(C)C 0.001 +This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2cccc(C(F)(F)F)c2)nc1-c1ccccn1 10.0 +This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 1.0 +This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1nc(-c2ccccn2)nc(NCc2cccc(C(F)(F)F)c2)c1C#N 0.1 +This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(N)c(NC(N)=O)c1C(F)(F)F 0.01 +This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 c(C(F)(F)F)cc(NC#N)C(=O)NNC(=O)NO 0.001 +This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 10.0 +This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 1.0 +This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 0.1 +This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CN(C(N)=O)C1CC(N(C)C(=O)CCc2ccsc2)C1 0.01 +This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CN(C(N)=O)C1CC(N(C)C(=O)CCc2ccsc2)C1 0.001 +This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 10.0 +This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 1.0 +This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 0.1 +This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 0.01 +This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 O=S(=O)(N(C)S(=O)(=O)c(OC)cccc(OC)O)N 0.001 +This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 10.0 +This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 1.0 +This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 0.1 +This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 N1(C(=O)NNC(=O)N)CCN(C(=O)N)c2cccc21 0.01 +This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 CN1CN(C(=O)NNC(N)=O)NNC1=O 0.001 +This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 10.0 +This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 1.0 +This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 0.1 +This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cc(OC(F)F)ccc1)NNC(=O)cN 0.01 +This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc(OC(F)F)cc(NC(=O)N)N)N 0.001 +This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 10.0 +This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 1.0 +This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 0.1 +This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 c1(N)c(C(=O)N2CCN(c(OC)c(N)c)C2)c(S)sc1N 0.01 +This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 N(N1CCN(C(=O)c(N)sc(C)n2)CC1)(C)C#N)C 0.001 +This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 10.0 +This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 1.0 +This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@H]1CCCO1)[C@@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 0.1 +This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 N(C(=O)N1CC(N)(C)C1)NC(=O)[C@@H]1CN(S(=O)(=O)N)C(=O)N 0.01 +This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 N(C(=O)NN1CC(=O)N(C)C)N(C)C(=O)N 0.001 +This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 10.0 +This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 1.0 +This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 0.1 +This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 C(=O)(OCC)Nc1c(NC(=O)CN(C)=O)c2c(c1)C[C@@H](N)N2 0.01 +This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 O(C(=O)Nc1c(NC(=O)CS(=O)(N)=O)c2c(N)c(N)(N)=O)N(C)S2 0.001 +This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 10.0 +This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 1.0 +This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCC1 0.1 +This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 O=C(N[C@@H](C)Cn1nccc1)NC(NCc(OC1CCC1)O)=O 0.01 +This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 O=C(N[C@@H](C)Cn1nccc1)NC(NCc(OC1CCC1)O)=O 0.001 +This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 10.0 +This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 1.0 +This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 0.1 +This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 S(=O)(=O)(NCc(=O)n(-cccc(F)c)N)C#N 0.01 +This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 S(=O)(NCC(=O)NN=C(C)O)(=O)Nc(C)ccc(F)N 0.001 +This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 10.0 +This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 1.0 +This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 0.1 +This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NC(C)(C)N 0.01 +This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 C(C(C)(C)CNC(=O)c(C)c(C)c(N)=O)N 0.001 +This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 c1(CC)nn2c(sc(COc3ccc(NC(=O)c4c(F)cccc4)cc3)n2)s1 10.0 +This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 c1(CC)nn2c(sc(COc3ccc(NC(=O)c4c(F)cccc4)cc3)n2)s1 1.0 +This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 c1(C(=O)Nc2ccc(OCc3nc4n(c(=O)sc4)c(=O)c3)cc2)c(F)cccc1 0.1 +This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 O=C(Nc(OC)COC(=O)cNC(=O)cc(OC)c(=O)n(C)c(=O)N)O 0.01 +This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 O=C(NcCOOCCN(C)C(=O)CNC(=O)OCC(=O)N)OCC(=O)N 0.001 +This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 10.0 +This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 1.0 +This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 0.1 +This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 C(Nc(NC(=O)C1CCN(c)nc2cccc(N)n2)C1)(=O)N 0.01 +This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 C(Nc(NC(=O)C)c(N)C)CN1CCN(c)c2c(nc()c(N)c2)C1 0.001 +This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 10.0 +This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 1.0 +This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 0.1 +This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1cc(NN2CCN(C(=O)c3ccccc3)CC2)cn1 0.01 +This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 NNC(=O)N1CCN(c2cncnc2N)CC1 0.001 +This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 10.0 +This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 1.0 +This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 0.1 +This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)N(C)C(=O)Nc1c(F)cccc1F 0.01 +This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 0.001 +This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC 10.0 +This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC 1.0 +This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC 0.1 +This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC O(C)c(OC)/C=C/C(=O)N(C)C(=O)N 0.01 +This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC O(C)c(OC)NC(=O)/C(=C/C(=O)N)N 0.001 +This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 10.0 +This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 1.0 +This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 0.1 +This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 N(S(=O)(=O)NCC)[C@@H]1CN(C(=O)cN)CC1 0.01 +This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)NS(=O)(=O)c1cc(F)ccc1F 0.001 +This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 10.0 +This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 1.0 +This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 0.1 +This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 NC(N)=O 0.01 +This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 NC(N)=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@@H](O)NC(=O)O)OCc1nc2cccc(O)c2 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O(C(=O)NC(=O)O)Cc(OC)NC(=O)N 0.001 +This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N 10.0 +This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N 1.0 +This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N 0.1 +This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(NS[C@@H](C)C(=O)Nc2ccccc2)ccc1N 0.01 +This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N C[C@H](SN[C@@H](C)S)C(=O)Nc1ccccc1 0.001 +This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@@H](O)c3ccc4ccccc4c3)CC2=O)cc1 10.0 +This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@@H](O)c3ccc4ccccc4c3)CC2=O)cc1 1.0 +This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@@H](O)c3ccc4ccccc4c3)CC2=O)cc1 0.1 +This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[CH]O)NC2=O)cc1 0.01 +This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](N)O)NC2=O)cc1 0.001 +This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 10.0 +This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 1.0 +This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 0.1 +This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 O(C(=O)OCCCC)C(=O)c(N)cnc(N)c 0.01 +This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCOC(=O)ONC(=O)NN 0.001 +This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 10.0 +This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 1.0 +This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 0.1 +This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 c1c(NC(=O)CN2c(C)C(=O)Nc2cccn2)cc([N+](=O)[O-])c1 0.01 +This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 N(C(=O)Nc1c(C)ccc([N+](=O)[O-])c1)CN1c(C)C(=O)N 0.001 +This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 10.0 +This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 1.0 +This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 0.1 +This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 N(C(=O)N[C@@H](C)C(N1CCCN2C(=O)N2)=O)C 0.01 +This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 N(C(=O)N[C@@H](C)C(N1CCCN2C(=O)N2)=O)S 0.001 +This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 10.0 +This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 1.0 +This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 0.1 +This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 C(N1CCC(C(Nc2c(OC)ccc(S(N(C)C)(=O)=O)c2)=O)C1)(=O)ccc(F)cN 0.01 +This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 N(S(=O)(=O)N(C)C)S(=O)(=O)cc(OC)NC(=O)C1CCN(C(=O)cN)C1 0.001 +This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 10.0 +This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 1.0 +This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 0.1 +This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 c1(=O)c(C(=O)Nc(C)cc([N+](=O)[O-])[nH]2)cc(=O)o1 0.01 +This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 O=C(Nc1c([N+](=O)[O-])cc(C)c(N)[nH]1)NC 0.001 +This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F 10.0 +This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F 1.0 +This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F 0.1 +This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F C(NC(=O)c(F)OC)CNC(=O)c(F)c(OC)c 0.01 +This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F C(NC(=O)c(F)OC)NC(=O)c(N)NC(=O)N 0.001 +This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C 10.0 +This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C 1.0 +This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C 0.1 +This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1cccnc1C[C@H](Br)c1c(C)cc(F)cc1N 0.01 +This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@@H](N)N(C)C(N)=O 0.001 +This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl 10.0 +This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl 1.0 +This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl 0.1 +This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl N(S(=O)(=O)c(Cl)ccc1)(C)S(=O)(=O)N(C)S(=O)(=O)N(C)S(=O)N 0.01 +This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(S(N)(=O)=O)S(=O)(=O)N(C)S(=O)(=O)NS(N)(=O)=O 0.001 +This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 10.0 +This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 1.0 +This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 0.1 +This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 C(NC(=O)cc(S(NC)(=O)=O)c[nH]1)cNS(=O)(=O)NC 0.01 +This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 C(NC(=O)cc(S(NC)(=O)=O)c[nH]1)NS(=O)(=O)NC 0.001 +This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 10.0 +This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 1.0 +This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 0.1 +This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@@H](N)C#N)CC2)c(=O)[nH]1 0.01 +This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@@H](N)C#N)CC2)c(=O)[nH]1 0.001 +This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 10.0 +This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 1.0 +This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 0.1 +This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 NC(=O)NNC(=O)NCCNC(=O)Cc1ccc(F)cc1 0.01 +This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 NC(=O)NNC(=O)NCNC(=O)Cc1ccc(F)cc1 0.001 +This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 10.0 +This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 1.0 +This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 0.1 +This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#CC1=CC=C(N)OC(=O)C=CC(C(N)=O)=NC=C1 0.01 +This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 O(c(N)c(Oc)cc(C#N)c)c(C(=O)N)n1 0.001 +This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 10.0 +This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 1.0 +This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 0.1 +This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 0.01 +This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@H](C)c1ccc(F)c(C)c1 0.001 +This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 10.0 +This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 1.0 +This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 0.1 +This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 CC(=NNC(=O)CCN)NC(=O)Cc1cccc(Cl)c1 0.01 +This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 CN(C)CC(=O)NNC(=O)CN(C)Cc1cccc(Cl)c1 0.001 +This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 10.0 +This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 1.0 +This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 0.1 +This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@@H](C)C(=O)n1cc2ccccc2c(N)c1=O 0.01 +This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 c1cccc2c(N)c(=O)N(C(=O)[C@@H](OCC)C)cS2 0.001 +This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 10.0 +This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 1.0 +This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 0.1 +This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 c1(C=O)c(C)n(CC(=O)N(C)cccc)n1 0.01 +This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 C(=O)(N(C)cc(N)=O)Cn1cccc1 0.001 +This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 10.0 +This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 1.0 +This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 0.1 +This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 C(C)(c)cccc1NC(=O)N1C[C@@H](COCC)C1 0.01 +This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 C(C)(c)cccc(C(C)C)(N)NC(=O)N1C[C@H](COCC)C1 0.001 +This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 10.0 +This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 1.0 +This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2ccc(C(=O)NC)c(N)n2)c1 0.1 +This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1ccc(N)c(C(=O)NC)c1 0.01 +This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 O([C@@H](C)Oc(C(=O)NC)ccc1)C(=O)NC 0.001 +This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 10.0 +This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 1.0 +This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 0.1 +This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 C(F)(F)(F)[C@@H]1C[C@H](NC(=O)c2c(OC)ccc2)N 0.01 +This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 N(C(=O)N[C@@H]1CCC[C@H](C(F)(F)F)N)(C)C 0.001 +This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 10.0 +This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 1.0 +This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 0.1 +This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 CC1=CC([N+](=O)[O-])=CN(C(=O)Nc2cc(C)[nH]n2)C1 0.01 +This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 c1(NC(=O)N2Cc(S)cc(C)c2)cc(C)n(C)c(C)c1 0.001 +This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 10.0 +This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 1.0 +This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 0.1 +This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 N(Cc1c(N)ccc([N+](=O)[O-])c1)(C)C(=O)C[C@@H]1C 0.01 +This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 N(C)(C)c1cc([N+](=O)[O-])cn1 0.001 +This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 10.0 +This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 1.0 +This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 0.1 +This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 N#CNS(=O)(=O)CC(=O)NN 0.01 +This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 N#CNS(=O)CC(=O)NNC(N)=O 0.001 +This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)o1 10.0 +This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)o1 1.0 +This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)o1 0.1 +This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCS(N)(=O)=O)c(F)c1 0.01 +This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)NCO)cc1 0.001 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 10.0 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 1.0 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 0.1 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 c1c(S(=O)(N2CCN(C(=O)ccc(Cl)c4)CC2)=O)sc(C)c1 0.01 +This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 N1(S(=O)(=O)c2sc(C)cc2)CCN(C(=O)ccc(Cl)c2)S1 0.001 +This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 10.0 +This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 1.0 +This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 CCO[C@@H]1[C@H](NC(=O)Nc2ccc([N+](=O)[O-])c(Cl)c2)C12CCC2 0.1 +This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 C(Nc1ccc([N+](=O)[O-])c(Cl)c1)(=O)N[C@H]1[C@H]2CCO[C@H]212 0.01 +This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 C(Nc1ccc([N+](=O)[O-])c(Cl)c1)(=O)N[C@H]1[C@H]2C(C)(C)OCC2 0.001 +This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 10.0 +This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 1.0 +This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 0.1 +This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 C(C(=O)N(C)c1nc(C(=O)OCC)cs1)((c)cc()c)(C)C 0.01 +This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 C(C(=O)N(C)c1nc(C(=O)OCC)cs1)((c)cc()c)(C)C 0.001 +This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC 10.0 +This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@H](C#N)C(=O)OC 1.0 +This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC COC(=O)[C@@H](C#N)[C@@H](C)C(=O)OC 0.1 +This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC O=C(OC)[C@@H](N#N)[C@@H](C)NC(=O)OC 0.01 +This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC O=C(OC)[C@@H](N#N)[C@@H](C)NC(=O)OC 0.001 +This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 10.0 +This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 1.0 +This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 0.1 +This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 0.01 +This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 0.001 +This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O 10.0 +This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O 1.0 +This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O 0.1 +This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCN(C)S(N)(=O)=O 0.01 +This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CN(CCCCN(C)S(N)(=O)=O)S(N)(=O)=O 0.001 +This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 10.0 +This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 1.0 +This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 0.1 +This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 N1(C(=O)cN)CN(C(=O)N)C[C@H](N)N(C)C1 0.01 +This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 N1(C(=O)cN)CN(C(=O)N)C[C@@H](N)N(C)C(=O)N1 0.001 +This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 10.0 +This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 1.0 +This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 0.1 +This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(OCC(=O)NNC(=O)O)c(N)n(-c1ccccc1)N 0.01 +This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O(CC(=O)OCC(=O)N(C)C)N(C)c(C)c(N)c(Cl)N 0.001 +This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 10.0 +This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 1.0 +This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 0.1 +This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 C(NC(=O)c#N)(=O)NC[C@@H](N1CCOCCN1CC(C)C)C 0.01 +This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 C(NC(=O)c(=)N)(=O)N(C)CN(C)C(=O)N 0.001 +This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 10.0 +This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 1.0 +This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@H](C)NC(=O)c1ccc(Cl)s1 0.1 +This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 O=C(N[C@@H](C)NC(=O)c(N)nc(N)N)NN 0.01 +This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 NNC(=O)NNC(=O)NNC(=O)NNC(=O)NN 0.001 +This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 10.0 +This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 1.0 +This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 0.1 +This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 CN(S(=O)(=O)Nc1ccccc1S(N)(=O)=O)S(=O)(=O)C(F)F 0.01 +This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 CN(S(=O)(=O)Nc1ccccc1S(N)=O)S(=O)(=O)C(F)F 0.001 +This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C 10.0 +This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C 1.0 +This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C 0.1 +This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CO)C(CO)NC(=O)Nc1cc(Br)ccc1C 0.01 +This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CO)C(CO)NC(=O)Nc1cc(Br)ccc1C 0.001 +This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O 10.0 +This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O 1.0 +This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O 0.1 +This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O c(=O)(N(C)Cc1c(Cl)cccc1)NN=O 0.01 +This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O c(=O)(N(C)CN(C)C(=O)c1c(Cl)cc[nH]1)N 0.001 +This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@H]1CCc2c(F)cccc21 10.0 +This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@H]1CCc2c(F)cccc21 1.0 +This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@H]1CCc2cccc(F)c21 0.1 +This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 c1ccc(F)c2c1SCC(=O)NCC(=O)NCCOCC 0.01 +This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 c1ccc(F)c2c1SCC(=O)NCC(=O)NCCOCC 0.001 +This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCC2)cc1 10.0 +This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCC2)cc1 1.0 +This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCC2)cc1 0.1 +This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@H](N(C)C(=O)NS(=O)(=O)N1CCCCCN1)N(C)S(N)(=O)=O 0.01 +This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](NS(=O)(=O)N(C)C)N(C)C(=O)NS(=O)(=O)N1CCCCCN1 0.001 +This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 10.0 +This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 1.0 +This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@H](c1ccc(Br)cc1)C1CCC1 0.1 +This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 C(N(C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1)(C)C)(=O)N 0.01 +This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 C[C@@H](NC(=O)N(C)C(=O)N(C)C)c1ccc(Br)cc1 0.001 +This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 10.0 +This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 1.0 +This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 0.1 +This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 N(S(=O)(=O)N1CCC(C)C)c1ccc(C(Nc2c(Cl)ccc(Cl)c2)=O)cc1 0.01 +This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 N(S(=O)(=O)N1CCN(S(=O)(=O)Nc)cc(Cl)cN)c1cc(Cl)ccc1 0.001 +This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C 10.0 +This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1c(CNC(=O)CCc2ccccc2C)c(OC)nn1C 1.0 +This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1c(CNC(=O)CCc2ccccc2C)c(OC)nn1C 0.1 +This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C C(NC(=O)CCc1c(C)cccc1)c(OC)c(OC)n(C)c1 0.01 +This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C C(NC(=O)CCc(C)c(OC)n(C))N 0.001 +This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 10.0 +This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 1.0 +This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 0.1 +This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 C(N(C)C(=O)cN(C)C(=O)ccc(Cl)cN)(=O)N 0.01 +This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 N(C)(C(=O)N(C)CN(C)C(=O)ccc(Cl)cN)S 0.001 +This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 10.0 +This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 1.0 +This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 0.1 +This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(NC(=O)CC)c1 0.01 +This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 c(NC(=O)CC)cc(NC(=O)CC)ccc1 0.001 +This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 10.0 +This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 1.0 +This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 0.1 +This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 N(C(N[C@@H](C1CC1)c(N)cc()c)=O)C(=O)NC(=O)N 0.01 +This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 N(C(N[C@@H](C1C1)c(N)cc()c)O)C(=O)NC(=O)NO 0.001 +This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C 10.0 +This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C 1.0 +This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C 0.1 +This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C C(N(C(=O)ccc(F)c)CC)CC(=O)NNC(=O)N 0.01 +This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(C(=O)NCc1ccc(F)cc1)C(=O)NC(N)=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O NC1=CC=CC2=C(N3CCCC3=O)[C@H](CCCO1)CNC2=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1C(=O)N(c2c(ccccc2)CNC[C@H]2CCCC2)c1c(C)CC 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O c1(N2C(=O)CCCC2)c(C)c(cccc1)ccccc12 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(C(NC[C@@H]2CCCC2)(c2ccccc2)C)Cc2ccccc2)C[C@H]1CCCCCCCC(C)CC 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(CNC(=O)ccccc2)(cc1NC[C@@H](C)CN(C)C)CCC)CCCCCCCCCC)C1 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(C(NC[C@@H]2CCCC2)(c2ccccc2)C)Cc2ccccc2)C[C@H]1CCCCCCCC(C)CC 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(CNC(=O)ccccc2)(cc1NC[C@@H](C)CN(C)C)CCC)CCCCCCCCCC)C1 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O CCCCCN(C(=O)c1ccccc1)c1ccccc1N1CCCC1 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c2ccccc2N(C(=O)c2ccccc2)c2ccccc2)CCN1CCCN(C)CCCCCCCCCCN(C)CCCCCCCCCCCCCCCCCCN(CCNCCCN(CCCCCCCCCCCCCCNCNCCCNCN(CCCCCCCCCCCCCCCCCCCCCCCNCN(CCCCN(CCCCN(CCCCCN(CCCCCCCCCCCCCCCCCCCCCCCN(CCCCCCCCCCN(CN(=CCCCCCCCCCCCCCCCCCCCCCCCCCN(=CCCCCCCCN(=CCCN(=CCCCCCC(=C(=CC(=C(=CC(=CCCC(=C(=C(=C(=CCC(=C(=C(=C(=CCCCCCCCCCCCCCCCCCCCCCN(=CCCN(C)(=CCC)(=C)(C)(C)(C)(C)C)C)(C)(C)(CCCCCC(C)(C)(C)(C)(=C)(C)(C)(C)(N(=C)(C)(C)(C)(=C(C)(=C)(=C)(=C)(=C)(=C)(=C)CCCCCCCN(=C)CN(=CCCC)(=C)(=C)(=C)(=C)(NCNC)(=C)(=C)(N(C)(=C)(=C)(=C) 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 +This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 diff --git a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py index ccb9368..fb3b4ea 100644 --- a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py +++ b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py @@ -18,14 +18,13 @@ from torch_geometric.loader import DataLoader as pyg_DataLoader -from models.MoleculeSTM.utils import get_molecule_repr_MoleculeSTM -from models.MoleculeSTM.models import MLP -from models.MoleculeSTM.downstream_molecule_edit_utils import load_molecule_models -from models.MoleculeSTM.utils import freeze_network -from models.MoleculeSTM.datasets import ZINC250K_Dataset_SMILES, ZINC250K_Dataset_Graph +from utils.molstm_utils import get_molecule_repr_MoleculeSTM +from models.multimodal.moleculestm import MLP +from utils.molstm_utils import load_molecule_models +from utils.molstm_utils import freeze_network from datasets.moledit_dataset import SUPPORTED_MOLEDIT_DATASET from models.task_model.moledit_model import MoleditModel -from models.MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART +from models.multimodal.mega_molbart.mega_mol_bart import MegaMolBART def cycle_index(num, shift): arr = torch.arange(num) + shift @@ -181,7 +180,7 @@ def train(epoch): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--device", type=str, default="cuda:0") + parser.add_argument("--device", type=str, default=None) parser.add_argument("--verbose", type=int, default=1) parser.add_argument("--dataset_path", type=str, default=None) parser.add_argument("--dataset", type=str, default="ZINC250K") @@ -220,46 +219,17 @@ def train(epoch): parser.add_argument('--use_normalize', dest='normalize', action='store_true') parser.add_argument('--no_normalize', dest='normalize', action='store_false') parser.set_defaults(normalize=True) - parser.add_argument("--MASTER_PORT", type=str, default='6001') + parser.add_argument("--MASTER_PORT", type=str, default='6000') args = parser.parse_args() print(args) config = json.load(open(args.config_path)) os.environ['MASTER_PORT'] = args.MASTER_PORT + # load dataset - if args.generation_model == "MegaMolBART": - if args.MoleculeSTM_molecule_type == "SMILES": - if args.dataset == "ZINC250K": - dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") - # dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") - # dataset = ZINC250K_Dataset_SMILES(dataset_root) - elif args.dataset == "ZINC250K1K": - dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") - dataset = ZINC250K_Dataset_SMILES(dataset_root, 1000) - elif args.dataset == "ZINC250K10K": - dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") - dataset = ZINC250K_Dataset_SMILES(dataset_root, 10000) - else: - raise Exception - dataloader_class = pyg_DataLoader - else: - if args.dataset == "ZINC250K": - dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") - # dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") - # dataset = ZINC250K_Dataset_Graph(dataset_root) - elif args.dataset == "ZINC250K1K": - dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") - dataset = ZINC250K_Dataset_Graph(dataset_root, 1000) - elif args.dataset == "ZINC250K10K": - dataset_root = os.path.join(args.dataset_path, "ZINC250K_data") - dataset = ZINC250K_Dataset_Graph(dataset_root, 10000) - else: - raise Exception - dataloader_class = pyg_DataLoader - else: - raise NotImplementedError - + dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") + dataloader_class = pyg_DataLoader device = torch.device(args.device) \ if torch.cuda.is_available() else torch.device("cpu") @@ -268,21 +238,15 @@ def train(epoch): torch.random.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) - # load model - if config["model"]=="molstm-MegaMolBART": - MegaMolBART_wrapper, molecule_model_generation, molecule_dim_generation, \ - molecule_model_MoleculeSTM, mol2latent_MoleculeSTM, molecule_dim_MoleculeSTM = load_molecule_models(args) - mol2latent_MoleculeSTM = mol2latent_MoleculeSTM.to(device) - freeze_network(mol2latent_MoleculeSTM) - mol2latent_MoleculeSTM.eval() - else: + molecule_model_MoleculeSTM = MoleditModel(config["network"]) + mol2latent_MoleculeSTM = None + if config["model"]== "molstm-MegaMolBART": + MegaMolBART_wrapper = molecule_model_MoleculeSTM.model.MegaMolBART_wrapper + molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) + else: MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) molecule_model_generation = copy.deepcopy(MegaMolBART_wrapper.model) - molecule_dim_generation = 256 - molecule_dim_MoleculeSTM = args.SSL_emb_dim - molecule_model_MoleculeSTM = MoleditModel(config["network"]) - mol2latent_MoleculeSTM = None torch.cuda.set_device(int(re.search(r'\d+', args.device).group())) @@ -296,6 +260,9 @@ def train(epoch): dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) + + molecule_dim_generation = 256 + molecule_dim_MoleculeSTM = args.SSL_emb_dim generation2MoleculeSTM = MLP(molecule_dim_generation, [molecule_dim_MoleculeSTM, molecule_dim_MoleculeSTM]).to(device) MoleculeSTM2generation = MLP(molecule_dim_MoleculeSTM, [molecule_dim_generation, molecule_dim_generation]).to(device) @@ -305,7 +272,8 @@ def train(epoch): ] optimizer = optim.Adam(model_param_group, weight_decay=args.decay) optimal_loss = 1e10 - + + for e in range(1, args.epochs+1): print("Epoch {}".format(e)) train(e) diff --git a/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py b/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py index 6b039cb..242a229 100644 --- a/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py +++ b/open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py @@ -11,13 +11,12 @@ import torch.nn.functional as F from tqdm import tqdm import re -from models.MoleculeSTM.downstream_molecule_edit_utils import get_SMILES_list, get_description_list, load_language_molecule_and_edit_models, clip_loss_for_edit, evaluate_SMILES_list -from models.MoleculeSTM.utils import prepare_text_tokens -from models.MoleculeSTM.models import GNN, GNN_graphpred, MLP +import copy +from utils.molstm_utils import prepare_text_tokens, get_SMILES_list, get_description_list, load_language_molecule_and_edit_models, clip_loss_for_edit, evaluate_SMILES_list +from models.multimodal.moleculestm import MLP from transformers import BertTokenizer -from models.multimodal import * from models.task_model.moledit_model import MoleditModel -from models.MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART +from models.multimodal.mega_molbart.mega_mol_bart import MegaMolBART def get_lr(t, initial_lr, rampdown=0.25, rampup=0.05): lr_ramp = min(1, (1 - t) / rampdown) @@ -35,18 +34,10 @@ def mean_pooling(token_embeddings, attention_mask): def check_edit(SMILES, text): - if config["model"]=="molstm-MegaMolBART": - text_list = [text] - text_tokens_ids, text_masks = prepare_text_tokens( - device=device, description=text_list, tokenizer=text_tokenizer, max_seq_len=args.max_seq_len) - text_output = text_model(input_ids=text_tokens_ids, attention_mask=text_masks) - text_repr = text_output["pooler_output"] - text_repr = text2latent(text_repr) - else: - text_list = text_tokenizer(text, truncation=True, padding=True, return_tensors='pt').to(device) - del text_list["token_type_ids"] - text_output = text_model(text_list) - text_repr = text_output[0] + text_list = text_tokenizer(text, truncation=True, padding=True, return_tensors='pt').to(device) + del text_list["token_type_ids"] + text_output = text_model(text_list) + text_repr = text_output first_and_second_SMILES_list = [] @@ -121,7 +112,7 @@ def check_edit(SMILES, text): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--device", type=str, default="cuda:0") + parser.add_argument("--device", type=str, default=None) parser.add_argument("--verbose", type=int, default=1) ########## for editing ########## @@ -137,12 +128,11 @@ def check_edit(SMILES, text): parser.add_argument('--no_normalize', dest='normalize', action='store_false') parser.set_defaults(normalize=True) - parser.add_argument("--dataset_path", type=str, default="./datasets/mol_edit") + parser.add_argument("--dataset_path", type=str, default=None) parser.add_argument("--SSL_emb_dim", type=int, default=256) parser.add_argument("--max_seq_len", type=int, default=512) parser.add_argument("--config_path", type=str, default=None) ########## for MoleculeSTM ########## - parser.add_argument("--MoleculeSTM_model_dir", type=str, default=None) parser.add_argument("--MoleculeSTM_molecule_type", type=str, default=None, choices=["SMILES", "Graph"]) ########## for MegaMolBART ########## @@ -151,14 +141,12 @@ def check_edit(SMILES, text): parser.add_argument("--text_mode", type=str, default=None) ########## for MoleculeSTM and generation projection ########## - parser.add_argument("--language_edit_model_dir_new", type=str, default=None) - parser.add_argument("--language_edit_model_dir", type=str, default=None) ########## for editing ########## parser.add_argument("--lr_rampup", type=float, default=0.05) parser.add_argument("--lr", type=float, default=0.1) parser.add_argument("--epochs", type=int, default=100) - parser.add_argument("--MASTER_PORT", type=str, default='6001') + parser.add_argument("--MASTER_PORT", type=str, default='6000') args = parser.parse_args() print(args) @@ -167,35 +155,32 @@ def check_edit(SMILES, text): os.environ['MASTER_PORT'] = args.MASTER_PORT device = torch.device(args.device) \ if torch.cuda.is_available() else torch.device("cpu") - if config["model"]=="molstm-MegaMolBART": - text_model, text_tokenizer, text_dim, molecule_model, MegaMolBART_wrapper, molecule_dim, \ - text2latent, mol2latent, generation2MoleculeSTM, MoleculeSTM2generation = load_language_molecule_and_edit_models(args) - text2latent = text2latent.to(device) - mol2latent = mol2latent.to(device) - text2latent.eval() - mol2latent.eval() - - else: - text_model = MoleditModel(config["network"]) - text_tokenizer = BertTokenizer.from_pretrained(args.text_mode, model_max_length=512, cache_dir=args.text_mode) - # This is loading from the pretarined_MegaMolBART + + + # load model + text_model = MoleditModel(config["network"]) + text_tokenizer = BertTokenizer.from_pretrained(args.text_mode, model_max_length=512, cache_dir=args.text_mode) + if config["model"]== "molstm-MegaMolBART": + MegaMolBART_wrapper = text_model.model.MegaMolBART_wrapper + molecule_model = MegaMolBART_wrapper.model + else: MegaMolBART_wrapper = MegaMolBART(vocab_path=args.vocab_path, input_dir=args.MegaMolBART_generation_model_dir, output_dir=None) molecule_model = MegaMolBART_wrapper.model - print("Loading from pretrained MegaMolBART ({}).".format(args.MegaMolBART_generation_model_dir)) - - # generation2MoleculeSTM = nn.Linear(molecule_dim_generation, args.SSL_emb_dim) - generation2MoleculeSTM = MLP(256, [args.SSL_emb_dim, args.SSL_emb_dim]) - input_model_path = os.path.join(args.language_edit_model_dir_new, "generation2MoleculeSTM_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - generation2MoleculeSTM.load_state_dict(state_dict) - - # MoleculeSTM2generation = nn.Linear(args.SSL_emb_dim, molecule_dim_generation) - MoleculeSTM2generation = MLP(args.SSL_emb_dim, [256, 256]) - input_model_path = os.path.join(args.language_edit_model_dir_new, "MoleculeSTM2generation_model.pth") - print("Loading from {}...".format(input_model_path)) - state_dict = torch.load(input_model_path, map_location='cpu') - MoleculeSTM2generation.load_state_dict(state_dict) + + torch.cuda.set_device(int(re.search(r'\d+', args.device).group())) + + generation2MoleculeSTM = MLP(256, [args.SSL_emb_dim, args.SSL_emb_dim]) + input_model_path = os.path.join(args.language_edit_model_dir, "generation2MoleculeSTM_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + generation2MoleculeSTM.load_state_dict(state_dict) + + + MoleculeSTM2generation = MLP(args.SSL_emb_dim, [256, 256]) + input_model_path = os.path.join(args.language_edit_model_dir, "MoleculeSTM2generation_model.pth") + print("Loading from {}...".format(input_model_path)) + state_dict = torch.load(input_model_path, map_location='cpu') + MoleculeSTM2generation.load_state_dict(state_dict) text_model = text_model.to(device) molecule_model = molecule_model.to(device) @@ -215,10 +200,10 @@ def check_edit(SMILES, text): print("\n\n\nstart editing\n\n\n") - source_SMILES_list = get_SMILES_list(args) + source_SMILES_list = get_SMILES_list(args)[0:1] description_list = get_description_list(args) - torch.cuda.set_device(int(re.search(r'\d+', args.device).group())) + for description in description_list: print("===== for description {} =====".format(description)) diff --git a/open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py b/open_biomed/utils/molstm_utils.py similarity index 92% rename from open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py rename to open_biomed/utils/molstm_utils.py index 9dd6d61..a0ede4c 100644 --- a/open_biomed/models/MoleculeSTM/downstream_molecule_edit_utils.py +++ b/open_biomed/utils/molstm_utils.py @@ -1,11 +1,13 @@ import os import copy +import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from transformers import AutoModel, AutoTokenizer -from models.MoleculeSTM.models.mega_molbart.mega_mol_bart import MegaMolBART -from models.MoleculeSTM.models import GNN, GNN_graphpred, MLP +from models.multimodal.mega_molbart.mega_mol_bart import MegaMolBART +from models.multimodal.moleculestm import MLP +from models.multimodal.moleculestm import GNN, GNN_graphpred from rdkit import Chem, RDLogger from rdkit.Chem import AllChem, Descriptors from rdkit import DataStructs @@ -13,6 +15,57 @@ lg.setLevel(RDLogger.CRITICAL) + + +# This is for BERT +def padarray(A, size, value=0): + t = size - len(A) + return np.pad(A, pad_width=(0, t), mode='constant', constant_values = value) + + +# This is for BERT +def preprocess_each_sentence(sentence, tokenizer, max_seq_len): + text_input = tokenizer( + sentence, truncation=True, max_length=max_seq_len, + padding='max_length', return_tensors='np') + + input_ids = text_input['input_ids'].squeeze() + attention_mask = text_input['attention_mask'].squeeze() + + sentence_tokens_ids = padarray(input_ids, max_seq_len) + sentence_masks = padarray(attention_mask, max_seq_len) + return [sentence_tokens_ids, sentence_masks] + + +# This is for BERT +def prepare_text_tokens(device, description, tokenizer, max_seq_len): + B = len(description) + tokens_outputs = [preprocess_each_sentence(description[idx], tokenizer, max_seq_len) for idx in range(B)] + tokens_ids = [o[0] for o in tokens_outputs] + masks = [o[1] for o in tokens_outputs] + tokens_ids = torch.Tensor(tokens_ids).long().to(device) + masks = torch.Tensor(masks).bool().to(device) + return tokens_ids, masks + + +def get_molecule_repr_MoleculeSTM(molecule_data, mol2latent=None, molecule_type="SMILES", MegaMolBART_wrapper=None, molecule_model=None): + if molecule_type == "SMILES": + embedding, pad_mask = MegaMolBART_wrapper.smileslist2embedding(molecule_data) # [pad, B, d], [pad, B] + molecule_repr = embedding[0, :, :] # [B, d] + else: + molecule_repr = molecule_model(molecule_data) + + if mol2latent is not None: + molecule_repr = mol2latent(molecule_repr) + return molecule_repr + + +def freeze_network(model): + for param in model.parameters(): + param.requires_grad = False + return + + def get_SMILES_list(args): if args.input_SMILES is not None: SMILES_list = [args.input_SMILES] diff --git a/scripts/multimodal/moledit/edit.sh b/scripts/multimodal/moledit/edit.sh index 3eb3285..0e063ba 100644 --- a/scripts/multimodal/moledit/edit.sh +++ b/scripts/multimodal/moledit/edit.sh @@ -1,23 +1,21 @@ #!/bin/bash molkformer--Graph momu--Graph molstm--SMILES/Graph ID---./models/MoleculeSTM/downstream_molecule_edit_utils.py MODE="test" -MODEL="molstm" +MODEL="molkformer" DEVICE=$1 EPOCHS=100 -MOL_TYPE="Graph" +TYPE="Graph" ID=101 python open_biomed/tasks/mol_edit/moledit_step_02_Latent_Optimization.py \ --device ${DEVICE} \ ---config_path ./configs/moledit/${MODEL}-MegaMolBART.json \ ---MegaMolBART_generation_model_dir ./ckpts/mol_edit_ckpts/pretrained_MegaMolBART/checkpoints \ +--config_path ./configs/moledit/${MODEL}-${TYPE}-MegaMolBART.json \ --input_SMILES_file ./datasets/mol_edit/Editing_data/single_multi_property_SMILES.txt \ ---language_edit_model_dir_new ./ckpts/finetune_ckpts/moledit/${MODEL} \ ---language_edit_model_dir ./ckpts/mol_edit_ckpts/demo_checkpoints_${MOL_TYPE} \ ---vocab_path ./open_biomed/models/MoleculeSTM/bart_vocab.txt \ +--language_edit_model_dir ./ckpts/finetune_ckpts/moledit/${MODEL}/${TYPE} \ --output_model_dir ./open_biomed/tasks/mol_edit \ ---text_mode ./ckpts/mol_edit_ckpts/pretrained_SciBERT \ +--text_mode ./ckpts/text_ckpts/scibert_scivocab_uncased \ --epochs ${EPOCHS} \ --input_description_id ${ID} \ ---MoleculeSTM_molecule_type ${MOL_TYPE} \ ---MoleculeSTM_model_dir ./ckpts/mol_edit_ckpts/demo_checkpoints_${MOL_TYPE} \ +--MoleculeSTM_molecule_type ${TYPE} \ +--vocab_path ./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt \ +--MegaMolBART_generation_model_dir ./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints \ --MASTER_PORT '6000' \ No newline at end of file diff --git a/scripts/multimodal/moledit/train.sh b/scripts/multimodal/moledit/train.sh index 39a9082..52a0a41 100755 --- a/scripts/multimodal/moledit/train.sh +++ b/scripts/multimodal/moledit/train.sh @@ -3,22 +3,21 @@ MODE="train" MODEL="molkformer" DEVICE=$1 EPOCHS=100 -MOL_TYPE="Graph" +TYPE="Graph" mkdir ./ckpts/finetune_ckpts/moledit/${MODEL} python open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py \ --device ${DEVICE} \ ---MoleculeSTM_molecule_type ${MOL_TYPE} \ ---config_path ./configs/moledit/${MODEL}-MegaMolBART.json \ +--MoleculeSTM_molecule_type ${TYPE} \ +--config_path ./configs/moledit/${MODEL}-${TYPE}-MegaMolBART.json \ --dataset ZINC250K \ --dataset_path ./datasets/mol_edit/ZINC250K_data \ ---output_path ./ckpts/finetune_ckpts/moledit/${MODEL}/ \ +--output_path ./ckpts/finetune_ckpts/moledit/${MODEL}/${TYPE} \ --mode ${MODE} \ --epochs ${EPOCHS} \ --num_workers 8 \ --batch_size 256 \ ---vocab_path ./open_biomed/models/MoleculeSTM/bart_vocab.txt \ ---MoleculeSTM_model_dir ./ckpts/mol_edit_ckpts/demo_checkpoints_${MOL_TYPE} \ ---MegaMolBART_generation_model_dir ./ckpts/mol_edit_ckpts/pretrained_MegaMolBART/checkpoints \ +--vocab_path ./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt \ +--MegaMolBART_generation_model_dir ./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints \ --MASTER_PORT '6000' \ No newline at end of file From 773843f3aca5cfdb836024828392fd5c15c1ed09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Wed, 8 Nov 2023 16:39:34 +0800 Subject: [PATCH 5/9] add moledit --- open_biomed/tasks/mol_edit/accuracy.npz | Bin 272 -> 0 bytes open_biomed/tasks/mol_edit/edited_SMILES.tsv | 1195 ------------------ 2 files changed, 1195 deletions(-) delete mode 100644 open_biomed/tasks/mol_edit/accuracy.npz delete mode 100644 open_biomed/tasks/mol_edit/edited_SMILES.tsv diff --git a/open_biomed/tasks/mol_edit/accuracy.npz b/open_biomed/tasks/mol_edit/accuracy.npz deleted file mode 100644 index 5e7485a9f5a03e5d895bb6fb8f135d264b832c68..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 272 zcmWIWW@Zs#fB;1XYiY*P4j>1FIT=J45{ruB4fOH~Dj69Bz=9x!AW0CI>=)`A5Xs0; z#!#)El3JWxq;934Zj)xAuA`uymS0p-l$aNvUzCyx5_e0?DNY577iT0EqyqUGhB}%$ w3bhIp04|6t7#IS)8JR?wad`r!ih-el5yXajDZrbR4J5z_gyul{Fo?qd0E^Qq@c;k- diff --git a/open_biomed/tasks/mol_edit/edited_SMILES.tsv b/open_biomed/tasks/mol_edit/edited_SMILES.tsv deleted file mode 100644 index ba3e82e..0000000 --- a/open_biomed/tasks/mol_edit/edited_SMILES.tsv +++ /dev/null @@ -1,1195 +0,0 @@ -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O N(C(=O)NN1C(=O)N()C)N(C)[C@@H]1OCCC1 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O NNC(=O)N1NC(=O)N(N)[C@@H]1ON 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O N(C(=O)NN1C(=O)N()C)N(C)[C@@H]1OCCC1 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O NNC(=O)N1NC(=O)N(N)[C@@H]1ON 0.001 -This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O 10.0 -This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O 1.0 -This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O 0.1 -This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O C(=O)(NNC(=O)cN#N)cN(C)C(=O)N 0.01 -This molecule is soluble in water. Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O Cn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O CN(C#N)NC(=O)NNC(=O)NNC(N)=O 0.001 -This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O 10.0 -This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O 1.0 -This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O 0.1 -This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O C(CC)n(C)c(=O)cc(C(=O)Nc2cnc(N3CCOCC3)cc2)c1 0.01 -This molecule is soluble in water. CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O CCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O C(CC)C(=O)NN(C)C(=O)Nc1ccc(N)n(CCC)c1 0.001 -This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 10.0 -This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 1.0 -This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NC(=O)CN(C)OC)c1nc(C)cs1 0.1 -This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 O=C(N(OC))N(O)C)N[C@@](C)(C)NC(=O)N 0.01 -This molecule is soluble in water. CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CC[C@@](C)(NCC(=O)N(C)OC)c1nc(C)cs1 CON(N)C(=O)CNC(C)(C)NC(=O)N(N)O 0.001 -This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 10.0 -This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 1.0 -This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 c1cccc2c1Sc(=O)n(CCC(=O)N[C@@H](C)C1CC1)c2S 0.1 -This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1Nc2ccccc2SC1=O)C1CC1 0.01 -This molecule is soluble in water. C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)CCN1c2ccccc2Sc2ccccc21)C1CC1 C[C@H](NC(=O)NN1Sc2ccccc2SC1=O)C1CC1 0.001 -This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 10.0 -This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 1.0 -This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2sc(C(N)=O)c(C(N)=O)c2C1 0.1 -This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCN(C(N)=O)c2cc(NC(N)=O)cc(C(N)=O)c21 0.01 -This molecule is soluble in water. CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N1CCc2c(sc(NC(=O)c3ccc4c(c3)OCCO4)c2C(N)=O)C1 CCOC(=O)N(CN(C(N)=O)c1cc(NC(N)=O)cc(C(N)=O)c1)C(N)=O 0.001 -This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 10.0 -This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 1.0 -This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 0.1 -This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Nc1ccc(F)c(N2CCN(c3ncc(Cl)cc3F)CC2)n1 0.01 -This molecule is soluble in water. Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Fc1ccccc1N1CCN(c2ncc(Cl)cc2F)CC1 Nc1ccc(F)c(N2CCN(c3ncc(Cl)cc3F)C2)n1 0.001 -This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2c(C)c(C)sc12 10.0 -This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2c(C)c(C)sc12 1.0 -This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2c(C)c(C)sc12 0.1 -This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 C(=O)(NC)CSc1c2c(c(C)c(C)[nH]2)cc1 0.01 -This molecule is soluble in water. CNC(=O)CSc1ncnc2sc(C)c(C)c12 CNC(=O)CSc1ncnc2sc(C)c(C)c12 C(=O)(NC)SSc1c2c(c(C)c(C)[nH]2)c(=O)[nH]1 0.001 -This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 10.0 -This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 1.0 -This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 0.1 -This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 C(=O)(OC)Nc1cc(N2CCN(c(C(=O)N)C)C2)nc(OC)c1 0.01 -This molecule is soluble in water. COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 COC(=O)c1cc2n(n1)CCN(c1cc(OC)nc(OC)n1)C2 C(=O)(OC)NCN(c)N1CCN(c(C(=O)N)c(C)N)C)C 0.001 -This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 10.0 -This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 1.0 -This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 0.1 -This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 N(C(=O)N[C@@H](C1CC1)ccccc(F)c1)N(C)C(=O)N 0.01 -This molecule is soluble in water. O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 O=C(N[C@@H](c1ccccc1)C1CC1)N1CCC(OCc2ccccc2F)CC1 N=C(N)N[C@H](NC(N)=O)c1ccccc1 0.001 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(C)c(S(=O)(=O)N(CC(=O)NCc2ncccc2C)c2ccccn2)c1 10.0 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 1.0 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1cccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)c1 0.1 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(C)CC(=O)NCc2ccccn2)cc1 0.01 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 Cc1ccc(S(=O)(=O)N(CC(=O)NCc2ccccn2)c2cc(C)ccc2C)cc1 N(C(=O)CN(S(=O)(=O)ccc(C)cc)S(=O)(=O)N)(C)cc(C)cN 0.001 -This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 10.0 -This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 1.0 -This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 0.1 -This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)SCCN(C(=O)Nc2ccc(N3CCOCC3)cc2F)S1 0.01 -This molecule is soluble in water. CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 CC1(C)CN(C(=O)Nc2ccc(N3CCOCC3)c(F)c2)CCS1 O=C(Nc1ccc(N2CCOCC2)cc1F)N1CCS(=O)(=O)N1 0.001 -This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C 10.0 -This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C 1.0 -This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C 0.1 -This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C C(#N)c(C)c(C)c(NC(=O)cccc(N(C)C)c)=O 0.01 -This molecule is soluble in water. Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C Cc1sc(NC(=O)c2cccc(N(C)C)c2)c(C#N)c1C C(#N)c(C)c(C)c(NC(=O)ccc(N(C)C)c)=O 0.001 -This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O 10.0 -This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O 1.0 -This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1ccc(Sc2cc3ccccc3[nH]2)cc1C=O 0.1 -This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O O=C(N)cSS(=O)(=O)N 0.01 -This molecule is soluble in water. Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O Cc1cc(Sc2cc3ccccc3[nH]2)ccc1C=O NC(=O)NS(N)(=O)=O 0.001 -This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 10.0 -This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 1.0 -This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 0.1 -This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 C(NC(=O)cc(C)nc2cc(-ccc(F)cc2)cc2)(=O)NCC(F)(F)F 0.01 -This molecule is soluble in water. Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 Cc1noc2nc(-c3ccc(F)cc3)cc(C(=O)NCC(=O)NCC(F)(F)F)c12 C(NC(=O)cc(C)nc2cc(-cc(F)cc)c2)(=O)NCC(F)(F)F 0.001 -This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 10.0 -This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 1.0 -This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 0.1 -This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 CC(C)(C)c1cccc(S(=O)(=O)Nc2ccc(C(=O)N3CCC3)cc2)c1 0.01 -This molecule is soluble in water. Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 Cc1ccc(C(C)(C)C)cc1S(=O)(=O)Nc1ccc(C(=O)N2CCC2)cc1 c1c(S(=O)(NS(=O)(cc(C(C)(C)C)=O)=O)N)ccc(C(=O)N2CCCN2)c1 0.001 -This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 10.0 -This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 1.0 -This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 0.1 -This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 C(N(CC#N)CC#N)c1c2c(cccc2)cc1 0.01 -This molecule is soluble in water. CCCN(CC#N)Cc1csc2ccccc12 CCCN(CC#N)Cc1csc2ccccc12 C(N(CC#N)CC#N)c1c2c(cccc2)cc1 0.001 -This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F 10.0 -This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F 1.0 -This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@@H](O)c1ccco1)[C@@H]1C[C@H]1c1ccccc1F 0.1 -This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F CN(C(N)=O)[C@H](N)C(=O)NC[C@H](N)O 0.01 -This molecule is soluble in water. O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F O=C(NC[C@H](O)c1ccco1)[C@@H]1C[C@@H]1c1ccccc1F CN(S)C(=O)NC[C@@H](O)N(C)C(=O)N(C)S 0.001 -This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 10.0 -This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 1.0 -This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 0.1 -This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 O=S(Nc#N)(=O)Ncccc(NC(=O)c(-c2c(Cl)cccc2F)n[nH]2)c1 0.01 -This molecule is soluble in water. Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 Cc1onc(-c2c(F)cccc2Cl)c1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 O=S(Nc#N)(=O)NccccNC(=O)c(N#N)c(-c2c(Cl)cccc2F)cO 0.001 -This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 10.0 -This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 1.0 -This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@@H](S/N=C\c2ccc(I)cc2)S[C@H]1CC(=O)NCCc1ccccc1 0.1 -This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 c1c(I)ccc(N2N(C)C(=O)S[C@@H]2CC(=O)NCC2)c1 0.01 -This molecule is soluble in water. CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CN1C(=O)[C@H](CC(=O)NCCc2ccccc2)S/C1=N\c1ccc(I)cc1 CCNC(=O)CNSN=c1[nH]c(=O)n(C)s1 0.001 -This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 10.0 -This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 1.0 -This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCC1)N(C)c1ccsc1 0.1 -This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 CN1NC(O)(CC(=O)NC[C@H](N)N(C)C)NNC1=O 0.01 -This molecule is soluble in water. C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@@H](CNC(=O)CC1(O)CCCC1)c1ccsc1 C[C@H](NC(=O)NNC(=O)NN(C)C(=O)NN)N(C)C 0.001 -This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 10.0 -This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 1.0 -This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@H](C)O1 0.1 -This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)N[C@H]1NCN(CN(C)S(=N)(N)=O)[C@H](C)O1 0.01 -This molecule is soluble in water. COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)[C@@H]1CN(Cc2cnc(C3CC3)s2)C[C@@H](C)O1 COC(=O)N[C@H]1NCN(C(=O)NN(C)S(=N)(N)=O)CN(C)O1 0.001 -This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 10.0 -This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 1.0 -This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 0.1 -This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 CC(=O)N(C)[C@H](C)N(N)C(=O)CCSN 0.01 -This molecule is soluble in water. C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C[C@H](c1cccc2ccccc12)N(C(=O)CCCn1cncn1)C1CC1 C(N(C(=O)N(C)[C@@H](C)NC(=O)N)C1)CC#N 0.001 -This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 10.0 -This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 1.0 -This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 0.1 -This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1nccc(NCc2cc[nH]n2)n1 0.01 -This molecule is soluble in water. FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 FC(F)(F)c1ccnc(NCc2cc[nH]n2)n1 NC(=O)Nc1nccc(C(F)(F)F)n1 0.001 -This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 10.0 -This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 1.0 -This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 0.1 -This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 N#C[C@@H](N1CCN(C(=O)cN)CCN1)C(=O)N)N 0.01 -This molecule is soluble in water. CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 CC(C)[C@H](C#N)N1CCN(C(=O)c2cnc(-c3cccs3)s2)CC1 N#C[C@@H](N1CCN(C(=O)cN)CCN(C)C(=O)N)N)C(=O)N 0.001 -This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C/c1cc(C)n(-c2ccc([N+](=O)[O-])c(C)c2)c1 10.0 -This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C/c1cc(C)n(-c2ccc([N+](=O)[O-])c(C)c2)c1 1.0 -This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C/c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1 0.1 -This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccc([N+](=O)[O-])cc1NC(=O)C#N 0.01 -This molecule is soluble in water. Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1ccccc1NC(=O)/C(C#N)=C\c1cc(C)n(-c2ccc(C)c([N+](=O)[O-])c2)c1C Cc1cc([N+](=O)[O-])ccc1NC(=O)C#N 0.001 -This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C 10.0 -This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C 1.0 -This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C 0.1 -This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(NCc2cccc(C(C)C)c2)c1C 0.01 -This molecule is soluble in water. CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C CCn1c(C)cc(CNc2cccc(C(C)C)c2)c1C c1(C)n(CC)c(C)cc(NNc2cc(C(C)C)ccn2)c1 0.001 -This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cn1 10.0 -This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cn1 1.0 -This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 0.1 -This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 CN(C)C(=O)NC(=O)C(=O)NCc1ccc(Cl)cc1 0.01 -This molecule is soluble in water. O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NCc1ccco1)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)nc1 O=C(NC(=O)N(C)Oc)N(C)C(=O)N(C)Oc 0.001 -This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)CNC(=O)N2CCCCC2)CC1 10.0 -This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCN(C(=O)NCC(=O)N2CCCCC2)CC1 1.0 -This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCN(C(=O)NCC(=O)N2CCCC2)CC1 0.1 -This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCN(C(=O)NCC(N)=O)CC1 0.01 -This molecule is soluble in water. N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N#Cc1ccccc1N1CCCN(C(=O)NCC(=O)N2CCCCC2)CC1 N(C(=O)N1CCN(c#N)cccc2)C(N)=O 0.001 -This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 10.0 -This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 1.0 -This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 0.1 -This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1cccc(NC(=O)CCO)c1 0.01 -This molecule is soluble in water. CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 CCNC(=O)c1ccc(C)c(NC(=O)CCCO)c1 C(CC(Nc(NC(=O)c)cc(C)c1)=O)N(C)C(=O)N 0.001 -This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 10.0 -This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 1.0 -This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1nc(-c2cccc(OCc3cccc(F)c3)c2)cs1 0.1 -This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 C(NC(=O)c(N)c(S)(=O)N)(=O)Oc1c()c(OC)c(OC)c()c1 0.01 -This molecule is soluble in water. O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 O=C(NCC1CC1)c1csc(-c2cccc(OCc3cccc(F)c3)c2)n1 C(NC(=O)c(N)S)(=O)N(C)C)N(C)C(=O)OCc1c()c(O)c(O)c(N)c(N)c1 0.001 -This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 10.0 -This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 1.0 -This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 0.1 -This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2cc(S(=O)(=O)N3CCCC3)ccc2Cl)cc1 0.01 -This molecule is soluble in water. N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#Cc1ccc(C(=O)Nc2ccc(Cl)c(S(=O)(=O)N3CCCC3)c2)cc1 N#CcS(Nc1cc(NC(=O)cNS(=O)(=O)N)ccc1)(=O)N 0.001 -This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl 10.0 -This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl 1.0 -This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl 0.1 -This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CN1CCN(C(=O)NCCN)[C@H]1c1ccccc1Cl 0.01 -This molecule is soluble in water. CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CCc1nncn1CCNC(=O)N1CCC[C@H]1c1ccccc1Cl CN1CN(C(=O)NCCNS(N)(=O)=O)[C@H]1c1ccccc1Cl 0.001 -This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 10.0 -This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 1.0 -This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 0.1 -This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 C(NS(=O)(=O)C)[C@@H]1CN(C(=O)cc(-c2ccc(C)cc2)[nH]1)CCCN2 0.01 -This molecule is soluble in water. Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(-c2cc(C(=O)N3CCC[C@H](CNS(C)(=O)=O)C3)on2)cc1 Cc1ccc(C2=CC(=O)N(C)C[C@@H](CNS(C)(=O)=O)N2)cc1 0.001 -This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c(=O)n(C)c(=O)n(C)c21 10.0 -This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c(=O)n(C)c(=O)n(C)c21 1.0 -This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c(=O)n(C)c(=O)n(C)c21 0.1 -This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](NC(=O)c1cn(C)c(=O)n1C)C(=O)N(C)CC#N 0.01 -This molecule is soluble in water. C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C[C@@H](C(=O)N(C)CCC#N)n1cnc2c1c(=O)n(C)c(=O)n2C C(=O)(N(C)CCN(C)C(=O)[C@@H](C)N=c1cc(=O)n(C)[nH]1)N 0.001 -This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 10.0 -This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 1.0 -This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 0.1 -This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 0.01 -This molecule is soluble in water. COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 COc1ccc(-c2ccc(OC(F)(F)F)cc2)cn1 O=C(Oc)ONccc(S(=O)(=O)N)cc 0.001 -This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 10.0 -This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 1.0 -This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 0.1 -This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 O=S(=O)(c1ccc(S(=O)(=O)CCC(Nc(OC)ccc(C)c3)=O)cc1)N 0.01 -This molecule is soluble in water. COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 COc1ccc(C)cc1NC(=O)CCS(=O)(=O)c1ccc2c(c1)OCCO2 O=S(=O)(ccc(S(=O)(=O)CCC(Nc(OC)ccc(C)c2)(=O)N)cc()N)N(C)C(=O)N 0.001 -This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 10.0 -This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 1.0 -This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 0.1 -This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 c1c(N(CC)CC)ccc(NC(=O)NNC(=O)c)c1 0.01 -This molecule is soluble in water. CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 CCN(CC)c1ccc(NC(=O)c2ccc3c(c2)COC3)cc1 N(C(=O)Nccc(N(CC)C(=O)N)c)(CC)C(=O)N 0.001 -This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 10.0 -This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 1.0 -This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 0.1 -This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1n[nH]c(S(=O)(=O)[C@@H](C)S(=O)(=O)c2cccc(F)c2)n1 0.01 -This molecule is soluble in water. CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 CC(C)c1noc([C@H](C)S(=O)(=O)c2cccc(F)c2)n1 c1(S(=O)(=O)[C@@H](C)S(=O)(=O)cc(N(C)C)n[nH]1)cc(F)cc1 0.001 -This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 10.0 -This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 1.0 -This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@H](COC)C2)no1 0.1 -This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCN(C)C(=O)N(C)CN(C)C(=O)N(C)CN(C)C(N)=O 0.01 -This molecule is soluble in water. COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCc1cc(C(=O)N2CCC[C@@H](COC)C2)no1 COCN(C)C(=O)N(C)N(C)C(=O)N(C)CN(C)C(N)=O 0.001 -This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C 10.0 -This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C 1.0 -This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@@H]1c2ccccc2CN1C(=O)CC(C)C 0.1 -This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C C(NC(=O)[C@@H]1Cc1cccc(N)(=O)N1C(=O)CC(C)C)#N 0.01 -This molecule is soluble in water. COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C COc1ncccc1CNC(=O)[C@H]1Cc2ccccc2CN1C(=O)CC(C)C C(NC(=O)[C@@H](N(C)C(=O)N)C#N)c(OC)(OC)F 0.001 -This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl 10.0 -This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl 1.0 -This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl 0.1 -This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)O[C@H](C)NC(=O)Nc1ccc(Cl)cc1Cl 0.01 -This molecule is soluble in water. CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl CC(=O)Oc1ccccc1C(=O)Nc1ccc(Cl)cc1Cl NC(=O)Nc1ccc(Cl)cc1OC(N)=O 0.001 -This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 10.0 -This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 1.0 -This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 0.1 -This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 C(C(=O)N1[C@@H](c(OC)ccc(OC)c2)NC(=O)C)(=O)N 0.01 -This molecule is soluble in water. COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 COc1ccc(OC)c([C@@H]2CCCN2C(=O)Cc2c(C)nn(C)c2C)c1 C(C(=O)N1[C@@H](c(OC)ccc(OC)c)N)(=O)N 0.001 -This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 10.0 -This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 1.0 -This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2cc(/C=C3\NC(=O)NC3=O)c(OC)cc2c1 0.1 -This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 O=C1N/C(=C/c(OC)c2ccc(OC)cc2)NC(=O)NN1 0.01 -This molecule is soluble in water. COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 COc1ccc2ccc(OC)c(/C=C3\NC(=O)NC3=O)c2c1 O(C)c1c(/C=C2\NC(=O)NC(=O)NN2)cc(OC)c1 0.001 -This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 10.0 -This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 1.0 -This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 0.1 -This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 c(=O)(NCCNc1c([N+](=O)[O-])cnn1)c(C(C)C)n[nH]1 0.01 -This molecule is soluble in water. CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 CC(C)c1noc(CCNc2ccccc2[N+](=O)[O-])n1 c(=O)(NCCNc(=O)c(=O)n(C)n[nH]2)c(=O)n(C)[nH]1 0.001 -This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1cccc2ccccc12)N1CCC[C@H](CO)C1 10.0 -This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1cccc2ccccc12)N1CCC[C@H](CO)C1 1.0 -This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 0.1 -This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 C(NC(=O)N1C[C@@H](CO)CCC1)(=O)NCc 0.01 -This molecule is soluble in water. O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 O=C(NCc1ccc2ccccc2c1)N1CCC[C@H](CO)C1 C(NC(=O)N1C[C@@H](CO)N(C)C)(=O)N 0.001 -This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@H](C(=O)N(C)c2ncccc2N(C)C)CC1=O 10.0 -This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O 1.0 -This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O 0.1 -This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O N(C(=O)[C@@H]1CN(c(CC)cccc2)C(=O)N1)(C)N 0.01 -This molecule is soluble in water. CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O CCc1ccccc1N1C[C@@H](C(=O)N(C)c2cccnc2N(C)C)CC1=O N(C(=O)N(C)NC(=O)NNc(CC)cccc1)(C)C 0.001 -This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 10.0 -This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 1.0 -This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 0.1 -This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@H]2NCCS2)cc1 0.01 -This molecule is soluble in water. C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 C[S@@](=O)c1ccc(CNC(=O)[C@@H]2SCCc3sccc32)cc1 N(C(=O)[C@@H]1SCCN2C(=O)N#N)(C)CN#N 0.001 -This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)no1)N1CCOCC1 10.0 -This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)no1)N1CCOCC1 1.0 -This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 0.1 -This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(N1CCOCC1)N(C)C)NC(=O)cc(O)c(F)c(F)c(F)c2 0.01 -This molecule is soluble in water. O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(c1cc(COc2ccc(F)c(F)c2)on1)N1CCOCC1 O=C(N1CCOCCN1)cc(O)c(O)c(F)c(F)c(S)(=O)N 0.001 -This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC 10.0 -This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC 1.0 -This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC 0.1 -This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1cc(NC(=O)CN2C(=O)N(C)[C@@H](C)N2C(N)=O)ccc1OC 0.01 -This molecule is soluble in water. CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC CCOc1ccc(NC(=O)CN2C(=O)[C@@H](C)N(c3ccc(C)cc3)C2=O)cc1OC C(N1C(=O)N(c2ccc(C)cc2)C(=O)N1)C(Nc1cc(OC)c(OC)cN)=O 0.001 -This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 10.0 -This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 1.0 -This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 0.1 -This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 O(C)c1cc(NC(=O)NCC(=O)NN)c(OC)ccc1 0.01 -This molecule is soluble in water. COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 COc1cccc(NC(=O)NCCc2coc(-c3ccccc3)n2)c1 O(C)c(O)cc(NC(=O)NC(=O)NCC(=O)N)c(O)c(O)c 0.001 -This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl 10.0 -This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl 1.0 -This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl 0.1 -This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl N1(c2c(Cl)cccc2)C(=O)N[C@@H](N2CCN)CC1=O 0.01 -This molecule is soluble in water. O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl O=C1C[C@@H](N2CCC3(CC2)OCCO3)C(=O)N1c1ccccc1Cl NN1CCSN(c2ccccc2Cl)NC1=O 0.001 -This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br 10.0 -This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br 1.0 -This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br 0.1 -This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br NC(=O)Nc1ccc(NS(=O)(=O)N2CCN2)cc1 0.01 -This molecule is soluble in water. COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br COc1ccc(C(=O)Nc2ccc(S(=O)(=O)N3CCc4ccccc43)cc2)cc1Br N(S(=O)(=O)N1c(NC(=O)cN)ccc(OC)cN)C(=O)N 0.001 -This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 10.0 -This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 1.0 -This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 0.1 -This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C(=O)(N(CC=C)[C@@H](C)NC(=O)Cc1ccc(C)cc1 0.01 -This molecule is soluble in water. C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C=CCN(C(=O)Cc1ccc(C)cc1)c1ccccc1 C(=O)(N(CC=C)[C@@H](C)N)CcN 0.001 -This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 10.0 -This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 1.0 -This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 0.1 -This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 NCNC(=O)N1CCC(OCc2ccc(F)cc2)CC1 0.01 -This molecule is soluble in water. COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 COc1cccc(CNC(=O)N2CCC(OCc3ccc(F)cc3)CC2)n1 N(C(N1CCC(OCc)(N)C1)=O)C(N)=O 0.001 -This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 10.0 -This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 1.0 -This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 0.1 -This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 0.01 -This molecule is soluble in water. CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)CC1 CS(=O)(=O)N1CCC(C(=O)N2CCN(Cc3ccc(Cl)cc3)CC2)C1 0.001 -This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 10.0 -This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 1.0 -This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 0.1 -This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 O=C(NCC1(O)CCOCC1)/C=C/c(O[C@@H](C)C)O 0.01 -This molecule is soluble in water. CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 CC[C@H](C)Oc1ccccc1/C=C/C(=O)NCC1(O)CCOCC1 C[C@H](O[C@H](C)O)ON(C)C(=O)NCN(C)C(N)=O 0.001 -This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 10.0 -This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 1.0 -This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@@H]2CCN(c3ccc(C)cc3)C2=O)cc1 0.1 -This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 NC(=O)CCc1ccc(OC(N)=O)cc1 0.01 -This molecule is soluble in water. CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 CC(=O)CCc1ccc(OC(=O)[C@H]2CCN(c3ccc(C)cc3)C2=O)cc1 O=C(ON)NC(=O)Oc 0.001 -This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 10.0 -This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 1.0 -This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 0.1 -This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 c1(N2CCC(C(=O)NCc(OC)cc(OC)c)C2)ncc(N)cc1 0.01 -This molecule is soluble in water. COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 COc1ccc(OC)c(CNC(=O)C2CCN(c3ncc(-c4ccccc4)cn3)CC2)c1 N(C(=O)NCc(OC)cc(OC)c)(N)S(=O)(=O)N 0.001 -This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 10.0 -This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 1.0 -This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 0.1 -This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 N(CN1C(=O)N(C)CN1)C(=O)cN 0.01 -This molecule is soluble in water. O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 O=C(NCN1C(=O)c2ccccc2C1=O)c1ccc(Br)cc1 CN1CN(CNC(=O)NNC(N)=O)C(=O)N1 0.001 -This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 10.0 -This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 1.0 -This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 0.1 -This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CSCc1ccccc1)Nc1nc[nH]n1 0.01 -This molecule is soluble in water. O=C(CCSCc1ccccc1)Nc1nc[nH]n1 O=C(CCSCc1ccccc1)Nc1nc[nH]n1 NC(=O)NN=C(O)CSCc1ccccc1 0.001 -This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F 10.0 -This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F 1.0 -This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F 0.1 -This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F N(C(=O)c(C)ncsN)[C@@H](C(F)(F)F)Nc(C(=O)N)c 0.01 -This molecule is soluble in water. Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F Cc1ncsc1C(=O)Nc1ccccc1C(F)(F)F N(C(=O)Nc(C(F)(F)F)cN)C(=O)NN 0.001 -This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 10.0 -This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 1.0 -This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 0.1 -This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2ccc(Br)cc2)cc1 0.01 -This molecule is soluble in water. COc1ccc(OCCOc2cccc(Br)c2)cc1 COc1ccc(OCCOc2cccc(Br)c2)cc1 c(OCCOccc(OC)c)c(N)c(=N)c(N)c(S)c(=N)[nH]2 0.001 -This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C 10.0 -This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C 1.0 -This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nn2cc(C)nc2c1C 0.1 -This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCC(N)=O 0.01 -This molecule is soluble in water. COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C COc1ccc(Br)cc1CN(C)C(=O)CCc1c(C)nc2cc(C)nn2c1C C(N(C)Cc(OC)ccc(Br)c1)(=O)CCC(=O)N)C 0.001 -This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 10.0 -This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 1.0 -This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 0.1 -This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)C2)c1 0.01 -This molecule is soluble in water. Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)CC2)c1 Cc1cccc(OCCNC(=O)N2CCN(c3cnccn3)C2)c1 0.001 -This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(n1-c1ccccc1)CCCS2 10.0 -This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(n1-c1ccccc1)CCCS2 1.0 -This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(n1-c1ccccc1)SCCC2 0.1 -This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 C(=O)(N(C)C)NN=c1[nH]c(=O)N 0.01 -This molecule is soluble in water. O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 O=c1[nH]c2c(c(=O)n1-c1ccccc1)CCCS2 CN(CN)C(=O)NN=[SH](C)(N)N 0.001 -This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 10.0 -This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 1.0 -This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 0.1 -This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(=O)NC(C)C)C(=O)COc1ccccc1 0.01 -This molecule is soluble in water. CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(CC(=O)NC(C)C)C(=O)COc1ccccc1 CCN(C(=O)COc1ccccc1)C(=O)NC(=O)NC(C)C 0.001 -This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl 10.0 -This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl 1.0 -This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl 0.1 -This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@H]1COC[C@H](N)N1C(=O)N(N)S(N)(=O)=O 0.01 -This molecule is soluble in water. C[C@H]1COC[C@@H](C)N1C(=O)c1cccc(Cl)c1Cl C[C@@H]1COC[C@H](C)N1C(=O)c1cccc(Cl)c1Cl N(N(C(=O)N1[C@@H](C)COC[C@@H]1N)S(=O)(=O)NN 0.001 -This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 10.0 -This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 1.0 -This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 0.1 -This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@H](NC(=O)CC(C)(C)C)Nc1cccc(N2CCOC2=O)c1 0.01 -This molecule is soluble in water. C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)c1cccc(N2CCOC2=O)c1 C[C@@H](NC(=O)CCC(C)(C)C)NC(=O)C(N)=O 0.001 -This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 10.0 -This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 1.0 -This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 0.1 -This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 CN(C)C(=O)N(C[C@H]1NCCCO1)C(=O)/C=C/c1ccc(Cl)cn1 0.01 -This molecule is soluble in water. O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 O=C(/C=C/c1cccs1)N(Cc1ccc(Cl)cc1)C[C@@H]1CCCO1 CN(C[C@H](O)N(C)C(N)=O)C(=O)/C=N/N(C)C(N)=O 0.001 -This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 10.0 -This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 1.0 -This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)CCCC2=O)Cc1cccs1 0.1 -This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 CN(CN(C)C(=O)NN(C)C(N)=O)C(N)=O 0.01 -This molecule is soluble in water. C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 C[C@H](CNC(=O)C(=O)Nc1ccc2c(c1)C(=O)CCC2)Cc1cccs1 CN(N)NC(=O)NC(=O)N(C)N 0.001 -This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 10.0 -This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 1.0 -This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 0.1 -This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 C(=O)(Nc(C(=O)NC)cccc1)COc 0.01 -This molecule is soluble in water. CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 CNC(=O)c1ccccc1NC(=O)COc1cccc(C)c1 C(=O)(Nc(C(=O)N)c(O)C)COc 0.001 -This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 10.0 -This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 1.0 -This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 0.1 -This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 C(C(N1CCN(C(=O)Nc)c2ccc(N)nc2)C1)C 0.01 -This molecule is soluble in water. CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 CCC(=O)N1CCCN(C(=O)Nc2ccc3nc(C)oc3c2)CC1 C(C(N1CCN(C(=O)Nc)CC1)N)N(C)C 0.001 -This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC 10.0 -This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC 1.0 -This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC 0.1 -This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC c1(OC)cc(OC)c(#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)C2)c1 0.01 -This molecule is soluble in water. COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC COc1cc(C#N)c(NC(=O)CN2CCN(c3ccc(F)cc3)CC2)cc1OC c1(NC(=O)CN2CCN(c3ccc(F)cc3)C2)cc(OC)c(#N)c1)c(OC)c(O)c2 0.001 -This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 10.0 -This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@@H](C)O)c2)c(C)n1 1.0 -This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 0.1 -This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 0.01 -This molecule is soluble in water. Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@H](C)O)c2)c(C)n1 Cc1ccc(NC(=O)Nc2cccc([C@@H](C)O)c2)c(C)n1 0.001 -This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 10.0 -This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 1.0 -This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 0.1 -This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 C(C(=O)Nccc(C(=O)OCC)c)N(C)C 0.01 -This molecule is soluble in water. CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 CCOC(=O)c1ccc(NC(=O)CC2CCCCC2)cc1 C(C(=O)Nccc(C(=O)OCC)N)N 0.001 -This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)C1(CCCC1)O2 10.0 -This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)C1(CCCC1)O2 1.0 -This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCC1)O2 0.1 -This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 CN(N)NC(=O)CCC(=O)NNC(N)=O 0.01 -This molecule is soluble in water. O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 O=C(CCC(=O)c1ccc2c(c1)CCC2)Nc1ccc2c(c1)OC1(CCCC1)O2 CN(NC(N)=O)NC(=O)NNC(N)=O 0.001 -This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 10.0 -This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 1.0 -This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 0.1 -This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 0.01 -This molecule is soluble in water. Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)CC2)cc1 Cc1ccc(Sc2nccnc2N2CCC(C(=O)N3CCOCC3)C2)cc1 0.001 -This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 10.0 -This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 1.0 -This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 0.1 -This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)S(=O)(=O)N1CCC(O)(CSc2ccc(Br)cc2)CC1 0.01 -This molecule is soluble in water. O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)N1CCC(O)(CSc2ccc(Br)cc2)CC1 O=S(=O)(c1ccc(Cl)cc1)S(=O)(=O)N1CCC(O)(CSc2ccc(Br)cc2)CC1 0.001 -This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 10.0 -This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 1.0 -This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)cn1 0.1 -This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)NC(=O)N1CCCN1 0.01 -This molecule is soluble in water. COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)c1ccc(N2CCC[C@@H]2C2CCCC2)nc1 COCCNC(=O)NN1CCCN1C(N)=O 0.001 -This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 10.0 -This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 1.0 -This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 0.1 -This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1ccc(NC(=O)CN(C)c2c(C)noc2C)cc1SC 0.01 -This molecule is soluble in water. CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1cccc(NC(=O)CN(C)c2c(C)noc2C)c1 CSc1ccc(NC(=O)CN(C)c2c(C)noc2C)cc1SC 0.001 -This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 10.0 -This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 1.0 -This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 0.1 -This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 N(S(=O)(=O)NCC=C)C(=O)N1CCC(Cc)CN1 0.01 -This molecule is soluble in water. C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 C=CCNS(=O)(=O)c1cccc(C(=O)N2CCC(Cc3ccccc3)CC2)c1 N(S(=O)(=O)NCC=C)C(=O)N1CCC(Cc)CN1 0.001 -This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 10.0 -This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 1.0 -This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 0.1 -This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 0.01 -This molecule is soluble in water. COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 COC(=O)c1cccc(CCNC(=O)Nc2cc(F)ccc2C)c1 0.001 -This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 10.0 -This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 1.0 -This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 OCC#CC#Cc1ccc(Br)cc1 0.1 -This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 N=c1ccc(C#CN)c[nH]1 0.01 -This molecule is soluble in water. OCC#Cc1ccc(Br)cc1 OCC#Cc1ccc(Br)cc1 N#CC#Nc1ccc(=N)ccc1 0.001 -This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 10.0 -This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 1.0 -This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@H](C)NC(=O)[C@@H](C)Sc1ccc(OC)cc1 0.1 -This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 c1c(S[C@@H](C(=O)N[C@@H](C)NC(=O)[C@@H](C)Sc2ccc(OC)cc2)ccc(O)n1)cn2 0.01 -This molecule is soluble in water. CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 CCC[C@@H](C)NC(=O)[C@H](C)Sc1ccc(OC)cc1 O=C(N[C@@H](C)N(C)c(Sc1ccc(OC)cc1)=O)N 0.001 -This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 10.0 -This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 1.0 -This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 0.1 -This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 COc1ccccc1O[C@@H](NC(N)=O)C(=O)Nc1ccc(Cl)c(Cl)c1 0.01 -This molecule is soluble in water. CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 CC[C@@H](Oc1ccccc1OC)C(=O)Nc1ccc(Cl)c(Cl)c1 COc1ccccc1O[C@@H](N)C(=O)Nc1ccc(Cl)c(Cl)c1 0.001 -This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 10.0 -This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 1.0 -This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 0.1 -This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 NC(=O)Cc1ccc(F)c(F)c1 0.01 -This molecule is soluble in water. CC(=O)Cc1ccc(Br)c(F)c1 CC(=O)Cc1ccc(Br)c(F)c1 NS(=O)(=O)Cc1ccc(F)c(F)c1 0.001 -This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 10.0 -This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 1.0 -This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 0.1 -This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 CNC(=O)N(C)C(=O)[C@@H](C)Cc1c(C)n[nH]c1C 0.01 -This molecule is soluble in water. Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C(=O)[C@@H](C)Cc2c(C)n[nH]c2C)C2CC2)c1 Cc1cccc(CN(C)C(=O)N(C)C(N)=O)c1 0.001 -This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 10.0 -This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 1.0 -This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 0.1 -This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 C(NC(=O)NC(=O)NC(C)(C)NC(=O)N)cOc1ccc(F)cc1 0.01 -This molecule is soluble in water. CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 CC(C)(C)NC(=O)CNC(=O)NCc1ccc(Oc2ccc(F)cc2)cc1 C(NC(=O)NC(=O)NC(=O)N)c(Oc)c(Oc1ccc(F)cc1)c 0.001 -This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 10.0 -This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 1.0 -This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 0.1 -This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CC2)c1)N1CCSCC1 0.01 -This molecule is soluble in water. O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 O=C(c1ccnc(OC2CCC2)c1)N1CCCSCC1 NNC(=O)c1cc(N)cc(OC2CC2)c1 0.001 -This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 10.0 -This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 1.0 -This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 0.1 -This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 N(C(=O)Nc(NC(=O)Nc1cc(N2CCOC2)ccc1)c)Cc(F)ccc(N)c2 0.01 -This molecule is soluble in water. COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 COc1cccc(NC(=O)Nc2cc(C(=O)NCc3cccc(F)c3)ccc2N2CCOCC2)c1 N(C(=O)Nc(NC(=O)Nc(N)c(N2CCOC2)c)c()c(NC(=O)NC)c)c(O)c 0.001 -This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 10.0 -This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 1.0 -This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 0.1 -This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)C1 0.01 -This molecule is soluble in water. CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 CC(=O)NCCNC(=O)N1CCN(Cc2ccon2)CC1 N(C(=O)N1CCN(Cc2cccn2)C1)CCNC(=O)C 0.001 -This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 10.0 -This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 1.0 -This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 0.1 -This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O(Cc1ccc(C(=O)NCN(C)C(=O)NC)o1)c(Cl)cc(Cl)cN 0.01 -This molecule is soluble in water. O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O=C(NCc1cccs1)c1ccc(COc2cc(Cl)ccc2Cl)o1 O(CNC(=O)NC(=O)NO)CN(C)C(=O)OC)Oc#N 0.001 -This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C 10.0 -This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C 1.0 -This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C 0.1 -This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C O=C(N[C@@H](C(C)C)NC(=O)c(OC)ccc1)Nc1cc(O)n(C)n1 0.01 -This molecule is soluble in water. COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C COc1ccccc1C(=O)N[C@H](C(=O)Nc1cccc(-c2csc(C)n2)c1)C(C)C O(C)c(C(=O)N[C@H](C(=O)Nc)c(N)c(N)c(O)c)c(O)c1 0.001 -This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 10.0 -This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 1.0 -This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 0.1 -This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2ncccc12 0.01 -This molecule is soluble in water. CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2cccnc12 CCCCNC(=O)Oc1cccc2ncccc12 0.001 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 10.0 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 1.0 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 0.1 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 c1(NC(=O)N2CCN(C(=O)Ncccc(C)c)CC2)oc(-ccc)c2)nn1 0.01 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 Cc1ccc(NC(=O)N2CCC(c3nnc(-c4ccco4)o3)CC2)cc1 c1(NC(=O)N2CCN(C(=O)Nc(N)ccc(C)c)CC2)oc(S)c(N)c1 0.001 -This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O 10.0 -This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O 1.0 -This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O 0.1 -This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O c1(=O)n(C)c(=O)ccc1C(=O)N(C(C)C)Cc1cc(Cl)ccc1 0.01 -This molecule is soluble in water. CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CC(C)N(Cc1cccc(Cl)c1)C(=O)Cn1ccc(=O)n(C)c1=O CCN(CNC(N)=O)C(=O)Cn1ccc(=O)n(C)c1=O 0.001 -This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 10.0 -This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 1.0 -This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 0.1 -This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 c(C(=O)NCCNC(=O)C)c(-c1ccccc1)[nH] 0.01 -This molecule is soluble in water. CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)c1cn(Cc2ccccc2)nc1-c1cccs1 CC(=O)NCCNC(=O)NC(=O)NC(N)=O 0.001 -This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 10.0 -This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 1.0 -This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 0.1 -This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 c1(Oc2ccc(NC(=O)N(C)C(=O)N(C)Cc2n(C)ncn2)ccc2)ccc(C)nn1 0.01 -This molecule is soluble in water. Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 Cc1ccc(Oc2ccc(NC(=O)N(C)Cc3ncnn3C)cc2)nn1 c(NC(=O)N(C)C(=O)N(C)C)Occ(Occc(C)nn2)ccc1 0.001 -This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 10.0 -This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 1.0 -This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 0.1 -This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(OC(=O)NNC(=O)O)cc1 0.01 -This molecule is soluble in water. N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 N#Cc1ccc(NC(=O)Oc2ccccc2)cc1 O=C(OC(=O)Nc)Ncc(C#N)cc()S 0.001 -This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 10.0 -This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 1.0 -This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 0.1 -This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 N(C(N1CcN(C)c2ccccc21)=O)ccccS 0.01 -This molecule is soluble in water. CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 CN1CCN(C(=O)Nc2ccccc2Br)Cc2ccccc21 N(C(N1CcN(C)c2ccccc21)=O)ccccS 0.001 -This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO 10.0 -This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO 1.0 -This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO 0.1 -This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1c[nH]c(-c2ccccc2)cc1=O)N(CCO)CCO 0.01 -This molecule is soluble in water. O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO O=C(c1cnc(-c2ccccc2)[nH]c1=O)N(CCO)CCO c(=O)(N(CCO)CCO)c(=N)[nH]c(-c1ccccc1)[nH]2 0.001 -This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1ccc(OC)cc1NC(=O)C[C@@H](C)C(C)C 10.0 -This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1ccc(OC)cc1NC(=O)C[C@@H](C)C(C)C 1.0 -This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1ccc(OC)cc1NC(=O)C[C@H](C)C(C)C 0.1 -This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CC(C)[C@@H](C)CC(=O)Nc1ccc(C(N)=O)cc1NC(N)=O 0.01 -This molecule is soluble in water. CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C CNC(=O)c1cc(OC)ccc1NC(=O)C[C@@H](C)C(C)C C[C@H](NC(=O)Nc1cc(NC(N)=O)ccc1N(C)C)C(N)=O 0.001 -This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 10.0 -This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 1.0 -This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCC3=O)cc2)C1 0.1 -This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1CN(S(N)(=O)=O)N(S(N)(=O)=O)N1 0.01 -This molecule is soluble in water. C[C@H]1C[C@@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1C[C@H](C)CN(S(=O)(=O)c2ccc(N3CCCCC3=O)cc2)C1 C[C@@H]1CN(S(N)(=O)=O)N(S(N)(=O)=O)N1 0.001 -This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 10.0 -This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 1.0 -This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 0.1 -This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 c1(CNC(=O)c(C)c(N)N)cc(Br)cc()c1 0.01 -This molecule is soluble in water. Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 Cn1c(CNC(=O)c2ccc(Br)cc2)nc2ccccc21 C(NC(=O)c(N)c(C)N)(=O)NC#N 0.001 -This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 10.0 -This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 1.0 -This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 0.1 -This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)C(=S)N1 0.01 -This molecule is soluble in water. CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 CCOc1ccccc1NC(=S)N1CCN(c2ccccc2F)CC1 N(C(=S)N1CCN(c2c(F)cccc2)C(=S)N1)c(OCC)cccc2 0.001 -This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 10.0 -This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 1.0 -This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 0.1 -This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 O=C(NNC(=O)[C@@H](C)N=S(=O)(=O)N)[C@@H](N)C#N 0.01 -This molecule is soluble in water. C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 C[C@@H](C(=O)NC(c1cccs1)c1cccs1)n1cccn1 O=C(NNC(=O)[C@@H](N)N=S(=O)(=O)N)N=S(=O)(N)N 0.001 -This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C 10.0 -This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C 1.0 -This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C COC(=O)[C@@H]1CCN(C(=O)c2cnn(CC(C)C)c2C)c2ccccc21 0.1 -This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C C(=O)(N1c(CC)(C(=O)N)N(C)c(C)N)N 0.01 -This molecule is soluble in water. CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C CCc1c(C(=O)N2CC[C@@H](C(=O)OC)c3ccccc32)cnn1CC(C)C C(=O)(N1c(CC)(C(=O)N)N(C)c(N)cN)N 0.001 -This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F 10.0 -This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F 1.0 -This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F 0.1 -This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F CN(CC(F)(F)F)C(=O)N(CC(F)(F)F)C(=O)c1ccccn1 0.01 -This molecule is soluble in water. O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F O=C(c1ccccn1)N(Cc1ccccc1[N+](=O)[O-])CC(F)(F)F CN(CC(F)(F)F)C(=O)N(CC(F)(F)F)C(=O)c1ccccn1 0.001 -This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2c3c([nH]c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 10.0 -This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2c3c([nH]c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 1.0 -This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2c3c([nH]c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 0.1 -This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 C(N1CC(NC(=O)CN(C)C)=O)(=O)NC(=O)N 0.01 -This molecule is soluble in water. COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 COc1ccc2[nH]c3c(c2c1)CN(C(=O)Cn1ccc2ccccc21)CC3 C(N(C)C)(=O)NNC(=O)NNC(=O)N)C 0.001 -This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 10.0 -This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 1.0 -This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2cnc(C)nc2n1 0.1 -This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1c(N)cc(C(=O)NCSC#N)c2c(C)noc12 0.01 -This molecule is soluble in water. Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 Cc1cc(C(=O)NCCSCc2ccccc2)c2c(C)noc2n1 C(SCNC(=O)c1c2c(C)nc(N)c(N)c2)(N)=O 0.001 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl 10.0 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl 1.0 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl 0.1 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCC4)ncn3)C2)cc1Cl 0.01 -This molecule is soluble in water. Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl Cc1ccc(NC(=O)N2CCN(c3cc(N4CCCCC4)ncn3)CC2)cc1Cl N(C(N1CCN(cnc(N2CCCC2)c2)C1)=O)cc(Cl)c(N)cc2 0.001 -This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C 10.0 -This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C 1.0 -This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C 0.1 -This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCC1)c1c(C)nn(C)c1C 0.01 -This molecule is soluble in water. CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NCC1(O)CCCCC1)c1c(C)nn(C)c1C CC[C@@H](NC(=O)NC(=O)NO)c1c(C)nn(C)c1C 0.001 -This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 10.0 -This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 1.0 -This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 0.1 -This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 0.01 -This molecule is soluble in water. N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 N#Cc1cc(NCc2cccc(S(N)(=O)=O)c2)nc2ccccc12 0.001 -This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 10.0 -This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 1.0 -This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CCN(C)C3)cc1 0.1 -This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 C(N(C)C)CN1CC(NC(=O)cNC(=O)N)(C)CN1 0.01 -This molecule is soluble in water. COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 COc1ccc(C(=O)Nc2nc3c(s2)CN(C)CC3)cc1 C(N(C)C)CN(C)C(=O)NC(=O)cNNC(=O)N 0.001 -This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 10.0 -This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 1.0 -This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 0.1 -This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 O(C)[C@@H](OC)C(Nc(NC(=O)NCc1sc(CC)cn1)C)=O 0.01 -This molecule is soluble in water. CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 CCc1cnc(CNC(=O)Nc2cccc([C@@H](C)OC)c2)s1 O(C)[C@@H](OC)C(Nc(NC(=O)NC(=O)NC)cc(C(=O)N)[nH]1)=O 0.001 -This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2ncnc(NO)c21 10.0 -This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 1.0 -This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2ncnc(NO)c21 0.1 -This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 O=C(NO)Nc1cnc2n(C[C@H](O)CO)cn2c1 0.01 -This molecule is soluble in water. OC[C@@H](O)Cn1cnc2c(NO)ncnc21 OC[C@@H](O)Cn1cnc2c(NO)ncnc21 O=NNc(N)ncn(C[C@@H](O)CO)c(N)N 0.001 -This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 10.0 -This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 1.0 -This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 0.1 -This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 c1(C)nc(CN2CCN(Cc3c(Cl)cncc3)C(=O)N2)cc1 0.01 -This molecule is soluble in water. Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1nc(CN2CCN(Cc3ccncc3Cl)CC2)cs1 Cc1cc(CN2CCN(Cc3ccncc3Cl)C(=O)N2)nc(N)n1 0.001 -This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 10.0 -This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 1.0 -This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 0.1 -This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 C(=O)(OCC)Nc(NC(=O)NN)ccc 0.01 -This molecule is soluble in water. CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 CCOC(=O)c1ccccc1NC(=O)c1ccc(N2CCCC2)nn1 C(=O)(OCC)Nc(NC(=O)OCC)cN 0.001 -This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 10.0 -This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 1.0 -This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 0.1 -This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc(N)[nH]c(=O)c(N)C(=O)N)NN 0.01 -This molecule is soluble in water. O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(Nc1cn[nH]c1)C(=O)Nc1cnc(-c2ccccc2)s1 O=C(NNC(=O)C(N)N)Nc(C)c(-c(N)c(C)N)[nH]1 0.001 -This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 10.0 -This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)CNC(=O)/C=C/c1ccco1 1.0 -This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 0.1 -This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CN(C)NC(=O)N(C)C(=O)NCC(=O)N(C)C 0.01 -This molecule is soluble in water. CCNC(=O)CNC(=O)/C=C/c1ccco1 CCNC(=O)CNC(=O)/C=C/c1ccco1 CN(C)/N=C(\N)C(=O)N(C)NC(=O)N(C)C(N)=O 0.001 -This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 10.0 -This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 1.0 -This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 0.1 -This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 C(C(C)C)(=O)N(C1CN(c(C#N)ccc(N)=O)C1)C 0.01 -This molecule is soluble in water. Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 Cc1nc(CC(C)C)sc1C(=O)N(c1ccc(C#N)cc1)C1CC1 C(C(C)C)(=O)N(C1CN(c(C)C#N)C1)C 0.001 -This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 10.0 -This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 1.0 -This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 0.1 -This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C(N)=O)C(=O)N1CCC(COc2ccccn2)C1 0.01 -This molecule is soluble in water. CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C)C(=O)c1cccc(C(=O)N2CCC(COc3ccccn3)CC2)c1 CN(C(N)=O)C(=O)N1CCC(N)(CON)C1 0.001 -This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 10.0 -This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 1.0 -This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc2C1=O)C1CC1 0.1 -This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 N(C(=O)N([C@@H](C)c1ccc(Cl)cc1)N(C)C(=O)CS2)C 0.01 -This molecule is soluble in water. C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@H](c1ccc(Cl)cc1)N(C(=O)CN1C(=O)CSc2ccccc21)C1CC1 C[C@@H](c1ccc(Cl)cc1)N(C(=O)NN(C)C(N)=O)C1CN1 0.001 -This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 10.0 -This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 1.0 -This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 0.1 -This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 c(S[C@@H](C)C(=O)NC(=O)NC(=O)NCC)ccc(F)cc1 0.01 -This molecule is soluble in water. CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 CCNC(=O)NC(=O)[C@H](C)Sc1nnc(-c2ccc(F)cc2)n1Cc1ccccc1 c(S[C@@H](C)C(=O)NC(=O)NC(=O)NC(=O)NCC)ccc(F)cc1 0.001 -This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 10.0 -This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 1.0 -This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 0.1 -This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 C1C(C(=O)N2C[C@@H](CNC(=O)C3(c)cc(Cl)c)S2)C1 0.01 -This molecule is soluble in water. O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 O=C(C1CCOCC1)N1CCC[C@H](CNC(=O)C2(c3ccc(Cl)cc3)CC2)C1 C1(C(=O)N2C[C@@H](CNC(=O)C3(c)cc(Cl)c)S2)CCOC1 0.001 -This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(N)nc(N)n2)CC1 10.0 -This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(N)nc(N)n2)CC1 1.0 -This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 0.1 -This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(N)nc(N)n2)CC1 0.01 -This molecule is soluble in water. Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 Cc1ccccc1COC1CCN(c2cc(-n3cccn3)nc(N)n2)CC1 N(N1CCC(OCc2c(C)cccc2)CC1)c(N)nc(N)nN)=S 0.001 -This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 10.0 -This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 1.0 -This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 0.1 -This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 CN(CCO)C(=O)N(CCO)C(=O)NC(N)=O 0.01 -This molecule is soluble in water. O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 O=C(Cn1nc(-c2ccccc2)ccc1=O)N(CCO)Cc1ccccc1 NC(=O)N(CCO)C(=O)N(CCO)C(N)=O 0.001 -This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1nn(-c2ccccc2)c(C)c1NC(=S)NC1CCCCC1 10.0 -This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C 1.0 -This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C 0.1 -This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C c1(=O)n(C)c(NC(=S)NC2CCCCC2)c(=O)n1C 0.01 -This molecule is soluble in water. Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C Cc1c(NC(=S)NC2CCCCC2)c(=O)n(-c2ccccc2)n1C C(NC(=S)NC1CCCC1)(=O)NNc(C)c(=O)n(C)[nH]1 0.001 -This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 10.0 -This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 1.0 -This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@H](C)O)C(C)C)s1 0.1 -This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 O=C(N(C(=O)N)C(C)C)N[C@@H](c1ccccc1)O)O 0.01 -This molecule is soluble in water. CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 CC[C@H](c1ccccc1)c1nnc(NC(=O)N(C[C@@H](C)O)C(C)C)s1 O=C(N(C(=O)N)C(=O)N)N(C(=O)O)[C@@H](O)c1ccccc1)O 0.001 -This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 10.0 -This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(CN(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 1.0 -This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 0.1 -This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 C(N(C(=O)Nc1cc(OCC)ccc1)C(=O)Nc1cc(Cl)ccc1)(=O)Nc 0.01 -This molecule is soluble in water. CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1ccc(N(Cc2nnc3n2CCCCC3)C(=O)Nc2cccc(Cl)c2)cc1 CCOc1cccc(NC(=O)N(C(N)=O)S(=O)(=O)Nc2cccc(Cl)c2)c1 0.001 -This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 10.0 -This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 1.0 -This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 0.1 -This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 C(Nc(NC(=O)c(C(F)(F)F)c)C)(=O)NOCC(=O)N 0.01 -This molecule is soluble in water. Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc2c(c(NC(=O)c3ccccc3C(F)(F)F)c1)OCC(=O)NC2 Cc1cc(NC(=O)N(C)C(F)(F)F)ccc1NC(N)=O 0.001 -This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 10.0 -This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 1.0 -This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 0.1 -This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C)[C@@H](C)C(C)C)nn2)cc1 0.01 -This molecule is soluble in water. COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C3CC3)[C@@H](C)C(C)C)nn2)cc1 COc1ccc(-n2cc(CNC(=O)N(C)[C@@H](C)C(C)C)cn2)cc1 0.001 -This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 10.0 -This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 1.0 -This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 0.1 -This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 0.01 -This molecule is soluble in water. CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 CC(C)C[C@@H](C)CC(=O)N1CCC(Nc2ccccc2)CC1 C(C(=O)N1CCC(Ncc(N)ccc2)C1)[C@@H](C)CC(C)C 0.001 -This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2cccc(C(F)(F)F)c2)nc1-c1ccccn1 10.0 -This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 1.0 -This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1nc(-c2ccccn2)nc(NCc2cccc(C(F)(F)F)c2)c1C#N 0.1 -This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(N)c(NC(N)=O)c1C(F)(F)F 0.01 -This molecule is soluble in water. N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 N#Cc1c(NCc2ccccc2)nc(-c2ccccn2)nc1-c1cccc(C(F)(F)F)c1 c(C(F)(F)F)cc(NC#N)C(=O)NNC(=O)NO 0.001 -This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 10.0 -This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 1.0 -This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 0.1 -This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CN(C(N)=O)C1CC(N(C)C(=O)CCc2ccsc2)C1 0.01 -This molecule is soluble in water. CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CC1CCC(N(C(=O)CCc2ccsc2)C2CC2)CC1 CN(C(N)=O)C1CC(N(C)C(=O)CCc2ccsc2)C1 0.001 -This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 10.0 -This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 1.0 -This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 0.1 -This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 0.01 -This molecule is soluble in water. COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 COc1ccccc1S(=O)(=O)NCCc1c[nH]c2ccccc12 O=S(=O)(N(C)S(=O)(=O)c(OC)cccc(OC)O)N 0.001 -This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 10.0 -This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 1.0 -This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 0.1 -This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 N1(C(=O)NNC(=O)N)CCN(C(=O)N)c2cccc21 0.01 -This molecule is soluble in water. O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 O=C(c1nccc2ccccc12)N1CCn2ncnc2C1 CN1CN(C(=O)NNC(N)=O)NNC1=O 0.001 -This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 10.0 -This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 1.0 -This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 0.1 -This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cc(OC(F)F)ccc1)NNC(=O)cN 0.01 -This molecule is soluble in water. O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc1cccc(OC(F)F)c1)c1coc(-c2ccccc2)n1 O=C(Nc(OC(F)F)cc(NC(=O)N)N)N 0.001 -This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 10.0 -This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 1.0 -This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 0.1 -This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 c1(N)c(C(=O)N2CCN(c(OC)c(N)c)C2)c(S)sc1N 0.01 -This molecule is soluble in water. COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 COc1ccccc1N1CCN(C(=O)c2nc(C)sc2-c2ccccc2)CC1 N(N1CCN(C(=O)c(N)sc(C)n2)CC1)(C)C#N)C 0.001 -This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 10.0 -This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 1.0 -This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@H]1CCCO1)[C@@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 0.1 -This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 N(C(=O)N1CC(N)(C)C1)NC(=O)[C@@H]1CN(S(=O)(=O)N)C(=O)N 0.01 -This molecule is soluble in water. O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 O=C(NC[C@@H]1CCCO1)[C@H]1CC(=O)N(c2ccc(OCC(=O)N3CCCC3)cc2)C1 N(C(=O)NN1CC(=O)N(C)C)N(C)C(=O)N 0.001 -This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 10.0 -This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 1.0 -This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 0.1 -This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 C(=O)(OCC)Nc1c(NC(=O)CN(C)=O)c2c(c1)C[C@@H](N)N2 0.01 -This molecule is soluble in water. CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 CCOC(=O)c1c(NC(=O)CC(C)C)sc2c1CC[C@@H](C)C2 O(C(=O)Nc1c(NC(=O)CS(=O)(N)=O)c2c(N)c(N)(N)=O)N(C)S2 0.001 -This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 10.0 -This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 1.0 -This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCC1 0.1 -This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 O=C(N[C@@H](C)Cn1nccc1)NC(NCc(OC1CCC1)O)=O 0.01 -This molecule is soluble in water. C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 C[C@@H](Cn1cccn1)NC(=O)NCc1cccnc1OC1CCCC1 O=C(N[C@@H](C)Cn1nccc1)NC(NCc(OC1CCC1)O)=O 0.001 -This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 10.0 -This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 1.0 -This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 0.1 -This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 S(=O)(=O)(NCc(=O)n(-cccc(F)c)N)C#N 0.01 -This molecule is soluble in water. Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 Fc1ccc(-n2nc(CSc3ccccn3)c3c2CCC3)cc1 S(=O)(NCC(=O)NN=C(C)O)(=O)Nc(C)ccc(F)N 0.001 -This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 10.0 -This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 1.0 -This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 0.1 -This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NC(C)(C)N 0.01 -This molecule is soluble in water. Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 Cc1cccc(C)c1C(=O)NCC(C)(C)c1ccncc1 C(C(C)(C)CNC(=O)c(C)c(C)c(N)=O)N 0.001 -This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 c1(CC)nn2c(sc(COc3ccc(NC(=O)c4c(F)cccc4)cc3)n2)s1 10.0 -This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 c1(CC)nn2c(sc(COc3ccc(NC(=O)c4c(F)cccc4)cc3)n2)s1 1.0 -This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 c1(C(=O)Nc2ccc(OCc3nc4n(c(=O)sc4)c(=O)c3)cc2)c(F)cccc1 0.1 -This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 O=C(Nc(OC)COC(=O)cNC(=O)cc(OC)c(=O)n(C)c(=O)N)O 0.01 -This molecule is soluble in water. CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 CCc1nn2c(=O)cc(COc3ccc(NC(=O)c4ccccc4F)cc3)nc2s1 O=C(NcCOOCCN(C)C(=O)CNC(=O)OCC(=O)N)OCC(=O)N 0.001 -This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 10.0 -This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 1.0 -This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 0.1 -This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 C(Nc(NC(=O)C1CCN(c)nc2cccc(N)n2)C1)(=O)N 0.01 -This molecule is soluble in water. Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 Cc1ccccc1NC(=O)C1CCN(c2nc3ccccc3s2)CC1 C(Nc(NC(=O)C)c(N)C)CN1CCN(c)c2c(nc()c(N)c2)C1 0.001 -This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 10.0 -This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 1.0 -This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 0.1 -This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1cc(NN2CCN(C(=O)c3ccccc3)CC2)cn1 0.01 -This molecule is soluble in water. Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 Cn1ncc2c(N3CCCN(C(=O)c4ccccc4)CC3)ncnc21 NNC(=O)N1CCN(c2cncnc2N)CC1 0.001 -This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 10.0 -This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 1.0 -This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 0.1 -This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)N(C)C(=O)Nc1c(F)cccc1F 0.01 -This molecule is soluble in water. COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F COCCN(C)C(=O)Nc1c(F)cccc1F 0.001 -This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC 10.0 -This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC 1.0 -This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC 0.1 -This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC O(C)c(OC)/C=C/C(=O)N(C)C(=O)N 0.01 -This molecule is soluble in water. COc1cccc(/C=C/C(=O)c2cccnc2)c1OC COc1cccc(/C=C/C(=O)c2cccnc2)c1OC O(C)c(OC)NC(=O)/C(=C/C(=O)N)N 0.001 -This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 10.0 -This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 1.0 -This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 0.1 -This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 N(S(=O)(=O)NCC)[C@@H]1CN(C(=O)cN)CC1 0.01 -This molecule is soluble in water. CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)[C@H]1CCN(C(=O)c2cc(F)ccc2F)C1 CCNS(=O)(=O)NS(=O)(=O)c1cc(F)ccc1F 0.001 -This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 10.0 -This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 1.0 -This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 0.1 -This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 NC(N)=O 0.01 -This molecule is soluble in water. Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 Cc1cc(F)cc(C(=O)Oc2cccc(C(N)=O)c2)c1 NC(N)=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@@H](O)NC(=O)O)OCc1nc2cccc(O)c2 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O=C(NC[C@H]1CCCO1)c1ccc(OCc2cn3ccccc3n2)cc1 O(C(=O)NC(=O)O)Cc(OC)NC(=O)N 0.001 -This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N 10.0 -This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N 1.0 -This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N 0.1 -This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(NS[C@@H](C)C(=O)Nc2ccccc2)ccc1N 0.01 -This molecule is soluble in water. Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N Cc1cc(S[C@@H](C)C(=O)Nc2ccccc2)ccc1N C[C@H](SN[C@@H](C)S)C(=O)Nc1ccccc1 0.001 -This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@@H](O)c3ccc4ccccc4c3)CC2=O)cc1 10.0 -This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@@H](O)c3ccc4ccccc4c3)CC2=O)cc1 1.0 -This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@@H](O)c3ccc4ccccc4c3)CC2=O)cc1 0.1 -This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[CH]O)NC2=O)cc1 0.01 -This molecule is soluble in water. Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](O)c3ccc4ccccc4c3)CC2=O)cc1 Cc1ccc(N2CCN(C(=O)[C@H](N)O)NC2=O)cc1 0.001 -This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 10.0 -This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 1.0 -This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 0.1 -This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 O(C(=O)OCCCC)C(=O)c(N)cnc(N)c 0.01 -This molecule is soluble in water. CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCCOC(=O)c1csc(-c2cnccn2)n1 CCCCOC(=O)ONC(=O)NN 0.001 -This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 10.0 -This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 1.0 -This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 0.1 -This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 c1c(NC(=O)CN2c(C)C(=O)Nc2cccn2)cc([N+](=O)[O-])c1 0.01 -This molecule is soluble in water. Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 Cc1ccc([N+](=O)[O-])cc1NC(=O)CN1C[C@H](C)C(=O)Nc2ccccc21 N(C(=O)Nc1c(C)ccc([N+](=O)[O-])c1)CN1c(C)C(=O)N 0.001 -This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 10.0 -This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 1.0 -This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 0.1 -This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 N(C(=O)N[C@@H](C)C(N1CCCN2C(=O)N2)=O)C 0.01 -This molecule is soluble in water. Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 Cc1cccc(C(=O)N[C@H](C)C(=O)N2CCCc3ccccc32)c1 N(C(=O)N[C@@H](C)C(N1CCCN2C(=O)N2)=O)S 0.001 -This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 10.0 -This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 1.0 -This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 0.1 -This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 C(N1CCC(C(Nc2c(OC)ccc(S(N(C)C)(=O)=O)c2)=O)C1)(=O)ccc(F)cN 0.01 -This molecule is soluble in water. COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 COc1ccc(S(=O)(=O)N(C)C)cc1NC(=O)C1CCN(C(=O)c2ccc(F)cc2)CC1 N(S(=O)(=O)N(C)C)S(=O)(=O)cc(OC)NC(=O)C1CCN(C(=O)cN)C1 0.001 -This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 10.0 -This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 1.0 -This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 0.1 -This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 c1(=O)c(C(=O)Nc(C)cc([N+](=O)[O-])[nH]2)cc(=O)o1 0.01 -This molecule is soluble in water. Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 Cc1ccc(NC(=O)c2cc3ccccc3oc2=O)c([N+](=O)[O-])c1 O=C(Nc1c([N+](=O)[O-])cc(C)c(N)[nH]1)NC 0.001 -This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F 10.0 -This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F 1.0 -This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F 0.1 -This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F C(NC(=O)c(F)OC)CNC(=O)c(F)c(OC)c 0.01 -This molecule is soluble in water. COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F COc1cccc(C(=O)NCCNC(=O)c2cccnc2)c1F C(NC(=O)c(F)OC)NC(=O)c(N)NC(=O)N 0.001 -This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C 10.0 -This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C 1.0 -This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C 0.1 -This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1cccnc1C[C@H](Br)c1c(C)cc(F)cc1N 0.01 -This molecule is soluble in water. Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@H](Br)Cc1ncccc1C Cc1ccc(F)cc1[C@@H](N)N(C)C(N)=O 0.001 -This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl 10.0 -This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl 1.0 -This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl 0.1 -This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl N(S(=O)(=O)c(Cl)ccc1)(C)S(=O)(=O)N(C)S(=O)(=O)N(C)S(=O)N 0.01 -This molecule is soluble in water. CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(Cc1ccc(C(=O)NC2CC2)cc1)S(=O)(=O)c1ccccc1Cl CN(S(N)(=O)=O)S(=O)(=O)N(C)S(=O)(=O)NS(N)(=O)=O 0.001 -This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 10.0 -This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 1.0 -This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 0.1 -This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 C(NC(=O)cc(S(NC)(=O)=O)c[nH]1)cNS(=O)(=O)NC 0.01 -This molecule is soluble in water. CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 CNS(=O)(=O)c1cccc(CNC(=O)c2cc(C(C)(C)C)n[nH]2)c1 C(NC(=O)cc(S(NC)(=O)=O)c[nH]1)NS(=O)(=O)NC 0.001 -This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 10.0 -This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 1.0 -This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 0.1 -This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@@H](N)C#N)CC2)c(=O)[nH]1 0.01 -This molecule is soluble in water. Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@H](C#N)c3ccccc3)CC2)c(=O)[nH]1 Cc1ccc(C(=O)N2CCN([C@@H](N)C#N)CC2)c(=O)[nH]1 0.001 -This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 10.0 -This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 1.0 -This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 0.1 -This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 NC(=O)NNC(=O)NCCNC(=O)Cc1ccc(F)cc1 0.01 -This molecule is soluble in water. O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 O=C(Cc1ccc(F)cc1)NCCNC(=O)c1[nH]nc2c1COCC2 NC(=O)NNC(=O)NCNC(=O)Cc1ccc(F)cc1 0.001 -This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 10.0 -This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 1.0 -This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 0.1 -This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#CC1=CC=C(N)OC(=O)C=CC(C(N)=O)=NC=C1 0.01 -This molecule is soluble in water. N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 N#Cc1ccc(Oc2ccc(C(N)=O)nn2)c(N)c1 O(c(N)c(Oc)cc(C#N)c)c(C(=O)N)n1 0.001 -This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 10.0 -This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 1.0 -This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 0.1 -This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 0.01 -This molecule is soluble in water. CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@@H](C)c1ccc(C)c(F)c1 CNS(=O)(=O)CC(=O)N[C@H](C)c1ccc(F)c(C)c1 0.001 -This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 10.0 -This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 1.0 -This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 0.1 -This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 CC(=NNC(=O)CCN)NC(=O)Cc1cccc(Cl)c1 0.01 -This molecule is soluble in water. Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 Cc1nc(NC(=O)CCn2cncn2)sc1Cc1cccc(Cl)c1 CN(C)CC(=O)NNC(=O)CN(C)Cc1cccc(Cl)c1 0.001 -This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 10.0 -This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 1.0 -This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 0.1 -This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@@H](C)C(=O)n1cc2ccccc2c(N)c1=O 0.01 -This molecule is soluble in water. CCO[C@H](C)C(=O)N1CCOc2ccccc21 CCO[C@H](C)C(=O)N1CCOc2ccccc21 c1cccc2c(N)c(=O)N(C(=O)[C@@H](OCC)C)cS2 0.001 -This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 10.0 -This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 1.0 -This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 0.1 -This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 c1(C=O)c(C)n(CC(=O)N(C)cccc)n1 0.01 -This molecule is soluble in water. Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 Cc1c(C=O)c2ccccc2n1CC(=O)N(C)c1ccccc1 C(=O)(N(C)cc(N)=O)Cn1cccc1 0.001 -This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 10.0 -This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 1.0 -This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 0.1 -This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 C(C)(c)cccc1NC(=O)N1C[C@@H](COCC)C1 0.01 -This molecule is soluble in water. CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 CCOC[C@H]1CCN(C(=O)Nc2c(C)cccc2C(C)C)C1 C(C)(c)cccc(C(C)C)(N)NC(=O)N1C[C@H](COCC)C1 0.001 -This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 10.0 -This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 1.0 -This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2ccc(C(=O)NC)c(N)n2)c1 0.1 -This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1ccc(N)c(C(=O)NC)c1 0.01 -This molecule is soluble in water. CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 CC[C@H](C)Oc1cccc(-c2nc(N)nc(C)c2C(=O)NC)c1 O([C@@H](C)Oc(C(=O)NC)ccc1)C(=O)NC 0.001 -This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 10.0 -This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 1.0 -This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 0.1 -This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 C(F)(F)(F)[C@@H]1C[C@H](NC(=O)c2c(OC)ccc2)N 0.01 -This molecule is soluble in water. O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 O=C(N[C@H]1CCC[C@H](C(F)(F)F)C1)c1cccc2c1OCO2 N(C(=O)N[C@@H]1CCC[C@H](C(F)(F)F)N)(C)C 0.001 -This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 10.0 -This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 1.0 -This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 0.1 -This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 CC1=CC([N+](=O)[O-])=CN(C(=O)Nc2cc(C)[nH]n2)C1 0.01 -This molecule is soluble in water. Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 Cc1cc(NC(=O)N2CCc3ccc([N+](=O)[O-])cc3C2)no1 c1(NC(=O)N2Cc(S)cc(C)c2)cc(C)n(C)c(C)c1 0.001 -This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 10.0 -This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 1.0 -This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 0.1 -This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 N(Cc1c(N)ccc([N+](=O)[O-])c1)(C)C(=O)C[C@@H]1C 0.01 -This molecule is soluble in water. CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 CC[C@@H]1CC(=O)N(Cc2cc([N+](=O)[O-])ccc2N)C1 N(C)(C)c1cc([N+](=O)[O-])cn1 0.001 -This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 10.0 -This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 1.0 -This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 0.1 -This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 N#CNS(=O)(=O)CC(=O)NN 0.01 -This molecule is soluble in water. CCCc1nc(CSc2ccnc3ccccc23)no1 CCCc1nc(CSc2ccnc3ccccc23)no1 N#CNS(=O)CC(=O)NNC(N)=O 0.001 -This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)o1 10.0 -This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)o1 1.0 -This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)o1 0.1 -This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCS(N)(=O)=O)c(F)c1 0.01 -This molecule is soluble in water. Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)CCc2ccc(-c3ccccc3F)o2)cc1 Cc1ccc(CNC(=O)NCO)cc1 0.001 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 10.0 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 1.0 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 0.1 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 c1c(S(=O)(N2CCN(C(=O)ccc(Cl)c4)CC2)=O)sc(C)c1 0.01 -This molecule is soluble in water. Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 Cc1ccc(S(=O)(=O)N2CCN(C(=O)c3ccc(Cl)cc3)CC2)s1 N1(S(=O)(=O)c2sc(C)cc2)CCN(C(=O)ccc(Cl)c2)S1 0.001 -This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 10.0 -This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 1.0 -This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 CCO[C@@H]1[C@H](NC(=O)Nc2ccc([N+](=O)[O-])c(Cl)c2)C12CCC2 0.1 -This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 C(Nc1ccc([N+](=O)[O-])c(Cl)c1)(=O)N[C@H]1[C@H]2CCO[C@H]212 0.01 -This molecule is soluble in water. O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 O=C(Nc1ccc([N+](=O)[O-])c(Cl)c1)N[C@H]1[C@H]2CCO[C@@H]2C12CCC2 C(Nc1ccc([N+](=O)[O-])c(Cl)c1)(=O)N[C@H]1[C@H]2C(C)(C)OCC2 0.001 -This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 10.0 -This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 1.0 -This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 0.1 -This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 C(C(=O)N(C)c1nc(C(=O)OCC)cs1)((c)cc()c)(C)C 0.01 -This molecule is soluble in water. CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 CCOC(=O)c1csc(N(C)C(=O)C(C)(C)c2ccc(C)cc2)n1 C(C(=O)N(C)c1nc(C(=O)OCC)cs1)((c)cc()c)(C)C 0.001 -This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC 10.0 -This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@H](C#N)C(=O)OC 1.0 -This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC COC(=O)[C@@H](C#N)[C@@H](C)C(=O)OC 0.1 -This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC O=C(OC)[C@@H](N#N)[C@@H](C)NC(=O)OC 0.01 -This molecule is soluble in water. CC[C@H](C)[C@@H](C#N)C(=O)OC CC[C@H](C)[C@@H](C#N)C(=O)OC O=C(OC)[C@@H](N#N)[C@@H](C)NC(=O)OC 0.001 -This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 10.0 -This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 1.0 -This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 0.1 -This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 0.01 -This molecule is soluble in water. CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 CSc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1 0.001 -This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O 10.0 -This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O 1.0 -This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O 0.1 -This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CCCCN(C)S(N)(=O)=O 0.01 -This molecule is soluble in water. CCCCCN(C)S(N)(=O)=O CCCCCN(C)S(N)(=O)=O CN(CCCCN(C)S(N)(=O)=O)S(N)(=O)=O 0.001 -This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 10.0 -This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 1.0 -This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 0.1 -This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 N1(C(=O)cN)CN(C(=O)N)C[C@H](N)N(C)C1 0.01 -This molecule is soluble in water. O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 O=C(c1ccc(Cl)cc1)N1CCC[C@H](c2nnc(-c3cccs3)o2)C1 N1(C(=O)cN)CN(C(=O)N)C[C@@H](N)N(C)C(=O)N1 0.001 -This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 10.0 -This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 1.0 -This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 0.1 -This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(OCC(=O)NNC(=O)O)c(N)n(-c1ccccc1)N 0.01 -This molecule is soluble in water. O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O=C(COC(=O)c1nc(-c2ccccc2)n(-c2ccccc2)n1)c1ccc2c(c1)OCO2 O(CC(=O)OCC(=O)N(C)C)N(C)c(C)c(N)c(Cl)N 0.001 -This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 10.0 -This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 1.0 -This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 0.1 -This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 C(NC(=O)c#N)(=O)NC[C@@H](N1CCOCCN1CC(C)C)C 0.01 -This molecule is soluble in water. CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 CC(C)C[C@H](CNC(=O)c1cc2c([nH]c1=O)CCCC2)N1CCOCC1 C(NC(=O)c(=)N)(=O)N(C)CN(C)C(=O)N 0.001 -This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 10.0 -This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 1.0 -This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@H](C)NC(=O)c1ccc(Cl)s1 0.1 -This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 O=C(N[C@@H](C)NC(=O)c(N)nc(N)N)NN 0.01 -This molecule is soluble in water. Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 Cc1nc(-c2ccncc2)ncc1[C@@H](C)NC(=O)c1ccc(Cl)s1 NNC(=O)NNC(=O)NNC(=O)NNC(=O)NN 0.001 -This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 10.0 -This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 1.0 -This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 0.1 -This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 CN(S(=O)(=O)Nc1ccccc1S(N)(=O)=O)S(=O)(=O)C(F)F 0.01 -This molecule is soluble in water. O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 O=C(Nc1ccccc1S(=O)(=O)C(F)F)C1CC1 CN(S(=O)(=O)Nc1ccccc1S(N)=O)S(=O)(=O)C(F)F 0.001 -This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C 10.0 -This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C 1.0 -This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C 0.1 -This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CO)C(CO)NC(=O)Nc1cc(Br)ccc1C 0.01 -This molecule is soluble in water. CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CC)(CO)NC(=O)Nc1cc(Br)ccc1C CCC(CO)C(CO)NC(=O)Nc1cc(Br)ccc1C 0.001 -This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O 10.0 -This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O 1.0 -This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O 0.1 -This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O c(=O)(N(C)Cc1c(Cl)cccc1)NN=O 0.01 -This molecule is soluble in water. CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O CCCn1nc(C(=O)N(C)Cc2ccccc2Cl)ccc1=O c(=O)(N(C)CN(C)C(=O)c1c(Cl)cc[nH]1)N 0.001 -This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@H]1CCc2c(F)cccc21 10.0 -This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@H]1CCc2c(F)cccc21 1.0 -This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@H]1CCc2cccc(F)c21 0.1 -This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 c1ccc(F)c2c1SCC(=O)NCC(=O)NCCOCC 0.01 -This molecule is soluble in water. CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 CCOCCNC(=O)CS[C@@H]1CCc2c(F)cccc21 c1ccc(F)c2c1SCC(=O)NCC(=O)NCCOCC 0.001 -This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCC2)cc1 10.0 -This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCC2)cc1 1.0 -This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCC2)cc1 0.1 -This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@H](N(C)C(=O)NS(=O)(=O)N1CCCCCN1)N(C)S(N)(=O)=O 0.01 -This molecule is soluble in water. C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](c1nc2ccccc2s1)N(C)C(=O)CNc1ccc(S(=O)(=O)N2CCCCCC2)cc1 C[C@@H](NS(=O)(=O)N(C)C)N(C)C(=O)NS(=O)(=O)N1CCCCCN1 0.001 -This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 10.0 -This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 1.0 -This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@H](c1ccc(Br)cc1)C1CCC1 0.1 -This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 C(N(C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1)(C)C)(=O)N 0.01 -This molecule is soluble in water. CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 CC(C)N(C)C(=O)N[C@@H](c1ccc(Br)cc1)C1CCC1 C[C@@H](NC(=O)N(C)C(=O)N(C)C)c1ccc(Br)cc1 0.001 -This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 10.0 -This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 1.0 -This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 0.1 -This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 N(S(=O)(=O)N1CCC(C)C)c1ccc(C(Nc2c(Cl)ccc(Cl)c2)=O)cc1 0.01 -This molecule is soluble in water. CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 CC1CCN(S(=O)(=O)c2ccc(C(=O)Nc3cc(Cl)ccc3Cl)cc2)CC1 N(S(=O)(=O)N1CCN(S(=O)(=O)Nc)cc(Cl)cN)c1cc(Cl)ccc1 0.001 -This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C 10.0 -This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1c(CNC(=O)CCc2ccccc2C)c(OC)nn1C 1.0 -This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1c(CNC(=O)CCc2ccccc2C)c(OC)nn1C 0.1 -This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C C(NC(=O)CCc1c(C)cccc1)c(OC)c(OC)n(C)c1 0.01 -This molecule is soluble in water. CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C CCc1nn(C)c(OC)c1CNC(=O)CCc1ccccc1C C(NC(=O)CCc(C)c(OC)n(C))N 0.001 -This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 10.0 -This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 1.0 -This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 0.1 -This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 C(N(C)C(=O)cN(C)C(=O)ccc(Cl)cN)(=O)N 0.01 -This molecule is soluble in water. CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 CN(Cc1nnc(C2CC2)n1C)C(=O)c1ccc(Cl)cn1 N(C)(C(=O)N(C)CN(C)C(=O)ccc(Cl)cN)S 0.001 -This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 10.0 -This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 1.0 -This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 0.1 -This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(NC(=O)CC)c1 0.01 -This molecule is soluble in water. CCC(=O)Nc1cccc(-n2cnnc2)c1 CCC(=O)Nc1cccc(-n2cnnc2)c1 c(NC(=O)CC)cc(NC(=O)CC)ccc1 0.001 -This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 10.0 -This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 1.0 -This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 0.1 -This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 N(C(N[C@@H](C1CC1)c(N)cc()c)=O)C(=O)NC(=O)N 0.01 -This molecule is soluble in water. Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 Cc1ccc([C@H](NC(=O)NCc2ccsc2)C2CC2)cc1 N(C(N[C@@H](C1C1)c(N)cc()c)O)C(=O)NC(=O)NO 0.001 -This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C 10.0 -This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C 1.0 -This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C 0.1 -This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C C(N(C(=O)ccc(F)c)CC)CC(=O)NNC(=O)N 0.01 -This molecule is soluble in water. CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(CCC(=O)NCc1ccc(F)cc1)C(=O)c1ccoc1C CCN(C(=O)NCc1ccc(F)cc1)C(=O)NC(N)=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O NC1=CC=CC2=C(N3CCCC3=O)[C@H](CCCO1)CNC2=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1C(=O)N(c2c(ccccc2)CNC[C@H]2CCCC2)c1c(C)CC 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O c1(N2C(=O)CCCC2)c(C)c(cccc1)ccccc12 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(C(NC[C@@H]2CCCC2)(c2ccccc2)C)Cc2ccccc2)C[C@H]1CCCCCCCC(C)CC 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(CNC(=O)ccccc2)(cc1NC[C@@H](C)CN(C)C)CCC)CCCCCCCCCC)C1 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(C(NC[C@@H]2CCCC2)(c2ccccc2)C)Cc2ccccc2)C[C@H]1CCCCCCCC(C)CC 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c1c(CNC(=O)ccccc2)(cc1NC[C@@H](C)CN(C)C)CCC)CCCCCCCCCC)C1 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O CCCCCN(C(=O)c1ccccc1)c1ccccc1N1CCCC1 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O C1CCN(c2ccccc2N(C(=O)c2ccccc2)c2ccccc2)CCN1CCCN(C)CCCCCCCCCCN(C)CCCCCCCCCCCCCCCCCCN(CCNCCCN(CCCCCCCCCCCCCCNCNCCCNCN(CCCCCCCCCCCCCCCCCCCCCCCNCN(CCCCN(CCCCN(CCCCCN(CCCCCCCCCCCCCCCCCCCCCCCN(CCCCCCCCCCN(CN(=CCCCCCCCCCCCCCCCCCCCCCCCCCN(=CCCCCCCCN(=CCCN(=CCCCCCC(=C(=CC(=C(=CC(=CCCC(=C(=C(=C(=CCC(=C(=C(=C(=CCCCCCCCCCCCCCCCCCCCCCN(=CCCN(C)(=CCC)(=C)(C)(C)(C)(C)C)C)(C)(C)(CCCCCC(C)(C)(C)(C)(=C)(C)(C)(C)(N(=C)(C)(C)(C)(=C(C)(=C)(=C)(=C)(=C)(=C)(=C)CCCCCCCN(=C)CN(=CCCC)(=C)(=C)(=C)(=C)(NCNC)(=C)(=C)(N(C)(=C)(=C)(=C) 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 10.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 1.0 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.1 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.01 -This molecule is soluble in water. O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O 0.001 From abbdd3e9174f34f047404f6e61db1fd6a568d0a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Fri, 10 Nov 2023 12:13:35 +0800 Subject: [PATCH 6/9] Added: Use static files instead of data preprocessing --- .../moledit_step_01_Space_Alignment.py | 64 +++++++++++++------ scripts/multimodal/moledit/train.sh | 4 +- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py index fb3b4ea..ad7bc69 100644 --- a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py +++ b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py @@ -9,6 +9,7 @@ import json import re import copy +import pickle import torch import torch.nn as nn @@ -120,6 +121,12 @@ def train(epoch): start_time = time.time() accum_loss, accum_acc = 0, 0 + + if args.use_molecule_repr_MoleculeSTM_list_molkformer==True and config["model"]=="molkformer-MegaMolBART":#use the processed molkformer data + with open('./datasets/mol_edit/ZINC250K_data/molecule_repr_MoleculeSTM_list.pkl', 'rb') as f: + molecule_repr_MoleculeSTM_list = pickle.load(f) + num_i=0 + for batch in L: if args.MoleculeSTM_molecule_type == "SMILES": SMILES_list = batch["structure"]["SMILES"] @@ -128,20 +135,25 @@ def train(epoch): graph = batch["structure"]["graph"] graph = graph.to(device) - - if args.MoleculeSTM_molecule_type == "SMILES": - molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( - SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, - molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper - ) + if args.use_molecule_repr_MoleculeSTM_list_molkformer==True and config["model"]=="molkformer-MegaMolBART":#use the processed molkformer data + molecule_repr_MoleculeSTM = molecule_repr_MoleculeSTM_list[num_i].to(device) molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) - + num_i+=1 else: - molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( - graph, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, - molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper - ) - molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) + if args.MoleculeSTM_molecule_type == "SMILES": + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) + + else: + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + graph, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper + ) + # molecule_repr_MoleculeSTM_list.append(molecule_repr_MoleculeSTM) #To generate the set of molecule_repr_MoleculeSTM to speed up training with molkformer + molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) if args.generation_model == "MegaMolBART": molecule_repr_generation = get_molecule_repr_generation( @@ -177,16 +189,17 @@ def train(epoch): return + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--device", type=str, default=None) + parser.add_argument("--device", type=str, default="cuda:3") parser.add_argument("--verbose", type=int, default=1) - parser.add_argument("--dataset_path", type=str, default=None) + parser.add_argument("--dataset_path", type=str, default="./datasets/mol_edit/ZINC250K_data") parser.add_argument("--dataset", type=str, default="ZINC250K") - parser.add_argument("--MoleculeSTM_molecule_type", type=str, default=None, choices=["SMILES", "Graph"]) - parser.add_argument("--output_path", type=str, default=None) - parser.add_argument("--config_path", type=str, default=None) + parser.add_argument("--MoleculeSTM_molecule_type", type=str, default="Graph", choices=["SMILES", "Graph"]) + parser.add_argument("--output_path", type=str, default="./ckpts/finetune_ckpts/moledit/molkformer/Graph") + parser.add_argument("--config_path", type=str, default="./configs/moledit/molkformer-Graph-MegaMolBART.json") parser.add_argument("--mode", type=str, default="train") ########## for MoleculeSTM ########## parser.add_argument("--MoleculeSTM_model_dir", type=str, default=None) @@ -203,8 +216,8 @@ def train(epoch): parser.add_argument('--generation_model', type=str, default="MegaMolBART", choices=["MegaMolBART"]) ######### for MegaMolBART ########## - parser.add_argument("--MegaMolBART_generation_model_dir", type=str, default=None) - parser.add_argument("--vocab_path", type=str, default=None) + parser.add_argument("--MegaMolBART_generation_model_dir", type=str, default="./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints") + parser.add_argument("--vocab_path", type=str, default="./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt") ########## for optimization ########## parser.add_argument("--batch_size", type=int, default=256) @@ -220,6 +233,8 @@ def train(epoch): parser.add_argument('--no_normalize', dest='normalize', action='store_false') parser.set_defaults(normalize=True) parser.add_argument("--MASTER_PORT", type=str, default='6000') + parser.add_argument("--use_processed_dataset", type=bool, default=False) + parser.add_argument("--use_molecule_repr_MoleculeSTM_list_molkformer", type=bool, default=False) args = parser.parse_args() print(args) @@ -228,7 +243,12 @@ def train(epoch): os.environ['MASTER_PORT'] = args.MASTER_PORT # load dataset - dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") + if args.use_processed_dataset==True: # skip SUPPORTED_MOLEDIT_DATASET + with open("./datasets/mol_edit/ZINC250K_data/dataset_zinc250K.pkl", "rb") as f: + dataset = pickle.load(f) + else: + dataset = SUPPORTED_MOLEDIT_DATASET[args.dataset](args.dataset_path, config["data"]["mol"], split="train") + dataloader_class = pyg_DataLoader device = torch.device(args.device) \ @@ -273,7 +293,9 @@ def train(epoch): optimizer = optim.Adam(model_param_group, weight_decay=args.decay) optimal_loss = 1e10 - + # molecule_repr_MoleculeSTM_list = [] for e in range(1, args.epochs+1): print("Epoch {}".format(e)) train(e) + # with open('./datasets/mol_edit/ZINC250K_data/molecule_repr_MoleculeSTM_list.pkl', 'wb') as f: #To generate the set of molecule_repr_MoleculeSTM to speed up training with molkformer + # pickle.dump(molecule_repr_MoleculeSTM_list, f) ##for molecule_repr_MoleculeSTM_list diff --git a/scripts/multimodal/moledit/train.sh b/scripts/multimodal/moledit/train.sh index 52a0a41..5ca0b05 100755 --- a/scripts/multimodal/moledit/train.sh +++ b/scripts/multimodal/moledit/train.sh @@ -20,4 +20,6 @@ python open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py \ --batch_size 256 \ --vocab_path ./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt \ --MegaMolBART_generation_model_dir ./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints \ ---MASTER_PORT '6000' \ No newline at end of file +--MASTER_PORT '6000' \ +--use_processed_dataset True \ +--use_molecule_repr_MoleculeSTM_list_molkformer True \ No newline at end of file From e54781e63c0b3d8a4fe4f490d6576495b5b6811d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Fri, 10 Nov 2023 14:57:00 +0800 Subject: [PATCH 7/9] Add:generate_static_files --- .../moledit_step_01_Space_Alignment.py | 103 ++++++++++++------ scripts/multimodal/moledit/train.sh | 6 +- 2 files changed, 75 insertions(+), 34 deletions(-) diff --git a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py index ad7bc69..422648d 100644 --- a/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py +++ b/open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py @@ -121,11 +121,7 @@ def train(epoch): start_time = time.time() accum_loss, accum_acc = 0, 0 - - if args.use_molecule_repr_MoleculeSTM_list_molkformer==True and config["model"]=="molkformer-MegaMolBART":#use the processed molkformer data - with open('./datasets/mol_edit/ZINC250K_data/molecule_repr_MoleculeSTM_list.pkl', 'rb') as f: - molecule_repr_MoleculeSTM_list = pickle.load(f) - num_i=0 + batch_num=0 for batch in L: if args.MoleculeSTM_molecule_type == "SMILES": @@ -135,35 +131,30 @@ def train(epoch): graph = batch["structure"]["graph"] graph = graph.to(device) - if args.use_molecule_repr_MoleculeSTM_list_molkformer==True and config["model"]=="molkformer-MegaMolBART":#use the processed molkformer data - molecule_repr_MoleculeSTM = molecule_repr_MoleculeSTM_list[num_i].to(device) + if args.use_static_files==1: + molecule_repr_MoleculeSTM = molecule_repr_MoleculeSTM_list[batch_num].to(device) molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) - num_i+=1 + molecule_repr_generation = molecule_repr_generation_list[batch_num].to(device) + molecule_repr_generation2MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) + batch_num+=1 else: if args.MoleculeSTM_molecule_type == "SMILES": molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper ) - molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) - else: molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( graph, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper ) - # molecule_repr_MoleculeSTM_list.append(molecule_repr_MoleculeSTM) #To generate the set of molecule_repr_MoleculeSTM to speed up training with molkformer - molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) - - if args.generation_model == "MegaMolBART": - molecule_repr_generation = get_molecule_repr_generation( - SMILES_list, molecule_model=molecule_model_generation, - molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper - ) - else: # for HierVAE - hiervae_data_list = MolGraph.tensorize(SMILES_list, vocab, avocab) - molecule_repr_generation = molecule_model_generation.forward_MoleculeSTM(hiervae_data_list) - molecule_repr_generation2MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) + if args.generation_model == "MegaMolBART": + molecule_repr_generation = get_molecule_repr_generation( + SMILES_list, molecule_model=molecule_model_generation, + molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_MoleculeSTM2generation = MoleculeSTM2generation(molecule_repr_MoleculeSTM) + molecule_repr_generation2MoleculeSTM = generation2MoleculeSTM(molecule_repr_generation) loss_01, acc_01 = do_CL(molecule_repr_generation, molecule_repr_MoleculeSTM2generation, args) loss_02, acc_02 = do_CL(molecule_repr_MoleculeSTM, molecule_repr_generation2MoleculeSTM, args) @@ -188,7 +179,44 @@ def train(epoch): print("SSL Loss: {:.5f}\tSSL Acc: {:.5f}\tTime: {:.5f}".format(accum_loss, accum_acc, time.time() - start_time)) return - +def generate_static_files(): + if args.verbose: + L = tqdm(dataloader) + else: + L = dataloader + molecule_repr_MoleculeSTM_list = [] + molecule_repr_generation_list = [] + for batch in L: + if args.MoleculeSTM_molecule_type == "SMILES": + SMILES_list = batch["structure"]["SMILES"] + else: + SMILES_list = batch["structure"]["SMILES"] + graph = batch["structure"]["graph"] + graph = graph.to(device) + if args.MoleculeSTM_molecule_type == "SMILES": + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + SMILES_list, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper + ) + else: + molecule_repr_MoleculeSTM = get_molecule_repr_MoleculeSTM( + graph, molecule_model=molecule_model_MoleculeSTM, mol2latent=mol2latent_MoleculeSTM, + molecule_type=args.MoleculeSTM_molecule_type, MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_MoleculeSTM_list.append(molecule_repr_MoleculeSTM) + if args.generation_model == "MegaMolBART": + molecule_repr_generation = get_molecule_repr_generation( + SMILES_list, molecule_model=molecule_model_generation, + molecule_type="MegaMolBART", MegaMolBART_wrapper=MegaMolBART_wrapper + ) + molecule_repr_generation_list.append(molecule_repr_generation) + saved_file_path = os.path.join(args.static_files_path, "molecule_repr_MoleculeSTM_list.pkl") + with open(saved_file_path, 'wb') as f: + pickle.dump(molecule_repr_MoleculeSTM_list, f) + saved_file_path = os.path.join(args.static_files_path, "molecule_repr_generation_list.pkl") + with open(saved_file_path, 'wb') as f: + pickle.dump(molecule_repr_generation_list, f) + return if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -196,6 +224,7 @@ def train(epoch): parser.add_argument("--device", type=str, default="cuda:3") parser.add_argument("--verbose", type=int, default=1) parser.add_argument("--dataset_path", type=str, default="./datasets/mol_edit/ZINC250K_data") + parser.add_argument("--static_files_path", type=str, default="./datasets/mol_edit/ZINC250K_data/static_files/molkformer-Graph") parser.add_argument("--dataset", type=str, default="ZINC250K") parser.add_argument("--MoleculeSTM_molecule_type", type=str, default="Graph", choices=["SMILES", "Graph"]) parser.add_argument("--output_path", type=str, default="./ckpts/finetune_ckpts/moledit/molkformer/Graph") @@ -222,7 +251,7 @@ def train(epoch): ########## for optimization ########## parser.add_argument("--batch_size", type=int, default=256) parser.add_argument("--num_workers", type=int, default=8) - parser.add_argument("--epochs", type=int, default=1) + parser.add_argument("--epochs", type=int, default=5) parser.add_argument("--decay", type=float, default=0) parser.add_argument("--generation_lr", type=float, default=1e-2) parser.add_argument("--MoleculeSTM_lr", type=float, default=1e-2) @@ -233,9 +262,10 @@ def train(epoch): parser.add_argument('--no_normalize', dest='normalize', action='store_false') parser.set_defaults(normalize=True) parser.add_argument("--MASTER_PORT", type=str, default='6000') - parser.add_argument("--use_processed_dataset", type=bool, default=False) - parser.add_argument("--use_molecule_repr_MoleculeSTM_list_molkformer", type=bool, default=False) - + parser.add_argument("--use_processed_dataset_250K", type=int, default=0) + parser.add_argument("--generate_static_files", type=int, default=0) + parser.add_argument("--use_static_files", type=int, default=0) + args = parser.parse_args() print(args) @@ -243,7 +273,7 @@ def train(epoch): os.environ['MASTER_PORT'] = args.MASTER_PORT # load dataset - if args.use_processed_dataset==True: # skip SUPPORTED_MOLEDIT_DATASET + if args.use_processed_dataset_250K==1: # skip SUPPORTED_MOLEDIT_DATASET with open("./datasets/mol_edit/ZINC250K_data/dataset_zinc250K.pkl", "rb") as f: dataset = pickle.load(f) else: @@ -278,7 +308,7 @@ def train(epoch): molecule_model_MoleculeSTM.eval() - dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) + dataloader = dataloader_class(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) molecule_dim_generation = 256 @@ -293,9 +323,18 @@ def train(epoch): optimizer = optim.Adam(model_param_group, weight_decay=args.decay) optimal_loss = 1e10 - # molecule_repr_MoleculeSTM_list = [] + if args.generate_static_files==1: + generate_static_files() + + if args.use_static_files==1: + saved_file_path = os.path.join(args.static_files_path, "molecule_repr_MoleculeSTM_list.pkl") + with open(saved_file_path, 'rb') as f: + molecule_repr_MoleculeSTM_list = pickle.load(f) + saved_file_path = os.path.join(args.static_files_path, "molecule_repr_generation_list.pkl") + with open(saved_file_path, 'rb') as f: + molecule_repr_generation_list = pickle.load(f) + + for e in range(1, args.epochs+1): print("Epoch {}".format(e)) train(e) - # with open('./datasets/mol_edit/ZINC250K_data/molecule_repr_MoleculeSTM_list.pkl', 'wb') as f: #To generate the set of molecule_repr_MoleculeSTM to speed up training with molkformer - # pickle.dump(molecule_repr_MoleculeSTM_list, f) ##for molecule_repr_MoleculeSTM_list diff --git a/scripts/multimodal/moledit/train.sh b/scripts/multimodal/moledit/train.sh index 5ca0b05..8d54106 100755 --- a/scripts/multimodal/moledit/train.sh +++ b/scripts/multimodal/moledit/train.sh @@ -14,6 +14,7 @@ python open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py \ --dataset ZINC250K \ --dataset_path ./datasets/mol_edit/ZINC250K_data \ --output_path ./ckpts/finetune_ckpts/moledit/${MODEL}/${TYPE} \ +--static_files_path ./datasets/mol_edit/ZINC250K_data/static_files/${MODEL}-${TYPE} \ --mode ${MODE} \ --epochs ${EPOCHS} \ --num_workers 8 \ @@ -21,5 +22,6 @@ python open_biomed/tasks/mol_edit/moledit_step_01_Space_Alignment.py \ --vocab_path ./ckpts/fusion_ckpts/pretrained_MegaMolBART/bart_vocab.txt \ --MegaMolBART_generation_model_dir ./ckpts/fusion_ckpts/pretrained_MegaMolBART/checkpoints \ --MASTER_PORT '6000' \ ---use_processed_dataset True \ ---use_molecule_repr_MoleculeSTM_list_molkformer True \ No newline at end of file +--use_processed_dataset_250K 0 \ +--generate_static_files 0 \ +--use_static_files 0 \ No newline at end of file From ebb200871a07dc1e77f5455c3827ff337d506a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Thu, 16 Nov 2023 13:32:57 +0800 Subject: [PATCH 8/9] Fixed the molkformer address --- configs/moledit/molkformer-Graph-MegaMolBART.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/moledit/molkformer-Graph-MegaMolBART.json b/configs/moledit/molkformer-Graph-MegaMolBART.json index f5333fd..3ac9731 100644 --- a/configs/moledit/molkformer-Graph-MegaMolBART.json +++ b/configs/moledit/molkformer-Graph-MegaMolBART.json @@ -45,7 +45,7 @@ "path_selfies": "./assets/tokenizers/biot5/selfies_dict.txt", "max_n_atoms": 256, "projection_dim": 256, - "init_checkpoint": "./ckpts/fusion_ckpts/mol_kformer_biot5.pth" + "init_checkpoint": "./ckpts/fusion_ckpts/molkformer/checkpoint_49.pth" } } } \ No newline at end of file From 0aa2cdd40018a923a96c1b25ac5f06493346ed0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ccyz-32526=E2=80=9D?= <345609226@qq.com> Date: Mon, 20 Nov 2023 09:53:36 +0800 Subject: [PATCH 9/9] load model --- open_biomed/models/task_model/moledit_model.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/open_biomed/models/task_model/moledit_model.py b/open_biomed/models/task_model/moledit_model.py index 7415a78..23a3823 100644 --- a/open_biomed/models/task_model/moledit_model.py +++ b/open_biomed/models/task_model/moledit_model.py @@ -15,10 +15,13 @@ def __init__(self, config): if config["graph"]["name"] == "molkformer": self.ckpt = torch.load(config["graph"]["init_checkpoint"], map_location="cpu") self.ckpt = self.ckpt["model"] - self.model.load_state_dict(self.ckpt, strict=False) + self.model.load_state_dict(self.ckpt) + if config["graph"]["name"] == "momu": - self.ckpt = torch.load(config["graph"]["init_checkpoint"], map_location="cpu") - self.model.load_state_dict(self.ckpt, strict=False) + self.ckpt = torch.load(config["graph"]["init_checkpoint"]) + if "param_key" in config["graph"]: + self.ckpt = self.ckpt[config["graph"]["param_key"]] + self.model.load_state_dict(self.ckpt) self.use_molkformer = True if config["graph"]["name"] == "molkformer" else False self.use_momu = True if config["graph"]["name"] == "momu" else False