Skip to content

Commit 17c5dfb

Browse files
committed
added script to make analysis of membrane input data
1 parent 812f76b commit 17c5dfb

File tree

3 files changed

+154
-346
lines changed

3 files changed

+154
-346
lines changed

src/freedom_analysis.py

Lines changed: 0 additions & 179 deletions
This file was deleted.

src/input_analysis.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import logging
2+
import os
3+
import pickle
4+
import socket
5+
import sys
6+
import time
7+
8+
import Bio.PDB
9+
10+
import ffmpeg
11+
import matplotlib.pyplot as plt
12+
import numpy as np
13+
from library.config import Keys, config
14+
from library.static.vector_mappings import DOPC_AT_MAPPING, DOPC_AT_BAB
15+
##### CONFIGURATION #####
16+
17+
# Plot config
18+
THEMES = ["seaborn-v0_8-paper", "seaborn-paper"] # Use the first theme that is available
19+
20+
# Load config
21+
DATA_PREFIX = config(Keys.DATA_PATH)
22+
BATCH_SIZE = config(Keys.BATCH_SIZE)
23+
VALIDATION_SPLIT = config(Keys.VALIDATION_SPLIT)
24+
NEIGHBORHOOD_SIZE = config(Keys.NEIGHBORHOOD_SIZE)
25+
EPOCHS = config(Keys.EPOCHS)
26+
MODEL_NAME_PREFIX = config(Keys.MODEL_NAME_PREFIX)
27+
DATA_USAGE = config(Keys.DATA_USAGE)
28+
USE_TENSORBOARD = config(Keys.USE_TENSORBOARD)
29+
30+
# Analysis config
31+
N_BATCHES = 1
32+
ANALYSIS_DATA_PATH = os.path.join(DATA_PREFIX, "analysis")
33+
MEMBRANE_PATH = os.path.join(DATA_PREFIX, "training", "membranes")
34+
35+
# Matplotlib config
36+
[plt.style.use(THEME) if THEME in plt.style.available else print(f"Theme '{THEME}' not available, using default theme. Select one of {plt.style.available}.") for THEME in THEMES]
37+
savefig_kwargs = {"dpi": 300, "bbox_inches": 'tight'}
38+
39+
#### UTILS ####
40+
41+
def gen_path(*args):
42+
"""
43+
Returns the path to the analysis data folder. Also creates the folder if it does not exist.
44+
"""
45+
# Create folder if it does not exist
46+
if len(args) > 1:
47+
os.makedirs(os.path.join(ANALYSIS_DATA_PATH, *args[:-1]), exist_ok=True)
48+
49+
return os.path.join(ANALYSIS_DATA_PATH, *args)
50+
51+
52+
##### ANALYSIS #####
53+
54+
# List all membranes in the training folder
55+
membranes = [os.path.join(MEMBRANE_PATH, membrane) for membrane in os.listdir(MEMBRANE_PATH)]
56+
57+
# Load the first membrane with BioPython
58+
parser = Bio.PDB.PDBParser(QUIET=True)
59+
60+
61+
bond_lengths = [[] for _ in range(len(DOPC_AT_MAPPING))]
62+
63+
# Remove duplicate bond angle bonds
64+
angle_pairs = [] # List of tuples of (name, name, name)
65+
DOPC_AT_BAB = list(set(DOPC_AT_BAB))
66+
for i, ((a,b), (c,d)) in enumerate(DOPC_AT_BAB):
67+
# Find common atom
68+
if a == c:
69+
angle_pairs.append((b, d, a))
70+
elif a == d:
71+
angle_pairs.append((b, c, a))
72+
elif b == c:
73+
angle_pairs.append((a, d, b))
74+
elif b == d:
75+
angle_pairs.append((a, c, b))
76+
else:
77+
raise ValueError(f"Could not find common atom in {((a,b), (c,d))}")
78+
79+
bond_angles = [[] for _ in range(len(angle_pairs))]
80+
bond_angles_stats = [[] for _ in range(len(angle_pairs))]
81+
82+
for k, membrane in enumerate(membranes):
83+
# Load the whole membrane
84+
structure = parser.get_structure("membrane", os.path.join(membrane, "at.pdb"))
85+
86+
# Get all residues (molecules) in the membrane
87+
residues = structure.get_residues()
88+
89+
# Loop through all residues
90+
for residue in residues:
91+
# Get all atoms in the residue
92+
atoms = list(residue.get_atoms())
93+
94+
# Save all bond lengths in this residue
95+
for i,(a,b) in enumerate(DOPC_AT_MAPPING):
96+
atom_a = [atom for atom in atoms if atom.get_name() == a][0]
97+
atom_b = [atom for atom in atoms if atom.get_name() == b][0]
98+
bond_lengths[i].append(atom_a - atom_b)
99+
100+
for i, (a,b,c) in enumerate(angle_pairs):
101+
atom_a = [atom for atom in atoms if atom.get_name() == a][0]
102+
atom_b = [atom for atom in atoms if atom.get_name() == b][0]
103+
atom_c = [atom for atom in atoms if atom.get_name() == c][0]
104+
105+
# Calculate angle between the two bonds
106+
angle = Bio.PDB.calc_angle(atom_a.get_vector(), atom_c.get_vector(), atom_b.get_vector())
107+
bond_angles[i].append(angle / np.pi * 180)
108+
109+
# Print progress
110+
if k % 10 == 0:
111+
print(f"Processed membrane {k}/{len(membranes)}")
112+
113+
break
114+
115+
# Calculate the mean and standard deviation of the bond lengths for every bond
116+
for i, bond in enumerate(bond_lengths):
117+
bond_lengths[i] = (np.mean(bond), np.std(bond))
118+
print(f"Mean bond length of {DOPC_AT_MAPPING[i]}: {bond_lengths[i][0]} +- {bond_lengths[i][1]}")
119+
120+
# Calculate the mean and standard deviation of the bond angles for every bond
121+
for i, bond in enumerate(bond_angles):
122+
bond_angles_stats[i] = (np.mean(bond), np.std(bond))
123+
print(f"Mean bond angle of {angle_pairs[i]}: {bond_angles_stats[i][0]} +- {bond_angles_stats[i][1]}")
124+
125+
over_2 = 0
126+
under_2 = 0
127+
128+
# Make bond angle historgram
129+
for i, bond in enumerate(angle_pairs):
130+
bond_a = bond_angles[i]
131+
132+
# Ignore outliers
133+
bond_a = np.array(bond_a)
134+
bond_a = bond_a[bond_a > bond_angles_stats[i][0] - 2 * bond_angles_stats[i][1]] # Ignore outliers (2 standard deviations)
135+
136+
# Calculate mean and standard deviation
137+
bond_angles_stats[i] = (np.mean(bond_a), np.std(bond_a))
138+
139+
if(bond_angles_stats[i][1] > 2):
140+
over_2 += 1
141+
else:
142+
under_2 += 1
143+
144+
print(f"Number of bond angles with a standard deviation of more than 2 degrees: {over_2}")
145+
print(f"Number of bond angles with a standard deviation of less than 2 degrees: {under_2}")
146+
147+
148+
# Clear
149+
plt.clf()
150+
plt.hist(bond_a, bins=100)
151+
plt.xlabel(f"Bond angle (degrees) {bond} {bond_angles_stats[i][0]} +- {bond_angles_stats[i][1]}")
152+
plt.ylabel("Frequency")
153+
plt.title("Bond angle distribution")
154+
plt.savefig(gen_path(f"training_bond_angle_distribution_{i}.png"), **savefig_kwargs)

0 commit comments

Comments
 (0)