1+ import logging
2+ import os
3+ import pickle
4+ import socket
5+ import sys
6+ import time
7+
8+ import Bio .PDB
9+
10+ import ffmpeg
11+ import matplotlib .pyplot as plt
12+ import numpy as np
13+ from library .config import Keys , config
14+ from library .static .vector_mappings import DOPC_AT_MAPPING , DOPC_AT_BAB
15+ ##### CONFIGURATION #####
16+
17+ # Plot config
18+ THEMES = ["seaborn-v0_8-paper" , "seaborn-paper" ] # Use the first theme that is available
19+
20+ # Load config
21+ DATA_PREFIX = config (Keys .DATA_PATH )
22+ BATCH_SIZE = config (Keys .BATCH_SIZE )
23+ VALIDATION_SPLIT = config (Keys .VALIDATION_SPLIT )
24+ NEIGHBORHOOD_SIZE = config (Keys .NEIGHBORHOOD_SIZE )
25+ EPOCHS = config (Keys .EPOCHS )
26+ MODEL_NAME_PREFIX = config (Keys .MODEL_NAME_PREFIX )
27+ DATA_USAGE = config (Keys .DATA_USAGE )
28+ USE_TENSORBOARD = config (Keys .USE_TENSORBOARD )
29+
30+ # Analysis config
31+ N_BATCHES = 1
32+ ANALYSIS_DATA_PATH = os .path .join (DATA_PREFIX , "analysis" )
33+ MEMBRANE_PATH = os .path .join (DATA_PREFIX , "training" , "membranes" )
34+
35+ # Matplotlib config
36+ [plt .style .use (THEME ) if THEME in plt .style .available else print (f"Theme '{ THEME } ' not available, using default theme. Select one of { plt .style .available } ." ) for THEME in THEMES ]
37+ savefig_kwargs = {"dpi" : 300 , "bbox_inches" : 'tight' }
38+
39+ #### UTILS ####
40+
41+ def gen_path (* args ):
42+ """
43+ Returns the path to the analysis data folder. Also creates the folder if it does not exist.
44+ """
45+ # Create folder if it does not exist
46+ if len (args ) > 1 :
47+ os .makedirs (os .path .join (ANALYSIS_DATA_PATH , * args [:- 1 ]), exist_ok = True )
48+
49+ return os .path .join (ANALYSIS_DATA_PATH , * args )
50+
51+
52+ ##### ANALYSIS #####
53+
54+ # List all membranes in the training folder
55+ membranes = [os .path .join (MEMBRANE_PATH , membrane ) for membrane in os .listdir (MEMBRANE_PATH )]
56+
57+ # Load the first membrane with BioPython
58+ parser = Bio .PDB .PDBParser (QUIET = True )
59+
60+
61+ bond_lengths = [[] for _ in range (len (DOPC_AT_MAPPING ))]
62+
63+ # Remove duplicate bond angle bonds
64+ angle_pairs = [] # List of tuples of (name, name, name)
65+ DOPC_AT_BAB = list (set (DOPC_AT_BAB ))
66+ for i , ((a ,b ), (c ,d )) in enumerate (DOPC_AT_BAB ):
67+ # Find common atom
68+ if a == c :
69+ angle_pairs .append ((b , d , a ))
70+ elif a == d :
71+ angle_pairs .append ((b , c , a ))
72+ elif b == c :
73+ angle_pairs .append ((a , d , b ))
74+ elif b == d :
75+ angle_pairs .append ((a , c , b ))
76+ else :
77+ raise ValueError (f"Could not find common atom in { ((a ,b ), (c ,d ))} " )
78+
79+ bond_angles = [[] for _ in range (len (angle_pairs ))]
80+ bond_angles_stats = [[] for _ in range (len (angle_pairs ))]
81+
82+ for k , membrane in enumerate (membranes ):
83+ # Load the whole membrane
84+ structure = parser .get_structure ("membrane" , os .path .join (membrane , "at.pdb" ))
85+
86+ # Get all residues (molecules) in the membrane
87+ residues = structure .get_residues ()
88+
89+ # Loop through all residues
90+ for residue in residues :
91+ # Get all atoms in the residue
92+ atoms = list (residue .get_atoms ())
93+
94+ # Save all bond lengths in this residue
95+ for i ,(a ,b ) in enumerate (DOPC_AT_MAPPING ):
96+ atom_a = [atom for atom in atoms if atom .get_name () == a ][0 ]
97+ atom_b = [atom for atom in atoms if atom .get_name () == b ][0 ]
98+ bond_lengths [i ].append (atom_a - atom_b )
99+
100+ for i , (a ,b ,c ) in enumerate (angle_pairs ):
101+ atom_a = [atom for atom in atoms if atom .get_name () == a ][0 ]
102+ atom_b = [atom for atom in atoms if atom .get_name () == b ][0 ]
103+ atom_c = [atom for atom in atoms if atom .get_name () == c ][0 ]
104+
105+ # Calculate angle between the two bonds
106+ angle = Bio .PDB .calc_angle (atom_a .get_vector (), atom_c .get_vector (), atom_b .get_vector ())
107+ bond_angles [i ].append (angle / np .pi * 180 )
108+
109+ # Print progress
110+ if k % 10 == 0 :
111+ print (f"Processed membrane { k } /{ len (membranes )} " )
112+
113+ break
114+
115+ # Calculate the mean and standard deviation of the bond lengths for every bond
116+ for i , bond in enumerate (bond_lengths ):
117+ bond_lengths [i ] = (np .mean (bond ), np .std (bond ))
118+ print (f"Mean bond length of { DOPC_AT_MAPPING [i ]} : { bond_lengths [i ][0 ]} +- { bond_lengths [i ][1 ]} " )
119+
120+ # Calculate the mean and standard deviation of the bond angles for every bond
121+ for i , bond in enumerate (bond_angles ):
122+ bond_angles_stats [i ] = (np .mean (bond ), np .std (bond ))
123+ print (f"Mean bond angle of { angle_pairs [i ]} : { bond_angles_stats [i ][0 ]} +- { bond_angles_stats [i ][1 ]} " )
124+
125+ over_2 = 0
126+ under_2 = 0
127+
128+ # Make bond angle historgram
129+ for i , bond in enumerate (angle_pairs ):
130+ bond_a = bond_angles [i ]
131+
132+ # Ignore outliers
133+ bond_a = np .array (bond_a )
134+ bond_a = bond_a [bond_a > bond_angles_stats [i ][0 ] - 2 * bond_angles_stats [i ][1 ]] # Ignore outliers (2 standard deviations)
135+
136+ # Calculate mean and standard deviation
137+ bond_angles_stats [i ] = (np .mean (bond_a ), np .std (bond_a ))
138+
139+ if (bond_angles_stats [i ][1 ] > 2 ):
140+ over_2 += 1
141+ else :
142+ under_2 += 1
143+
144+ print (f"Number of bond angles with a standard deviation of more than 2 degrees: { over_2 } " )
145+ print (f"Number of bond angles with a standard deviation of less than 2 degrees: { under_2 } " )
146+
147+
148+ # Clear
149+ plt .clf ()
150+ plt .hist (bond_a , bins = 100 )
151+ plt .xlabel (f"Bond angle (degrees) { bond } { bond_angles_stats [i ][0 ]} +- { bond_angles_stats [i ][1 ]} " )
152+ plt .ylabel ("Frequency" )
153+ plt .title ("Bond angle distribution" )
154+ plt .savefig (gen_path (f"training_bond_angle_distribution_{ i } .png" ), ** savefig_kwargs )
0 commit comments