-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathIntRpts.py
More file actions
61 lines (56 loc) · 2.9 KB
/
IntRpts.py
File metadata and controls
61 lines (56 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import sys
import glob
import re
import subprocess
"""for filename in glob.glob("MSAs/*.a3m"):
pdb_id = filename.split("/")[-1][:-4]
pdb_id = pdb_id[0:4].lower() + pdb_id[4:]
os.system("hhalign -i %s -o data/IntRpts/%s_Score.txt"%(filename, pdb_id))
"""
barrels = {}
with open("BarrelChars85.txt", "r") as barrel_list:
for line in barrel_list:
if "PDB" not in line:
line = line.split("\t")
barrels[line[0]] = int(line[1])
with open("data/CollatedIntRpts.txt", "w+") as outdata:
outdata.write("TotalStrands\tPDB\tProb\tE-value\tScore\tSS\tCols\tQuery HMM\tTemplate HMM\tLength\tQuery Strands\tTemplate Strands\n")
for filename in glob.glob("data/IntRpts/*_Score.txt"):
kept_lines = []
pdb_ID = filename.split("/")[-1][:-10]
print(pdb_ID)
if pdb_ID in barrels.keys():
with open(filename, "r") as score_file:
keep_line = False
for line in score_file:
if "No Hit" in line:
keep_line = True
if "No 1" in line:
keep_line = False
if keep_line == True:
kept_lines.append(line)
if len(kept_lines) > 0: print(pdb_ID)
for value in kept_lines[1:-1]:
value = value[36:].strip().split()
print(value)
if (float(value[0]) < 75 or float(value[1]) > 1E-3): continue
else:
query = value[6].split("-")
query = [int(x) for x in query]
template = value[7].split("(")[0].split("-")
template = [int(x) for x in template]
print(query, template)
#something about strands...
query_strands = []
template_strands = []
with open("InOut/InOut_%s.txt"%pdb_ID[0:4].upper(), "r") as strands:
for line in strands:
if "Res" not in line:
line = line.split("\t")
if (int(line[1]) in range(query[0], query[1]+1) and line[3] == pdb_ID[-1]): query_strands.append(int(line[2]))
if (int(line[1]) in range(template[0], template[1]+1) and line[3] == pdb_ID[-1]): template_strands.append(int(line[2]))
#print(sorted(set(template_strands)), sorted(set(query_strands)))
template_strands = sorted(set(template_strands))
query_strands = sorted(set(query_strands))
outdata.write("%s\t%s\t%s\t%s\t%s\t%s\n"%(barrels[pdb_ID], pdb_ID, value[0], "\t".join(value[2:]), ",".join(map(str, query_strands)), ",".join(map(str, template_strands))))