Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 38 additions & 32 deletions getmodels.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,65 @@
#!/usr/bin/env python
"""
getmodels: A program to extract model files from Nanopore reads.
You must provide a read that has given a 2D read
- i.e comes from the pass folder.
"""
import sys
import h5py
import pandas as pd
import configargparse

def get_model_location(hdf,strand):
#print filepath

def get_model_location(hdf,strand):
""" Find path for node with a model table for strand "strand". """
for element in hdf:
#print element
for element2 in hdf[element]:
#print element2
for element3 in hdf[element][element2]:
#print element3
try:
for element4 in hdf[element][element2][element3]:
#if "model" in (element,element2,element3,element4):
if any("Model" in s for s in [element,element2,element3,element4]):
if any(strand in s for s in [element,element2,element3,element4]):
return element,element2,element3,element4
except:
print ""


pass


if __name__ == "__main__":

def main():
parser = configargparse.ArgParser(description='getmodels: A program to extract model files from Nanopore reads. You must provide a read that has given a 2D read - i.e comes from the pass folder.')
parser.add('-read', '--read', type=str, dest='read', required=True, default=None, help="Provide a read file to extract the current model from.")
args = parser.parse_args()
hdf = h5py.File(args.read, 'r')
temp1,temp2,temp3,temp4 = get_model_location(hdf,"template")
comp1,comp2,comp3,comp4 = get_model_location(hdf,"complement")

print temp1,temp2,temp3,temp4
print comp1,comp2,comp3,comp4

file = open("template.model", "w")
for thing in hdf[temp1][temp2][temp3][temp4]:
writestring = str(thing[0])+"\t"+str(thing[1])+"\t"+str(thing[2])+"\n"
file.write(writestring)
#print thing[0],thing[1],thing[2]
file.close()

file = open("complement.model", "w")
for thing in hdf[comp1][comp2][comp3][comp4]:
writestring = str(thing[0])+"\t"+str(thing[1])+"\t"+str(thing[2])+"\n"
file.write(writestring)
#print thing[0],thing[1],thing[2]
file.close()
try:
template_path = "/".join(get_model_location(hdf,"template"))
complement_path = "/".join(get_model_location(hdf,"complement"))
except TypeError:
print "ERROR: This file does not contain template and complement models."

hdf.close()
sys.exit(1)

hdf.close()
print "Found model date in these paths:"
print template_path
print complement_path
print

columns = ['kmer', 'level_mean', 'level_stdv']

df = pd.read_hdf(args.read, template_path)
df = df[columns]
df.to_csv("template.model", sep="\t", index=False)

df = pd.read_hdf(args.read, complement_path)
df = df[columns]
df.to_csv("complement.model", sep="\t", index=False)


print "File Write Completed."
print "Kmer length is:",len(str(thing[0]))
print "Kmer length is:", len(df.kmer[0])
print "File format is:"
print "Kmer\tMean\tStandard Dev"
sys.exit()


if __name__ == "__main__":
main()