From 5ac94cfb872584ac09b8fde6e5506ecb3544dc47 Mon Sep 17 00:00:00 2001 From: Andreas Klostermann Date: Mon, 15 Feb 2016 21:45:01 +0100 Subject: [PATCH] Clean up getmodels.py --- getmodels.py | 70 ++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/getmodels.py b/getmodels.py index ee52bc9..d865949 100755 --- a/getmodels.py +++ b/getmodels.py @@ -1,59 +1,65 @@ #!/usr/bin/env python +""" + getmodels: A program to extract model files from Nanopore reads. + You must provide a read that has given a 2D read + - i.e comes from the pass folder. +""" import sys import h5py +import pandas as pd import configargparse -def get_model_location(hdf,strand): - #print filepath +def get_model_location(hdf,strand): + """ Find path for node with a model table for strand "strand". """ for element in hdf: - #print element for element2 in hdf[element]: - #print element2 for element3 in hdf[element][element2]: - #print element3 try: for element4 in hdf[element][element2][element3]: - #if "model" in (element,element2,element3,element4): if any("Model" in s for s in [element,element2,element3,element4]): if any(strand in s for s in [element,element2,element3,element4]): return element,element2,element3,element4 except: - print "" - - + pass -if __name__ == "__main__": - +def main(): parser = configargparse.ArgParser(description='getmodels: A program to extract model files from Nanopore reads. You must provide a read that has given a 2D read - i.e comes from the pass folder.') parser.add('-read', '--read', type=str, dest='read', required=True, default=None, help="Provide a read file to extract the current model from.") args = parser.parse_args() hdf = h5py.File(args.read, 'r') - temp1,temp2,temp3,temp4 = get_model_location(hdf,"template") - comp1,comp2,comp3,comp4 = get_model_location(hdf,"complement") - - print temp1,temp2,temp3,temp4 - print comp1,comp2,comp3,comp4 - - file = open("template.model", "w") - for thing in hdf[temp1][temp2][temp3][temp4]: - writestring = str(thing[0])+"\t"+str(thing[1])+"\t"+str(thing[2])+"\n" - file.write(writestring) - #print thing[0],thing[1],thing[2] - file.close() - - file = open("complement.model", "w") - for thing in hdf[comp1][comp2][comp3][comp4]: - writestring = str(thing[0])+"\t"+str(thing[1])+"\t"+str(thing[2])+"\n" - file.write(writestring) - #print thing[0],thing[1],thing[2] - file.close() + try: + template_path = "/".join(get_model_location(hdf,"template")) + complement_path = "/".join(get_model_location(hdf,"complement")) + except TypeError: + print "ERROR: This file does not contain template and complement models." + + hdf.close() + sys.exit(1) hdf.close() + print "Found model date in these paths:" + print template_path + print complement_path + print + + columns = ['kmer', 'level_mean', 'level_stdv'] + + df = pd.read_hdf(args.read, template_path) + df = df[columns] + df.to_csv("template.model", sep="\t", index=False) + + df = pd.read_hdf(args.read, complement_path) + df = df[columns] + df.to_csv("complement.model", sep="\t", index=False) + print "File Write Completed." - print "Kmer length is:",len(str(thing[0])) + print "Kmer length is:", len(df.kmer[0]) print "File format is:" print "Kmer\tMean\tStandard Dev" - sys.exit() + + +if __name__ == "__main__": + main()