From 5ac94cfb872584ac09b8fde6e5506ecb3544dc47 Mon Sep 17 00:00:00 2001
From: Andreas Klostermann <andreasklostermann@gmail.com>
Date: Mon, 15 Feb 2016 21:45:01 +0100
Subject: [PATCH] Clean up getmodels.py

---
 getmodels.py | 70 ++++++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/getmodels.py b/getmodels.py
index ee52bc9..d865949 100755
--- a/getmodels.py
+++ b/getmodels.py
@@ -1,59 +1,65 @@
 #!/usr/bin/env python
+"""
+    getmodels: A program to extract model files from Nanopore reads.
+               You must provide a read that has given a 2D read
+               - i.e comes from the pass folder.
+"""
 import sys
 import h5py
+import pandas as pd
 import configargparse
 
-def get_model_location(hdf,strand):
-    #print filepath
 
+def get_model_location(hdf,strand):
+    """ Find path for node with a model table for strand "strand". """
     for element in hdf:
-        #print element
         for element2 in hdf[element]:
-            #print element2
             for element3 in hdf[element][element2]:
-                #print element3
                 try:
                     for element4 in hdf[element][element2][element3]:
-                        #if "model" in (element,element2,element3,element4):
                         if any("Model" in s for s in [element,element2,element3,element4]):
                             if any(strand in s for s in [element,element2,element3,element4]):
                                 return element,element2,element3,element4
                 except:
-                    print ""
-
-
+                    pass
 
 
-if __name__ == "__main__":
-
+def main():
     parser = configargparse.ArgParser(description='getmodels: A program to extract model files from Nanopore reads. You must provide a read that has given a 2D read - i.e comes from the pass folder.')
     parser.add('-read', '--read', type=str, dest='read', required=True, default=None, help="Provide a read file to extract the current model from.")
     args = parser.parse_args()
     hdf = h5py.File(args.read, 'r')
-    temp1,temp2,temp3,temp4 = get_model_location(hdf,"template")
-    comp1,comp2,comp3,comp4 = get_model_location(hdf,"complement")
-
-    print temp1,temp2,temp3,temp4
-    print comp1,comp2,comp3,comp4
-
-    file = open("template.model", "w")
-    for thing in hdf[temp1][temp2][temp3][temp4]:
-        writestring = str(thing[0])+"\t"+str(thing[1])+"\t"+str(thing[2])+"\n"
-        file.write(writestring)
-        #print thing[0],thing[1],thing[2]
-    file.close()
-
-    file = open("complement.model", "w")
-    for thing in hdf[comp1][comp2][comp3][comp4]:
-        writestring = str(thing[0])+"\t"+str(thing[1])+"\t"+str(thing[2])+"\n"
-        file.write(writestring)
-        #print thing[0],thing[1],thing[2]
-    file.close()
+    try:
+        template_path = "/".join(get_model_location(hdf,"template"))
+        complement_path = "/".join(get_model_location(hdf,"complement"))
+    except TypeError:
+        print "ERROR: This file does not contain template and complement models."
+
+        hdf.close()
+        sys.exit(1)
 
     hdf.close()
+    print "Found model date in these paths:"
+    print template_path
+    print complement_path
+    print
+
+    columns = ['kmer', 'level_mean', 'level_stdv']
+
+    df = pd.read_hdf(args.read, template_path)
+    df = df[columns]
+    df.to_csv("template.model", sep="\t", index=False)
+
+    df = pd.read_hdf(args.read, complement_path)
+    df = df[columns]
+    df.to_csv("complement.model", sep="\t", index=False)
+
 
     print "File Write Completed."
-    print "Kmer length is:",len(str(thing[0]))
+    print "Kmer length is:", len(df.kmer[0])
     print "File format is:"
     print "Kmer\tMean\tStandard Dev"
-    sys.exit()
+
+
+if __name__ == "__main__":
+    main()