11import re
22
33from Bio import SeqIO
4+
45from .bidirectionalmap .bidirectionalmap import BidirectionalMap
56
7+
68def get_contig_lengths_spades (contigs_file ):
79 # Get length and coverage of contigs
810 contig_lengths = {}
@@ -11,26 +13,25 @@ def get_contig_lengths_spades(contigs_file):
1113 my_map = BidirectionalMap ()
1214
1315 for index , record in enumerate (SeqIO .parse (contigs_file , "fasta" )):
14- start = ' NODE_'
15- end = ' _length'
16- contig_num = int (re .search (' %s(.*)%s' % (start , end ), record .id ).group (1 ))
17-
18- start = ' _length_'
19- end = ' _cov'
20- length = int (re .search (' %s(.*)%s' % (start , end ), record .id ).group (1 ))
21-
22- start = ' _cov_'
23- end = ''
24- coverage = int (float (re .search (' %s(.*)%s' % (start , end ), record .id ).group (1 )))
25-
16+ start = " NODE_"
17+ end = " _length"
18+ contig_num = int (re .search (" %s(.*)%s" % (start , end ), record .id ).group (1 ))
19+
20+ start = " _length_"
21+ end = " _cov"
22+ length = int (re .search (" %s(.*)%s" % (start , end ), record .id ).group (1 ))
23+
24+ start = " _cov_"
25+ end = ""
26+ coverage = int (float (re .search (" %s(.*)%s" % (start , end ), record .id ).group (1 )))
27+
2628 contig_lengths [contig_num ] = length
2729 coverages [contig_num ] = coverage
2830
2931 return contig_lengths , coverages
3032
3133
3234def get_contig_paths_spades (contig_paths ):
33-
3435 paths = {}
3536 segment_contigs = {}
3637 node_count = 0
@@ -44,34 +45,33 @@ def get_contig_paths_spades(contig_paths):
4445 with open (contig_paths ) as file :
4546 name = file .readline ()
4647 path = file .readline ()
47-
48+
4849 while name != "" and path != "" :
49-
5050 while ";" in path :
51- path = path [:- 2 ]+ "," + file .readline ()
52-
53- start = ' NODE_'
54- end = ' _length_'
55- contig_num = str (int (re .search (' %s(.*)%s' % (start , end ), name ).group (1 )))
56-
51+ path = path [:- 2 ] + "," + file .readline ()
52+
53+ start = " NODE_"
54+ end = " _length_"
55+ contig_num = str (int (re .search (" %s(.*)%s" % (start , end ), name ).group (1 )))
56+
5757 segments = path .rstrip ().split ("," )
5858
5959 if current_contig_num != contig_num :
6060 my_map [node_count ] = int (contig_num )
6161 contig_names [node_count ] = name .strip ()
6262 current_contig_num = contig_num
6363 node_count += 1
64-
64+
6565 if contig_num not in paths :
6666 paths [contig_num ] = [segments [0 ], segments [- 1 ]]
67-
67+
6868 for segment in segments :
6969 if segment not in segment_contigs :
7070 segment_contigs [segment ] = set ([contig_num ])
7171 else :
7272 segment_contigs [segment ].add (contig_num )
73-
73+
7474 name = file .readline ()
7575 path = file .readline ()
7676
77- return my_map ,
77+ return ( my_map ,)
0 commit comments