Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions python/DRMLookupTable.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
HIV Gene region,Wildtype,Position (WHO Paper ),Position (UniProt)�,Position (RefSeq),Position (Genbank),DRM Mutation,Class
RT,M,41,41,628,105,L,NRTI
RT,A,62,62,649,126,V,NRTI
RT,NONE,69,69,656,133,INSERT,NRTI
RT,K,70,70,657,134,R,NRTI
RT,L,210,210,797,274,W,NRTI
RT,T,215,215,802,279,Y,NRTI
RT,T,215,215,802,279,F,NRTI
RT,K,219,219,806,283,Q,NRTI
RT,K,219,219,806,283,E,NRTI
RT,A,62,62,711,188,V,NRTI
RT,V,75,75,662,139,I,NRTI
RT,F,77,77,664,141,L,NRTI
RT,F,116,116,703,180,Y,NRTI
RT,Q,151,151,738,215,M,NRTI
RT,D,67,67,654,131,N,NRTI
RT,K,65,65,652,129,R,NRTI
RT,K,65,65,652,129,E,NRTI
RT,K,65,65,652,129,N,NRTI
RT,L,74,74,661,138,V,NRTI
RT,Y,115,115,702,179,F,NRTI
RT,M,184,184,771,248,V,NRTI
RT,M,184,184,771,248,I,NRTI
RT,K,70,70,657,134,E,NRTI
RT,D,67,67,654,131,N,NRTI
RT,V,106,106,693,170,A,NNRTI
RT,V,106,106,693,170,I,NNRTI
RT,V,106,106,693,170,M,NNRTI
RT,V,106,106,693,170,T,NNRTI
RT,Y,188,188,775,252,C,NNRTI
RT,Y,188,188,775,252,L,NNRTI
RT,Y,188,188,775,252,H,NNRTI
RT,G,190,190,777,254,E,NNRTI
RT,P,225,225,812,289,H,NNRTI
RT,F,227,227,814,291,C,NNRTI
RT,F,227,227,814,292,L,NNRTI
RT,F,227,227,814,291,R,NNRTI
RT,M,230,230,817,294,L,NNRTI
RT,L,234,234,821,298,I,NNRTI
RT,L,100,100,687,164,I,NNRTI
RT,K,101,101,688,165,P,NNRTI
RT,K,103,103,690,167,N,NNRTI
RT,K,103,103,690,167,S,NNRTI
RT,V,108,108,695,172,I,NNRTI
RT,Y,181,181,768,245,C,NNRTI
RT,Y,181,181,768,245,I,NNRTI
RT,G,190,190,777,254,S,NNRTI
RT,G,190,190,777,254,A,NNRTI
RT,V,90,90,677,154,I,NNRTI
RT,A,98,98,685,162,G,NNRTI
RT,L,100,100,687,164,I,NNRTI
RT,K,101,101,688,165,E,NNRTI
RT,K,101,101,688,165,H,NNRTI
RT,K,101,101,688,165,P,NNRTI
RT,E,138,138,725,202,A,NNRTI
RT,E,138,138,725,202,G,NNRTI
RT,E,138,138,725,202,K,NNRTI
RT,E,138,138,725,202,Q,NNRTI
RT,V,179,179,766,243,D,NNRTI
RT,V,179,179,766,243,F,NNRTI
RT,V,179,179,766,243,T,NNRTI
RT,Y,181,181,768,245,V,NNRTI
RT,E,138,138,725,202,R,NNRTI
RT,V,179,179,766,243,L,NNRTI
RT,H,221,221,808,285,Y,NNRTI
RT,M,230,230,817,294,I,NNRTI
PR,L,10,10,498,Unavailable,I,PI
PR,L,10,10,498,Unavailable,F,PI
PR,L,10,10,498,Unavailable,V,PI
PR,L,10,10,498,Unavailable,C,PI
PR,G,16,16,504,Unavailable,E,PI
PR,K,20,20,508,Unavailable,R,PI
PR,K,20,20,508,Unavailable,M,PI
PR,K,20,20,508,Unavailable,I,PI
PR,K,20,20,508,Unavailable,T,PI
PR,K,20,20,508,Unavailable,V,PI
PR,L,24,24,512,Unavailable,I,PI
PR,D,30,30,518,Unavailable,N,PI
PR,V,32,32,520,Unavailable,I,PI
PR,L,33,33,521,Unavailable,I,PI
PR,L,33,33,521,Unavailable,F,PI
PR,L,33,33,521,Unavailable,V,PI
PR,E,34,34,522,Unavailable,Q,PI
PR,M,36,36,524,1,I,PI
PR,M,36,36,524,1,L,PI
PR,M,36,36,524,1,V,PI
PR,M,46,46,534,11,I,PI
PR,M,46,46,534,11,L,PI
PR,G,48,48,536,13,V,PI
PR,I,50,50,538,15,L,PI
PR,F,53,53,541,18,L,PI
PR,F,53,53,541,18,Y,PI
PR,I,54,54,542,19,L,PI
PR,I,54,54,542,19,V,PI
PR,I,54,54,542,19,M,PI
PR,I,54,54,542,19,T,PI
PR,I,54,54,542,19,A,PI
PR,D,60,60,548,25,E,PI
PR,I,62,62,550,27,V,PI
PR,I,64,64,552,29,L,PI
PR,I,64,64,552,29,M,PI
PR,I,64,64,552,29,V,PI
PR,A,71,71,559,36,V,PI
PR,A,71,71,559,36,I,PI
PR,A,71,71,559,36,T,PI
PR,A,71,71,559,36,L,PI
PR,G,73,73,561,38,C,PI
PR,G,73,73,561,38,S,PI
PR,G,73,73,561,38,T,PI
PR,G,73,73,561,38,A,PI
PR,V,77,77,565,42,I,PI
PR,V,82,82,570,47,A,PI
PR,V,82,82,570,47,T,PI
PR,V,82,82,570,47,F,PI
PR,V,82,82,570,47,I,PI
PR,I,84,84,572,49,V,PI
PR,I,85,85,573,50,V,PI
PR,N,88,88,576,53,D,PI
PR,N,88,88,576,53,S,PI
PR,L,90,90,578,55,M,PI
PR,I,93,93,581,58,L,PI
PR,I,93,93,581,58,M,PI
PR,V,11,11,499,-24,I,PI
PR,I,47,47,535,12,V,PI
PR,I,50,50,538,15,V,PI
PR,T,74,74,562,39,P,PI
PR,L,76,76,564,41,V,PI
PR,L,89,89,577,54,V,PI
PR,L,10,10,498,-25,R,PI
PR,I,47,47,535,12,V,PI
PR,I,47,47,535,12,A,PI
PR,I,54,54,542,19,S,PI
PR,L,63,63,551,28,P,PI
PR,I,76,76,564,41,V,PI
PR,V,82,82,570,47,S,PI
PR,K,43,43,531,8,T,PI
PR,Q,58,58,557,34,E,PI
PR,H,69,69,557,34,K,PI
PR,H,69,69,557,34,R,PI
PR,T,74,74,562,39,P,PI
PR,V,82,82,570,47,L,PI
PR,N,83,83,571,48,D,PI
PR,L,89,89,577,54,I,PI
PR,L,89,89,577,54,M,PI
PR,L,89,89,577,54,V,PI
INT,G,118,118,1265,742,R,INSTI
INT,E,138,138,1285,762,K,INSTI
INT,G,140,140,1287,764,S,INSTI
INT,Q,148,148,1295,772,H,INSTI
INT,R,263,263,1410,887,K,INSTI
INT,T,66,66,1213,690,K,INSTI
INT,E,138,138,1285,762,A,INSTI
INT,E,138,138,1285,762,T,INSTI
INT,G,140,140,1287,764,A,INSTI
INT,G,140,140,1287,764,C,INSTI
INT,G,140,140,1287,764,R,INSTI
INT,G,140,140,1287,764,S,INSTI
INT,Q,148,148,1295,772,H,INSTI
INT,Q,148,148,1295,772,K,INSTI
INT,Q,148,148,1295,772,R,INSTI
INT,S,153,153,1300,777,F,INSTI
INT,S,153,153,1300,777,Y,INSTI
INT,N,155,155,1302,779,H,INSTI
INT,F,121,121,1268,745,Y,INSTI
INT,T,66,66,1213,690,I,INSTI
INT,T,66,66,1213,690,A,INSTI
INT,T,66,66,1213,690,K,INSTI
INT,E,92,92,1239,716,Q,INSTI
INT,E,92,92,1239,716,G,INSTI
INT,T,97,97,1244,721,A,INSTI
INT,S,147,147,1294,771,G,INSTI
INT,L,74,74,1221,698,M,INSTI
INT,Y,143,143,1290,767,R,INSTI
INT,Y,143,143,1290,767,H,INSTI
INT,Y,143,143,1290,767,C,INSTI
8 changes: 8 additions & 0 deletions python/DRM_Report.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#,Hit #,AAPOS,AAREF,AASUB,CDS,Description
58,35,20,K,R,NP_057849.4,PI resistance
60,46,36,M,I,NP_057849.4,PI resistance
65,95,69,H,K,NP_057849.4,PI resistance
66,101,89,L,M,NP_057849.4,PI resistance
69,141,103,K,N,NP_057849.4,NNRTI resistance
74,154,138,E,A,NP_057849.4,NNRTI resistance
206,192,74,L,M,NP_057849.4,INSTI resistance
59 changes: 59 additions & 0 deletions python/positionFinder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# This script will return the position (along with -5 to +5 away from the position) that you ask from from the HXB2 sequence.

###################################################################################################

# Using this script, I searched (ctrl + F) for snippets pulled from the Stanford site:
# https://hivdb.stanford.edu/page/release-notes/#appendix.1.consensus.b.sequences
# I compared these to the different versions of the Consensus B sequence from LANL:
# https://www.hiv.lanl.gov/cgi-bin/CONSENSUS_DOWNLOAD/ConsensusDownloader.cgi
# (Download format: FASTA, Computer type: Unix, pull data for proteins no gaps (unaligned): GAG and POL)

# I was able to determine an offset when I search for positions listed in the "otherMuts.csv" file
# (which comes from StanfordDB).
# PR offset = 56
# RT offset = 155
#INT offset = 715
# But each of these really needs to subtract 1, possibly because the Methionine gets clipped off by the host and they don't count it.

# So I think the fix is pretty simple now. It's currently calling column E from Millicent's table (same directory as this script)
# It just needs to report column D or C (I think these are the same).
# So a quick post-processing script that uses a library looking should fix it.
# Then wrap the whole thing up in a one-click pipeline.
# Then you can look into adding more data, but the "otherMuts.csv" doesn't report which part of the gag-pol it comes from,
# so this will take some categorization first, and it's a pretty big step!

###################################################################################################

# This is the HXB2 sequence from here: https://www.ncbi.nlm.nih.gov/nuccore/NC_001802.1
# Referenced by UniProt here: https://www.uniprot.org/uniprotkb/P04585/entry#sequences
# And here: https://rest.uniprot.org/uniprotkb/P04585.txt (search for refseq there)
mySeq = 'MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED'
consensusB = 'MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKTVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ$'
bAnc = 'MGARASVLSGGKLDKWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTTIMMQRGNFRDQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKELYPLASLKSLFGNDPSSQ$'
conB2 = 'FFREDLAFPQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED$'
fullBAncillary = 'FFRENLAFPQGKAREFSSEQTRANSPTRRELQVWGRDNNPLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKVIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED$'

def seqViewer(sequence, position):
return '-5 ' + sequence[int(position) - 5] + sequence[int(position) - 4] + sequence[int(position) - 3] + sequence[int(position) - 2] + sequence[int(position) - 1] + ' ' + sequence[int(position)] + ' ' + sequence[int(position) + 1] + sequence[int(position) + 2] + sequence[int(position) + 3] + sequence[int(position) + 4] + sequence[int(position) + 5] + ' +5' + '\n ^'

pos = 82
# modPos = int(pos) + 155

pr = int(pos) + 56
rt = int(pos) + 155
integrase = int(pos) + 715

# print ("Consensus B:\n" + seqViewer(consensusB, pos))
# print ("RefSeq:\n" + seqViewer(mySeq, pos))
# print ("B Ancillary:\n" + seqViewer(bAnc, pos))
# print ("Full Con B:\n" + seqViewer(conB2, pos))
# print ("Full Con B Ancillary:\n" + seqViewer(fullBAncillary, pos))
# print ("Modified Full Con B:\n" + seqViewer(conB2, modPos))
# print ("Modified Full Con B Ancillary:\n" + seqViewer(fullBAncillary, modPos))


print ("Consensus B PR:\n" + seqViewer(conB2, pr))
print ("Consensus B RT:\n" + seqViewer(conB2, rt))
print ("Consensus B INT:\n" + seqViewer(conB2, integrase))

# print (len('FFREDLAFPQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVL'))
26 changes: 26 additions & 0 deletions python/positionShift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
with open ('DRM_Report.csv', 'w+') as outFile:
with open ('sampleOutput.csv', 'r') as inFile:
with open ('DRMLookupTable.csv', 'r') as lookup:
lookupTable = list (lookup)
for line in inFile:
if line[0] == '#':
print (line.strip(), file = outFile)
else:
number = line.split(',')[0]
hit = line.split(',')[1]
pos = line.split(',')[2]
wt = line.split(',')[3]
mut = line.split(',')[4]
cds = line.split(',')[5]
description = line.split(',')[6]
for row in lookupTable:
if row.split(',')[0] == 'HIV Gene region':
pass
else:
wildtype = row.split(',')[1]
position = row.split(',')[4]
mutation = row.split(',')[6]
newPos = row.split(',')[2]
if pos == position and wt == wildtype and mut == mutation:
print (number + ',' + hit + ',' + newPos + ',' + wt + ',' + mut + ',' + cds + ',' + description.strip(), file = outFile)

8 changes: 8 additions & 0 deletions python/sampleOutput.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#,Hit #,AAPOS,AAREF,AASUB,CDS,Description
58,35,508,K,R,NP_057849.4,PI resistance
60,46,524,M,I,NP_057849.4,PI resistance
65,95,557,H,K,NP_057849.4,PI resistance
66,101,577,L,M,NP_057849.4,PI resistance
69,141,690,K,N,NP_057849.4,NNRTI resistance
74,154,725,E,A,NP_057849.4,NNRTI resistance
206,192,1221,L,M,NP_057849.4,INSTI resistance