From 4f6beeb967a0484a99c0a7548691fc99da61d876 Mon Sep 17 00:00:00 2001 From: Jonathon Keeney Date: Wed, 21 Sep 2022 14:37:34 -0400 Subject: [PATCH] Add files via upload Files for working with HIV reference sequences and matching their coordinate systems to HIVE's DRM output. --- python/DRMLookupTable.csv | 175 ++++++++++++++++++++++++++++++++++++++ python/DRM_Report.csv | 8 ++ python/positionFinder.py | 59 +++++++++++++ python/positionShift.py | 26 ++++++ python/sampleOutput.csv | 8 ++ 5 files changed, 276 insertions(+) create mode 100644 python/DRMLookupTable.csv create mode 100644 python/DRM_Report.csv create mode 100644 python/positionFinder.py create mode 100644 python/positionShift.py create mode 100644 python/sampleOutput.csv diff --git a/python/DRMLookupTable.csv b/python/DRMLookupTable.csv new file mode 100644 index 0000000..53f4505 --- /dev/null +++ b/python/DRMLookupTable.csv @@ -0,0 +1,175 @@ +HIV Gene region,Wildtype,Position (WHO Paper ),Position (UniProt)�,Position (RefSeq),Position (Genbank),DRM Mutation,Class +RT,M,41,41,628,105,L,NRTI +RT,A,62,62,649,126,V,NRTI +RT,NONE,69,69,656,133,INSERT,NRTI +RT,K,70,70,657,134,R,NRTI +RT,L,210,210,797,274,W,NRTI +RT,T,215,215,802,279,Y,NRTI +RT,T,215,215,802,279,F,NRTI +RT,K,219,219,806,283,Q,NRTI +RT,K,219,219,806,283,E,NRTI +RT,A,62,62,711,188,V,NRTI +RT,V,75,75,662,139,I,NRTI +RT,F,77,77,664,141,L,NRTI +RT,F,116,116,703,180,Y,NRTI +RT,Q,151,151,738,215,M,NRTI +RT,D,67,67,654,131,N,NRTI +RT,K,65,65,652,129,R,NRTI +RT,K,65,65,652,129,E,NRTI +RT,K,65,65,652,129,N,NRTI +RT,L,74,74,661,138,V,NRTI +RT,Y,115,115,702,179,F,NRTI +RT,M,184,184,771,248,V,NRTI +RT,M,184,184,771,248,I,NRTI +RT,K,70,70,657,134,E,NRTI +RT,D,67,67,654,131,N,NRTI +RT,V,106,106,693,170,A,NNRTI +RT,V,106,106,693,170,I,NNRTI +RT,V,106,106,693,170,M,NNRTI +RT,V,106,106,693,170,T,NNRTI +RT,Y,188,188,775,252,C,NNRTI +RT,Y,188,188,775,252,L,NNRTI +RT,Y,188,188,775,252,H,NNRTI +RT,G,190,190,777,254,E,NNRTI +RT,P,225,225,812,289,H,NNRTI +RT,F,227,227,814,291,C,NNRTI +RT,F,227,227,814,292,L,NNRTI +RT,F,227,227,814,291,R,NNRTI +RT,M,230,230,817,294,L,NNRTI +RT,L,234,234,821,298,I,NNRTI +RT,L,100,100,687,164,I,NNRTI +RT,K,101,101,688,165,P,NNRTI +RT,K,103,103,690,167,N,NNRTI +RT,K,103,103,690,167,S,NNRTI +RT,V,108,108,695,172,I,NNRTI +RT,Y,181,181,768,245,C,NNRTI +RT,Y,181,181,768,245,I,NNRTI +RT,G,190,190,777,254,S,NNRTI +RT,G,190,190,777,254,A,NNRTI +RT,V,90,90,677,154,I,NNRTI +RT,A,98,98,685,162,G,NNRTI +RT,L,100,100,687,164,I,NNRTI +RT,K,101,101,688,165,E,NNRTI +RT,K,101,101,688,165,H,NNRTI +RT,K,101,101,688,165,P,NNRTI +RT,E,138,138,725,202,A,NNRTI +RT,E,138,138,725,202,G,NNRTI +RT,E,138,138,725,202,K,NNRTI +RT,E,138,138,725,202,Q,NNRTI +RT,V,179,179,766,243,D,NNRTI +RT,V,179,179,766,243,F,NNRTI +RT,V,179,179,766,243,T,NNRTI +RT,Y,181,181,768,245,V,NNRTI +RT,E,138,138,725,202,R,NNRTI +RT,V,179,179,766,243,L,NNRTI +RT,H,221,221,808,285,Y,NNRTI +RT,M,230,230,817,294,I,NNRTI +PR,L,10,10,498,Unavailable,I,PI +PR,L,10,10,498,Unavailable,F,PI +PR,L,10,10,498,Unavailable,V,PI +PR,L,10,10,498,Unavailable,C,PI +PR,G,16,16,504,Unavailable,E,PI +PR,K,20,20,508,Unavailable,R,PI +PR,K,20,20,508,Unavailable,M,PI +PR,K,20,20,508,Unavailable,I,PI +PR,K,20,20,508,Unavailable,T,PI +PR,K,20,20,508,Unavailable,V,PI +PR,L,24,24,512,Unavailable,I,PI +PR,D,30,30,518,Unavailable,N,PI +PR,V,32,32,520,Unavailable,I,PI +PR,L,33,33,521,Unavailable,I,PI +PR,L,33,33,521,Unavailable,F,PI +PR,L,33,33,521,Unavailable,V,PI +PR,E,34,34,522,Unavailable,Q,PI +PR,M,36,36,524,1,I,PI +PR,M,36,36,524,1,L,PI +PR,M,36,36,524,1,V,PI +PR,M,46,46,534,11,I,PI +PR,M,46,46,534,11,L,PI +PR,G,48,48,536,13,V,PI +PR,I,50,50,538,15,L,PI +PR,F,53,53,541,18,L,PI +PR,F,53,53,541,18,Y,PI +PR,I,54,54,542,19,L,PI +PR,I,54,54,542,19,V,PI +PR,I,54,54,542,19,M,PI +PR,I,54,54,542,19,T,PI +PR,I,54,54,542,19,A,PI +PR,D,60,60,548,25,E,PI +PR,I,62,62,550,27,V,PI +PR,I,64,64,552,29,L,PI +PR,I,64,64,552,29,M,PI +PR,I,64,64,552,29,V,PI +PR,A,71,71,559,36,V,PI +PR,A,71,71,559,36,I,PI +PR,A,71,71,559,36,T,PI +PR,A,71,71,559,36,L,PI +PR,G,73,73,561,38,C,PI +PR,G,73,73,561,38,S,PI +PR,G,73,73,561,38,T,PI +PR,G,73,73,561,38,A,PI +PR,V,77,77,565,42,I,PI +PR,V,82,82,570,47,A,PI +PR,V,82,82,570,47,T,PI +PR,V,82,82,570,47,F,PI +PR,V,82,82,570,47,I,PI +PR,I,84,84,572,49,V,PI +PR,I,85,85,573,50,V,PI +PR,N,88,88,576,53,D,PI +PR,N,88,88,576,53,S,PI +PR,L,90,90,578,55,M,PI +PR,I,93,93,581,58,L,PI +PR,I,93,93,581,58,M,PI +PR,V,11,11,499,-24,I,PI +PR,I,47,47,535,12,V,PI +PR,I,50,50,538,15,V,PI +PR,T,74,74,562,39,P,PI +PR,L,76,76,564,41,V,PI +PR,L,89,89,577,54,V,PI +PR,L,10,10,498,-25,R,PI +PR,I,47,47,535,12,V,PI +PR,I,47,47,535,12,A,PI +PR,I,54,54,542,19,S,PI +PR,L,63,63,551,28,P,PI +PR,I,76,76,564,41,V,PI +PR,V,82,82,570,47,S,PI +PR,K,43,43,531,8,T,PI +PR,Q,58,58,557,34,E,PI +PR,H,69,69,557,34,K,PI +PR,H,69,69,557,34,R,PI +PR,T,74,74,562,39,P,PI +PR,V,82,82,570,47,L,PI +PR,N,83,83,571,48,D,PI +PR,L,89,89,577,54,I,PI +PR,L,89,89,577,54,M,PI +PR,L,89,89,577,54,V,PI +INT,G,118,118,1265,742,R,INSTI +INT,E,138,138,1285,762,K,INSTI +INT,G,140,140,1287,764,S,INSTI +INT,Q,148,148,1295,772,H,INSTI +INT,R,263,263,1410,887,K,INSTI +INT,T,66,66,1213,690,K,INSTI +INT,E,138,138,1285,762,A,INSTI +INT,E,138,138,1285,762,T,INSTI +INT,G,140,140,1287,764,A,INSTI +INT,G,140,140,1287,764,C,INSTI +INT,G,140,140,1287,764,R,INSTI +INT,G,140,140,1287,764,S,INSTI +INT,Q,148,148,1295,772,H,INSTI +INT,Q,148,148,1295,772,K,INSTI +INT,Q,148,148,1295,772,R,INSTI +INT,S,153,153,1300,777,F,INSTI +INT,S,153,153,1300,777,Y,INSTI +INT,N,155,155,1302,779,H,INSTI +INT,F,121,121,1268,745,Y,INSTI +INT,T,66,66,1213,690,I,INSTI +INT,T,66,66,1213,690,A,INSTI +INT,T,66,66,1213,690,K,INSTI +INT,E,92,92,1239,716,Q,INSTI +INT,E,92,92,1239,716,G,INSTI +INT,T,97,97,1244,721,A,INSTI +INT,S,147,147,1294,771,G,INSTI +INT,L,74,74,1221,698,M,INSTI +INT,Y,143,143,1290,767,R,INSTI +INT,Y,143,143,1290,767,H,INSTI +INT,Y,143,143,1290,767,C,INSTI diff --git a/python/DRM_Report.csv b/python/DRM_Report.csv new file mode 100644 index 0000000..072eb55 --- /dev/null +++ b/python/DRM_Report.csv @@ -0,0 +1,8 @@ +#,Hit #,AAPOS,AAREF,AASUB,CDS,Description +58,35,20,K,R,NP_057849.4,PI resistance +60,46,36,M,I,NP_057849.4,PI resistance +65,95,69,H,K,NP_057849.4,PI resistance +66,101,89,L,M,NP_057849.4,PI resistance +69,141,103,K,N,NP_057849.4,NNRTI resistance +74,154,138,E,A,NP_057849.4,NNRTI resistance +206,192,74,L,M,NP_057849.4,INSTI resistance diff --git a/python/positionFinder.py b/python/positionFinder.py new file mode 100644 index 0000000..43d45e0 --- /dev/null +++ b/python/positionFinder.py @@ -0,0 +1,59 @@ +# This script will return the position (along with -5 to +5 away from the position) that you ask from from the HXB2 sequence. + +################################################################################################### + +# Using this script, I searched (ctrl + F) for snippets pulled from the Stanford site: +# https://hivdb.stanford.edu/page/release-notes/#appendix.1.consensus.b.sequences +# I compared these to the different versions of the Consensus B sequence from LANL: +# https://www.hiv.lanl.gov/cgi-bin/CONSENSUS_DOWNLOAD/ConsensusDownloader.cgi +# (Download format: FASTA, Computer type: Unix, pull data for proteins no gaps (unaligned): GAG and POL) + +# I was able to determine an offset when I search for positions listed in the "otherMuts.csv" file +# (which comes from StanfordDB). +# PR offset = 56 +# RT offset = 155 +#INT offset = 715 +# But each of these really needs to subtract 1, possibly because the Methionine gets clipped off by the host and they don't count it. + +# So I think the fix is pretty simple now. It's currently calling column E from Millicent's table (same directory as this script) +# It just needs to report column D or C (I think these are the same). +# So a quick post-processing script that uses a library looking should fix it. +# Then wrap the whole thing up in a one-click pipeline. +# Then you can look into adding more data, but the "otherMuts.csv" doesn't report which part of the gag-pol it comes from, +# so this will take some categorization first, and it's a pretty big step! + +################################################################################################### + +# This is the HXB2 sequence from here: https://www.ncbi.nlm.nih.gov/nuccore/NC_001802.1 +# Referenced by UniProt here: https://www.uniprot.org/uniprotkb/P04585/entry#sequences +# And here: https://rest.uniprot.org/uniprotkb/P04585.txt (search for refseq there) +mySeq = 'MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLREDLAFLQGKAREFSSEQTRANSPTRRELQVWGRDNNSPSEAGADRQGTVSFNFPQVTLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKITTESIVIWGKTPKFKLPIQKETWETWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTNRGRQKVVTLTDTTNQKTELQAIYLALQDSGLEVNIVTDSQYALGIIQAQPDQSESELVNQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQDEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTGATVRAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRNPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED' +consensusB = 'MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALEKIEEEQNKSKKKAQQAAADTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKTVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGNDPSSQ$' +bAnc = 'MGARASVLSGGKLDKWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPALQTGSEELRSLYNTVATLYCVHQRIEVKDTKEALDKIEEEQNKSKKKAQQAAADTGNSSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPISILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMSQVTNSTTIMMQRGNFRDQRKIVKCFNCGKEGHIARNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKELYPLASLKSLFGNDPSSQ$' +conB2 = 'FFREDLAFPQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED$' +fullBAncillary = 'FFRENLAFPQGKAREFSSEQTRANSPTRRELQVWGRDNNPLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVLFLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKIILVAVHVASGYIEAEVIPAETGQETAYFILKLAGRWPVKVIHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVVESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPLWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED$' + +def seqViewer(sequence, position): + return '-5 ' + sequence[int(position) - 5] + sequence[int(position) - 4] + sequence[int(position) - 3] + sequence[int(position) - 2] + sequence[int(position) - 1] + ' ' + sequence[int(position)] + ' ' + sequence[int(position) + 1] + sequence[int(position) + 2] + sequence[int(position) + 3] + sequence[int(position) + 4] + sequence[int(position) + 5] + ' +5' + '\n ^' + +pos = 82 +# modPos = int(pos) + 155 + +pr = int(pos) + 56 +rt = int(pos) + 155 +integrase = int(pos) + 715 + +# print ("Consensus B:\n" + seqViewer(consensusB, pos)) +# print ("RefSeq:\n" + seqViewer(mySeq, pos)) +# print ("B Ancillary:\n" + seqViewer(bAnc, pos)) +# print ("Full Con B:\n" + seqViewer(conB2, pos)) +# print ("Full Con B Ancillary:\n" + seqViewer(fullBAncillary, pos)) +# print ("Modified Full Con B:\n" + seqViewer(conB2, modPos)) +# print ("Modified Full Con B Ancillary:\n" + seqViewer(fullBAncillary, modPos)) + + +print ("Consensus B PR:\n" + seqViewer(conB2, pr)) +print ("Consensus B RT:\n" + seqViewer(conB2, rt)) +print ("Consensus B INT:\n" + seqViewer(conB2, integrase)) + +# print (len('FFREDLAFPQGKAREFSSEQTRANSPTRRELQVWGRDNNSLSEAGADRQGTVSFSFPQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFPISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVIPLTEEAELELAENREILKEPVHGVYYDPSKDLIAEIQKQGQGQWTYQIYQEPFKNLKTGKYARMRGAHTNDVKQLTEAVQKIATESIVIWGKTPKFKLPIQKETWEAWWTEYWQATWIPEWEFVNTPPLVKLWYQLEKEPIVGAETFYVDGAANRETKLGKAGYVTDRGRQKVVSLTDTTNQKTELQAIHLALQDSGLEVNIVTDSQYALGIIQAQPDKSESELVSQIIEQLIKKEKVYLAWVPAHKGIGGNEQVDKLVSAGIRKVL')) \ No newline at end of file diff --git a/python/positionShift.py b/python/positionShift.py new file mode 100644 index 0000000..6015d54 --- /dev/null +++ b/python/positionShift.py @@ -0,0 +1,26 @@ +with open ('DRM_Report.csv', 'w+') as outFile: + with open ('sampleOutput.csv', 'r') as inFile: + with open ('DRMLookupTable.csv', 'r') as lookup: + lookupTable = list (lookup) + for line in inFile: + if line[0] == '#': + print (line.strip(), file = outFile) + else: + number = line.split(',')[0] + hit = line.split(',')[1] + pos = line.split(',')[2] + wt = line.split(',')[3] + mut = line.split(',')[4] + cds = line.split(',')[5] + description = line.split(',')[6] + for row in lookupTable: + if row.split(',')[0] == 'HIV Gene region': + pass + else: + wildtype = row.split(',')[1] + position = row.split(',')[4] + mutation = row.split(',')[6] + newPos = row.split(',')[2] + if pos == position and wt == wildtype and mut == mutation: + print (number + ',' + hit + ',' + newPos + ',' + wt + ',' + mut + ',' + cds + ',' + description.strip(), file = outFile) + diff --git a/python/sampleOutput.csv b/python/sampleOutput.csv new file mode 100644 index 0000000..cb372bd --- /dev/null +++ b/python/sampleOutput.csv @@ -0,0 +1,8 @@ +#,Hit #,AAPOS,AAREF,AASUB,CDS,Description +58,35,508,K,R,NP_057849.4,PI resistance +60,46,524,M,I,NP_057849.4,PI resistance +65,95,557,H,K,NP_057849.4,PI resistance +66,101,577,L,M,NP_057849.4,PI resistance +69,141,690,K,N,NP_057849.4,NNRTI resistance +74,154,725,E,A,NP_057849.4,NNRTI resistance +206,192,1221,L,M,NP_057849.4,INSTI resistance