diff --git a/scripts/DNAscan.py b/scripts/DNAscan.py index 961e769..45361bb 100644 --- a/scripts/DNAscan.py +++ b/scripts/DNAscan.py @@ -589,7 +589,7 @@ annovar_operations = "g,f,f" - annovar_protocols = "refGene,dbnsfp30a,clinvar_20170905," + annovar_protocols = "refGene,dbnsfp30a,clinvar_20200316" # Y. adapt DB to reference @@ -852,7 +852,7 @@ rg_option_hisat2 = " --rg-id %s --rg LB:%s --rg PL:%s --rg PU:%s --rg SM:%s " % ( RG_ID, RG_LB, RG_PL, RG_PU, RG_SM) - rg_option_bwa = " -R '@RG\tID:%s\tLB:%s\tPL:%s\tRGPU:%s\tSM:%s' " % ( + rg_option_bwa = " -R '@RG\\tID:%s\\tLB:%s\\tPL:%s\\tPU:%s\\tSM:%s' " % ( RG_ID, RG_LB, RG_PL, RG_PU, RG_SM) else: @@ -946,12 +946,14 @@ rg_option_hisat2 = " --rg-id %s --rg LB:%s --rg PL:%s --rg PU:%s --rg SM:%s " % ( RG_ID, RG_LB, RG_PL, RG_PU, RG_SM) - rg_option_bwa = " -R '@RG\tID:%s\tLB:%s\tPL:%s\tRGPU:%s\tSM:%s' " % ( + rg_option_bwa = " -R '@RG\\tID:%s\\tLB:%s\\tPL:%s\\tPU:%s\\tSM:%s' " % ( RG_ID, RG_LB, RG_PL, RG_PU, RG_SM) else: - rg_option = "" + rg_option_hisat2 = "" + + rg_option_bwa = "" os.system( "%shisat2 %s --no-softclip --no-spliced-alignment -p %s -x %s -U %s | %s %ssamtools view -Sb - | %ssambamba sort -t %s --tmpdir=%s -o %ssorted.bam /dev/stdin; %ssamtools index -@ %s %ssorted.bam" @@ -1047,7 +1049,7 @@ "WARNING: The presence of VC.log in logs is telling you that the variant calling was already peformed, please remove VC.log if you wish to perform this stage anyway\n" ) - variant_results_file = "%sresults/%s_sorted.vcf.gz" % (out, + variant_results_file = "%s%s_sorted.vcf.gz" % (out, sample_name) else: @@ -1107,7 +1109,7 @@ while counter < int(num_cpu) + 1: - command = "%sjava -jar %sGenomeAnalysisTK.jar %s -R %s -T HaplotypeCaller -I %s -L %smpileup_positions%s.bed -o %sgatk_indels%s.vcf" % ( + command = "%sjava -jar %sgatk-package-4.1.9.0-local.jar %s HaplotypeCaller -R %s -I %s -L %smpileup_positions%s.bed -O %sgatk_indels%s.vcf" % ( path_java, path_gatk, gatk_HC_custom_options, path_reference, bam_file, out, str(counter), out, str(counter)) @@ -1182,7 +1184,7 @@ "%svcftools --vcf %sfreebayes.vcf --minGQ 30 --minDP 2 --exclude-bed %smpileup_positions.bed --recode --recode-INFO-all --out %sSNPs_only" % (path_vcftools, out, out, out)) - os.system("%sSNPs_only.log" % (out)) + os.system("touch %sSNPs_only.log" % (out)) os.system( "bgzip %sSNPs_only.recode.vcf ; bgzip %sindels_only.recode.vcf " @@ -1193,7 +1195,7 @@ % (path_tabix, out, path_tabix, out)) os.system( - "%sjava -jar %sGenomeAnalysisTK.jar -T CombineVariants -minimalVCF -R %s --variant %sSNPs_only.recode.vcf.gz --variant %sindels_only.recode.vcf.gz -o %s%s.vcf --genotypemergeoption UNSORTED" + "%sjava -jar %sgatkpackage-4.1.9.0-local.jar MergeVcfs -R %s -I %sSNPs_only.recode.vcf.gz -I %sindels_only.recode.vcf.gz -O %s%s.vcf " % (path_java, path_gatk, path_reference, out, out, out, sample_name)) @@ -1350,11 +1352,11 @@ reference, annovar_protocols, annovar_operations, out)) if not debug and not alsgenescanner: os.system( - "rm %sannovar.vcf.hg19_multianno.txt %sannovar.vcf.avinput" % + "rm %sannovar.vcf.hg38_multianno.txt %sannovar.vcf.avinput" % (out, out)) os.system( - "mv %s/annovar.vcf.hg19_multianno.vcf %sresults/%s_annotated.vcf ; bgzip -f %sresults/%s_annotated.vcf ; %stabix -fp vcf %sresults/%s_annotated.vcf.gz" + "mv %s/annovar.vcf.hg38_multianno.vcf %sresults/%s_annotated.vcf ; bgzip -f %sresults/%s_annotated.vcf ; %stabix -fp vcf %sresults/%s_annotated.vcf.gz" % (out, out, sample_name, out, sample_name, path_tabix, out, sample_name)) @@ -1373,7 +1375,7 @@ os.system("mv %s* %sresults/" % (variant_results_file, out)) - variant_results_file = "%sresults/%s_sorted.vcf.gz" % (out, + variant_results_file = "%s%s_sorted.vcf.gz" % (out, sample_name) # 15. Microbes screening @@ -1599,7 +1601,7 @@ if path_java != "": - java_option = "-j " + path_java + " " + java_option = "-j " + path_java + "java" else: @@ -1742,7 +1744,7 @@ os.system("touch %slogs/iobio.log" % (out)) print( - "\n\nIobio serces have been started at http://localhost:%s\n\nCopy and paste http://localhost:%s to select the service (vcf, bam, gene) and upload your data into the selected service\n\nIf you want to explore your variant calling results please copy and paste the following URL into your browser and upload the vcf file (../%sresults/%s_sorted.vcf.gz):\n\n" + "\n\nIobio services have been started at http://localhost:%s\n\nCopy and paste http://localhost:%s to select the service (vcf, bam, gene) and upload your data into the selected service\n\nIf you want to explore your variant calling results please copy and paste the following URL into your browser and upload the vcf file (../%s%s_sorted.vcf.gz):\n\n" % (port_num, port_num, out, sample_name), end='', flush=True) @@ -1783,7 +1785,7 @@ if alsgenescanner: os.system( - "python3 %s/alsgenescanner.py %s/annovar.vcf.hg19_multianno.txt %s/results/%s_alsgenescanner_all.txt" + "python3 %s/alsgenescanner.py %s/annovar.vcf.hg38_multianno.txt %s/results/%s_alsgenescanner_all.txt" % (path_scripts, out, out, sample_name)) os.system( "cat %s/results/%s_alsgenescanner_all.txt | head -1 > %s/results/%s_alsgenescanner_alsod.txt; cat %s/results/%s_alsgenescanner_all.txt | grep -iwf %s/list_genes_alsod.txt >> %s/results/%s_alsgenescanner_alsod.txt" diff --git a/scripts/analyse_list_of_samples.py b/scripts/analyse_list_of_samples.py index f9f7d41..0be647d 100644 --- a/scripts/analyse_list_of_samples.py +++ b/scripts/analyse_list_of_samples.py @@ -16,20 +16,18 @@ # 5.3 Run DNAscan for one sample ################################################################ -import argparse , os , paths , os.path +import argparse , os, paths_configs , os.path from argparse import RawTextHelpFormatter # 2. Define paths viriables -python_path = paths.python_path - -dnascan_dir = paths.dnascan_dir +dnascan_dir = paths_configs.dnascan_dir # 3. Define options from command line -parser = argparse.ArgumentParser(prog='python analyse_list_of_samples.py ', usage='%(prog)s -format "string" -paired "string" -sample_list "string" -out_dir "string" -option_string "string"', description = '############Help Message############ \n\nThis is a script to run DNAscan on a list of samples. Each line of the list must contain the path to one sample. If samples are in paired reads in fastq format and you have two files per sample, these will have to be on the same line spaced bt a tab.\n\n E.g. sample.1.fq.gz sample.2.fq.gz\n\nDNAscan uses the file paths.py to locate the needed tools and files. Please make sure your paths.py file is properly filled \n\nUsage example: \n\npython alalyse_list_of_files.py -option_string "-format fastq -mode intensive -reference hg19 -alignment -variantcalling -annotation" -out_dir /path/to/dir -sample_list list.txt -format bam\n\nPlease check the following list of required options\n\n################################################', formatter_class=RawTextHelpFormatter) +parser = argparse.ArgumentParser(prog='python3 analyse_list_of_samples.py ', usage='%(prog)s -format "string" -paired "string" -sample_list "string" -out_dir "string" -option_string "string"', description = '############Help Message############ \n\nThis is a script to run DNAscan on a list of samples. Each line of the list must contain the path to one sample. If samples are in paired reads in fastq format and you have two files per sample, these will have to be on the same line spaced bt a tab.\n\n E.g. sample.1.fq.gz sample.2.fq.gz\n\nDNAscan uses the file paths.py to locate the needed tools and files. Please make sure your paths.py file is properly filled \n\nUsage example: \n\npython alalyse_list_of_files.py -option_string "-format fastq -mode intensive -reference hg19 -alignment -variantcalling -annotation" -out_dir /path/to/dir -sample_list list.txt -format bam\n\nPlease check the following list of required options\n\n################################################', formatter_class=RawTextHelpFormatter) requiredNamed = parser.add_argument_group('required named arguments') @@ -72,7 +70,7 @@ if paired == "1" and format == "fastq" : - input_file_string = "-in %s -in2 %s" %(sample.split('\t')[0] , sample.split('\t')[1].strip()) + input_file_string = "-in %s -in2 %s" %( sample.split('\t')[0] , sample.split('\t')[1].strip() ) sample_name = sample.split('\t')[0].split("/")[-1].split("1.f")[-2] @@ -88,6 +86,6 @@ # 5.3 Run DNAscan for one sample - os.system( "%s %sDNAscan.py %s -sample_name %s %s -out %s/%s/ " %( python_path , dnascan_dir , option_string , sample_name , input_file_string , out_dir , sample_name) ) + os.system( "python3 %sDNAscan.py %s -sample_name %s %s -out %s/%s/ " %( dnascan_dir , option_string , sample_name , input_file_string , out_dir , sample_name) ) + - \ No newline at end of file