Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions scripts/DNAscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@

annovar_operations = "g,f,f"

annovar_protocols = "refGene,dbnsfp30a,clinvar_20170905,"
annovar_protocols = "refGene,dbnsfp30a,clinvar_20200316"

# Y. adapt DB to reference

Expand Down Expand Up @@ -852,7 +852,7 @@
rg_option_hisat2 = " --rg-id %s --rg LB:%s --rg PL:%s --rg PU:%s --rg SM:%s " % (
RG_ID, RG_LB, RG_PL, RG_PU, RG_SM)

rg_option_bwa = " -R '@RG\tID:%s\tLB:%s\tPL:%s\tRGPU:%s\tSM:%s' " % (
rg_option_bwa = " -R '@RG\\tID:%s\\tLB:%s\\tPL:%s\\tPU:%s\\tSM:%s' " % (
RG_ID, RG_LB, RG_PL, RG_PU, RG_SM)

else:
Expand Down Expand Up @@ -946,12 +946,14 @@
rg_option_hisat2 = " --rg-id %s --rg LB:%s --rg PL:%s --rg PU:%s --rg SM:%s " % (
RG_ID, RG_LB, RG_PL, RG_PU, RG_SM)

rg_option_bwa = " -R '@RG\tID:%s\tLB:%s\tPL:%s\tRGPU:%s\tSM:%s' " % (
rg_option_bwa = " -R '@RG\\tID:%s\\tLB:%s\\tPL:%s\\tPU:%s\\tSM:%s' " % (
RG_ID, RG_LB, RG_PL, RG_PU, RG_SM)

else:

rg_option = ""
rg_option_hisat2 = ""

rg_option_bwa = ""

os.system(
"%shisat2 %s --no-softclip --no-spliced-alignment -p %s -x %s -U %s | %s %ssamtools view -Sb - | %ssambamba sort -t %s --tmpdir=%s -o %ssorted.bam /dev/stdin; %ssamtools index -@ %s %ssorted.bam"
Expand Down Expand Up @@ -1047,7 +1049,7 @@
"WARNING: The presence of VC.log in logs is telling you that the variant calling was already peformed, please remove VC.log if you wish to perform this stage anyway\n"
)

variant_results_file = "%sresults/%s_sorted.vcf.gz" % (out,
variant_results_file = "%s%s_sorted.vcf.gz" % (out,
sample_name)

else:
Expand Down Expand Up @@ -1107,7 +1109,7 @@

while counter < int(num_cpu) + 1:

command = "%sjava -jar %sGenomeAnalysisTK.jar %s -R %s -T HaplotypeCaller -I %s -L %smpileup_positions%s.bed -o %sgatk_indels%s.vcf" % (
command = "%sjava -jar %sgatk-package-4.1.9.0-local.jar %s HaplotypeCaller -R %s -I %s -L %smpileup_positions%s.bed -O %sgatk_indels%s.vcf" % (
path_java, path_gatk, gatk_HC_custom_options, path_reference, bam_file, out,
str(counter), out, str(counter))

Expand Down Expand Up @@ -1182,7 +1184,7 @@
"%svcftools --vcf %sfreebayes.vcf --minGQ 30 --minDP 2 --exclude-bed %smpileup_positions.bed --recode --recode-INFO-all --out %sSNPs_only"
% (path_vcftools, out, out, out))

os.system("%sSNPs_only.log" % (out))
os.system("touch %sSNPs_only.log" % (out))

os.system(
"bgzip %sSNPs_only.recode.vcf ; bgzip %sindels_only.recode.vcf "
Expand All @@ -1193,7 +1195,7 @@
% (path_tabix, out, path_tabix, out))

os.system(
"%sjava -jar %sGenomeAnalysisTK.jar -T CombineVariants -minimalVCF -R %s --variant %sSNPs_only.recode.vcf.gz --variant %sindels_only.recode.vcf.gz -o %s%s.vcf --genotypemergeoption UNSORTED"
"%sjava -jar %sgatkpackage-4.1.9.0-local.jar MergeVcfs -R %s -I %sSNPs_only.recode.vcf.gz -I %sindels_only.recode.vcf.gz -O %s%s.vcf "
% (path_java, path_gatk, path_reference, out, out, out,
sample_name))

Expand Down Expand Up @@ -1350,11 +1352,11 @@
reference, annovar_protocols, annovar_operations, out))
if not debug and not alsgenescanner:
os.system(
"rm %sannovar.vcf.hg19_multianno.txt %sannovar.vcf.avinput" %
"rm %sannovar.vcf.hg38_multianno.txt %sannovar.vcf.avinput" %
(out, out))

os.system(
"mv %s/annovar.vcf.hg19_multianno.vcf %sresults/%s_annotated.vcf ; bgzip -f %sresults/%s_annotated.vcf ; %stabix -fp vcf %sresults/%s_annotated.vcf.gz"
"mv %s/annovar.vcf.hg38_multianno.vcf %sresults/%s_annotated.vcf ; bgzip -f %sresults/%s_annotated.vcf ; %stabix -fp vcf %sresults/%s_annotated.vcf.gz"
% (out, out, sample_name, out, sample_name, path_tabix, out,
sample_name))

Expand All @@ -1373,7 +1375,7 @@

os.system("mv %s* %sresults/" % (variant_results_file, out))

variant_results_file = "%sresults/%s_sorted.vcf.gz" % (out,
variant_results_file = "%s%s_sorted.vcf.gz" % (out,
sample_name)

# 15. Microbes screening
Expand Down Expand Up @@ -1599,7 +1601,7 @@

if path_java != "":

java_option = "-j " + path_java + " "
java_option = "-j " + path_java + "java"

else:

Expand Down Expand Up @@ -1742,7 +1744,7 @@
os.system("touch %slogs/iobio.log" % (out))

print(
"\n\nIobio serces have been started at http://localhost:%s\n\nCopy and paste http://localhost:%s to select the service (vcf, bam, gene) and upload your data into the selected service\n\nIf you want to explore your variant calling results please copy and paste the following URL into your browser and upload the vcf file (../%sresults/%s_sorted.vcf.gz):\n\n"
"\n\nIobio services have been started at http://localhost:%s\n\nCopy and paste http://localhost:%s to select the service (vcf, bam, gene) and upload your data into the selected service\n\nIf you want to explore your variant calling results please copy and paste the following URL into your browser and upload the vcf file (../%s%s_sorted.vcf.gz):\n\n"
% (port_num, port_num, out, sample_name),
end='',
flush=True)
Expand Down Expand Up @@ -1783,7 +1785,7 @@
if alsgenescanner:

os.system(
"python3 %s/alsgenescanner.py %s/annovar.vcf.hg19_multianno.txt %s/results/%s_alsgenescanner_all.txt"
"python3 %s/alsgenescanner.py %s/annovar.vcf.hg38_multianno.txt %s/results/%s_alsgenescanner_all.txt"
% (path_scripts, out, out, sample_name))
os.system(
"cat %s/results/%s_alsgenescanner_all.txt | head -1 > %s/results/%s_alsgenescanner_alsod.txt; cat %s/results/%s_alsgenescanner_all.txt | grep -iwf %s/list_genes_alsod.txt >> %s/results/%s_alsgenescanner_alsod.txt"
Expand Down
14 changes: 6 additions & 8 deletions scripts/analyse_list_of_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,18 @@
# 5.3 Run DNAscan for one sample
################################################################

import argparse , os , paths , os.path
import argparse , os, paths_configs , os.path

from argparse import RawTextHelpFormatter

# 2. Define paths viriables

python_path = paths.python_path

dnascan_dir = paths.dnascan_dir
dnascan_dir = paths_configs.dnascan_dir


# 3. Define options from command line

parser = argparse.ArgumentParser(prog='python analyse_list_of_samples.py ', usage='%(prog)s -format "string" -paired "string" -sample_list "string" -out_dir "string" -option_string "string"', description = '############Help Message############ \n\nThis is a script to run DNAscan on a list of samples. Each line of the list must contain the path to one sample. If samples are in paired reads in fastq format and you have two files per sample, these will have to be on the same line spaced bt a tab.\n\n E.g. sample.1.fq.gz sample.2.fq.gz\n\nDNAscan uses the file paths.py to locate the needed tools and files. Please make sure your paths.py file is properly filled \n\nUsage example: \n\npython alalyse_list_of_files.py -option_string "-format fastq -mode intensive -reference hg19 -alignment -variantcalling -annotation" -out_dir /path/to/dir -sample_list list.txt -format bam\n\nPlease check the following list of required options\n\n################################################', formatter_class=RawTextHelpFormatter)
parser = argparse.ArgumentParser(prog='python3 analyse_list_of_samples.py ', usage='%(prog)s -format "string" -paired "string" -sample_list "string" -out_dir "string" -option_string "string"', description = '############Help Message############ \n\nThis is a script to run DNAscan on a list of samples. Each line of the list must contain the path to one sample. If samples are in paired reads in fastq format and you have two files per sample, these will have to be on the same line spaced bt a tab.\n\n E.g. sample.1.fq.gz sample.2.fq.gz\n\nDNAscan uses the file paths.py to locate the needed tools and files. Please make sure your paths.py file is properly filled \n\nUsage example: \n\npython alalyse_list_of_files.py -option_string "-format fastq -mode intensive -reference hg19 -alignment -variantcalling -annotation" -out_dir /path/to/dir -sample_list list.txt -format bam\n\nPlease check the following list of required options\n\n################################################', formatter_class=RawTextHelpFormatter)

requiredNamed = parser.add_argument_group('required named arguments')

Expand Down Expand Up @@ -72,7 +70,7 @@

if paired == "1" and format == "fastq" :

input_file_string = "-in %s -in2 %s" %(sample.split('\t')[0] , sample.split('\t')[1].strip())
input_file_string = "-in %s -in2 %s" %( sample.split('\t')[0] , sample.split('\t')[1].strip() )

sample_name = sample.split('\t')[0].split("/")[-1].split("1.f")[-2]

Expand All @@ -88,6 +86,6 @@

# 5.3 Run DNAscan for one sample

os.system( "%s %sDNAscan.py %s -sample_name %s %s -out %s/%s/ " %( python_path , dnascan_dir , option_string , sample_name , input_file_string , out_dir , sample_name) )
os.system( "python3 %sDNAscan.py %s -sample_name %s %s -out %s/%s/ " %( dnascan_dir , option_string , sample_name , input_file_string , out_dir , sample_name) )