diff --git a/.gitignore b/.gitignore index 2bb9400..055ac83 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ results/ testing/ testing* *.pyc -.nf-test/ \ No newline at end of file +.nf-test/ +*.Rhistory +*version?current=* \ No newline at end of file diff --git a/bin/BindingSiteFinderQC.R b/bin/BindingSiteFinderQC.R new file mode 100644 index 0000000..62b60e6 --- /dev/null +++ b/bin/BindingSiteFinderQC.R @@ -0,0 +1,11 @@ +args <- commandArgs(trailingOnly = TRUE) + +print(args) +bds = args[1] + + +rmarkdown::render(xxreport_tmp_path, + #output_dir = paste0(snakemake@params[[1]], "/results/"), + args = args, + output_format = "html_document" +) \ No newline at end of file diff --git a/bin/BindingSiteFinderQC.qmd b/bin/BindingSiteFinderQC.qmd new file mode 100644 index 0000000..147ed87 --- /dev/null +++ b/bin/BindingSiteFinderQC.qmd @@ -0,0 +1,57 @@ +--- +title: "Quality Control of binding sites from BindingSiteFinder" +format: html +--- + +```{r} +library(BindingSiteFinder) +bds <- readRDS(args[1]) +``` + + +# Overview + +This document describes the (automatically chosen) parameters of used for biding site definition in BindingSiteFinder and provides multiple plots to check the quality of the binding sites on multiple levels. + +```{r} +# Flowchart +processingStepsFlowChart(bds) + +``` + +The flowchart lists all the steps that were performed by BindingSiteFinder. On the left, the number of crosslinks (before the makeBindingSite step) or binding sites (after the makeBindingSite step) is given. On the right, the used parameters from each step are listed. + +# Filtering of peak input signal + +The peaks obtained in peak calls are filtered by their score, removing the peaks with very low binding scores. The following plot shows the distribution of the scores of the peaks before filtering and the chosen cutoff as a line. + +```{r} +# Filtering peak input +pureClipGlobalFilterPlot(bdsOut) + +``` + + +# Estimation of binding sites + +BindingSiteFinder estimates the optimal size of the binding sites from a signal to flank ration. In addition, it tests different genewise filters of crosslinks to reduce influences of background signal. The following plot shows the signal to flank ratio across different binding sites width. Each line stands for another genewise filter. The chosen filter setting and binding site width are shown by the read lines and are also given in the top right corner of the plot. + +```{r} +estimateBsWidthPlot(bdsOut) + +``` + +Whether the chosen binding site width is appropriate can be checked by the following plot. It shows some examples of binding site width (grey boxes) and the summed signal in the binding site region. For an optimal binding site width the peak should be completely within the binding site, but the binding site should not spann further into the background signal. + +```{r} +rangeCoveragePlot(l, width = 20, show.samples = TRUE, subset.chromosome = "chr22") + +``` + +# Reproducibility of crosslink signals from different replicates in binding sites + +Coming soon! + + + + diff --git a/bin/DefineBindingSites.R b/bin/DefineBindingSites.R new file mode 100755 index 0000000..1122cc4 --- /dev/null +++ b/bin/DefineBindingSites.R @@ -0,0 +1,206 @@ +#!/usr/bin/env Rscript + +# ----------------------------- +# Module make binding sites +# ----------------------------- +options(warn = -1) + +# libraries +################ +# Suppress messages +suppressMessages(library(BindingSiteFinder)) +suppressMessages(library(GenomicRanges)) +suppressMessages(library(rtracklayer)) +suppressMessages(library(tidyverse)) +suppressMessages(library(optparse)) + +# print BSF version +################## +cat("BindingSiteFinder version: ", as.character(packageVersion("BindingSiteFinder")), "\n") + + +# Input +################ + +# Specify the input arguments +option_list <- list( + make_option(c("-b", "--bw_files_folder"), type = "character"), + make_option(c("-p", "--peaks"), type = "character"), + make_option(c("-g", "--anno_genes"), type = "character"), + make_option(c("-r", "--anno_regions"), type = "character"), + # output paths + make_option(c("-o", "--output_path"), type = "character", default = "."), + + # optional parameters to customize binding sites + #-------------------------------- + # the default values are the ones specified in the Bioconductor package BindingSiteFinder + # https://www.bioconductor.org/packages/release/bioc/manuals/BindingSiteFinder/man/BindingSiteFinder.pdf + # the default values as specified in BindingSiteFinder 2.4.0 are given in the comment after the parameter name + # optional parameters for binding site defnition + make_option(c( "--peak_score_global_cuttoff"), type = "numeric"), # default 0.01 + make_option(c( "--bsWidth"), type = "numeric"), # default automatic estimation + make_option(c( "--peak_score_genewise_cuttoff"), type = "numeric"), # default automatic estimation + make_option(c( "--minWidth"), type = "numeric"), # default 2 + make_option(c( "--minCrosslinks"), type = "numeric"), # default 2 + make_option(c( "--minCLSites"), type = "numeric"), # default 1 + make_option(c( "--maxBsWidth"), type = "numeric"), # default 13 + # optional parameters for reproducibility + # make_option(c( "--reproducibility_cutoff"), type = "numeric"), + # make_option(c( "--reproducibility_nReps"), type = "numeric"), + # optional parameters for gene and region assignment + make_option(c( "--method_gene_overlaps"), type = "character"), # default "frequency" + make_option(c( "--rule_gene_overlaps"), type = "character"), # default NULL (only needed for --method_gene_overlaps "hierarchy") + make_option(c( "--method_region_overlaps"), type = "character"), # default "frequency" + make_option(c( "--rule_region_overlaps"), type = "character"), # default NULL (only needed for --method_region_overlaps "hierarchy") + # optional parameters to fit non-standard genomes + make_option(c( "--match_geneID"), type = "character"), # default "gene_id" + make_option(c( "--match_geneName"), type = "character"), # default "gene_name" + make_option(c( "--match_geneType"), type = "character"), # default "gene_type" + # optional parameters to fit peakcaller scores + make_option(c( "--match_score"), type = "numeric"), # default "score" + make_option(c( "--match_score_option"), type = "character") # default "max" +) + +# Parse arguments +parser <- OptionParser(option_list = option_list) +args <- parse_args(parser) + +# Default parameters (NULL means use function defaults) +params.input.output <- list( + bw_files_folder = args$bw_files_folder, + peaks = args$peaks, + anno_genes = args$anno_genes, + anno_regions = args$anno_regions, + output_path = args$output_path) + +params.pureClipGlobalFilter <- list( + cuttoff = args$peak_score_global_cuttoff) + +params.estimateBsWidth <- list( + est.minWidth = args$minWidth, + est.maxBsWidth = args$maxBsWidth) + +params.pureClipGeneWiseFilter <- list( + cutoff = args$peak_score_genewise_cuttoff, + match.score = args$match_score, + match.geneID = args$match_geneID, + overlaps = args$method_gene_overlaps) + +params.makeBindingSites <- list( + minWidth = args$minWidth, + minCrosslinks = args$minCrosslinks, + minClSites = args$minCLSites) + +params.reproducibilityFilter <- list( + cutoff = args$reproducibility_cutoff, + nReps = args$reproducibility_nReps) + +params.assignToGenes <- list( + overlaps = args$method_gene_overlaps, + overlaps.rule = args$rule_gene_overlaps, + match.geneID = args$match_geneID, + match.geneName = args$match_geneName, + match.geneType = args$match_geneType + ) +params.assignToTranscriptRegions <- list( + overlaps = args$method_region_overlaps, + overlaps.rule = args$rule_region_overlaps) + +params.annotateWithScore <- list( + match.score = args$match_score, + match.option = args$match_score_option +) + +# Remove NULL values so function defaults apply +params.pureClipGlobalFilter <- params.pureClipGlobalFilter[!sapply(params.pureClipGlobalFilter, is.null)] +params.estimateBsWidth <- params.estimateBsWidth[!sapply(params.estimateBsWidth, is.null)] +params.pureClipGeneWiseFilter <- params.pureClipGeneWiseFilter[!sapply(params.pureClipGeneWiseFilter, is.null)] +params.makeBindingSites <- params.makeBindingSites[!sapply(params.makeBindingSites, is.null)] +params.reproducibilityFilter <- params.reproducibilityFilter[!sapply(params.reproducibilityFilter, is.null)] +params.assignToGenes <- params.assignToGenes[!sapply(params.assignToGenes, is.null)] +params.assignToTranscriptRegions <- params.assignToTranscriptRegions[!sapply(params.assignToTranscriptRegions, is.null)] +params.annotateWithScore <- params.annotateWithScore[!sapply(params.annotateWithScore, is.null)] + +####################### +# BindingSiteFinder +####################### +# crosslinks +bw_files_names <- list.files(params.input.output$bw_files_folder) + +clipFilesP <- list.files(params.input.output$bw_files_folder, pattern = "plus.bw$", full.names = TRUE) +clipFilesM <- list.files(params.input.output$bw_files_folder, pattern = "minus.bw$", full.names = TRUE) + + +# annotation +gns <- readRDS(params.input.output$anno_genes) +regions <- readRDS(params.input.output$anno_regions) + + +# Peaks from pureclip +peaks = rtracklayer::import(con = params.input.output$peaks, format = "BED", extraCols=c("additionalScores" = "character")) +peaks$additionalScores = NULL +peaks$name = NULL + + + +# Prepare meta data +meta = data.frame( + id = c(1:length(clipFilesP)), + condition = factor(rep("all", length(clipFilesP))), # add option for multiple groups from sample file + clPlus = clipFilesP, + clMinus = clipFilesM) + + +# run BindingSiteFinder +####################### + +cat("############################# \n Running BindingSiteFinder \n############################# \n") +bds = BSFDataSetFromBigWig(ranges = peaks, meta = meta, silent =T) + +cat("\nGlobal filter on peak sites \n \n") +bds = do.call(pureClipGlobalFilter, + c(list(bds), + params.pureClipGlobalFilter)) # param cutoff +cat("\nEstimate binding site width \n \n") +bds = do.call(estimateBsWidth, + c(list(bds, anno.genes = gns), + params.estimateBsWidth)) # optional param: bsWidth + +cat("\nGenewise filter on peak sites \n \n") +bds = do.call(pureClipGeneWiseFilter, + c(list(bds, anno.genes = gns), + params.pureClipGeneWiseFilter)) # param cutoff, overlaps, match score, match geneID + +cat("\nMake binding sites \n \n") +bds = do.call(makeBindingSites, + c(list(bds), + params.makeBindingSites)) # params minWidth, minCrosslinks, minCLSites + + +# bds = do.call(reproducibilityFilter, c(list(bds), params.reproducibilityFilter)) # params cutoff, nReps +cat("\nAssign binding sites to genes \n \n") +bds = do.call(assignToGenes, c(list(bds, anno.genes = gns), + params.assignToGenes)) # params overlaps, overlaps.rule, match.geneID, match.geneName, match.geneType + +cat("\nAssign binding sites to transcript regions \n \n") +bds = do.call(assignToTranscriptRegions, c(list(bds,anno.transcriptRegionList = regions), + params.assignToTranscriptRegions))# params overlaps, overlaps.rule, + +cat("\nAnotate binding scores \n \n") +bds = do.call(annotateWithScore, c(list(bds, peaks),params.annotateWithScore)) #match.score, match.option + +cat("\nSaving bindings sites \n \n") +bs_gr = getRanges(bds) +names(bs_gr) <- 1:NROW(bs_gr) +bs_df <- as.data.frame(bs_gr) + + +# export outputs +######################## + +exportToBED(bds, con = paste0(params.input.output$output_path , "/myBindingSites.bed")) +saveRDS(bds, paste0(params.input.output$output_path ,"/bds_object.rds")) +saveRDS(bs_df, paste0(params.input.output$output_path ,"/bindingSites.rds")) +write.csv(bs_df, file = paste0(params.input.output$output_path,"/bindingSites.csv"), row.names = FALSE) + + diff --git a/bin/sortAnnotationForBindingSiteFinder.R b/bin/sortAnnotationForBindingSiteFinder.R new file mode 100755 index 0000000..5ce125b --- /dev/null +++ b/bin/sortAnnotationForBindingSiteFinder.R @@ -0,0 +1,45 @@ +#!/usr/bin/env Rscript + +# command line arguments +args <- commandArgs(trailingOnly = TRUE) + +print(args) +annoFile = args[1] +out_gns = args[2] +out_regions = args[3] + +# for local tests +#annoFile = "/Users/melinaklostermann/Documents/projects/anno/GENCODEv31-p12/gencode.v31.annotation.gtf" +# out_gns = "/Users/melinaklostermann/Documents/projects/nf-core-clipseq/devel_folder/outputs/gns.rds" +# out_regions = "/Users/melinaklostermann/Documents/projects/nf-core-clipseq/devel_folder/outputs/regions.rds" + +# libraries +library(GenomicFeatures) + + + + +# Make annotation database from gff3 file +annoDb = GenomicFeatures::makeTxDbFromGFF(file = annoFile, format = "gtf") +annoInfo = rtracklayer::import(annoFile, format = "gtf") + + +# Get genes as GRanges +gns = genes(annoDb) +idx = match(gns$gene_id, annoInfo$gene_id) +meta = cbind(elementMetadata(gns), + elementMetadata(annoInfo)[idx,]) +meta = meta[,!duplicated(colnames(meta))] +elementMetadata(gns) = meta + +saveRDS(gns, out_gns) + + +# Get regions as Granges +cdseq = cds(annoDb) +intrns = unlist(intronsByTranscript(annoDb)) +utrs3 = unlist(threeUTRsByTranscript(annoDb)) +utrs5 = unlist(fiveUTRsByTranscript(annoDb)) +regions = GRangesList(CDS = cdseq, Intron = intrns, UTR3 = utrs3, UTR5 = utrs5) + +saveRDS(regions, out_regions) diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf new file mode 100644 index 0000000..6f89406 --- /dev/null +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -0,0 +1,71 @@ +process defineBindingSites { + tag "$meta.id" + label 'process_single' + + container "${'melinak/bindingsitefinder:1.1'}" + + input: + tuple val(meta), path(bw_files_folder) + tuple val(meta), path(peaks) + tuple val(meta), path(anno_gns) + tuple val(meta), path(anno_regions) + + output: + tuple val(meta), path("*binding_sites.rds"), emit: binding_sites_rds + tuple val(meta), path("*binding_sites.csv"), emit: binding_sites_csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // the bindingSiteFinder sersion number will be printed to the console by the R script + + // optional parameters: + // --peak_score_global_cuttoff $params.peak_score_global_cuttoff \\ + // --bsWidth $params.bsWidth \\ + // --peak_score_genewise_cuttoff $params.peak_score_genewise_cuttoff \\ + // --minWidth $params.minWidth \\ + // --minCrosslinks $params.minCrosslinks \\ + // --minCLSites $params.minCLSites \\ + // --maxBsWidth $params.maxBsWidth \\ + // // --reproducibility_cutoff $params.reproducibility_cutoff \\ + // // --reproducibility_nReps $params.reproducibility_nReps \\ + // --method_gene_overlaps $params.method_gene_overlaps \\ + // --rule_gene_overlaps $params.rule_gene_overlaps \\ + // --method_region_overlaps $params.method_region_overlaps \\ + // --rule_region_overlaps $params.rule_region_overlaps \\ + // --match_score $params.match_score \\ + // --match_geneID $params.match_geneID \\ + // --match_geneName $params.match_geneName \\ + // --match_geneType $params.match_geneType \\ + // --match_score_option $params.match_score_option \\ + + """ + DefineBindingSites.R \\ + --bw_files_folder $bw_files_folder \\ + --peaks $peaks \\ + --anno_genes $anno_gns \\ + --anno_regions $anno_regions \\ + --output_path . \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch binding_sites.rds + touch binding_sites.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ +} diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/test_run.sh b/modules/local/BindingSiteFinder/DefineBindingSites/test_run.sh new file mode 100644 index 0000000..f3337c7 --- /dev/null +++ b/modules/local/BindingSiteFinder/DefineBindingSites/test_run.sh @@ -0,0 +1,9 @@ +# without nextflow +docker run --rm -it -v /Users/melinaklostermann/Documents/projects:/mnt/ melinak/bindingsitefinder:1.0 bin/bash + +Rscript /mnt/nf-core-clipseq/modules/local/BindingSiteFinder/DefineBindingSites.R \ + --bw_files_folder "/mnt/nf-core-clipseq/devel_folder/example_inputs/AGO_iCLIP" \ + --peaks "/mnt/nf-core-clipseq/devel_folder/example_inputs/AGO_iCLIP/IP_WT_pureclip_sites.bed" \ + --anno_genes "/mnt/nf-core-clipseq/devel_folder/outputs/gns.rds" \ + --anno_regions "/mnt/nf-core-clipseq/devel_folder/outputs/regions.rds" \ + --output_path "/mnt/nf-core-clipseq/devel_folder/outputs" diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test b/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test new file mode 100644 index 0000000..39cf478 --- /dev/null +++ b/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process CLIPSEQ_DEFINEBINDINGSITES" + script "../main.nf" + process "defineBindingSites" + + tag "clipseq" + tag "clipseq_definebindingsites" + tag "definebindingsites" + + // Global setup for rds input files + setup { + run("sortAnnotationForBindingSiteFinder", alias: "ANNOTATIONS") { + script "../../sortAnnotationForBindingSiteFinder/main.nf" + process { + """ + input[0] = [ + [ id:'test_sort' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), + ] + """ + } + } + } + + test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/tests/bigwigs', checkIfExists: true), + ] + input[1] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/results/05_peakcalling/pureclip/HNRNPC_pureclip_crosslinks.bed', checkIfExists: true), + ] + input[2] = ANNOTATIONS.out[0] + input[3] = ANNOTATIONS.out[1] + """ + } + } + + then { + assertAll( + { assert process.success }, + // { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/tests/bigwigs', checkIfExists: true), + ] + input[1] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/results/05_peakcalling/pureclip/HNRNPC_pureclip_crosslinks.bed', checkIfExists: true), + ] + input[2] = ANNOTATIONS.out[0] + input[3] = ANNOTATIONS.out[1] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + assert new File(process.out.binding_sites_rds[0][1]).exists() + assert new File(process.out.binding_sites_csv[0][1]).exists() + } + + } + +} diff --git a/modules/local/BindingSiteFinder/bsfQC/main.nf b/modules/local/BindingSiteFinder/bsfQC/main.nf new file mode 100644 index 0000000..c7391f7 --- /dev/null +++ b/modules/local/BindingSiteFinder/bsfQC/main.nf @@ -0,0 +1,40 @@ +process bsfQC { + tag "$meta.id" + label 'process_single' + + container "${'melinak/bindingsitefinder:1.1'}" + + input: + tuple val(meta), path(binding_sites_rds) + + output: + tuple val(meta), path("*BindingSiteFinderQC.html"), emit: bindingSiteFinderQC + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + BindingSiteFinderQC.R \\ + $binding_sites_rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ + + stub: + def args = task.ext.a + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch gns.rds + touch regions.rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ +} diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf new file mode 100644 index 0000000..94d8562 --- /dev/null +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -0,0 +1,42 @@ +process sortAnnotationForBindingSiteFinder { + tag "$meta.id" + label 'process_single' + + container "${'melinak/bindingsitefinder:1.1'}" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("gns.rds"), emit: gns_rds + tuple val(meta), path("regions.rds"), emit: regions_rds + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + sortAnnotationForBindingSiteFinder.R \\ + $gtf \\ + gns.rds \\ + regions.rds + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ + + stub: + def args = task.ext.a + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch gns.rds + touch regions.rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ +} diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test new file mode 100644 index 0000000..7b5b9c3 --- /dev/null +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process CLIPSEQ_SORTANNOTATIONSFORBINDINGSITEFINDER" + script "../main.nf" + process "sortAnnotationForBindingSiteFinder" + + tag "clipseq" + tag "clipseq_sortannotationsforbindingsitefinder" + tag "sortannotationsforbindingsitefinder" + + test("sortAnnotationsForBindingSiteFinder - gtf") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + + // These statements fail inside assertAll (not sure why) + assert new File(process.out.gns_rds[0][1]).exists() + assert new File(process.out.regions_rds[0][1]).exists() + } + + } + + test("sortAnnotationsForBindingSiteFinder - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), + ] + """ + } + } + + then { + assert process.success + assert new File(process.out.gns_rds[0][1]).exists() + assert new File(process.out.regions_rds[0][1]).exists() + } + + } + +} diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap new file mode 100644 index 0000000..7fced7e --- /dev/null +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,978f7f625274870d4e9dd7d737845e31" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-25T10:44:54.619281" + } +} \ No newline at end of file diff --git a/subworkflows/local/BindingSiteFinder.nf b/subworkflows/local/BindingSiteFinder.nf new file mode 100644 index 0000000..dcabb17 --- /dev/null +++ b/subworkflows/local/BindingSiteFinder.nf @@ -0,0 +1,42 @@ +nextflow.enable.dsl = 2 + +include { sortAnnotationForBindingSiteFinder } from '../../modules/local/sortAnnotationForBindingSiteFinder/main.nf' +include { BindingSiteFinderAnalysis } from '../../modules/local/BindingSiteFinder/main.nf' + +workflow BindingSiteFinder{ + take: + gtf_ch + // bw_files_folder + // peaks + + + main: + sorted_ch = sortAnnotationForBindingSiteFinder(gtf_ch) + + // BindingSiteFinderAnalysis( + // bw_files_folder, + // peaks, + // sorted_ch.gns, + // sorted_ch.regions, + // sample_sheet, + // // Here start optional parameters + // peak_score_global_cuttoff, + // bsWidth, + // peak_score_genewise_cuttoff, + // minWidth, + // minCrosslinks, + // minCLSites, + // maxBsWidth, + // reproducibility_cutoff, + // reproducibility_nReps, + // method_gene_overlaps, + // rule_gene_overlaps, + // method_region_overlaps, + // rule_region_overlaps, + // match_score, + // match_geneID, + // match_geneName, + // match_geneType, + // match_score_option + // ) +} diff --git a/subworkflows/local/bindingsitefinder/main.nf b/subworkflows/local/bindingsitefinder/main.nf new file mode 100644 index 0000000..9e050cc --- /dev/null +++ b/subworkflows/local/bindingsitefinder/main.nf @@ -0,0 +1,42 @@ + +include { sortAnnotationForBindingSiteFinder } from '../../../modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main' +include { defineBindingSites } from '../../../modules/local/BindingSiteFinder/DefineBindingSites/main' +include { bsfQC } from '../../../modules/local/BindingSiteFinder/bsfQC/main' + +workflow BINDINGSITEFINDER { + + take: + + gtf_ch // channel: [ val(meta), [ gtf ] ] + bw_ch // channel: [ val(meta), [ bigwig ] ] + peak_ch // channel: [ peaks ] + + main: + + ch_versions = Channel.empty() + + // Destructure outputs from sortAnnotationForBindingSiteFinder(gtf_ch) + tuple_ch = sortAnnotationForBindingSiteFinder(gtf_ch) + gns_ch = tuple_ch.gns_rds + regions_ch = tuple_ch.regions_rds + ch_versions = ch_versions.mix(sortAnnotationForBindingSiteFinder.out.versions.first()) + + // Pass annotation outputs, bigwig files and peak file to sortAnnotationForBindingSiteFinder(bw_ch) + bs_ch = defineBindingSites(bw_ch, gns_ch, regions_ch, peak_ch) + ch_versions = ch_versions.mix(defineBindingSites.out.versions.first()) + + // Extract only the rds output from bs_ch + rds_ch = bs_ch.binding_sites_rds + + // Pass the rds output to bsfQC + qc_ch = bsfQC(rds_ch) + ch_versions = ch_versions.mix(bsfQC.out.versions.first()) + + emit: + csv = defineBindingSites.out.binding_sites_csv // channel: [ val(meta), [ csv ] ] + rds = defineBindingSites.out.binding_sites_rds // channel: [ val(meta), [ rds ] ] + html = bsfQC.out.bindingSiteFinderQC // channel: [ val(meta), [ html ] ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/bindingsitefinder/meta.yml b/subworkflows/local/bindingsitefinder/meta.yml new file mode 100644 index 0000000..2a28660 --- /dev/null +++ b/subworkflows/local/bindingsitefinder/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bindingsitefinder" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow +components: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the channels used as input with a description and their structure +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" +## TODO nf-core: List all of the channels used as output with a descriptions and their structure +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@MelinaKlostermann" +maintainers: + - "@MelinaKlostermann" diff --git a/subworkflows/local/bindingsitefinder/tests/main.nf.test b/subworkflows/local/bindingsitefinder/tests/main.nf.test new file mode 100644 index 0000000..9141f86 --- /dev/null +++ b/subworkflows/local/bindingsitefinder/tests/main.nf.test @@ -0,0 +1,45 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core subworkflows test bindingsitefinder +nextflow_workflow { + + name "Test Subworkflow BINDINGSITEFINDER" + script "../main.nf" + workflow "BINDINGSITEFINDER" + + tag "subworkflows" + tag "subworkflows_" + tag "subworkflows/bindingsitefinder" + // TODO nf-core: Add tags for all modules used within this subworkflow. Example: + tag "samtools" + tag "samtools/sort" + tag "samtools/index" + + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam - single_end") { + + when { + workflow { + """ + // TODO nf-core: define inputs of the workflow here. Example: + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + } +} diff --git a/tests/bigwigs/HNRNPC.genome.xl.bedgraph_minus.bw b/tests/bigwigs/HNRNPC.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000..a136ad2 Binary files /dev/null and b/tests/bigwigs/HNRNPC.genome.xl.bedgraph_minus.bw differ diff --git a/tests/bigwigs/HNRNPC.genome.xl.bedgraph_plus.bw b/tests/bigwigs/HNRNPC.genome.xl.bedgraph_plus.bw new file mode 100644 index 0000000..509b8b3 Binary files /dev/null and b/tests/bigwigs/HNRNPC.genome.xl.bedgraph_plus.bw differ diff --git a/tests/bigwigs/PHO92_A.genome.xl.bedgraph_minus.bw b/tests/bigwigs/PHO92_A.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000..a136ad2 Binary files /dev/null and b/tests/bigwigs/PHO92_A.genome.xl.bedgraph_minus.bw differ diff --git a/tests/bigwigs/PHO92_A.genome.xl.bedgraph_plus.bw b/tests/bigwigs/PHO92_A.genome.xl.bedgraph_plus.bw new file mode 100644 index 0000000..509b8b3 Binary files /dev/null and b/tests/bigwigs/PHO92_A.genome.xl.bedgraph_plus.bw differ diff --git a/tests/bigwigs/PHO92_B.genome.xl.bedgraph_minus.bw b/tests/bigwigs/PHO92_B.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000..a136ad2 Binary files /dev/null and b/tests/bigwigs/PHO92_B.genome.xl.bedgraph_minus.bw differ diff --git a/tests/bigwigs/PHO92_B.genome.xl.bedgraph_plus.bw b/tests/bigwigs/PHO92_B.genome.xl.bedgraph_plus.bw new file mode 100644 index 0000000..509b8b3 Binary files /dev/null and b/tests/bigwigs/PHO92_B.genome.xl.bedgraph_plus.bw differ diff --git a/tests/bigwigs/PHO92_C.genome.xl.bedgraph_minus.bw b/tests/bigwigs/PHO92_C.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000..a136ad2 Binary files /dev/null and b/tests/bigwigs/PHO92_C.genome.xl.bedgraph_minus.bw differ diff --git a/tests/bigwigs/PHO92_C.genome.xl.bedgraph_plus.bw b/tests/bigwigs/PHO92_C.genome.xl.bedgraph_plus.bw new file mode 100644 index 0000000..509b8b3 Binary files /dev/null and b/tests/bigwigs/PHO92_C.genome.xl.bedgraph_plus.bw differ