From 84be19106e59674af6efc5208493fd9c87924457 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Mon, 17 Mar 2025 16:09:29 +0100 Subject: [PATCH 01/12] v1 BindingSiteFinder modules and subworkflow --- .gitignore | 3 +- .../BindingSiteFinder/DefineBindingSites.R | 192 ++++++++++++++++++ modules/local/BindingSiteFinder/main.nf | 44 ++++ modules/local/BindingSiteFinder/test_run.sh | 9 + .../main.nf | 17 ++ .../sortAnnotationForBindingSiteFinder.R | 44 ++++ subworkflows/local/BindingSiteFinder.nf | 42 ++++ 7 files changed, 350 insertions(+), 1 deletion(-) create mode 100644 modules/local/BindingSiteFinder/DefineBindingSites.R create mode 100644 modules/local/BindingSiteFinder/main.nf create mode 100644 modules/local/BindingSiteFinder/test_run.sh create mode 100644 modules/local/sortAnnotationForBindingSiteFinder/main.nf create mode 100644 modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R create mode 100644 subworkflows/local/BindingSiteFinder.nf diff --git a/.gitignore b/.gitignore index 2bb94000..b47b44c0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ results/ testing/ testing* *.pyc -.nf-test/ \ No newline at end of file +.nf-test/ +*.Rhistory \ No newline at end of file diff --git a/modules/local/BindingSiteFinder/DefineBindingSites.R b/modules/local/BindingSiteFinder/DefineBindingSites.R new file mode 100644 index 00000000..dbb79c2b --- /dev/null +++ b/modules/local/BindingSiteFinder/DefineBindingSites.R @@ -0,0 +1,192 @@ +# ----------------------------- +# Module make binding sites +# ----------------------------- +options(warn = -1) + +# libraries +################ + # Suppress messages +suppressMessages( library(BindingSiteFinder)) +suppressMessages( library(GenomicRanges)) +suppressMessages( library(rtracklayer)) +suppressMessages( library(tidyverse)) +suppressMessages( library(optparse)) + +# Input +################ + +# Specify the input arguments +option_list <- list( + make_option(c("-b", "--bw_files_folder"), type = "character"), + make_option(c("-p", "--peaks"), type = "character"), + make_option(c("-g", "--anno_genes"), type = "character"), + make_option(c("-r", "--anno_regions"), type = "character"), + # output paths + make_option(c("-o", "--output_path"), type = "character", default = "."), + # optional parameters to customize binding sites + make_option(c( "--peak_score_global_cuttoff"), type = "numeric"), + make_option(c( "--bsWidth"), type = "numeric"), + make_option(c( "--peak_score_genewise_cuttoff"), type = "numeric"), + make_option(c( "--minWidth"), type = "numeric"), + make_option(c( "--minCrosslinks"), type = "numeric"), + make_option(c( "--minCLSites"), type = "numeric"), + make_option(c( "--maxBsWidth"), type = "numeric"), + # optional parameters for reproducibility + make_option(c( "--reproducibility_cutoff"), type = "numeric"), + make_option(c( "--reproducibility_nReps"), type = "numeric"), + # optional parameters for gene and region assignment + make_option(c( "--method_gene_overlaps"), type = "character"), + make_option(c( "--rule_gene_overlaps"), type = "character"), + make_option(c( "--method_region_overlaps"), type = "character"), + make_option(c( "--rule_region_overlaps"), type = "character"), + # optional parameters to fit non-standard genomes + make_option(c( "--match_score"), type = "numeric"), + make_option(c( "--match_geneID"), type = "character"), + make_option(c( "--match_geneName"), type = "character"), + make_option(c( "--match_geneType"), type = "character"), + make_option(c( "--match_score_option"), type = "character") +) + +# Parse arguments +parser <- OptionParser(option_list = option_list) +args <- parse_args(parser) + +# Default parameters (NULL means use function defaults) +params.input.output <- list( + bw_files_folder = args$bw_files_folder, + peaks = args$peaks, + anno_genes = args$anno_genes, + anno_regions = args$anno_regions, + output_path = args$output_path) + +params.pureClipGlobalFilter <- list( + cuttoff = args$peak_score_global_cuttoff) + +params.estimateBsWidth <- list( + est.minWidth = args$minWidth, + est.maxBsWidth = args$maxBsWidth) + +params.pureClipGeneWiseFilter <- list( + cutoff = args$peak_score_genewise_cuttoff, + match.score = args$match_score, + match.geneID = args$match_geneID, + overlaps = args$method_gene_overlaps) + +params.makeBindingSites <- list( + minWidth = args$minWidth, + minCrosslinks = args$minCrosslinks, + minClSites = args$minCLSites) + +params.reproducibilityFilter <- list( + cutoff = args$reproducibility_cutoff, + nReps = args$reproducibility_nReps) + +params.assignToGenes <- list( + overlaps = args$method_gene_overlaps, + overlaps.rule = args$rule_gene_overlaps, + match.geneID = args$match_geneID, + match.geneName = args$match_geneName, + match.geneType = args$match_geneType + ) +params.assignToTranscriptRegions <- list( + overlaps = args$method_region_overlaps, + overlaps.rule = args$rule_region_overlaps) + +params.annotateWithScore <- list( + match.score = args$match_score, + match.option = args$match_score_option +) + +# Remove NULL values so function defaults apply +params.pureClipGlobalFilter <- params.pureClipGlobalFilter[!sapply(params.pureClipGlobalFilter, is.null)] +params.estimateBsWidth <- params.estimateBsWidth[!sapply(params.estimateBsWidth, is.null)] +params.pureClipGeneWiseFilter <- params.pureClipGeneWiseFilter[!sapply(params.pureClipGeneWiseFilter, is.null)] +params.makeBindingSites <- params.makeBindingSites[!sapply(params.makeBindingSites, is.null)] +params.reproducibilityFilter <- params.reproducibilityFilter[!sapply(params.reproducibilityFilter, is.null)] +params.assignToGenes <- params.assignToGenes[!sapply(params.assignToGenes, is.null)] +params.assignToTranscriptRegions <- params.assignToTranscriptRegions[!sapply(params.assignToTranscriptRegions, is.null)] +params.annotateWithScore <- params.annotateWithScore[!sapply(params.annotateWithScore, is.null)] + +######################## +# BindingSiteFinder +####################### +# crosslinks +bw_files_names <- list.files(params.input.output$bw_files_folder) + +clipFilesP <- list.files(params.input.output$bw_files_folder, pattern = "plus.bw$", full.names = TRUE) +clipFilesM <- list.files(params.input.output$bw_files_folder, pattern = "minus.bw$", full.names = TRUE) + + +# annotation +gns <- readRDS(params.input.output$anno_genes) +regions <- readRDS(params.input.output$anno_regions) + + +# Peaks from pureclip +peaks = rtracklayer::import(con = params.input.output$peaks, format = "BED", extraCols=c("additionalScores" = "character")) +peaks$additionalScores = NULL +peaks$name = NULL + + + +# Prepare meta data +meta = data.frame( + id = c(1:length(clipFilesP)), + condition = factor(rep("all", length(clipFilesP))), # add option for multiple groups from sample file + clPlus = clipFilesP, + clMinus = clipFilesM) + + +# run BindingSiteFinder +####################### + +cat("############################# \n Running BindingSiteFinder \n############################# \n") +bds = BSFDataSetFromBigWig(ranges = peaks, meta = meta, silent =T) + +cat("\nGlobal filter on peak sites \n \n") +bds = do.call(pureClipGlobalFilter, + c(list(bds), + params.pureClipGlobalFilter)) # param cutoff +cat("\nEstimate binding site width \n \n") +bds = do.call(estimateBsWidth, + c(list(bds, anno.genes = gns), + params.estimateBsWidth)) # optional param: bsWidth + +cat("\nGenewise filter on peak sites \n \n") +bds = do.call(pureClipGeneWiseFilter, + c(list(bds, anno.genes = gns), + params.pureClipGeneWiseFilter)) # param cutoff, overlaps, match score, match geneID + +cat("\nMake binding sites \n \n") +bds = do.call(makeBindingSites, + c(list(bds), + params.makeBindingSites)) # params minWidth, minCrosslinks, minCLSites + + +# bds = do.call(reproducibilityFilter, c(list(bds), params.reproducibilityFilter)) # params cutoff, nReps +cat("\nAssign binding sites to genes \n \n") +bds = do.call(assignToGenes, c(list(bds, anno.genes = gns), + params.assignToGenes)) # params overlaps, overlaps.rule, match.geneID, match.geneName, match.geneType + +cat("\nAssign binding sites to transcript regions \n \n") +bds = do.call(assignToTranscriptRegions, c(list(bds,anno.transcriptRegionList = regions), + params.assignToTranscriptRegions))# params overlaps, overlaps.rule, + +cat("\nAnotate binding scores \n \n") +bds = do.call(annotateWithScore, c(list(bds, peaks),params.annotateWithScore)) #match.score, match.option + +cat("\nSaving bindings sites \n \n") +bs_gr = getRanges(bds) +names(bs_gr) <- 1:NROW(bs_gr) +bs_df <- as.data.frame(bs_gr) + + +# export outputs +######################## + +exportToBED(bds, con = paste0(params.input.output$output_path , "/myBindingSites.bed")) +saveRDS(bds, paste0(params.input.output$output_path ,"/bds_object.rds")) +saveRDS(bs_df, paste0(params.input.output$output_path ,"/bindingSites.rds")) +write.csv(bs_df, file = paste0(params.input.output$output_path,"/bindingSites.csv"), row.names = FALSE) + + diff --git a/modules/local/BindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/main.nf new file mode 100644 index 00000000..74fd7a3d --- /dev/null +++ b/modules/local/BindingSiteFinder/main.nf @@ -0,0 +1,44 @@ +process BindingSiteFinderAnalysis { + container 'melinak/bindingsitefinder:latest' + + input: + path bw_files_folder + path peaks + path genome_annotation + path sample_sheet + + output: + path "binding_sites.rds" + path "binding_sites.csv" + + script: + """ + Rscript DefineBindingSites.R \\ + --bw_files_folder $bw_files_folder \\ + --peaks $peaks \\ + --anno_genes $anno_gns \\ + --anno_regions $anno_regions \\ + --sample_sheet $sample_sheet \\ + --output_path . \\ + --peak_score_global_cuttoff $params.peak_score_global_cuttoff \\ + --bsWidth $params.bsWidth \\ + --peak_score_genewise_cuttoff $params.peak_score_genewise_cuttoff \\ + --minWidth $params.minWidth \\ + --minCrosslinks $params.minCrosslinks \\ + --minCLSites $params.minCLSites \\ + --maxBsWidth $params.maxBsWidth \\ + --reproducibility_cutoff $params.reproducibility_cutoff \\ + --reproducibility_nReps $params.reproducibility_nReps \\ + --method_gene_overlaps $params.method_gene_overlaps \\ + --rule_gene_overlaps $params.rule_gene_overlaps \\ + --method_region_overlaps $params.method_region_overlaps \\ + --rule_region_overlaps $params.rule_region_overlaps \\ + --match_score $params.match_score \\ + --match_geneID $params.match_geneID \\ + --match_geneName $params.match_geneName \\ + --match_geneType $params.match_geneType \\ + --match_score_option $params.match_score_option \\ + binding_sites.rds \\ + binding_sites.csv + """ +} \ No newline at end of file diff --git a/modules/local/BindingSiteFinder/test_run.sh b/modules/local/BindingSiteFinder/test_run.sh new file mode 100644 index 00000000..f3337c74 --- /dev/null +++ b/modules/local/BindingSiteFinder/test_run.sh @@ -0,0 +1,9 @@ +# without nextflow +docker run --rm -it -v /Users/melinaklostermann/Documents/projects:/mnt/ melinak/bindingsitefinder:1.0 bin/bash + +Rscript /mnt/nf-core-clipseq/modules/local/BindingSiteFinder/DefineBindingSites.R \ + --bw_files_folder "/mnt/nf-core-clipseq/devel_folder/example_inputs/AGO_iCLIP" \ + --peaks "/mnt/nf-core-clipseq/devel_folder/example_inputs/AGO_iCLIP/IP_WT_pureclip_sites.bed" \ + --anno_genes "/mnt/nf-core-clipseq/devel_folder/outputs/gns.rds" \ + --anno_regions "/mnt/nf-core-clipseq/devel_folder/outputs/regions.rds" \ + --output_path "/mnt/nf-core-clipseq/devel_folder/outputs" diff --git a/modules/local/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/sortAnnotationForBindingSiteFinder/main.nf new file mode 100644 index 00000000..94bbdc4b --- /dev/null +++ b/modules/local/sortAnnotationForBindingSiteFinder/main.nf @@ -0,0 +1,17 @@ +process sortAnnotationForBindingSiteFinder { + container = 'melinak/bindingsitefinder:latest' + + input: + path gtf_file + output: + path "gns.rds" + path "regions.rds" + + script: + """ + Rscript /home/mek24iv/nfcore-clipseq/devel_BindingSiteFinder/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R \\ + $gtf_file \\ + gns.rds \\ + regions.rds + """ +} \ No newline at end of file diff --git a/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R b/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R new file mode 100644 index 00000000..46f58faf --- /dev/null +++ b/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R @@ -0,0 +1,44 @@ + +# command line arguments +args <- commandArgs(trailingOnly = TRUE) + +print(args) +annoFile = args[1] +out_gns = args[2] +out_regions = args[3] + +# for local tests +#annoFile = "/Users/melinaklostermann/Documents/projects/anno/GENCODEv31-p12/gencode.v31.annotation.gtf" +# out_gns = "/Users/melinaklostermann/Documents/projects/nf-core-clipseq/devel_folder/outputs/gns.rds" +# out_regions = "/Users/melinaklostermann/Documents/projects/nf-core-clipseq/devel_folder/outputs/regions.rds" + +# libraries +library(GenomicFeatures) + + + + +# Make annotation database from gff3 file +annoDb = GenomicFeatures::makeTxDbFromGFF(file = annoFile, format = "gtf") +annoInfo = rtracklayer::import(annoFile, format = "gtf") + + +# Get genes as GRanges +gns = genes(annoDb) +idx = match(gns$gene_id, annoInfo$gene_id) +meta = cbind(elementMetadata(gns), + elementMetadata(annoInfo)[idx,]) +meta = meta[,!duplicated(colnames(meta))] +elementMetadata(gns) = meta + +saveRDS(gns, out_gns) + + +# Get regions as Granges +cdseq = cds(annoDb) +intrns = unlist(intronsByTranscript(annoDb)) +utrs3 = unlist(threeUTRsByTranscript(annoDb)) +utrs5 = unlist(fiveUTRsByTranscript(annoDb)) +regions = GRangesList(CDS = cdseq, Intron = intrns, UTR3 = utrs3, UTR5 = utrs5) + +saveRDS(regions, out_regions) diff --git a/subworkflows/local/BindingSiteFinder.nf b/subworkflows/local/BindingSiteFinder.nf new file mode 100644 index 00000000..dcabb17d --- /dev/null +++ b/subworkflows/local/BindingSiteFinder.nf @@ -0,0 +1,42 @@ +nextflow.enable.dsl = 2 + +include { sortAnnotationForBindingSiteFinder } from '../../modules/local/sortAnnotationForBindingSiteFinder/main.nf' +include { BindingSiteFinderAnalysis } from '../../modules/local/BindingSiteFinder/main.nf' + +workflow BindingSiteFinder{ + take: + gtf_ch + // bw_files_folder + // peaks + + + main: + sorted_ch = sortAnnotationForBindingSiteFinder(gtf_ch) + + // BindingSiteFinderAnalysis( + // bw_files_folder, + // peaks, + // sorted_ch.gns, + // sorted_ch.regions, + // sample_sheet, + // // Here start optional parameters + // peak_score_global_cuttoff, + // bsWidth, + // peak_score_genewise_cuttoff, + // minWidth, + // minCrosslinks, + // minCLSites, + // maxBsWidth, + // reproducibility_cutoff, + // reproducibility_nReps, + // method_gene_overlaps, + // rule_gene_overlaps, + // method_region_overlaps, + // rule_region_overlaps, + // match_score, + // match_geneID, + // match_geneName, + // match_geneType, + // match_score_option + // ) +} From e69c0a5f082ad8d4a8bd47af24d52ad89bd28067 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Mon, 24 Mar 2025 09:55:04 +0100 Subject: [PATCH 02/12] issue 122 comment in bindingsitefinder.R script done --- .../DefineBindingSites.R | 43 +++++++++++-------- .../{ => DefineBindingSites}/main.nf | 4 +- .../{ => DefineBindingSites}/test_run.sh | 0 3 files changed, 27 insertions(+), 20 deletions(-) rename modules/local/BindingSiteFinder/{ => DefineBindingSites}/DefineBindingSites.R (80%) rename modules/local/BindingSiteFinder/{ => DefineBindingSites}/main.nf (91%) rename modules/local/BindingSiteFinder/{ => DefineBindingSites}/test_run.sh (100%) diff --git a/modules/local/BindingSiteFinder/DefineBindingSites.R b/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R similarity index 80% rename from modules/local/BindingSiteFinder/DefineBindingSites.R rename to modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R index dbb79c2b..a4ae19db 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites.R +++ b/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R @@ -23,28 +23,35 @@ option_list <- list( make_option(c("-r", "--anno_regions"), type = "character"), # output paths make_option(c("-o", "--output_path"), type = "character", default = "."), + # optional parameters to customize binding sites - make_option(c( "--peak_score_global_cuttoff"), type = "numeric"), - make_option(c( "--bsWidth"), type = "numeric"), - make_option(c( "--peak_score_genewise_cuttoff"), type = "numeric"), - make_option(c( "--minWidth"), type = "numeric"), - make_option(c( "--minCrosslinks"), type = "numeric"), - make_option(c( "--minCLSites"), type = "numeric"), - make_option(c( "--maxBsWidth"), type = "numeric"), + #-------------------------------- + # the default values are the ones specified in the Bioconductor package BindingSiteFinder + # https://www.bioconductor.org/packages/release/bioc/manuals/BindingSiteFinder/man/BindingSiteFinder.pdf + # the default values as specified in BindingSiteFinder 2.4.0 are given in the comment after the parameter name + # optional parameters for binding site defnition + make_option(c( "--peak_score_global_cuttoff"), type = "numeric"), # default 0.01 + make_option(c( "--bsWidth"), type = "numeric"), # default automatic estimation + make_option(c( "--peak_score_genewise_cuttoff"), type = "numeric"), # default automatic estimation + make_option(c( "--minWidth"), type = "numeric"), # default 2 + make_option(c( "--minCrosslinks"), type = "numeric"), # default 2 + make_option(c( "--minCLSites"), type = "numeric"), # default 1 + make_option(c( "--maxBsWidth"), type = "numeric"), # default 13 # optional parameters for reproducibility - make_option(c( "--reproducibility_cutoff"), type = "numeric"), - make_option(c( "--reproducibility_nReps"), type = "numeric"), + # make_option(c( "--reproducibility_cutoff"), type = "numeric"), + # make_option(c( "--reproducibility_nReps"), type = "numeric"), # optional parameters for gene and region assignment - make_option(c( "--method_gene_overlaps"), type = "character"), - make_option(c( "--rule_gene_overlaps"), type = "character"), - make_option(c( "--method_region_overlaps"), type = "character"), - make_option(c( "--rule_region_overlaps"), type = "character"), + make_option(c( "--method_gene_overlaps"), type = "character"), # default "frequency" + make_option(c( "--rule_gene_overlaps"), type = "character"), # default NULL (only needed for --method_gene_overlaps "hierarchy") + make_option(c( "--method_region_overlaps"), type = "character"), # default "frequency" + make_option(c( "--rule_region_overlaps"), type = "character"), # default NULL (only needed for --method_region_overlaps "hierarchy") # optional parameters to fit non-standard genomes - make_option(c( "--match_score"), type = "numeric"), - make_option(c( "--match_geneID"), type = "character"), - make_option(c( "--match_geneName"), type = "character"), - make_option(c( "--match_geneType"), type = "character"), - make_option(c( "--match_score_option"), type = "character") + make_option(c( "--match_geneID"), type = "character"), # default "gene_id" + make_option(c( "--match_geneName"), type = "character"), # default "gene_name" + make_option(c( "--match_geneType"), type = "character"), # default "gene_type" + # optional parameters to fit peakcaller scores + make_option(c( "--match_score"), type = "numeric"), # default "score" + make_option(c( "--match_score_option"), type = "character") # default "max" ) # Parse arguments diff --git a/modules/local/BindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf similarity index 91% rename from modules/local/BindingSiteFinder/main.nf rename to modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 74fd7a3d..54291ce3 100644 --- a/modules/local/BindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -27,8 +27,8 @@ process BindingSiteFinderAnalysis { --minCrosslinks $params.minCrosslinks \\ --minCLSites $params.minCLSites \\ --maxBsWidth $params.maxBsWidth \\ - --reproducibility_cutoff $params.reproducibility_cutoff \\ - --reproducibility_nReps $params.reproducibility_nReps \\ + // --reproducibility_cutoff $params.reproducibility_cutoff \\ + // --reproducibility_nReps $params.reproducibility_nReps \\ --method_gene_overlaps $params.method_gene_overlaps \\ --rule_gene_overlaps $params.rule_gene_overlaps \\ --method_region_overlaps $params.method_region_overlaps \\ diff --git a/modules/local/BindingSiteFinder/test_run.sh b/modules/local/BindingSiteFinder/DefineBindingSites/test_run.sh similarity index 100% rename from modules/local/BindingSiteFinder/test_run.sh rename to modules/local/BindingSiteFinder/DefineBindingSites/test_run.sh From 6c56951f909795667866223747aa4bb3c67f9009 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Mon, 24 Mar 2025 14:00:18 +0100 Subject: [PATCH 03/12] updated templates for modules --- .../DefineBindingSites/DefineBindingSites.R | 5 ++ .../DefineBindingSites/main.nf | 76 ++++++++++++------- .../main.nf | 35 +++++++++ .../sortAnnotationForBindingSiteFinder.R | 0 .../main.nf | 17 ----- 5 files changed, 88 insertions(+), 45 deletions(-) create mode 100644 modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf rename modules/local/{ => BindingSiteFinder}/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R (100%) delete mode 100644 modules/local/sortAnnotationForBindingSiteFinder/main.nf diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R b/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R index a4ae19db..626ea83d 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R +++ b/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R @@ -12,6 +12,11 @@ suppressMessages( library(rtracklayer)) suppressMessages( library(tidyverse)) suppressMessages( library(optparse)) +# print BSF version +################## +cat("BindingSiteFinder version: ", packageVersion("BindingSiteFinder"), "\n") + + # Input ################ diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 54291ce3..79133d3f 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -1,17 +1,50 @@ process BindingSiteFinderAnalysis { - container 'melinak/bindingsitefinder:latest' - + tag "$meta.id" + label 'process_low' + + //conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://melinak/bindingsitefinder:latest': + 'melinak/bindingsitefinder:latest' }" + input: - path bw_files_folder - path peaks - path genome_annotation - path sample_sheet + tuple val(meta), path(bw_files_folder) + tuple val(meta), path(peaks) + tuple val(meta), path(anno_gns) + tuple val(meta), path(anno_regions) output: - path "binding_sites.rds" - path "binding_sites.csv" + tuple val(meta), path("*binding_sites.rds"), emit: binding_sites_rds + tuple val(meta), path("*binding_sites.csv"), emit: binding_sites_csv + + when: + task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // the bindingSiteFinder sersion number will be printed to the console by the R script + + // optional parameters: + // --peak_score_global_cuttoff $params.peak_score_global_cuttoff \\ + // --bsWidth $params.bsWidth \\ + // --peak_score_genewise_cuttoff $params.peak_score_genewise_cuttoff \\ + // --minWidth $params.minWidth \\ + // --minCrosslinks $params.minCrosslinks \\ + // --minCLSites $params.minCLSites \\ + // --maxBsWidth $params.maxBsWidth \\ + // // --reproducibility_cutoff $params.reproducibility_cutoff \\ + // // --reproducibility_nReps $params.reproducibility_nReps \\ + // --method_gene_overlaps $params.method_gene_overlaps \\ + // --rule_gene_overlaps $params.rule_gene_overlaps \\ + // --method_region_overlaps $params.method_region_overlaps \\ + // --rule_region_overlaps $params.rule_region_overlaps \\ + // --match_score $params.match_score \\ + // --match_geneID $params.match_geneID \\ + // --match_geneName $params.match_geneName \\ + // --match_geneType $params.match_geneType \\ + // --match_score_option $params.match_score_option \\ + """ Rscript DefineBindingSites.R \\ --bw_files_folder $bw_files_folder \\ @@ -20,25 +53,12 @@ process BindingSiteFinderAnalysis { --anno_regions $anno_regions \\ --sample_sheet $sample_sheet \\ --output_path . \\ - --peak_score_global_cuttoff $params.peak_score_global_cuttoff \\ - --bsWidth $params.bsWidth \\ - --peak_score_genewise_cuttoff $params.peak_score_genewise_cuttoff \\ - --minWidth $params.minWidth \\ - --minCrosslinks $params.minCrosslinks \\ - --minCLSites $params.minCLSites \\ - --maxBsWidth $params.maxBsWidth \\ - // --reproducibility_cutoff $params.reproducibility_cutoff \\ - // --reproducibility_nReps $params.reproducibility_nReps \\ - --method_gene_overlaps $params.method_gene_overlaps \\ - --rule_gene_overlaps $params.rule_gene_overlaps \\ - --method_region_overlaps $params.method_region_overlaps \\ - --rule_region_overlaps $params.rule_region_overlaps \\ - --match_score $params.match_score \\ - --match_geneID $params.match_geneID \\ - --match_geneName $params.match_geneName \\ - --match_geneType $params.match_geneType \\ - --match_score_option $params.match_score_option \\ - binding_sites.rds \\ - binding_sites.csv + """ + + stub: + def args = task.ext.args ?: '' + """ + touch binding_sites.rds + touch binding_sites.csv """ } \ No newline at end of file diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf new file mode 100644 index 00000000..779200c6 --- /dev/null +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -0,0 +1,35 @@ +process sortAnnotationForBindingSiteFinder { + tag "$meta.id" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://melinak/bindingsitefinder:latest': + 'melinak/bindingsitefinder:latest' }" + + input: + tuple val(meta), path(gtf_file) + + output: + tuple val(meta), path("*gns.rds"), emit: gns_rds + tuple val(meta), path("*regions.rds"), emit: regions_rds + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + Rscript /home/mek24iv/nfcore-clipseq/devel_BindingSiteFinder/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R \\ + $gtf_file \\ + gns.rds \\ + regions.rds + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch gns.rds + touch regions.rds + """ +} \ No newline at end of file diff --git a/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R similarity index 100% rename from modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R rename to modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R diff --git a/modules/local/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/sortAnnotationForBindingSiteFinder/main.nf deleted file mode 100644 index 94bbdc4b..00000000 --- a/modules/local/sortAnnotationForBindingSiteFinder/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -process sortAnnotationForBindingSiteFinder { - container = 'melinak/bindingsitefinder:latest' - - input: - path gtf_file - output: - path "gns.rds" - path "regions.rds" - - script: - """ - Rscript /home/mek24iv/nfcore-clipseq/devel_BindingSiteFinder/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R \\ - $gtf_file \\ - gns.rds \\ - regions.rds - """ -} \ No newline at end of file From eb35a8ba94f39430149e171d63025492c660124a Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Mon, 24 Mar 2025 14:25:57 +0100 Subject: [PATCH 04/12] add version number of bindingSiteFinder --- .../BindingSiteFinder/DefineBindingSites/main.nf | 10 ++++++++++ .../sortAnnotationForBindingSiteFinder/main.nf | 13 +++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 79133d3f..3dc57d67 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -53,6 +53,11 @@ process BindingSiteFinderAnalysis { --anno_regions $anno_regions \\ --sample_sheet $sample_sheet \\ --output_path . \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS """ stub: @@ -60,5 +65,10 @@ process BindingSiteFinderAnalysis { """ touch binding_sites.rds touch binding_sites.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS """ } \ No newline at end of file diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf index 779200c6..77956704 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -23,13 +23,22 @@ process sortAnnotationForBindingSiteFinder { $gtf_file \\ gns.rds \\ regions.rds - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ stub: - def args = task.ext.args ?: '' + def args = task.ext.a def prefix = task.ext.prefix ?: "${meta.id}" """ touch gns.rds touch regions.rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS """ } \ No newline at end of file From 75c3cac237f9650c36ddf1d1f530a5dbf74cdcc4 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Mon, 24 Mar 2025 16:41:13 +0100 Subject: [PATCH 05/12] add bfs qc --- .../DefineBindingSites/main.nf | 1 + .../bsfQC/BindingSiteFinderQC.R | 11 ++++ .../bsfQC/BindingSiteFinderQC.qmd | 57 +++++++++++++++++++ modules/local/BindingSiteFinder/bsfQC/main.nf | 42 ++++++++++++++ 4 files changed, 111 insertions(+) create mode 100644 modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R create mode 100644 modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.qmd create mode 100644 modules/local/BindingSiteFinder/bsfQC/main.nf diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 3dc57d67..20a860ee 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -16,6 +16,7 @@ process BindingSiteFinderAnalysis { output: tuple val(meta), path("*binding_sites.rds"), emit: binding_sites_rds tuple val(meta), path("*binding_sites.csv"), emit: binding_sites_csv + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R b/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R new file mode 100644 index 00000000..62b60e64 --- /dev/null +++ b/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R @@ -0,0 +1,11 @@ +args <- commandArgs(trailingOnly = TRUE) + +print(args) +bds = args[1] + + +rmarkdown::render(xxreport_tmp_path, + #output_dir = paste0(snakemake@params[[1]], "/results/"), + args = args, + output_format = "html_document" +) \ No newline at end of file diff --git a/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.qmd b/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.qmd new file mode 100644 index 00000000..147ed877 --- /dev/null +++ b/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.qmd @@ -0,0 +1,57 @@ +--- +title: "Quality Control of binding sites from BindingSiteFinder" +format: html +--- + +```{r} +library(BindingSiteFinder) +bds <- readRDS(args[1]) +``` + + +# Overview + +This document describes the (automatically chosen) parameters of used for biding site definition in BindingSiteFinder and provides multiple plots to check the quality of the binding sites on multiple levels. + +```{r} +# Flowchart +processingStepsFlowChart(bds) + +``` + +The flowchart lists all the steps that were performed by BindingSiteFinder. On the left, the number of crosslinks (before the makeBindingSite step) or binding sites (after the makeBindingSite step) is given. On the right, the used parameters from each step are listed. + +# Filtering of peak input signal + +The peaks obtained in peak calls are filtered by their score, removing the peaks with very low binding scores. The following plot shows the distribution of the scores of the peaks before filtering and the chosen cutoff as a line. + +```{r} +# Filtering peak input +pureClipGlobalFilterPlot(bdsOut) + +``` + + +# Estimation of binding sites + +BindingSiteFinder estimates the optimal size of the binding sites from a signal to flank ration. In addition, it tests different genewise filters of crosslinks to reduce influences of background signal. The following plot shows the signal to flank ratio across different binding sites width. Each line stands for another genewise filter. The chosen filter setting and binding site width are shown by the read lines and are also given in the top right corner of the plot. + +```{r} +estimateBsWidthPlot(bdsOut) + +``` + +Whether the chosen binding site width is appropriate can be checked by the following plot. It shows some examples of binding site width (grey boxes) and the summed signal in the binding site region. For an optimal binding site width the peak should be completely within the binding site, but the binding site should not spann further into the background signal. + +```{r} +rangeCoveragePlot(l, width = 20, show.samples = TRUE, subset.chromosome = "chr22") + +``` + +# Reproducibility of crosslink signals from different replicates in binding sites + +Coming soon! + + + + diff --git a/modules/local/BindingSiteFinder/bsfQC/main.nf b/modules/local/BindingSiteFinder/bsfQC/main.nf new file mode 100644 index 00000000..dea29bf0 --- /dev/null +++ b/modules/local/BindingSiteFinder/bsfQC/main.nf @@ -0,0 +1,42 @@ +process bsfQC { + tag "$meta.id" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://melinak/bindingsitefinder:latest': + 'melinak/bindingsitefinder:latest' }" + + input: + tuple val(meta), path(binding_sites_rds) + + output: + tuple val(meta), path("*BindingSiteFinderQC.html"), emit: BindingSiteFinderQC + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + Rscript clipseq/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R \\ + $binding_sites_rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ + + stub: + def args = task.ext.a + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch gns.rds + touch regions.rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') + END_VERSIONS + """ +} \ No newline at end of file From 085a4f49e7b91d583128bafb8d254787edb1f6a3 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Tue, 25 Mar 2025 08:29:08 +0100 Subject: [PATCH 06/12] bsf change label to process single --- modules/local/BindingSiteFinder/DefineBindingSites/main.nf | 2 +- modules/local/BindingSiteFinder/bsfQC/main.nf | 2 +- .../sortAnnotationForBindingSiteFinder/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 20a860ee..8f8d1308 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -1,6 +1,6 @@ process BindingSiteFinderAnalysis { tag "$meta.id" - label 'process_low' + label 'process_single' //conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/BindingSiteFinder/bsfQC/main.nf b/modules/local/BindingSiteFinder/bsfQC/main.nf index dea29bf0..c1b8589c 100644 --- a/modules/local/BindingSiteFinder/bsfQC/main.nf +++ b/modules/local/BindingSiteFinder/bsfQC/main.nf @@ -1,6 +1,6 @@ process bsfQC { tag "$meta.id" - label 'process_low' + label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://melinak/bindingsitefinder:latest': diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf index 77956704..3c47f390 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -1,6 +1,6 @@ process sortAnnotationForBindingSiteFinder { tag "$meta.id" - label 'process_low' + label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://melinak/bindingsitefinder:latest': From 6cdfae9a4ce0d51cdabf93958239089d05319a37 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Tue, 25 Mar 2025 11:20:13 +0100 Subject: [PATCH 07/12] bsf start subworkflow --- .../DefineBindingSites/main.nf | 2 +- subworkflows/local/bindingsitefinder/main.nf | 42 +++++++++++++++ subworkflows/local/bindingsitefinder/meta.yml | 51 +++++++++++++++++++ .../bindingsitefinder/tests/main.nf.test | 45 ++++++++++++++++ 4 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 subworkflows/local/bindingsitefinder/main.nf create mode 100644 subworkflows/local/bindingsitefinder/meta.yml create mode 100644 subworkflows/local/bindingsitefinder/tests/main.nf.test diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 8f8d1308..4ed02daf 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -1,4 +1,4 @@ -process BindingSiteFinderAnalysis { +process defineBindingSites { tag "$meta.id" label 'process_single' diff --git a/subworkflows/local/bindingsitefinder/main.nf b/subworkflows/local/bindingsitefinder/main.nf new file mode 100644 index 00000000..0fd508e2 --- /dev/null +++ b/subworkflows/local/bindingsitefinder/main.nf @@ -0,0 +1,42 @@ + +include { sortAnnotationForBindingSiteFinder } from '../../../modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main' +include { defineBindingSites } from '../../../modules/local/BindingSiteFinder/DefineBindingSites/main' +include { bsfQC } from '../../../modules/local/BindingSiteFinder/bsfQC/main' + +workflow BINDINGSITEFINDER { + + take: + + gtf_ch // channel: [ val(meta), [ gtf ] ] + bw_ch // channel: [ val(meta), [ bigwig ] ] + peak_ch // channel: [ peaks ] + + main: + + ch_versions = Channel.empty() + + // Destructure outputs from sortAnnotationForBindingSiteFinder(gtf_ch) + tuple_ch = sortAnnotationForBindingSiteFinder(gtf_ch) + gns_ch = tuple_ch.gns_rds + regions_ch = tuple_ch.regions_rds + ch_versions = ch_versions.mix(sortAnnotationForBindingSiteFinder.out.versions.first()) + + // Pass annotation outputs, bigwig files and peak file to sortAnnotationForBindingSiteFinder(bw_ch) + bs_ch = defineBindingSites(bw_ch, gns_ch, regions_ch, peak_ch) + ch_versions = ch_versions.mix(defineBindingSites.out.versions.first()) + + // Extract only the rds output from bs_ch + rds_ch = bs_ch.binding_sites_rds + + // Pass the rds output to bsfQC + qc_ch = bsfQC(rds_ch) + ch_versions = ch_versions.mix(bsfQC.out.versions.first()) + + emit: + csv = DefineBindingSites.out.csv // channel: [ val(meta), [ csv ] ] + rds = DefineBindingSites.out.rds // channel: [ val(meta), [ rds ] ] + html = bsfQC.out.html // channel: [ val(meta), [ html ] ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/bindingsitefinder/meta.yml b/subworkflows/local/bindingsitefinder/meta.yml new file mode 100644 index 00000000..2a286607 --- /dev/null +++ b/subworkflows/local/bindingsitefinder/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bindingsitefinder" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow +components: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the channels used as input with a description and their structure +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" +## TODO nf-core: List all of the channels used as output with a descriptions and their structure +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@MelinaKlostermann" +maintainers: + - "@MelinaKlostermann" diff --git a/subworkflows/local/bindingsitefinder/tests/main.nf.test b/subworkflows/local/bindingsitefinder/tests/main.nf.test new file mode 100644 index 00000000..9141f86d --- /dev/null +++ b/subworkflows/local/bindingsitefinder/tests/main.nf.test @@ -0,0 +1,45 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core subworkflows test bindingsitefinder +nextflow_workflow { + + name "Test Subworkflow BINDINGSITEFINDER" + script "../main.nf" + workflow "BINDINGSITEFINDER" + + tag "subworkflows" + tag "subworkflows_" + tag "subworkflows/bindingsitefinder" + // TODO nf-core: Add tags for all modules used within this subworkflow. Example: + tag "samtools" + tag "samtools/sort" + tag "samtools/index" + + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam - single_end") { + + when { + workflow { + """ + // TODO nf-core: define inputs of the workflow here. Example: + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + } +} From b42acd18630ed0863894c09ba1512328c1697f82 Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Tue, 25 Mar 2025 10:38:48 +0000 Subject: [PATCH 08/12] Add tests for sortAnnotationForBindingSiteFinder --- .../sortAnnotationForBindingSiteFinder.R | 9 +-- .../main.nf | 8 +-- .../tests/main.nf.test | 64 +++++++++++++++++++ .../tests/main.nf.test.snap | 14 ++++ 4 files changed, 87 insertions(+), 8 deletions(-) rename {modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder => bin}/sortAnnotationForBindingSiteFinder.R (85%) mode change 100644 => 100755 create mode 100644 modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test create mode 100644 modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R b/bin/sortAnnotationForBindingSiteFinder.R old mode 100644 new mode 100755 similarity index 85% rename from modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R rename to bin/sortAnnotationForBindingSiteFinder.R index 46f58faf..5ce125bd --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R +++ b/bin/sortAnnotationForBindingSiteFinder.R @@ -1,3 +1,4 @@ +#!/usr/bin/env Rscript # command line arguments args <- commandArgs(trailingOnly = TRUE) @@ -35,10 +36,10 @@ saveRDS(gns, out_gns) # Get regions as Granges -cdseq = cds(annoDb) -intrns = unlist(intronsByTranscript(annoDb)) -utrs3 = unlist(threeUTRsByTranscript(annoDb)) -utrs5 = unlist(fiveUTRsByTranscript(annoDb)) +cdseq = cds(annoDb) +intrns = unlist(intronsByTranscript(annoDb)) +utrs3 = unlist(threeUTRsByTranscript(annoDb)) +utrs5 = unlist(fiveUTRsByTranscript(annoDb)) regions = GRangesList(CDS = cdseq, Intron = intrns, UTR3 = utrs3, UTR5 = utrs5) saveRDS(regions, out_regions) diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf index 3c47f390..fc5458f3 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -10,8 +10,8 @@ process sortAnnotationForBindingSiteFinder { tuple val(meta), path(gtf_file) output: - tuple val(meta), path("*gns.rds"), emit: gns_rds - tuple val(meta), path("*regions.rds"), emit: regions_rds + tuple val(meta), path("gns.rds"), emit: gns_rds + tuple val(meta), path("regions.rds"), emit: regions_rds path "versions.yml", emit: versions when: @@ -19,7 +19,7 @@ process sortAnnotationForBindingSiteFinder { script: """ - Rscript /home/mek24iv/nfcore-clipseq/devel_BindingSiteFinder/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R \\ + sortAnnotationForBindingSiteFinder.R \\ $gtf_file \\ gns.rds \\ regions.rds @@ -41,4 +41,4 @@ process sortAnnotationForBindingSiteFinder { \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test new file mode 100644 index 00000000..a12d798c --- /dev/null +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test @@ -0,0 +1,64 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test modules/hirutest +nextflow_process { + + name "Test Process CLIPSEQ_SORTANNOTATIONSFORBINDINGSITEFINDER" + script "../main.nf" + process "sortAnnotationForBindingSiteFinder" + + tag "clipseq" + tag "clipseq_sortannotationsforbindingsitefinder" + tag "sortannotationsforbindingsitefinder" + + test("sortAnnotationsForBindingSiteFinder - gtf") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + + // These statements fail inside assertAll (not sure why) + assert new File(process.out.gns_rds[0][1]).exists() + assert new File(process.out.regions_rds[0][1]).exists() + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sortAnnotationsForBindingSiteFinder - gtf - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), + ] + """ + } + } + + then { + assert process.success + assert new File(process.out.gns_rds[0][1]).exists() + assert new File(process.out.regions_rds[0][1]).exists() + } + + } + +} diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap new file mode 100644 index 00000000..06761a27 --- /dev/null +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,978f7f625274870d4e9dd7d737845e31" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-25T10:28:10.976439" + } +} \ No newline at end of file From 38e0b3482f9184851aaa9902ef69e9e5f087db78 Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Tue, 25 Mar 2025 13:21:19 +0100 Subject: [PATCH 09/12] bsf repair contantainer command --- .gitignore | 3 ++- modules/local/BindingSiteFinder/DefineBindingSites/main.nf | 5 +---- modules/local/BindingSiteFinder/bsfQC/main.nf | 6 ++---- .../sortAnnotationForBindingSiteFinder/main.nf | 4 +--- subworkflows/local/bindingsitefinder/main.nf | 6 +++--- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index b47b44c0..055ac835 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ testing/ testing* *.pyc .nf-test/ -*.Rhistory \ No newline at end of file +*.Rhistory +*version?current=* \ No newline at end of file diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 4ed02daf..c9fc4453 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -2,10 +2,7 @@ process defineBindingSites { tag "$meta.id" label 'process_single' - //conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://melinak/bindingsitefinder:latest': - 'melinak/bindingsitefinder:latest' }" + container "${'melinak/bindingsitefinder:1.1'}" input: tuple val(meta), path(bw_files_folder) diff --git a/modules/local/BindingSiteFinder/bsfQC/main.nf b/modules/local/BindingSiteFinder/bsfQC/main.nf index c1b8589c..cfdf079f 100644 --- a/modules/local/BindingSiteFinder/bsfQC/main.nf +++ b/modules/local/BindingSiteFinder/bsfQC/main.nf @@ -2,15 +2,13 @@ process bsfQC { tag "$meta.id" label 'process_single' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://melinak/bindingsitefinder:latest': - 'melinak/bindingsitefinder:latest' }" + container "${'melinak/bindingsitefinder:1.1'}" input: tuple val(meta), path(binding_sites_rds) output: - tuple val(meta), path("*BindingSiteFinderQC.html"), emit: BindingSiteFinderQC + tuple val(meta), path("*BindingSiteFinderQC.html"), emit: bindingSiteFinderQC path "versions.yml", emit: versions when: diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf index 3c47f390..6af2afec 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -2,9 +2,7 @@ process sortAnnotationForBindingSiteFinder { tag "$meta.id" label 'process_single' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://melinak/bindingsitefinder:latest': - 'melinak/bindingsitefinder:latest' }" + container "${'melinak/bindingsitefinder:1.1'}" input: tuple val(meta), path(gtf_file) diff --git a/subworkflows/local/bindingsitefinder/main.nf b/subworkflows/local/bindingsitefinder/main.nf index 0fd508e2..9e050cc8 100644 --- a/subworkflows/local/bindingsitefinder/main.nf +++ b/subworkflows/local/bindingsitefinder/main.nf @@ -33,9 +33,9 @@ workflow BINDINGSITEFINDER { ch_versions = ch_versions.mix(bsfQC.out.versions.first()) emit: - csv = DefineBindingSites.out.csv // channel: [ val(meta), [ csv ] ] - rds = DefineBindingSites.out.rds // channel: [ val(meta), [ rds ] ] - html = bsfQC.out.html // channel: [ val(meta), [ html ] ] + csv = defineBindingSites.out.binding_sites_csv // channel: [ val(meta), [ csv ] ] + rds = defineBindingSites.out.binding_sites_rds // channel: [ val(meta), [ rds ] ] + html = bsfQC.out.bindingSiteFinderQC // channel: [ val(meta), [ html ] ] versions = ch_versions // channel: [ versions.yml ] } From 957906230bb43ad17300c6b052afc3799550ef3f Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:18:34 +0000 Subject: [PATCH 10/12] Add test setup for DefineBindingSites Add bigwig temp testing files. At current state, the R script errors out. --- bin/DefineBindingSites.R | 208 ++++++++++++++++++ .../DefineBindingSites/main.nf | 5 +- .../DefineBindingSites/tests/main.nf.test | 89 ++++++++ .../HNRNPC.genome.xl.bedgraph_minus.bw | Bin 0 -> 26182 bytes .../bigwigs/HNRNPC.genome.xl.bedgraph_plus.bw | Bin 0 -> 26218 bytes .../PHO92_A.genome.xl.bedgraph_minus.bw | Bin 0 -> 26182 bytes .../PHO92_A.genome.xl.bedgraph_plus.bw | Bin 0 -> 26218 bytes .../PHO92_B.genome.xl.bedgraph_minus.bw | Bin 0 -> 26182 bytes .../PHO92_B.genome.xl.bedgraph_plus.bw | Bin 0 -> 26218 bytes .../PHO92_C.genome.xl.bedgraph_minus.bw | Bin 0 -> 26182 bytes .../PHO92_C.genome.xl.bedgraph_plus.bw | Bin 0 -> 26218 bytes 11 files changed, 299 insertions(+), 3 deletions(-) create mode 100755 bin/DefineBindingSites.R create mode 100644 modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test create mode 100644 tests/bigwigs/HNRNPC.genome.xl.bedgraph_minus.bw create mode 100644 tests/bigwigs/HNRNPC.genome.xl.bedgraph_plus.bw create mode 100644 tests/bigwigs/PHO92_A.genome.xl.bedgraph_minus.bw create mode 100644 tests/bigwigs/PHO92_A.genome.xl.bedgraph_plus.bw create mode 100644 tests/bigwigs/PHO92_B.genome.xl.bedgraph_minus.bw create mode 100644 tests/bigwigs/PHO92_B.genome.xl.bedgraph_plus.bw create mode 100644 tests/bigwigs/PHO92_C.genome.xl.bedgraph_minus.bw create mode 100644 tests/bigwigs/PHO92_C.genome.xl.bedgraph_plus.bw diff --git a/bin/DefineBindingSites.R b/bin/DefineBindingSites.R new file mode 100755 index 00000000..8fe5ec7e --- /dev/null +++ b/bin/DefineBindingSites.R @@ -0,0 +1,208 @@ +#!/usr/bin/env Rscript + +# ----------------------------- +# Module make binding sites +# ----------------------------- +options(warn = -1) + +# libraries +################ + # Suppress messages +suppressMessages(library(BindingSiteFinder)) +suppressMessages(library(GenomicRanges)) +suppressMessages(library(rtracklayer)) +suppressMessages(library(tidyverse)) +suppressMessages(library(optparse)) + +# print BSF version +################## +cat("BindingSiteFinder version: ", as.character(packageVersion("BindingSiteFinder")), "\n") + + +# Input +################ + +# Specify the input arguments +option_list <- list( + make_option(c("-b", "--bw_files_folder"), type = "character"), + make_option(c("-p", "--peaks"), type = "character"), + make_option(c("-g", "--anno_genes"), type = "character"), + make_option(c("-r", "--anno_regions"), type = "character"), + # output paths + make_option(c("-o", "--output_path"), type = "character", default = "."), + + # optional parameters to customize binding sites + #-------------------------------- + # the default values are the ones specified in the Bioconductor package BindingSiteFinder + # https://www.bioconductor.org/packages/release/bioc/manuals/BindingSiteFinder/man/BindingSiteFinder.pdf + # the default values as specified in BindingSiteFinder 2.4.0 are given in the comment after the parameter name + # optional parameters for binding site defnition + make_option(c( "--peak_score_global_cuttoff"), type = "numeric"), # default 0.01 + make_option(c( "--bsWidth"), type = "numeric"), # default automatic estimation + make_option(c( "--peak_score_genewise_cuttoff"), type = "numeric"), # default automatic estimation + make_option(c( "--minWidth"), type = "numeric"), # default 2 + make_option(c( "--minCrosslinks"), type = "numeric"), # default 2 + make_option(c( "--minCLSites"), type = "numeric"), # default 1 + make_option(c( "--maxBsWidth"), type = "numeric"), # default 13 + # optional parameters for reproducibility + # make_option(c( "--reproducibility_cutoff"), type = "numeric"), + # make_option(c( "--reproducibility_nReps"), type = "numeric"), + # optional parameters for gene and region assignment + make_option(c( "--method_gene_overlaps"), type = "character"), # default "frequency" + make_option(c( "--rule_gene_overlaps"), type = "character"), # default NULL (only needed for --method_gene_overlaps "hierarchy") + make_option(c( "--method_region_overlaps"), type = "character"), # default "frequency" + make_option(c( "--rule_region_overlaps"), type = "character"), # default NULL (only needed for --method_region_overlaps "hierarchy") + # optional parameters to fit non-standard genomes + make_option(c( "--match_geneID"), type = "character"), # default "gene_id" + make_option(c( "--match_geneName"), type = "character"), # default "gene_name" + make_option(c( "--match_geneType"), type = "character"), # default "gene_type" + # optional parameters to fit peakcaller scores + make_option(c( "--match_score"), type = "numeric"), # default "score" + make_option(c( "--match_score_option"), type = "character") # default "max" +) + +# Parse arguments +parser <- OptionParser(option_list = option_list) +args <- parse_args(parser) + +# Default parameters (NULL means use function defaults) +params.input.output <- list( + bw_files_folder = args$bw_files_folder, + peaks = args$peaks, + anno_genes = args$anno_genes, + anno_regions = args$anno_regions, + output_path = args$output_path) + +params.pureClipGlobalFilter <- list( + cuttoff = args$peak_score_global_cuttoff) + +params.estimateBsWidth <- list( + est.minWidth = args$minWidth, + est.maxBsWidth = args$maxBsWidth) + +params.pureClipGeneWiseFilter <- list( + cutoff = args$peak_score_genewise_cuttoff, + match.score = args$match_score, + match.geneID = args$match_geneID, + overlaps = args$method_gene_overlaps) + +params.makeBindingSites <- list( + minWidth = args$minWidth, + minCrosslinks = args$minCrosslinks, + minClSites = args$minCLSites) + +params.reproducibilityFilter <- list( + cutoff = args$reproducibility_cutoff, + nReps = args$reproducibility_nReps) + +params.assignToGenes <- list( + overlaps = args$method_gene_overlaps, + overlaps.rule = args$rule_gene_overlaps, + match.geneID = args$match_geneID, + match.geneName = args$match_geneName, + match.geneType = args$match_geneType + ) +params.assignToTranscriptRegions <- list( + overlaps = args$method_region_overlaps, + overlaps.rule = args$rule_region_overlaps) + +params.annotateWithScore <- list( + match.score = args$match_score, + match.option = args$match_score_option +) + +# Remove NULL values so function defaults apply +params.pureClipGlobalFilter <- params.pureClipGlobalFilter[!sapply(params.pureClipGlobalFilter, is.null)] +params.estimateBsWidth <- params.estimateBsWidth[!sapply(params.estimateBsWidth, is.null)] +params.pureClipGeneWiseFilter <- params.pureClipGeneWiseFilter[!sapply(params.pureClipGeneWiseFilter, is.null)] +params.makeBindingSites <- params.makeBindingSites[!sapply(params.makeBindingSites, is.null)] +params.reproducibilityFilter <- params.reproducibilityFilter[!sapply(params.reproducibilityFilter, is.null)] +params.assignToGenes <- params.assignToGenes[!sapply(params.assignToGenes, is.null)] +params.assignToTranscriptRegions <- params.assignToTranscriptRegions[!sapply(params.assignToTranscriptRegions, is.null)] +params.annotateWithScore <- params.annotateWithScore[!sapply(params.annotateWithScore, is.null)] + +######################## +# BindingSiteFinder +####################### +# crosslinks +bw_files_names <- list.files(params.input.output$bw_files_folder) + +clipFilesP <- list.files(params.input.output$bw_files_folder, pattern = "plus.bw$", full.names = TRUE) +clipFilesM <- list.files(params.input.output$bw_files_folder, pattern = "minus.bw$", full.names = TRUE) +cat(clipFilesP) +cat(clipFilesM) + + +# annotation +gns <- readRDS(params.input.output$anno_genes) +regions <- readRDS(params.input.output$anno_regions) + + +# Peaks from pureclip +peaks = rtracklayer::import(con = params.input.output$peaks, format = "BED", extraCols=c("additionalScores" = "character")) +peaks$additionalScores = NULL +peaks$name = NULL + + + +# Prepare meta data +meta = data.frame( + id = c(1:length(clipFilesP)), + condition = factor(rep("all", length(clipFilesP))), # add option for multiple groups from sample file + clPlus = clipFilesP, + clMinus = clipFilesM) + + +# run BindingSiteFinder +####################### + +cat("############################# \n Running BindingSiteFinder \n############################# \n") +bds = BSFDataSetFromBigWig(ranges = peaks, meta = meta, silent =T) + +cat("\nGlobal filter on peak sites \n \n") +bds = do.call(pureClipGlobalFilter, + c(list(bds), + params.pureClipGlobalFilter)) # param cutoff +cat("\nEstimate binding site width \n \n") +bds = do.call(estimateBsWidth, + c(list(bds, anno.genes = gns), + params.estimateBsWidth)) # optional param: bsWidth + +cat("\nGenewise filter on peak sites \n \n") +bds = do.call(pureClipGeneWiseFilter, + c(list(bds, anno.genes = gns), + params.pureClipGeneWiseFilter)) # param cutoff, overlaps, match score, match geneID + +cat("\nMake binding sites \n \n") +bds = do.call(makeBindingSites, + c(list(bds), + params.makeBindingSites)) # params minWidth, minCrosslinks, minCLSites + + +# bds = do.call(reproducibilityFilter, c(list(bds), params.reproducibilityFilter)) # params cutoff, nReps +cat("\nAssign binding sites to genes \n \n") +bds = do.call(assignToGenes, c(list(bds, anno.genes = gns), + params.assignToGenes)) # params overlaps, overlaps.rule, match.geneID, match.geneName, match.geneType + +cat("\nAssign binding sites to transcript regions \n \n") +bds = do.call(assignToTranscriptRegions, c(list(bds,anno.transcriptRegionList = regions), + params.assignToTranscriptRegions))# params overlaps, overlaps.rule, + +cat("\nAnotate binding scores \n \n") +bds = do.call(annotateWithScore, c(list(bds, peaks),params.annotateWithScore)) #match.score, match.option + +cat("\nSaving bindings sites \n \n") +bs_gr = getRanges(bds) +names(bs_gr) <- 1:NROW(bs_gr) +bs_df <- as.data.frame(bs_gr) + + +# export outputs +######################## + +exportToBED(bds, con = paste0(params.input.output$output_path , "/myBindingSites.bed")) +saveRDS(bds, paste0(params.input.output$output_path ,"/bds_object.rds")) +saveRDS(bs_df, paste0(params.input.output$output_path ,"/bindingSites.rds")) +write.csv(bs_df, file = paste0(params.input.output$output_path,"/bindingSites.csv"), row.names = FALSE) + + diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index 4ed02daf..db2c28f5 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -47,12 +47,11 @@ process defineBindingSites { // --match_score_option $params.match_score_option \\ """ - Rscript DefineBindingSites.R \\ + DefineBindingSites.R \\ --bw_files_folder $bw_files_folder \\ --peaks $peaks \\ --anno_genes $anno_gns \\ --anno_regions $anno_regions \\ - --sample_sheet $sample_sheet \\ --output_path . \\ cat <<-END_VERSIONS > versions.yml @@ -72,4 +71,4 @@ process defineBindingSites { \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test b/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test new file mode 100644 index 00000000..edd6b13d --- /dev/null +++ b/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test @@ -0,0 +1,89 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test modules/hirutest +nextflow_process { + + name "Test Process CLIPSEQ_DEFINEBINDINGSITES" + script "../main.nf" + process "defineBindingSites" + + tag "clipseq" + tag "clipseq_definebindingsites" + tag "definebindingsites" + + // Global setup for rds input files + setup { + run("sortAnnotationForBindingSiteFinder", alias: "ANNOTATIONS") { + script "../../sortAnnotationForBindingSiteFinder/main.nf" + process { + """ + input[0] = [ + [ id:'test_sort' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), + ] + """ + } + } + } + + test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + input[0] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/tests/bigwigs', checkIfExists: true), + ] + input[1] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/results/05_peakcalling/pureclip/HNRNPC_pureclip_crosslinks.bed', checkIfExists: true), + ] + input[2] = ANNOTATIONS.out[0] + input[3] = ANNOTATIONS.out[1] + """ + } + } + + then { + assertAll( + { assert process.success }, + // { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + // test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds - stub") { + + // options "-stub" + + // when { + // process { + // """ + // // TODO nf-core: define inputs of the process here. Example: + + // input = ["","","",""] + // """ + // } + // } + + // then { + // assertAll( + // { assert process.success }, + // { assert snapshot(process.out).match() } + // //TODO nf-core: Add all required assertions to verify the test output. + // ) + // } + + // } + +} diff --git a/tests/bigwigs/HNRNPC.genome.xl.bedgraph_minus.bw b/tests/bigwigs/HNRNPC.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000000000000000000000000000000000000..a136ad2dbfc1f78037d4787c3c1ad0aaf3fadea6 GIT binary patch literal 26182 zcmeI*drVVT90%~z7TXT5MS>upRm27)0R@Wk7y)z?-6l}SBDfJP55WgShKP+YdJD=x z5$OOoBDOdgFG*JI7(>h`Yy{=G0FEjez<@?6$Y7R9p{}@hb59-SFXfmZ3W^{hT`4lubrgMj>DkqA58+FC%kkapCvhKoayS{zN zP$Oij>*xs9_ftdk^>R4}{$rwca(#2ENmt7w^ljIq08w5$pAaz79(3)Y*U(-V3Q5}& zd^F_*(XIb7;GvkF73%x2f!W@P9>DU`H%Hx{G!jCH@R=ojEF3-2s#|keTP^H-L55x`Ckm`>s zH9iGZvhg(eZ{o?_80$zm`zN8JUs^_&yv24f7yD7<(Q+|{a;5E2&%qu0%0(I<-jhMp zI9$S?D0wmWuqt=kb*0M9f*F$@9Mc^bmjAhUIy2R5%yQy*(LmiHZpg8okF4qhtuFHw zqOD^t&QBi{XP*0(tUlPTV!mJy@ z>ywi5J2pxFf!?(X`1CbZ;JuyP(upNjXFvZl7`O;o6^bdNBg+LwMSUQYFWCd)_dv~&{i=W z8DC!G`${Y}q7Y+|yOL6zx<< zL_%}UhP;WL(dL?fd9hHq%DE?Lwf$OD-dGwiJ?lsQJus;DY1XLc)woziQI|=d0NI6uy1Qp`kkvv~E#qslAeTdj(Z7$9{J0Y;?vy=FP%vOg*gMeS)`Boy*; zW2Z5+ASPfeOGAsrxKTG$#swK`>d0MsA=0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHf(R)E;!>AymY@Q`FzFMP~~v1@de8O^IYO+`LM7~L2<{M&|7#CH&jyR^L} zcIYwe=w{esYS}Wv#6bWAKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00aAeE@hb59*7^F&67gmx3qOe@gWdnDcQ+`X&*91_gwEz^BAUZ$(l`ica^nT`z}y8dhW ziV$_CV{C`6-#JuQ=gU6iA2<5kPj|T7Usw0(s(|aCpCjYp0!G@6qTO^F+7J3h(Dv+z zO-aBHEyD;XLR)d(LyeT#vAhyChG7k-qA{m51v1W|#bH-{V_n+=_6Rcrwk(jwiF+tN zcm;H_>X+C^^MYDG*~h^y+%#M0l1_6kziWp+RG3%sN<4m%gzfyure59SE!kLg{G^zf zk7o+HIRL1+ZDGpgCQlH_#$UsVh@<^=ZMXSZD z)C;UVtxJb|>m9>xmu~XDvEwY?O3*?}e6yE2?MGcw#XsGb@aDC`C^0oq5ug0C=N&WO zy+u<8EO`&DcI62wSE$>QseHzWh%Lbosyuo;dW2BaUb3~z4LUFcbAE&Fx#}# z+}4R^FVb`|A{ir9w(mVLQ#*3yOA2p|#*LbB+e`b}CeP%xhNU-6jZ6t6HG`ft8Q~dU zZOh-vk?s?BzsR4AC}|VNdRKImUu|{yuDHcXA^J2Vs7R{vVr+^N6@@Y$l`y-u`(!$s zsuHd*3a*m2hgw#rJKN@3$Y-zyyV3ukEt-9- z>#ACH>bz!qw|ji^hWKvx;vdO95eJ);8oSFj1I|fWgPL+|C7 zmH6D5JH_Ic>Za4Watbm$%7&*GpL%%1$KWlcAE6Tc$<4a1nW>l>^!GJ-_2>Hy7txP{ z0sfA&bDDWn!*<^A=bqy(ej9_8R%o@;0NRKydC8t*V2A5~8 zAl)O2YX&{Gk{ds8BnMPa63Zq{%Z}t)2B!zwTi!F;7)`denxenp2wtix4@P(Ji_T;& zbTF@{yA|^lG*9l@;4#v$2Rds{^t{7aZW5H|u;%0&`t&-~ML6j-%sS}W!U{^dgE~fO zwEeYzN=7?vYE%_&;`2{ANVY+s^pYPt7u9Bx%1Ar0)Tnkm;ZcQq&G3+a#WZ5(_>cSi z?rn4JK@#@T9$gCAtTbdN`0qC~Lw15kakFtIp5tj7VI~>TWhZzPf{!uLN1lJojDQ^o zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@A#{G1_rspDS>lWAsNO8OACfhpXQf7ksxu9B zy+=!P11#3Npn`tFvmq||e$zO{Y4DNXt<7a+5W^dZ+mKu9yGIR?1^)GhHaD`EebeeK zk%gcBZLDO&1)zce2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>0TLFCGr~i&D z@XVo?RbU^HvaU90WiB1V8`; lKmY_l00ck)1V8`;KmY_l00ck)1V8`;K)@j2sQs6jzW}DR1p@#8 literal 0 HcmV?d00001 diff --git a/tests/bigwigs/PHO92_A.genome.xl.bedgraph_minus.bw b/tests/bigwigs/PHO92_A.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000000000000000000000000000000000000..a136ad2dbfc1f78037d4787c3c1ad0aaf3fadea6 GIT binary patch literal 26182 zcmeI*drVVT90%~z7TXT5MS>upRm27)0R@Wk7y)z?-6l}SBDfJP55WgShKP+YdJD=x z5$OOoBDOdgFG*JI7(>h`Yy{=G0FEjez<@?6$Y7R9p{}@hb59-SFXfmZ3W^{hT`4lubrgMj>DkqA58+FC%kkapCvhKoayS{zN zP$Oij>*xs9_ftdk^>R4}{$rwca(#2ENmt7w^ljIq08w5$pAaz79(3)Y*U(-V3Q5}& zd^F_*(XIb7;GvkF73%x2f!W@P9>DU`H%Hx{G!jCH@R=ojEF3-2s#|keTP^H-L55x`Ckm`>s zH9iGZvhg(eZ{o?_80$zm`zN8JUs^_&yv24f7yD7<(Q+|{a;5E2&%qu0%0(I<-jhMp zI9$S?D0wmWuqt=kb*0M9f*F$@9Mc^bmjAhUIy2R5%yQy*(LmiHZpg8okF4qhtuFHw zqOD^t&QBi{XP*0(tUlPTV!mJy@ z>ywi5J2pxFf!?(X`1CbZ;JuyP(upNjXFvZl7`O;o6^bdNBg+LwMSUQYFWCd)_dv~&{i=W z8DC!G`${Y}q7Y+|yOL6zx<< zL_%}UhP;WL(dL?fd9hHq%DE?Lwf$OD-dGwiJ?lsQJus;DY1XLc)woziQI|=d0NI6uy1Qp`kkvv~E#qslAeTdj(Z7$9{J0Y;?vy=FP%vOg*gMeS)`Boy*; zW2Z5+ASPfeOGAsrxKTG$#swK`>d0MsA=0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHf(R)E;!>AymY@Q`FzFMP~~v1@de8O^IYO+`LM7~L2<{M&|7#CH&jyR^L} zcIYwe=w{esYS}Wv#6bWAKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00aAeE@hb59*7^F&67gmx3qOe@gWdnDcQ+`X&*91_gwEz^BAUZ$(l`ica^nT`z}y8dhW ziV$_CV{C`6-#JuQ=gU6iA2<5kPj|T7Usw0(s(|aCpCjYp0!G@6qTO^F+7J3h(Dv+z zO-aBHEyD;XLR)d(LyeT#vAhyChG7k-qA{m51v1W|#bH-{V_n+=_6Rcrwk(jwiF+tN zcm;H_>X+C^^MYDG*~h^y+%#M0l1_6kziWp+RG3%sN<4m%gzfyure59SE!kLg{G^zf zk7o+HIRL1+ZDGpgCQlH_#$UsVh@<^=ZMXSZD z)C;UVtxJb|>m9>xmu~XDvEwY?O3*?}e6yE2?MGcw#XsGb@aDC`C^0oq5ug0C=N&WO zy+u<8EO`&DcI62wSE$>QseHzWh%Lbosyuo;dW2BaUb3~z4LUFcbAE&Fx#}# z+}4R^FVb`|A{ir9w(mVLQ#*3yOA2p|#*LbB+e`b}CeP%xhNU-6jZ6t6HG`ft8Q~dU zZOh-vk?s?BzsR4AC}|VNdRKImUu|{yuDHcXA^J2Vs7R{vVr+^N6@@Y$l`y-u`(!$s zsuHd*3a*m2hgw#rJKN@3$Y-zyyV3ukEt-9- z>#ACH>bz!qw|ji^hWKvx;vdO95eJ);8oSFj1I|fWgPL+|C7 zmH6D5JH_Ic>Za4Watbm$%7&*GpL%%1$KWlcAE6Tc$<4a1nW>l>^!GJ-_2>Hy7txP{ z0sfA&bDDWn!*<^A=bqy(ej9_8R%o@;0NRKydC8t*V2A5~8 zAl)O2YX&{Gk{ds8BnMPa63Zq{%Z}t)2B!zwTi!F;7)`denxenp2wtix4@P(Ji_T;& zbTF@{yA|^lG*9l@;4#v$2Rds{^t{7aZW5H|u;%0&`t&-~ML6j-%sS}W!U{^dgE~fO zwEeYzN=7?vYE%_&;`2{ANVY+s^pYPt7u9Bx%1Ar0)Tnkm;ZcQq&G3+a#WZ5(_>cSi z?rn4JK@#@T9$gCAtTbdN`0qC~Lw15kakFtIp5tj7VI~>TWhZzPf{!uLN1lJojDQ^o zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@A#{G1_rspDS>lWAsNO8OACfhpXQf7ksxu9B zy+=!P11#3Npn`tFvmq||e$zO{Y4DNXt<7a+5W^dZ+mKu9yGIR?1^)GhHaD`EebeeK zk%gcBZLDO&1)zce2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>0TLFCGr~i&D z@XVo?RbU^HvaU90WiB1V8`; lKmY_l00ck)1V8`;KmY_l00ck)1V8`;K)@j2sQs6jzW}DR1p@#8 literal 0 HcmV?d00001 diff --git a/tests/bigwigs/PHO92_B.genome.xl.bedgraph_minus.bw b/tests/bigwigs/PHO92_B.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000000000000000000000000000000000000..a136ad2dbfc1f78037d4787c3c1ad0aaf3fadea6 GIT binary patch literal 26182 zcmeI*drVVT90%~z7TXT5MS>upRm27)0R@Wk7y)z?-6l}SBDfJP55WgShKP+YdJD=x z5$OOoBDOdgFG*JI7(>h`Yy{=G0FEjez<@?6$Y7R9p{}@hb59-SFXfmZ3W^{hT`4lubrgMj>DkqA58+FC%kkapCvhKoayS{zN zP$Oij>*xs9_ftdk^>R4}{$rwca(#2ENmt7w^ljIq08w5$pAaz79(3)Y*U(-V3Q5}& zd^F_*(XIb7;GvkF73%x2f!W@P9>DU`H%Hx{G!jCH@R=ojEF3-2s#|keTP^H-L55x`Ckm`>s zH9iGZvhg(eZ{o?_80$zm`zN8JUs^_&yv24f7yD7<(Q+|{a;5E2&%qu0%0(I<-jhMp zI9$S?D0wmWuqt=kb*0M9f*F$@9Mc^bmjAhUIy2R5%yQy*(LmiHZpg8okF4qhtuFHw zqOD^t&QBi{XP*0(tUlPTV!mJy@ z>ywi5J2pxFf!?(X`1CbZ;JuyP(upNjXFvZl7`O;o6^bdNBg+LwMSUQYFWCd)_dv~&{i=W z8DC!G`${Y}q7Y+|yOL6zx<< zL_%}UhP;WL(dL?fd9hHq%DE?Lwf$OD-dGwiJ?lsQJus;DY1XLc)woziQI|=d0NI6uy1Qp`kkvv~E#qslAeTdj(Z7$9{J0Y;?vy=FP%vOg*gMeS)`Boy*; zW2Z5+ASPfeOGAsrxKTG$#swK`>d0MsA=0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHf(R)E;!>AymY@Q`FzFMP~~v1@de8O^IYO+`LM7~L2<{M&|7#CH&jyR^L} zcIYwe=w{esYS}Wv#6bWAKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00aAeE@hb59*7^F&67gmx3qOe@gWdnDcQ+`X&*91_gwEz^BAUZ$(l`ica^nT`z}y8dhW ziV$_CV{C`6-#JuQ=gU6iA2<5kPj|T7Usw0(s(|aCpCjYp0!G@6qTO^F+7J3h(Dv+z zO-aBHEyD;XLR)d(LyeT#vAhyChG7k-qA{m51v1W|#bH-{V_n+=_6Rcrwk(jwiF+tN zcm;H_>X+C^^MYDG*~h^y+%#M0l1_6kziWp+RG3%sN<4m%gzfyure59SE!kLg{G^zf zk7o+HIRL1+ZDGpgCQlH_#$UsVh@<^=ZMXSZD z)C;UVtxJb|>m9>xmu~XDvEwY?O3*?}e6yE2?MGcw#XsGb@aDC`C^0oq5ug0C=N&WO zy+u<8EO`&DcI62wSE$>QseHzWh%Lbosyuo;dW2BaUb3~z4LUFcbAE&Fx#}# z+}4R^FVb`|A{ir9w(mVLQ#*3yOA2p|#*LbB+e`b}CeP%xhNU-6jZ6t6HG`ft8Q~dU zZOh-vk?s?BzsR4AC}|VNdRKImUu|{yuDHcXA^J2Vs7R{vVr+^N6@@Y$l`y-u`(!$s zsuHd*3a*m2hgw#rJKN@3$Y-zyyV3ukEt-9- z>#ACH>bz!qw|ji^hWKvx;vdO95eJ);8oSFj1I|fWgPL+|C7 zmH6D5JH_Ic>Za4Watbm$%7&*GpL%%1$KWlcAE6Tc$<4a1nW>l>^!GJ-_2>Hy7txP{ z0sfA&bDDWn!*<^A=bqy(ej9_8R%o@;0NRKydC8t*V2A5~8 zAl)O2YX&{Gk{ds8BnMPa63Zq{%Z}t)2B!zwTi!F;7)`denxenp2wtix4@P(Ji_T;& zbTF@{yA|^lG*9l@;4#v$2Rds{^t{7aZW5H|u;%0&`t&-~ML6j-%sS}W!U{^dgE~fO zwEeYzN=7?vYE%_&;`2{ANVY+s^pYPt7u9Bx%1Ar0)Tnkm;ZcQq&G3+a#WZ5(_>cSi z?rn4JK@#@T9$gCAtTbdN`0qC~Lw15kakFtIp5tj7VI~>TWhZzPf{!uLN1lJojDQ^o zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@A#{G1_rspDS>lWAsNO8OACfhpXQf7ksxu9B zy+=!P11#3Npn`tFvmq||e$zO{Y4DNXt<7a+5W^dZ+mKu9yGIR?1^)GhHaD`EebeeK zk%gcBZLDO&1)zce2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>0TLFCGr~i&D z@XVo?RbU^HvaU90WiB1V8`; lKmY_l00ck)1V8`;KmY_l00ck)1V8`;K)@j2sQs6jzW}DR1p@#8 literal 0 HcmV?d00001 diff --git a/tests/bigwigs/PHO92_C.genome.xl.bedgraph_minus.bw b/tests/bigwigs/PHO92_C.genome.xl.bedgraph_minus.bw new file mode 100644 index 0000000000000000000000000000000000000000..a136ad2dbfc1f78037d4787c3c1ad0aaf3fadea6 GIT binary patch literal 26182 zcmeI*drVVT90%~z7TXT5MS>upRm27)0R@Wk7y)z?-6l}SBDfJP55WgShKP+YdJD=x z5$OOoBDOdgFG*JI7(>h`Yy{=G0FEjez<@?6$Y7R9p{}@hb59-SFXfmZ3W^{hT`4lubrgMj>DkqA58+FC%kkapCvhKoayS{zN zP$Oij>*xs9_ftdk^>R4}{$rwca(#2ENmt7w^ljIq08w5$pAaz79(3)Y*U(-V3Q5}& zd^F_*(XIb7;GvkF73%x2f!W@P9>DU`H%Hx{G!jCH@R=ojEF3-2s#|keTP^H-L55x`Ckm`>s zH9iGZvhg(eZ{o?_80$zm`zN8JUs^_&yv24f7yD7<(Q+|{a;5E2&%qu0%0(I<-jhMp zI9$S?D0wmWuqt=kb*0M9f*F$@9Mc^bmjAhUIy2R5%yQy*(LmiHZpg8okF4qhtuFHw zqOD^t&QBi{XP*0(tUlPTV!mJy@ z>ywi5J2pxFf!?(X`1CbZ;JuyP(upNjXFvZl7`O;o6^bdNBg+LwMSUQYFWCd)_dv~&{i=W z8DC!G`${Y}q7Y+|yOL6zx<< zL_%}UhP;WL(dL?fd9hHq%DE?Lwf$OD-dGwiJ?lsQJus;DY1XLc)woziQI|=d0NI6uy1Qp`kkvv~E#qslAeTdj(Z7$9{J0Y;?vy=FP%vOg*gMeS)`Boy*; zW2Z5+ASPfeOGAsrxKTG$#swK`>d0MsA=0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHf(R)E;!>AymY@Q`FzFMP~~v1@de8O^IYO+`LM7~L2<{M&|7#CH&jyR^L} zcIYwe=w{esYS}Wv#6bWAKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00aAeE@hb59*7^F&67gmx3qOe@gWdnDcQ+`X&*91_gwEz^BAUZ$(l`ica^nT`z}y8dhW ziV$_CV{C`6-#JuQ=gU6iA2<5kPj|T7Usw0(s(|aCpCjYp0!G@6qTO^F+7J3h(Dv+z zO-aBHEyD;XLR)d(LyeT#vAhyChG7k-qA{m51v1W|#bH-{V_n+=_6Rcrwk(jwiF+tN zcm;H_>X+C^^MYDG*~h^y+%#M0l1_6kziWp+RG3%sN<4m%gzfyure59SE!kLg{G^zf zk7o+HIRL1+ZDGpgCQlH_#$UsVh@<^=ZMXSZD z)C;UVtxJb|>m9>xmu~XDvEwY?O3*?}e6yE2?MGcw#XsGb@aDC`C^0oq5ug0C=N&WO zy+u<8EO`&DcI62wSE$>QseHzWh%Lbosyuo;dW2BaUb3~z4LUFcbAE&Fx#}# z+}4R^FVb`|A{ir9w(mVLQ#*3yOA2p|#*LbB+e`b}CeP%xhNU-6jZ6t6HG`ft8Q~dU zZOh-vk?s?BzsR4AC}|VNdRKImUu|{yuDHcXA^J2Vs7R{vVr+^N6@@Y$l`y-u`(!$s zsuHd*3a*m2hgw#rJKN@3$Y-zyyV3ukEt-9- z>#ACH>bz!qw|ji^hWKvx;vdO95eJ);8oSFj1I|fWgPL+|C7 zmH6D5JH_Ic>Za4Watbm$%7&*GpL%%1$KWlcAE6Tc$<4a1nW>l>^!GJ-_2>Hy7txP{ z0sfA&bDDWn!*<^A=bqy(ej9_8R%o@;0NRKydC8t*V2A5~8 zAl)O2YX&{Gk{ds8BnMPa63Zq{%Z}t)2B!zwTi!F;7)`denxenp2wtix4@P(Ji_T;& zbTF@{yA|^lG*9l@;4#v$2Rds{^t{7aZW5H|u;%0&`t&-~ML6j-%sS}W!U{^dgE~fO zwEeYzN=7?vYE%_&;`2{ANVY+s^pYPt7u9Bx%1Ar0)Tnkm;ZcQq&G3+a#WZ5(_>cSi z?rn4JK@#@T9$gCAtTbdN`0qC~Lw15kakFtIp5tj7VI~>TWhZzPf{!uLN1lJojDQ^o zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@A#{G1_rspDS>lWAsNO8OACfhpXQf7ksxu9B zy+=!P11#3Npn`tFvmq||e$zO{Y4DNXt<7a+5W^dZ+mKu9yGIR?1^)GhHaD`EebeeK zk%gcBZLDO&1)zce2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>0TLFCGr~i&D z@XVo?RbU^HvaU90WiB1V8`; lKmY_l00ck)1V8`;KmY_l00ck)1V8`;K)@j2sQs6jzW}DR1p@#8 literal 0 HcmV?d00001 From b94d637f4792acd9baf5e213632a086864c6383b Mon Sep 17 00:00:00 2001 From: MelinaKlostermann Date: Tue, 25 Mar 2025 14:21:01 +0100 Subject: [PATCH 11/12] bsf fix docker container --- .../BindingSiteFinder/bsfQC => bin}/BindingSiteFinderQC.R | 0 .../BindingSiteFinder/bsfQC => bin}/BindingSiteFinderQC.qmd | 0 .../DefineBindingSites => bin}/DefineBindingSites.R | 0 .../sortAnnotationForBindingSiteFinder.R | 0 modules/local/BindingSiteFinder/DefineBindingSites/main.nf | 1 - modules/local/BindingSiteFinder/bsfQC/main.nf | 2 +- .../sortAnnotationForBindingSiteFinder/main.nf | 6 +++--- 7 files changed, 4 insertions(+), 5 deletions(-) rename {modules/local/BindingSiteFinder/bsfQC => bin}/BindingSiteFinderQC.R (100%) rename {modules/local/BindingSiteFinder/bsfQC => bin}/BindingSiteFinderQC.qmd (100%) rename {modules/local/BindingSiteFinder/DefineBindingSites => bin}/DefineBindingSites.R (100%) rename {modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder => bin}/sortAnnotationForBindingSiteFinder.R (100%) mode change 100644 => 100755 diff --git a/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R b/bin/BindingSiteFinderQC.R similarity index 100% rename from modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R rename to bin/BindingSiteFinderQC.R diff --git a/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.qmd b/bin/BindingSiteFinderQC.qmd similarity index 100% rename from modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.qmd rename to bin/BindingSiteFinderQC.qmd diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R b/bin/DefineBindingSites.R similarity index 100% rename from modules/local/BindingSiteFinder/DefineBindingSites/DefineBindingSites.R rename to bin/DefineBindingSites.R diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R b/bin/sortAnnotationForBindingSiteFinder.R old mode 100644 new mode 100755 similarity index 100% rename from modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R rename to bin/sortAnnotationForBindingSiteFinder.R diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf index c9fc4453..6c115802 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/main.nf +++ b/modules/local/BindingSiteFinder/DefineBindingSites/main.nf @@ -49,7 +49,6 @@ process defineBindingSites { --peaks $peaks \\ --anno_genes $anno_gns \\ --anno_regions $anno_regions \\ - --sample_sheet $sample_sheet \\ --output_path . \\ cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/BindingSiteFinder/bsfQC/main.nf b/modules/local/BindingSiteFinder/bsfQC/main.nf index cfdf079f..2d88e087 100644 --- a/modules/local/BindingSiteFinder/bsfQC/main.nf +++ b/modules/local/BindingSiteFinder/bsfQC/main.nf @@ -16,7 +16,7 @@ process bsfQC { script: """ - Rscript clipseq/modules/local/BindingSiteFinder/bsfQC/BindingSiteFinderQC.R \\ + Rscript BindingSiteFinderQC.R \\ $binding_sites_rds cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf index 6af2afec..4169397b 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -5,7 +5,7 @@ process sortAnnotationForBindingSiteFinder { container "${'melinak/bindingsitefinder:1.1'}" input: - tuple val(meta), path(gtf_file) + tuple val(meta), path(gtf) output: tuple val(meta), path("*gns.rds"), emit: gns_rds @@ -17,8 +17,8 @@ process sortAnnotationForBindingSiteFinder { script: """ - Rscript /home/mek24iv/nfcore-clipseq/devel_BindingSiteFinder/modules/local/sortAnnotationForBindingSiteFinder/sortAnnotationForBindingSiteFinder.R \\ - $gtf_file \\ + Rscript ~/nfcore-clipseq/clipseq/bin/sortAnnotationForBindingSiteFinder.R \\ + $gtf \\ gns.rds \\ regions.rds cat <<-END_VERSIONS > versions.yml From f80ebfac71f267260e109ed1fd2ce6345f31135d Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Tue, 25 Mar 2025 14:16:55 +0000 Subject: [PATCH 12/12] Improve tests --- .../DefineBindingSites/tests/main.nf.test | 53 +++++++++---------- modules/local/BindingSiteFinder/bsfQC/main.nf | 6 +-- .../main.nf | 2 +- .../tests/main.nf.test | 5 -- .../tests/main.nf.test.snap | 2 +- 5 files changed, 30 insertions(+), 38 deletions(-) diff --git a/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test b/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test index edd6b13d..39cf478b 100644 --- a/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test +++ b/modules/local/BindingSiteFinder/DefineBindingSites/tests/main.nf.test @@ -1,5 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test modules/hirutest nextflow_process { name "Test Process CLIPSEQ_DEFINEBINDINGSITES" @@ -27,15 +25,9 @@ nextflow_process { test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds") { - // TODO nf-core: If you are created a test for a chained module - // (the module requires running more than one process to generate the required output) - // add the 'setup' method here. - // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). - when { process { """ - // TODO nf-core: define inputs of the process here. Example: input[0] = [ [ id:'test' ], // meta map file('/Users/hdash/code/public/clipseq/tests/bigwigs', checkIfExists: true), @@ -61,29 +53,34 @@ nextflow_process { } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. - // test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds - stub") { - - // options "-stub" + test("DefineBindingSites - bw_files_folder, peak_bed, anno_genes_rds, anno_regions_rds - stub") { - // when { - // process { - // """ - // // TODO nf-core: define inputs of the process here. Example: + options "-stub" - // input = ["","","",""] - // """ - // } - // } + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/tests/bigwigs', checkIfExists: true), + ] + input[1] = [ + [ id:'test' ], // meta map + file('/Users/hdash/code/public/clipseq/results/05_peakcalling/pureclip/HNRNPC_pureclip_crosslinks.bed', checkIfExists: true), + ] + input[2] = ANNOTATIONS.out[0] + input[3] = ANNOTATIONS.out[1] + """ + } + } - // then { - // assertAll( - // { assert process.success }, - // { assert snapshot(process.out).match() } - // //TODO nf-core: Add all required assertions to verify the test output. - // ) - // } + then { + assert process.success + assert snapshot(process.out).match() + assert new File(process.out.binding_sites_rds[0][1]).exists() + assert new File(process.out.binding_sites_csv[0][1]).exists() + } - // } + } } diff --git a/modules/local/BindingSiteFinder/bsfQC/main.nf b/modules/local/BindingSiteFinder/bsfQC/main.nf index 2d88e087..c7391f75 100644 --- a/modules/local/BindingSiteFinder/bsfQC/main.nf +++ b/modules/local/BindingSiteFinder/bsfQC/main.nf @@ -16,9 +16,9 @@ process bsfQC { script: """ - Rscript BindingSiteFinderQC.R \\ + BindingSiteFinderQC.R \\ $binding_sites_rds - + cat <<-END_VERSIONS > versions.yml "${task.process}": \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') @@ -37,4 +37,4 @@ process bsfQC { \$(Rscript -e "packageVersion('BindingSiteFinder')" |& sed '1!d ; s/[1] //') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf index 6a944517..94d8562f 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/main.nf @@ -18,7 +18,7 @@ process sortAnnotationForBindingSiteFinder { script: """ sortAnnotationForBindingSiteFinder.R \\ - $gtf_file \\ + $gtf \\ gns.rds \\ regions.rds cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test index a12d798c..7b5b9c3b 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test @@ -1,5 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test modules/hirutest nextflow_process { name "Test Process CLIPSEQ_SORTANNOTATIONSFORBINDINGSITEFINDER" @@ -35,7 +33,6 @@ nextflow_process { } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. test("sortAnnotationsForBindingSiteFinder - gtf - stub") { options "-stub" @@ -43,8 +40,6 @@ nextflow_process { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - input[0] = [ [ id:'test' ], // meta map file('https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg.gtf'), diff --git a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap index 06761a27..7fced7ee 100644 --- a/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap +++ b/modules/local/BindingSiteFinder/sortAnnotationForBindingSiteFinder/tests/main.nf.test.snap @@ -9,6 +9,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-03-25T10:28:10.976439" + "timestamp": "2025-03-25T10:44:54.619281" } } \ No newline at end of file