From 8e9fe02ad0ca8ea178c8f9fd3d6e86ab5b9cef3b Mon Sep 17 00:00:00 2001
From: ellendejong <de.jong.ellen@hotmail.com>
Date: Thu, 2 Nov 2023 08:04:06 +0100
Subject: [PATCH 01/31] Add gitignore

---
 .gitignore | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 264 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..ee726bf4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,264 @@
+# History files
+.Rhistory
+.Rapp.history
+
+# Session Data files
+.RData
+.RDataTmp
+
+# User-specific files
+.Ruserdata
+
+# Example code in package build process
+*-Ex.R
+
+# Output files from R CMD build
+/*.tar.gz
+
+# Output files from R CMD check
+/*.Rcheck/
+
+# RStudio files
+.Rproj.user/
+
+# produced vignettes
+vignettes/*.html
+vignettes/*.pdf
+
+# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
+.httr-oauth
+
+# knitr and R markdown default cache directories
+*_cache/
+/cache/
+
+# Temporary files created by R markdown
+*.utf8.md
+*.knit.md
+
+# R Environment Variables
+.Renviron
+
+# pkgdown site
+docs/
+
+# translation temp files
+po/*~
+
+# RStudio Connect folder
+rsconnect/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+
+# Local History for Visual Studio Code
+.history/
+
+# Built Visual Studio Code Extensions
+*.vsix
+
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+
+# Local History for Visual Studio Code
+.history/
+
+# Built Visual Studio Code Extensions
+*.vsix

From 156c79e6c144580cc24f156a7a330d1058cdcc41 Mon Sep 17 00:00:00 2001
From: ellendejong <de.jong.ellen@hotmail.com>
Date: Thu, 2 Nov 2023 08:10:03 +0100
Subject: [PATCH 02/31] custom script to run outrider, not working yet.

---
 Outrider/1.20.0/outrider.R | 95 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 Outrider/1.20.0/outrider.R

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
new file mode 100644
index 00000000..f249fda5
--- /dev/null
+++ b/Outrider/1.20.0/outrider.R
@@ -0,0 +1,95 @@
+#!/usr/bin/env Rscript
+
+# Import statements, alphabetic order of main package.
+suppressPackageStartupMessages(library("argparse"))
+suppressPackageStartupMessages(library("OUTRIDER"))
+suppressPackageStartupMessages(library("tools"))
+suppressPackageStartupMessages(library("tibble"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+
+# Argument parser
+parser <- ArgumentParser(description = "Process some integers")
+parser$add_argument("query", metavar = "query_input_files", nargs = "+", 
+										help = "Files or directories containing the query input files.")
+parser$add_argument("output_path", metavar = "output_path", nargs = "+", 
+										help = "Path where output of OUTRIDER will be stored.")
+parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+", 
+										help = "Files or directories containing the reference input files.")
+args <- parser$parse_args()
+
+
+read_input_files <- function(input){
+	sampleIDs <- c() 
+	count_tables <- lapply(input, function(f) {
+		input_ext <- tools::file_ext(f)
+		if(input_ext == "Rds") {
+			rse <- readRDS(f)  # RangedSummarizedExperiment
+			sampleIDs <- append(sampleIDs, colnames(rse))
+			return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
+		} else if (input_ext %in% c("txt", "tsv")) {
+			count_table <- read_delim(f, show_col_types=FALSE)
+			sampleIDs <- append(sampleIDs, colnames(count_table))
+			return(count_table)
+		} else {
+				stop("Input file extension is not supported.")
+		}
+	})
+	return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
+}
+
+
+get_input <- function(input){
+	# If directories are provided
+	if(all(sapply(input, function(x) dir.exists(x)))) {
+		retrieved_files <- sapply(input, function(d){
+			list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
+		})
+	} else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
+		retrieved_files <- input
+	} else {  # If both dir and files are provided, or different tyes (character, int etc)
+		stop("Input is neither dir or file.")
+	}
+
+	return(read_input_files(retrieved_files))
+}
+
+
+merge_count_tables <- function(lst_query, lst_ref){
+	# merge count tables together.
+	lst_count_tables <- c(lst_query, lst_ref)
+	all_counts <- lst_count_tables %>%
+		Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
+}
+
+
+run_outrider <- function(all_counts) {
+	# TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
+	# filter expression
+	ods <- OutriderDataSet(all_counts)
+	filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
+	out <- OUTRIDER(filtered)
+	return(out)
+}
+
+
+save_output <- function(out_path, out_outrider, ref_samples, padj_thres=0.05, zscore_thres=0, all=True) {
+	res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=all))
+	# Reference samples are excluded from final results. 
+	query_res <- filter(res, !(sampleID %in% ref_samples))
+	# Write output table with aberrant expressed targets.
+	write_tsv(query_res, paste0(out_path, "outrider_result.tsv"))
+}
+
+
+# TODO: investigate memory usage and if possible reduced / parallel.
+main <- function(query, ref, output_path){
+	query_data <- get_input(query)
+	ref_data <- get_input(ref)
+	all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
+	output <- run_outrider(all_counts)
+	save_output(output_path, output, ref_data$sampleIDs)
+}
+
+
+main(args$query, args$ref, args$output_path)
\ No newline at end of file

From 369cf704225a508e927080954e0cfd9d3e0b5e67 Mon Sep 17 00:00:00 2001
From: ellendejong <de.jong.ellen@hotmail.com>
Date: Thu, 2 Nov 2023 08:10:17 +0100
Subject: [PATCH 03/31] Dockerfile outrider for additional R packages

---
 Outrider/1.20.0/Dockerfile | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 Outrider/1.20.0/Dockerfile

diff --git a/Outrider/1.20.0/Dockerfile b/Outrider/1.20.0/Dockerfile
new file mode 100644
index 00000000..d4510780
--- /dev/null
+++ b/Outrider/1.20.0/Dockerfile
@@ -0,0 +1,19 @@
+################## BASE IMAGE ######################
+FROM --platform=linux/amd64 quay.io/biocontainers/bioconductor-outrider:1.18.1--r43hf17093f_0
+
+################## METADATA ######################
+LABEL base_image="outrider:1.18.1--r43hf17093f_0"
+LABEL version="1.18.1--r43hf17093f_0"
+LABEL software="dependencies_custom_outrider"
+LABEL about.summary=""
+LABEL about.home="https://github.com/gagneurlab/OUTRIDER"
+LABEL about.documentation="https://github.com/gagneurlab/OUTRIDER"
+# LABEL extra.binaries="R"
+
+################## INSTALLATIONS ######################
+
+RUN R -e "install.packages('argparse', dependencies=TRUE, repos='http://cran.rstudio.com/')"
+RUN R -e "install.packages('tools', dependencies=TRUE, repos='http://cran.rstudio.com/')"
+RUN R -e "install.packages('tibble', dependencies=TRUE, repos='http://cran.rstudio.com/')"
+RUN R -e "install.packages('readr', dependencies=TRUE, repos='http://cran.rstudio.com/')"
+RUN R -e "install.packages('dplyr', dependencies=TRUE, repos='http://cran.rstudio.com/')"

From 0e46a46a6d3fab81ee413eb2e38ec9bc198ca330 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Fri, 5 Jan 2024 11:25:33 +0100
Subject: [PATCH 04/31] changes NextflowModules to test and run pipeline

---
 Outrider/1.20.0/outrider.R        | 110 +++++++++++++-------------
 Outrider/1.20.0/outrider_050123.R |  95 +++++++++++++++++++++++
 main.nf                           | 123 ++++++++++++++++++++++++++++++
 modules.json                      |  77 +++++++++++++++++++
 nextflow_schema.json              |  97 +++++++++++++++++++++++
 run_nextflow.sh                   |  58 ++++++++++++++
 6 files changed, 508 insertions(+), 52 deletions(-)
 create mode 100644 Outrider/1.20.0/outrider_050123.R
 create mode 100644 main.nf
 create mode 100644 modules.json
 create mode 100644 nextflow_schema.json
 create mode 100644 run_nextflow.sh

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index f249fda5..2cd08598 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -11,85 +11,91 @@ suppressPackageStartupMessages(library("dplyr"))
 # Argument parser
 parser <- ArgumentParser(description = "Process some integers")
 parser$add_argument("query", metavar = "query_input_files", nargs = "+", 
-										help = "Files or directories containing the query input files.")
+                    help = "Files or directories containing the query input files.")
 parser$add_argument("output_path", metavar = "output_path", nargs = "+", 
-										help = "Path where output of OUTRIDER will be stored.")
+                    help = "Path where output of OUTRIDER will be stored.")
 parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+", 
-										help = "Files or directories containing the reference input files.")
+                    help = "Files or directories containing the reference input files.")
 args <- parser$parse_args()
 
 
 read_input_files <- function(input){
-	sampleIDs <- c() 
-	count_tables <- lapply(input, function(f) {
-		input_ext <- tools::file_ext(f)
-		if(input_ext == "Rds") {
-			rse <- readRDS(f)  # RangedSummarizedExperiment
-			sampleIDs <- append(sampleIDs, colnames(rse))
-			return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
-		} else if (input_ext %in% c("txt", "tsv")) {
-			count_table <- read_delim(f, show_col_types=FALSE)
-			sampleIDs <- append(sampleIDs, colnames(count_table))
-			return(count_table)
-		} else {
-				stop("Input file extension is not supported.")
-		}
-	})
-	return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
+  sampleIDs <- c() 
+  count_tables <- lapply(input, function(f) {
+    input_ext <- tools::file_ext(f)
+    if(input_ext == "Rds") {
+      rse <- readRDS(f)  # RangedSummarizedExperiment
+      sampleIDs <- append(sampleIDs, colnames(rse))
+      return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
+    } else if (input_ext %in% c("txt", "tsv")) {
+      count_table <- read_delim(f, show_col_types=FALSE, skip=1)
+      ct <- count_table[,c(1,7)]
+      names(ct)[1] <- "rownames"
+      sampleIDs <- append(sampleIDs, colnames(count_table))
+      return(ct)
+    } else {
+      stop("Input file extension is not supported.")
+    }
+  })
+  return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
 }
 
 
 get_input <- function(input){
-	# If directories are provided
-	if(all(sapply(input, function(x) dir.exists(x)))) {
-		retrieved_files <- sapply(input, function(d){
-			list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
-		})
-	} else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
-		retrieved_files <- input
-	} else {  # If both dir and files are provided, or different tyes (character, int etc)
-		stop("Input is neither dir or file.")
-	}
-
-	return(read_input_files(retrieved_files))
+  # If directories are provided
+  if(all(sapply(input, function(x) dir.exists(x)))) {
+    retrieved_files <- sapply(input, function(d){
+      list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
+    })
+  } else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
+    retrieved_files <- input
+  } else {  # If both dir and files are provided, or different tyes (character, int etc)
+    stop("Input is neither dir or file.")
+  }
+  
+  return(read_input_files(retrieved_files))
 }
 
 
 merge_count_tables <- function(lst_query, lst_ref){
-	# merge count tables together.
-	lst_count_tables <- c(lst_query, lst_ref)
-	all_counts <- lst_count_tables %>%
-		Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
+  # merge count tables together.
+  lst_count_tables <- c(lst_query, lst_ref)
+  all_counts <- lst_count_tables %>%
+    Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
 }
 
 
 run_outrider <- function(all_counts) {
-	# TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
-	# filter expression
-	ods <- OutriderDataSet(all_counts)
-	filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
-	out <- OUTRIDER(filtered)
-	return(out)
+  # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
+  # filter expression
+  all_counts_matrix <- as.matrix(all_counts)[,-1]
+  mode(all_counts_matrix) <- "integer"
+  rownames(all_counts_matrix) <- all_counts$rownames
+
+  ods <- OutriderDataSet(countData=all_counts_matrix)
+  filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
+  out <- OUTRIDER(filtered)
+  return(out)
 }
 
 
 save_output <- function(out_path, out_outrider, ref_samples, padj_thres=0.05, zscore_thres=0, all=True) {
-	res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=all))
-	# Reference samples are excluded from final results. 
-	query_res <- filter(res, !(sampleID %in% ref_samples))
-	# Write output table with aberrant expressed targets.
-	write_tsv(query_res, paste0(out_path, "outrider_result.tsv"))
+#  res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=TRUE))
+  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=TRUE))
+  # Reference samples are excluded from final results. 
+  query_res <- filter(res, !(sampleID %in% ref_samples))
+  # Write output table with aberrant expressed targets.
+  write_tsv(query_res, paste0(out_path, "outrider_result.tsv"))
 }
 
 
 # TODO: investigate memory usage and if possible reduced / parallel.
 main <- function(query, ref, output_path){
-	query_data <- get_input(query)
-	ref_data <- get_input(ref)
-	all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
-	output <- run_outrider(all_counts)
-	save_output(output_path, output, ref_data$sampleIDs)
+  query_data <- get_input(query)
+  ref_data <- get_input(ref)
+  all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
+  output <- run_outrider(all_counts)
+  save_output(output_path, output, ref_data$sampleIDs)
 }
 
-
 main(args$query, args$ref, args$output_path)
\ No newline at end of file
diff --git a/Outrider/1.20.0/outrider_050123.R b/Outrider/1.20.0/outrider_050123.R
new file mode 100644
index 00000000..f249fda5
--- /dev/null
+++ b/Outrider/1.20.0/outrider_050123.R
@@ -0,0 +1,95 @@
+#!/usr/bin/env Rscript
+
+# Import statements, alphabetic order of main package.
+suppressPackageStartupMessages(library("argparse"))
+suppressPackageStartupMessages(library("OUTRIDER"))
+suppressPackageStartupMessages(library("tools"))
+suppressPackageStartupMessages(library("tibble"))
+suppressPackageStartupMessages(library("readr"))
+suppressPackageStartupMessages(library("dplyr"))
+
+# Argument parser
+parser <- ArgumentParser(description = "Process some integers")
+parser$add_argument("query", metavar = "query_input_files", nargs = "+", 
+										help = "Files or directories containing the query input files.")
+parser$add_argument("output_path", metavar = "output_path", nargs = "+", 
+										help = "Path where output of OUTRIDER will be stored.")
+parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+", 
+										help = "Files or directories containing the reference input files.")
+args <- parser$parse_args()
+
+
+read_input_files <- function(input){
+	sampleIDs <- c() 
+	count_tables <- lapply(input, function(f) {
+		input_ext <- tools::file_ext(f)
+		if(input_ext == "Rds") {
+			rse <- readRDS(f)  # RangedSummarizedExperiment
+			sampleIDs <- append(sampleIDs, colnames(rse))
+			return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
+		} else if (input_ext %in% c("txt", "tsv")) {
+			count_table <- read_delim(f, show_col_types=FALSE)
+			sampleIDs <- append(sampleIDs, colnames(count_table))
+			return(count_table)
+		} else {
+				stop("Input file extension is not supported.")
+		}
+	})
+	return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
+}
+
+
+get_input <- function(input){
+	# If directories are provided
+	if(all(sapply(input, function(x) dir.exists(x)))) {
+		retrieved_files <- sapply(input, function(d){
+			list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
+		})
+	} else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
+		retrieved_files <- input
+	} else {  # If both dir and files are provided, or different tyes (character, int etc)
+		stop("Input is neither dir or file.")
+	}
+
+	return(read_input_files(retrieved_files))
+}
+
+
+merge_count_tables <- function(lst_query, lst_ref){
+	# merge count tables together.
+	lst_count_tables <- c(lst_query, lst_ref)
+	all_counts <- lst_count_tables %>%
+		Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
+}
+
+
+run_outrider <- function(all_counts) {
+	# TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
+	# filter expression
+	ods <- OutriderDataSet(all_counts)
+	filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
+	out <- OUTRIDER(filtered)
+	return(out)
+}
+
+
+save_output <- function(out_path, out_outrider, ref_samples, padj_thres=0.05, zscore_thres=0, all=True) {
+	res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=all))
+	# Reference samples are excluded from final results. 
+	query_res <- filter(res, !(sampleID %in% ref_samples))
+	# Write output table with aberrant expressed targets.
+	write_tsv(query_res, paste0(out_path, "outrider_result.tsv"))
+}
+
+
+# TODO: investigate memory usage and if possible reduced / parallel.
+main <- function(query, ref, output_path){
+	query_data <- get_input(query)
+	ref_data <- get_input(ref)
+	all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
+	output <- run_outrider(all_counts)
+	save_output(output_path, output, ref_data$sampleIDs)
+}
+
+
+main(args$query, args$ref, args$output_path)
\ No newline at end of file
diff --git a/main.nf b/main.nf
new file mode 100644
index 00000000..77b517dc
--- /dev/null
+++ b/main.nf
@@ -0,0 +1,123 @@
+#!/usr/bin/env nextflow
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    UMCUGenetics/DxNextflowRNA
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Github : https://github.com/UMCUGenetics/DxNextflowRNA
+----------------------------------------------------------------------------------------
+*/
+
+nextflow.enable.dsl = 2
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Validate parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { validateParameters; } from 'plugin/nf-validation'
+validateParameters()
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Import modules/subworkflows
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+/*include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from './subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main'
+
+include { TRIMGALORE } from './modules/nf-core/trimgalore/main'
+include { FASTQC } from './modules/nf-core/fastqc/main'
+include { MULTIQC } from './modules/nf-core/multiqc/main'
+include { SAMTOOLS_INDEX } from './modules/nf-core/samtools/index/main'
+include { SAMTOOLS_MERGE } from './modules/nf-core/samtools/merge/main'
+include { STAR_ALIGN } from './modules/nf-core/star/align/main'
+include { SUBREAD_FEATURECOUNTS } from './modules/nf-core/subread/featurecounts/main'
+
+*/
+include { OUTRIDER } from './Outrider/1.20.0/main'
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Main workflow
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow {
+    // Reference file channels
+/*    ch_star_index = Channel.fromPath(params.star_index).map {star_index -> [star_index.getSimpleName(), star_index] }
+    ch_gtf = Channel.fromPath(params.gtf).map { gtf -> [gtf.getSimpleName(), gtf] }
+
+    // Input channel
+    ch_fastq = Channel.fromFilePairs("$params.input/*_R{1,2}_001.fastq.gz")
+        .map {
+            meta, fastq ->
+            def fmeta = [:]
+            // Set meta.id
+            fmeta.id = meta
+            // Set meta.single_end
+            if (fastq.size() == 1) {
+                fmeta.single_end = true
+            } else {
+                fmeta.single_end = false
+            }
+            [ fmeta, fastq ]
+        }
+
+    // Trim, Alignment, FeatureCounts
+    TRIMGALORE(
+        ch_fastq
+    )
+    
+    STAR_ALIGN(
+        TRIMGALORE.out.reads,
+        ch_star_index.first(),
+        ch_gtf.first(),
+        false,
+        'illumina',
+        'UMCU Genetics'
+    )
+
+    SAMTOOLS_MERGE(
+        STAR_ALIGN.out.bam_sorted.map {
+            meta, bam ->
+                new_id = meta.id.split('_')[0]
+                [ meta + [id: new_id], bam ]
+        }.groupTuple(),
+        [ [ id:'null' ], []],
+        [ [ id:'null' ], []],
+    )
+
+    SAMTOOLS_INDEX ( SAMTOOLS_MERGE.out.bam )
+
+    SAMTOOLS_MERGE.out.bam
+        .join(SAMTOOLS_INDEX.out.bai)
+        .set { ch_bam_bai }
+
+    BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS(
+        ch_bam_bai,
+        true
+    )
+
+    SUBREAD_FEATURECOUNTS(
+        BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam.map{
+        meta, bam -> [ meta, bam, params.gtf ]
+        }
+    )
+
+    // QC
+    FASTQC(ch_fastq)
+
+    // MultiQC
+    ch_multiqc_files = Channel.empty()
+    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
+    ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+    MULTIQC(
+        ch_multiqc_files.collect(),
+        ch_multiqc_config.toList(),
+        Channel.empty().toList(),
+        Channel.empty().toList()
+    )
+*/
+    // Input channel
+    ch_outrider = Channel.fromPath("$params.input/featureCounts/*_.txt")
+    //Outrider
+}
\ No newline at end of file
diff --git a/modules.json b/modules.json
new file mode 100644
index 00000000..208f19bd
--- /dev/null
+++ b/modules.json
@@ -0,0 +1,77 @@
+{
+    "name": "UMCUGenetics/DxNextflowRNA",
+    "homePage": "https://github.com/UMCUGenetics/DxNextflowRNA",
+    "repos": {
+      "https://github.com/nf-core/modules.git": {
+        "modules": {
+          "nf-core": {
+            "fastqc": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["modules"]
+            },
+            "multiqc": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["modules"]
+            },
+            "samtools/flagstat": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["bam_stats_samtools"]
+            },
+            "samtools/idxstats": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["bam_stats_samtools"]
+            },
+            "samtools/index": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["bam_dedup_stats_samtools_umitools"]
+            },
+            "samtools/merge": {
+              "branch": "master",
+              "git_sha": "e7ce60acc8a33fa17429e966364657a63016e870",
+              "installed_by": ["modules"]
+            },
+            "samtools/stats": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["bam_stats_samtools"]
+            },
+            "star/align": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["modules"]
+            },
+            "subread/featurecounts": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["modules"]
+            },
+            "umitools/dedup": {
+              "branch": "master",
+              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+              "installed_by": ["bam_dedup_stats_samtools_umitools"]
+            }
+          }
+        },
+        "subworkflows": {
+          "nf-core": {
+            "bam_dedup_stats_samtools_umitools": {
+              "branch": "master",
+              "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
+              "installed_by": ["subworkflows"]
+            },
+            "bam_stats_samtools": {
+              "branch": "master",
+              "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
+              "installed_by": ["bam_dedup_stats_samtools_umitools"]
+            }
+          }
+        }
+      }
+    }
+  }
+  
\ No newline at end of file
diff --git a/nextflow_schema.json b/nextflow_schema.json
new file mode 100644
index 00000000..c5db6a24
--- /dev/null
+++ b/nextflow_schema.json
@@ -0,0 +1,97 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema",
+    "$id": "https://raw.githubusercontent.com/UMCUGenetics/DxNextflowRNA/master/nextflow_schema.json",
+    "title": "UMCUGenetics/DxNextflowRNA pipeline parameters",
+    "description": "UMCU Genetics RNA Workflow",
+    "type": "object",
+    "definitions": {
+      "input_output_options": {
+        "title": "Input/output options",
+        "type": "object",
+        "fa_icon": "fas fa-terminal",
+        "description": "Define where the pipeline should find input data and save output data.",
+        "required": ["input", "outdir"],
+        "properties": {
+            "input": {
+                "type": "string",
+                "format": "directory-path",
+                "exists": true,
+                "description": "Path to input data.",
+                "fa_icon": "fas fa-folder-open"
+            },
+            "outdir": {
+                "type": "string",
+                "format": "directory-path",
+                "description": "The output directory where the results will be saved.",
+                "fa_icon": "fas fa-folder-open"
+            },
+            "email": {
+                "type": "string",
+                "description": "Email address for completion summary.",
+                "fa_icon": "fas fa-envelope",
+                "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
+            }
+        }
+      },
+      "reference_genome_options": {
+        "title": "Reference genome options",
+        "type": "object",
+        "fa_icon": "fas fa-dna",
+        "description": "Reference genome related files and options required for the workflow.",
+        "properties": {
+          "genome": {
+            "type": "string",
+            "description": "Path of genome reference.",
+            "fa_icon": "fas fa-book",
+            "help_text": ""
+          },
+          "gtf": {
+            "type": "string",
+            "format": "file-path",
+            "exists": true,
+            "mimetype": "text/plain",
+            "pattern": "^\\S+\\.gtf(\\.gz)?$",
+            "description": "Path to GTF annotation file.",
+            "fa_icon": "fas fa-code-branch",
+            "help_text": "This parameter is *mandatory* if `--genome` is not specified."
+          },      
+          "star_index": {
+            "type": "string",
+            "format": "path",
+            "exists": true,
+            "fa_icon": "fas fa-bezier-curve",
+            "description": "Path to directory or tar.gz archive for pre-built STAR index."
+        }
+       }
+      },
+      "generic_options": {
+        "title": "Generic options",
+        "type": "object",
+        "fa_icon": "fas fa-file-import",
+        "description": "Less common options for the pipeline, typically set in a config file.",
+        "properties": {
+          "publish_dir_mode": {
+              "type": "string",
+              "default": "copy",
+              "description": "Method used to save pipeline results to output directory.",
+              "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
+              "fa_icon": "fas fa-copy",
+              "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+              "hidden": true
+          }
+        }
+      }
+    },
+    "allOf": [
+      {
+        "$ref": "#/definitions/input_output_options"
+      },
+      {
+        "$ref": "#/definitions/reference_genome_options"
+      },
+      {
+        "$ref": "#/definitions/generic_options"
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/run_nextflow.sh b/run_nextflow.sh
new file mode 100644
index 00000000..c0aafaa7
--- /dev/null
+++ b/run_nextflow.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+set -euo pipefail
+
+workflow_path='/hpc/diaggen/users/lonneke/github/DxNextflowRNA'
+
+# Set input and output dirs
+input=`realpath $1`
+output=`realpath $2`
+email=$3
+
+mkdir -p $output && cd $output
+mkdir -p log
+
+if ! { [ -f 'workflow.running' ] || [ -f 'workflow.done' ] || [ -f 'workflow.failed' ]; }; then
+touch workflow.running
+echo "check directory for output: ${output}"
+
+export JAVA_HOME='/hpc/diaggen/software/tools/jdk-18.0.2.1/'  # change java version 
+export NXF_JAVA_HOME='/hpc/diaggen/software/tools/jdk-18.0.2.1/'  # change java vesion of nextflow
+
+sbatch <<EOT
+#!/bin/bash
+#SBATCH --time=04:00:00
+#SBATCH --nodes=1
+#SBATCH --mem 10G
+#SBATCH --gres=tmpspace:10G
+#SBATCH --job-name Nextflow_RNASeq
+#SBATCH -o log/slurm_nextflow_rnaseq.%j.out
+#SBATCH -e log/slurm_nextflow_rnaseq.%j.err
+#SBATCH --mail-user $email
+#SBATCH --mail-type FAIL
+#SBATCH --account=diaggen
+
+/hpc/diaggen/software/development/DxNextflowRNA/tools/nextflow run /hpc/diaggen/users/lonneke/github/DxNextflowRNA/main.nf  \
+-c /hpc/diaggen/users/lonneke/github/DxNextflowRNA/nextflow.config -resume -ansi-log false -profile slurm \
+--input $input \
+--outdir $output \
+--email $email
+ 
+if [ \$? -eq 0 ]; then
+    echo "Nextflow done."
+
+    echo "RNA Trimgalore test workflow completed successfully."
+    rm workflow.running
+    touch workflow.done
+
+    exit 0
+else
+    echo "Nextflow failed"
+    rm workflow.running
+    touch workflow.failed
+
+    exit 1
+fi
+EOT
+else
+echo "Workflow job not submitted, please check $output for 'workflow.status' files."
+fi

From a01ca9b2e92d392694433b7b5c9230918600b5cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Fri, 5 Jan 2024 11:45:45 +0100
Subject: [PATCH 05/31] changed links to NextflowModules

---
 run_nextflow.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/run_nextflow.sh b/run_nextflow.sh
index c0aafaa7..69ad309f 100644
--- a/run_nextflow.sh
+++ b/run_nextflow.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -euo pipefail
 
-workflow_path='/hpc/diaggen/users/lonneke/github/DxNextflowRNA'
+workflow_path='/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules'
 
 # Set input and output dirs
 input=`realpath $1`
@@ -31,8 +31,8 @@ sbatch <<EOT
 #SBATCH --mail-type FAIL
 #SBATCH --account=diaggen
 
-/hpc/diaggen/software/development/DxNextflowRNA/tools/nextflow run /hpc/diaggen/users/lonneke/github/DxNextflowRNA/main.nf  \
--c /hpc/diaggen/users/lonneke/github/DxNextflowRNA/nextflow.config -resume -ansi-log false -profile slurm \
+/hpc/diaggen/software/development/DxNextflowRNA/tools/nextflow run /hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/main.nf  \
+-c /hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/nextflow.config -resume -ansi-log false -profile slurm \
 --input $input \
 --outdir $output \
 --email $email

From e0957504a336ca56b6c860226a16d4f004f36f2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Fri, 5 Jan 2024 12:03:22 +0100
Subject: [PATCH 06/31] added nextflow.config

---
 nextflow.config | 101 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 nextflow.config

diff --git a/nextflow.config b/nextflow.config
new file mode 100644
index 00000000..7952bf9d
--- /dev/null
+++ b/nextflow.config
@@ -0,0 +1,101 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    UMCUGenetics/DxNextflowRNA Nextflow config file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+manifest {
+    name            = 'UMCUGenetics/DxNextflowRNA'
+    author          = """UMCU Genetics"""
+    homePage        = 'https://github.com/UMCUGenetics/DxNextflowRNA'
+    description     = """UMCU Genetics RNA Workflow"""
+    mainScript      = 'main.nf'
+    nextflowVersion = '!>=23.04.0'
+    version         = '1.0dev'
+    doi             = ''
+}
+
+// Nextflow plugins
+plugins {
+    id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+}
+
+// Global default params, used in configs
+params {
+    genome = '/hpc/diaggen/data/databases/ref_genomes/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta'
+    star_index = '/hpc/diaggen/data/databases/STAR_ref_genome_index/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set_2.7.9a'
+    gtf = '/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf'
+
+    // Input options
+    input = null
+
+    // Output options
+    outdir = null
+    publish_dir_mode = 'link'
+    email = null
+
+    // cluster_options = "--mail-user $params.email --mail-type FAIL --account=diaggen"
+}
+
+// Capture exit codes from upstream processes when piping
+process.shell = ['/bin/bash', '-euo', 'pipefail']
+
+// Load base.config
+includeConfig 'conf/base.config'
+
+// Load modules.config for module specific options
+includeConfig 'conf/modules.config'
+
+report {
+    enabled = true
+    file = "$params.outdir/log/nextflow_report.html"
+    overwrite = true
+}
+
+trace {
+    enabled = true
+    file = "$params.outdir/log/nextflow_trace.txt"
+    overwrite = true
+}
+
+timeline {
+    enabled = true
+    file = "$params.outdir/log/nextflow_timeline.html"
+    overwrite = true
+}
+
+profiles {
+
+    slurm {
+        process {
+            executor = 'slurm'
+            queue = 'cpu'
+
+            errorStrategy = 'retry'
+            maxRetries = 1
+        }
+
+        singularity {
+            enabled = true
+            runOptions = '-B /hpc:/hpc -B $TMPDIR:$TMPDIR'
+            autoMounts = true
+            cacheDir = '/hpc/diaggen/software/singularity_cache'
+        }
+
+        executor {
+            queueSize = 1000
+            pollInterval = '1min'
+            queueStatInterval = '5min'
+            submitRatelimit = '10sec'
+        }
+
+       mail {
+            smtp.host = 'localhost'
+        }
+    }
+
+    mac {
+        docker.enabled = true
+        docker.runOptions = '-v /Users:/Users'
+    }
+}
\ No newline at end of file

From 8918ddc5c48636b4b6125b379d7293af3282e892 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Fri, 5 Jan 2024 12:17:39 +0100
Subject: [PATCH 07/31] added conf folder witth config files

---
 conf/base.config    | 75 +++++++++++++++++++++++++++++++++++++++++++++
 conf/modules.config | 59 +++++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+)
 create mode 100644 conf/base.config
 create mode 100644 conf/modules.config

diff --git a/conf/base.config b/conf/base.config
new file mode 100644
index 00000000..32944612
--- /dev/null
+++ b/conf/base.config
@@ -0,0 +1,75 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    UMCUGenetics/DxNextflowRNA Nextflow base config file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process {
+
+    // Defaults for all processes, based on HPC defaults
+    cpus   = { 2    * task.attempt }
+    memory = { 10.GB * task.attempt }
+    time   = { 1.h  * task.attempt }
+
+    errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
+    maxRetries    = 1
+    maxErrors     = '-1'
+
+    executor = 'slurm'
+    queue = 'cpu'
+    // clusterOptions = "$params.cluster_options"
+
+    // Process-specific resource requirements
+    // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
+    withLabel:process_single {
+        cpus   = { 2 }
+        memory = { 8.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
+    }
+    withLabel:process_low {
+        cpus   = { 2     * task.attempt }
+        memory = { 16.GB * task.attempt }
+        time   = { 4.h   * task.attempt }
+    }
+    withLabel:process_medium {
+        cpus   = { 6     * task.attempt }
+        memory = { 32.GB * task.attempt }
+        time   = { 8.h   * task.attempt }
+    }
+    withLabel:process_high {
+        cpus   = { 12    * task.attempt }
+        memory = { 64.GB * task.attempt }
+        time   = { 16.h  * task.attempt }
+    }
+    withLabel:process_long {
+        time   = { 20.h  * task.attempt }
+    }
+    withLabel:process_high_memory {
+        memory = { 128.GB * task.attempt }
+    }
+    withLabel:error_ignore {
+        errorStrategy = 'ignore'
+    }
+    withLabel:error_retry {
+        errorStrategy = 'retry'
+        maxRetries    = 2
+    }
+}
+
+singularity {
+    enabled = true
+    runOptions = '-B /hpc:/hpc -B $TMPDIR:$TMPDIR'
+    autoMounts = true
+    cacheDir = '/hpc/diaggen/projects/woc/rna/container'
+}
+
+executor {
+    queueSize = 1000
+    pollInterval = '1min'
+    queueStatInterval = '5min'
+    submitRatelimit = '10sec'
+}
+
+mail {
+    smtp.host = 'localhost'
+}
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
new file mode 100644
index 00000000..81083f5b
--- /dev/null
+++ b/conf/modules.config
@@ -0,0 +1,59 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args   = Additional arguments appended to command in module.
+        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix = File name prefix for output files.
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    withName: FASTQC {
+        ext.args = '--quiet'
+        publishDir = [
+            [path: "$params.outdir/QC/", mode: params.publish_dir_mode, pattern: '*_{fastqc.html}']
+        ]
+        cpus   = 2
+        memory = { 8.GB   * task.attempt }
+        time   = { 15.m   * task.attempt }
+    }
+
+    withName: SAMTOOLS_MERGE {
+        cpus   = { 8    * task.attempt }
+        memory = { 8.GB * task.attempt }
+        time   = { 1.h  * task.attempt }
+    }
+
+    withName: STAR_ALIGN {
+        cpus   = { 12    * task.attempt }
+        memory = { 40.GB * task.attempt }
+        time   = { 1.h   * task.attempt }
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate'
+    }
+
+    withName: MULTIQC {
+        publishDir = [
+            path: { "${params.outdir}/QC" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: UMITOOLS_DEDUP {
+        ext.args = "--umi-separator=:"
+        ext.prefix = { "${meta.id}.markdup" }
+        publishDir = [
+            [path: "$params.outdir/bam_files/", mode: params.publish_dir_mode, pattern: '*.ba*']
+        ]
+        clusterOptions = "--gres=tmpspace:50G"
+    }
+
+    withName: SUBREAD_FEATURECOUNTS {
+        publishDir = [
+            [path: "$params.outdir/feature_counts/", mode: params.publish_dir_mode, pattern: '*featureCounts.txt*'],
+        ]
+    }
+}
\ No newline at end of file

From 5a9e2283415b5fd50ad964ee1d4a752014669f82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Fri, 5 Jan 2024 12:51:57 +0100
Subject: [PATCH 08/31] added main.nf and yml in outrider

---
 Outrider/1.20.0/environment.yml |  6 ++++++
 Outrider/1.20.0/main.nf         | 28 ++++++++++++++++++++++++++++
 Outrider/1.20.0/meta.yml        | 26 ++++++++++++++++++++++++++
 main.nf                         |  8 +++++++-
 4 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 Outrider/1.20.0/environment.yml
 create mode 100644 Outrider/1.20.0/main.nf
 create mode 100644 Outrider/1.20.0/meta.yml

diff --git a/Outrider/1.20.0/environment.yml b/Outrider/1.20.0/environment.yml
new file mode 100644
index 00000000..f52a53a0
--- /dev/null
+++ b/Outrider/1.20.0/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::fastqc=0.12.1
diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
new file mode 100644
index 00000000..a33ca637
--- /dev/null
+++ b/Outrider/1.20.0/main.nf
@@ -0,0 +1,28 @@
+
+process OUTRIDER {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+/*    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+        'biocontainers/fastqc:0.12.1--hdfd78af_0' }"*/
+
+    input:
+    tuple val(meta), path(counts)
+
+    output:
+    tuple val(meta), path("*.tsv"), emit: tsv
+//    path  "versions.yml"           , emit: versions
+
+ /*   when:
+    task.ext.when == null || task.ext.when*/
+
+    script:
+    """
+    printf "%s %s\\n" $rename_to | while read old_name new_name; do
+        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+    done
+    """
+
+}
\ No newline at end of file
diff --git a/Outrider/1.20.0/meta.yml b/Outrider/1.20.0/meta.yml
new file mode 100644
index 00000000..54155e70
--- /dev/null
+++ b/Outrider/1.20.0/meta.yml
@@ -0,0 +1,26 @@
+name: outrider
+description: Run Outrider on feature counts
+keywords:
+  - outrider
+  - abberant expression
+tools:
+  - outrider:
+      description: |
+      homepage: 
+      documentation: 
+      licence: 
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+authors:
+  - 
+maintainers:
+  - 
diff --git a/main.nf b/main.nf
index 77b517dc..9c78544f 100644
--- a/main.nf
+++ b/main.nf
@@ -118,6 +118,12 @@ workflow {
     )
 */
     // Input channel
-    ch_outrider = Channel.fromPath("$params.input/featureCounts/*_.txt")
+    ch_outrider_in = Channel.fromPath("$params.input/feature_counts/*CHX*.txt")
+    ch_outrider_ref = Channel.fromPath("$params.input/feature_counts/*Cntrl*.txt")
+
     //Outrider
+    OUTRIDER(
+        ch_outrider_in,
+        ch_outrider_ref
+    )
 }
\ No newline at end of file

From 6dc82eb763a8aaf03921a2357d59fde6ac019ebc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Fri, 5 Jan 2024 13:47:46 +0100
Subject: [PATCH 09/31] edit outrider main.nf

---
 Outrider/1.20.0/main.nf | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index a33ca637..64a23ddd 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -4,12 +4,12 @@ process OUTRIDER {
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
-/*    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
-        'biocontainers/fastqc:0.12.1--hdfd78af_0' }"*/
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bioconductor-outrider:1.20.0--r43hf17093f_0 ' :
+        'biocontainers/bioconductor-outrider:1.20.0--r43hf17093f_0 ' }"
 
     input:
-    tuple val(meta), path(counts)
+    tuple val(meta), path(counts), path(ref)
 
     output:
     tuple val(meta), path("*.tsv"), emit: tsv
@@ -20,9 +20,9 @@ process OUTRIDER {
 
     script:
     """
-    printf "%s %s\\n" $rename_to | while read old_name new_name; do
-        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
-    done
+    echo $counts
+    echo $ref
+    Rscript "outrider.R" "$counts" "/out" -r "$ref" 
     """
 
 }
\ No newline at end of file

From 03407c48458ada50de95b82757073ea525f65983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Tue, 9 Jan 2024 16:31:27 +0100
Subject: [PATCH 10/31] Delete all test files

---
 Outrider/1.20.0/outrider_050123.R |  95 ----------------------
 conf/base.config                  |  75 -----------------
 conf/modules.config               |  59 --------------
 main.nf                           | 129 ------------------------------
 modules.json                      |  77 ------------------
 nextflow.config                   | 101 -----------------------
 nextflow_schema.json              |  97 ----------------------
 run_nextflow.sh                   |  58 --------------
 8 files changed, 691 deletions(-)
 delete mode 100644 Outrider/1.20.0/outrider_050123.R
 delete mode 100644 conf/base.config
 delete mode 100644 conf/modules.config
 delete mode 100644 main.nf
 delete mode 100644 modules.json
 delete mode 100644 nextflow.config
 delete mode 100644 nextflow_schema.json
 delete mode 100644 run_nextflow.sh

diff --git a/Outrider/1.20.0/outrider_050123.R b/Outrider/1.20.0/outrider_050123.R
deleted file mode 100644
index f249fda5..00000000
--- a/Outrider/1.20.0/outrider_050123.R
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Import statements, alphabetic order of main package.
-suppressPackageStartupMessages(library("argparse"))
-suppressPackageStartupMessages(library("OUTRIDER"))
-suppressPackageStartupMessages(library("tools"))
-suppressPackageStartupMessages(library("tibble"))
-suppressPackageStartupMessages(library("readr"))
-suppressPackageStartupMessages(library("dplyr"))
-
-# Argument parser
-parser <- ArgumentParser(description = "Process some integers")
-parser$add_argument("query", metavar = "query_input_files", nargs = "+", 
-										help = "Files or directories containing the query input files.")
-parser$add_argument("output_path", metavar = "output_path", nargs = "+", 
-										help = "Path where output of OUTRIDER will be stored.")
-parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+", 
-										help = "Files or directories containing the reference input files.")
-args <- parser$parse_args()
-
-
-read_input_files <- function(input){
-	sampleIDs <- c() 
-	count_tables <- lapply(input, function(f) {
-		input_ext <- tools::file_ext(f)
-		if(input_ext == "Rds") {
-			rse <- readRDS(f)  # RangedSummarizedExperiment
-			sampleIDs <- append(sampleIDs, colnames(rse))
-			return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
-		} else if (input_ext %in% c("txt", "tsv")) {
-			count_table <- read_delim(f, show_col_types=FALSE)
-			sampleIDs <- append(sampleIDs, colnames(count_table))
-			return(count_table)
-		} else {
-				stop("Input file extension is not supported.")
-		}
-	})
-	return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
-}
-
-
-get_input <- function(input){
-	# If directories are provided
-	if(all(sapply(input, function(x) dir.exists(x)))) {
-		retrieved_files <- sapply(input, function(d){
-			list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
-		})
-	} else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
-		retrieved_files <- input
-	} else {  # If both dir and files are provided, or different tyes (character, int etc)
-		stop("Input is neither dir or file.")
-	}
-
-	return(read_input_files(retrieved_files))
-}
-
-
-merge_count_tables <- function(lst_query, lst_ref){
-	# merge count tables together.
-	lst_count_tables <- c(lst_query, lst_ref)
-	all_counts <- lst_count_tables %>%
-		Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
-}
-
-
-run_outrider <- function(all_counts) {
-	# TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
-	# filter expression
-	ods <- OutriderDataSet(all_counts)
-	filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
-	out <- OUTRIDER(filtered)
-	return(out)
-}
-
-
-save_output <- function(out_path, out_outrider, ref_samples, padj_thres=0.05, zscore_thres=0, all=True) {
-	res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=all))
-	# Reference samples are excluded from final results. 
-	query_res <- filter(res, !(sampleID %in% ref_samples))
-	# Write output table with aberrant expressed targets.
-	write_tsv(query_res, paste0(out_path, "outrider_result.tsv"))
-}
-
-
-# TODO: investigate memory usage and if possible reduced / parallel.
-main <- function(query, ref, output_path){
-	query_data <- get_input(query)
-	ref_data <- get_input(ref)
-	all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
-	output <- run_outrider(all_counts)
-	save_output(output_path, output, ref_data$sampleIDs)
-}
-
-
-main(args$query, args$ref, args$output_path)
\ No newline at end of file
diff --git a/conf/base.config b/conf/base.config
deleted file mode 100644
index 32944612..00000000
--- a/conf/base.config
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    UMCUGenetics/DxNextflowRNA Nextflow base config file
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-process {
-
-    // Defaults for all processes, based on HPC defaults
-    cpus   = { 2    * task.attempt }
-    memory = { 10.GB * task.attempt }
-    time   = { 1.h  * task.attempt }
-
-    errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
-    maxRetries    = 1
-    maxErrors     = '-1'
-
-    executor = 'slurm'
-    queue = 'cpu'
-    // clusterOptions = "$params.cluster_options"
-
-    // Process-specific resource requirements
-    // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
-    withLabel:process_single {
-        cpus   = { 2 }
-        memory = { 8.GB * task.attempt }
-        time   = { 4.h  * task.attempt }
-    }
-    withLabel:process_low {
-        cpus   = { 2     * task.attempt }
-        memory = { 16.GB * task.attempt }
-        time   = { 4.h   * task.attempt }
-    }
-    withLabel:process_medium {
-        cpus   = { 6     * task.attempt }
-        memory = { 32.GB * task.attempt }
-        time   = { 8.h   * task.attempt }
-    }
-    withLabel:process_high {
-        cpus   = { 12    * task.attempt }
-        memory = { 64.GB * task.attempt }
-        time   = { 16.h  * task.attempt }
-    }
-    withLabel:process_long {
-        time   = { 20.h  * task.attempt }
-    }
-    withLabel:process_high_memory {
-        memory = { 128.GB * task.attempt }
-    }
-    withLabel:error_ignore {
-        errorStrategy = 'ignore'
-    }
-    withLabel:error_retry {
-        errorStrategy = 'retry'
-        maxRetries    = 2
-    }
-}
-
-singularity {
-    enabled = true
-    runOptions = '-B /hpc:/hpc -B $TMPDIR:$TMPDIR'
-    autoMounts = true
-    cacheDir = '/hpc/diaggen/projects/woc/rna/container'
-}
-
-executor {
-    queueSize = 1000
-    pollInterval = '1min'
-    queueStatInterval = '5min'
-    submitRatelimit = '10sec'
-}
-
-mail {
-    smtp.host = 'localhost'
-}
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
deleted file mode 100644
index 81083f5b..00000000
--- a/conf/modules.config
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Config file for defining DSL2 per module options and publishing paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Available keys to override module options:
-        ext.args   = Additional arguments appended to command in module.
-        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
-        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
-        ext.prefix = File name prefix for output files.
-----------------------------------------------------------------------------------------
-*/
-
-process {
-    withName: FASTQC {
-        ext.args = '--quiet'
-        publishDir = [
-            [path: "$params.outdir/QC/", mode: params.publish_dir_mode, pattern: '*_{fastqc.html}']
-        ]
-        cpus   = 2
-        memory = { 8.GB   * task.attempt }
-        time   = { 15.m   * task.attempt }
-    }
-
-    withName: SAMTOOLS_MERGE {
-        cpus   = { 8    * task.attempt }
-        memory = { 8.GB * task.attempt }
-        time   = { 1.h  * task.attempt }
-    }
-
-    withName: STAR_ALIGN {
-        cpus   = { 12    * task.attempt }
-        memory = { 40.GB * task.attempt }
-        time   = { 1.h   * task.attempt }
-        ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate'
-    }
-
-    withName: MULTIQC {
-        publishDir = [
-            path: { "${params.outdir}/QC" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: UMITOOLS_DEDUP {
-        ext.args = "--umi-separator=:"
-        ext.prefix = { "${meta.id}.markdup" }
-        publishDir = [
-            [path: "$params.outdir/bam_files/", mode: params.publish_dir_mode, pattern: '*.ba*']
-        ]
-        clusterOptions = "--gres=tmpspace:50G"
-    }
-
-    withName: SUBREAD_FEATURECOUNTS {
-        publishDir = [
-            [path: "$params.outdir/feature_counts/", mode: params.publish_dir_mode, pattern: '*featureCounts.txt*'],
-        ]
-    }
-}
\ No newline at end of file
diff --git a/main.nf b/main.nf
deleted file mode 100644
index 9c78544f..00000000
--- a/main.nf
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env nextflow
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    UMCUGenetics/DxNextflowRNA
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Github : https://github.com/UMCUGenetics/DxNextflowRNA
-----------------------------------------------------------------------------------------
-*/
-
-nextflow.enable.dsl = 2
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Validate parameters
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-include { validateParameters; } from 'plugin/nf-validation'
-validateParameters()
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Import modules/subworkflows
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-/*include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from './subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main'
-
-include { TRIMGALORE } from './modules/nf-core/trimgalore/main'
-include { FASTQC } from './modules/nf-core/fastqc/main'
-include { MULTIQC } from './modules/nf-core/multiqc/main'
-include { SAMTOOLS_INDEX } from './modules/nf-core/samtools/index/main'
-include { SAMTOOLS_MERGE } from './modules/nf-core/samtools/merge/main'
-include { STAR_ALIGN } from './modules/nf-core/star/align/main'
-include { SUBREAD_FEATURECOUNTS } from './modules/nf-core/subread/featurecounts/main'
-
-*/
-include { OUTRIDER } from './Outrider/1.20.0/main'
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Main workflow
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-workflow {
-    // Reference file channels
-/*    ch_star_index = Channel.fromPath(params.star_index).map {star_index -> [star_index.getSimpleName(), star_index] }
-    ch_gtf = Channel.fromPath(params.gtf).map { gtf -> [gtf.getSimpleName(), gtf] }
-
-    // Input channel
-    ch_fastq = Channel.fromFilePairs("$params.input/*_R{1,2}_001.fastq.gz")
-        .map {
-            meta, fastq ->
-            def fmeta = [:]
-            // Set meta.id
-            fmeta.id = meta
-            // Set meta.single_end
-            if (fastq.size() == 1) {
-                fmeta.single_end = true
-            } else {
-                fmeta.single_end = false
-            }
-            [ fmeta, fastq ]
-        }
-
-    // Trim, Alignment, FeatureCounts
-    TRIMGALORE(
-        ch_fastq
-    )
-    
-    STAR_ALIGN(
-        TRIMGALORE.out.reads,
-        ch_star_index.first(),
-        ch_gtf.first(),
-        false,
-        'illumina',
-        'UMCU Genetics'
-    )
-
-    SAMTOOLS_MERGE(
-        STAR_ALIGN.out.bam_sorted.map {
-            meta, bam ->
-                new_id = meta.id.split('_')[0]
-                [ meta + [id: new_id], bam ]
-        }.groupTuple(),
-        [ [ id:'null' ], []],
-        [ [ id:'null' ], []],
-    )
-
-    SAMTOOLS_INDEX ( SAMTOOLS_MERGE.out.bam )
-
-    SAMTOOLS_MERGE.out.bam
-        .join(SAMTOOLS_INDEX.out.bai)
-        .set { ch_bam_bai }
-
-    BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS(
-        ch_bam_bai,
-        true
-    )
-
-    SUBREAD_FEATURECOUNTS(
-        BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam.map{
-        meta, bam -> [ meta, bam, params.gtf ]
-        }
-    )
-
-    // QC
-    FASTQC(ch_fastq)
-
-    // MultiQC
-    ch_multiqc_files = Channel.empty()
-    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
-    ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-    MULTIQC(
-        ch_multiqc_files.collect(),
-        ch_multiqc_config.toList(),
-        Channel.empty().toList(),
-        Channel.empty().toList()
-    )
-*/
-    // Input channel
-    ch_outrider_in = Channel.fromPath("$params.input/feature_counts/*CHX*.txt")
-    ch_outrider_ref = Channel.fromPath("$params.input/feature_counts/*Cntrl*.txt")
-
-    //Outrider
-    OUTRIDER(
-        ch_outrider_in,
-        ch_outrider_ref
-    )
-}
\ No newline at end of file
diff --git a/modules.json b/modules.json
deleted file mode 100644
index 208f19bd..00000000
--- a/modules.json
+++ /dev/null
@@ -1,77 +0,0 @@
-{
-    "name": "UMCUGenetics/DxNextflowRNA",
-    "homePage": "https://github.com/UMCUGenetics/DxNextflowRNA",
-    "repos": {
-      "https://github.com/nf-core/modules.git": {
-        "modules": {
-          "nf-core": {
-            "fastqc": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["modules"]
-            },
-            "multiqc": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["modules"]
-            },
-            "samtools/flagstat": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["bam_stats_samtools"]
-            },
-            "samtools/idxstats": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["bam_stats_samtools"]
-            },
-            "samtools/index": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["bam_dedup_stats_samtools_umitools"]
-            },
-            "samtools/merge": {
-              "branch": "master",
-              "git_sha": "e7ce60acc8a33fa17429e966364657a63016e870",
-              "installed_by": ["modules"]
-            },
-            "samtools/stats": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["bam_stats_samtools"]
-            },
-            "star/align": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["modules"]
-            },
-            "subread/featurecounts": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["modules"]
-            },
-            "umitools/dedup": {
-              "branch": "master",
-              "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-              "installed_by": ["bam_dedup_stats_samtools_umitools"]
-            }
-          }
-        },
-        "subworkflows": {
-          "nf-core": {
-            "bam_dedup_stats_samtools_umitools": {
-              "branch": "master",
-              "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
-              "installed_by": ["subworkflows"]
-            },
-            "bam_stats_samtools": {
-              "branch": "master",
-              "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
-              "installed_by": ["bam_dedup_stats_samtools_umitools"]
-            }
-          }
-        }
-      }
-    }
-  }
-  
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
deleted file mode 100644
index 7952bf9d..00000000
--- a/nextflow.config
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    UMCUGenetics/DxNextflowRNA Nextflow config file
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-manifest {
-    name            = 'UMCUGenetics/DxNextflowRNA'
-    author          = """UMCU Genetics"""
-    homePage        = 'https://github.com/UMCUGenetics/DxNextflowRNA'
-    description     = """UMCU Genetics RNA Workflow"""
-    mainScript      = 'main.nf'
-    nextflowVersion = '!>=23.04.0'
-    version         = '1.0dev'
-    doi             = ''
-}
-
-// Nextflow plugins
-plugins {
-    id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet
-}
-
-// Global default params, used in configs
-params {
-    genome = '/hpc/diaggen/data/databases/ref_genomes/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta'
-    star_index = '/hpc/diaggen/data/databases/STAR_ref_genome_index/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set_2.7.9a'
-    gtf = '/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf'
-
-    // Input options
-    input = null
-
-    // Output options
-    outdir = null
-    publish_dir_mode = 'link'
-    email = null
-
-    // cluster_options = "--mail-user $params.email --mail-type FAIL --account=diaggen"
-}
-
-// Capture exit codes from upstream processes when piping
-process.shell = ['/bin/bash', '-euo', 'pipefail']
-
-// Load base.config
-includeConfig 'conf/base.config'
-
-// Load modules.config for module specific options
-includeConfig 'conf/modules.config'
-
-report {
-    enabled = true
-    file = "$params.outdir/log/nextflow_report.html"
-    overwrite = true
-}
-
-trace {
-    enabled = true
-    file = "$params.outdir/log/nextflow_trace.txt"
-    overwrite = true
-}
-
-timeline {
-    enabled = true
-    file = "$params.outdir/log/nextflow_timeline.html"
-    overwrite = true
-}
-
-profiles {
-
-    slurm {
-        process {
-            executor = 'slurm'
-            queue = 'cpu'
-
-            errorStrategy = 'retry'
-            maxRetries = 1
-        }
-
-        singularity {
-            enabled = true
-            runOptions = '-B /hpc:/hpc -B $TMPDIR:$TMPDIR'
-            autoMounts = true
-            cacheDir = '/hpc/diaggen/software/singularity_cache'
-        }
-
-        executor {
-            queueSize = 1000
-            pollInterval = '1min'
-            queueStatInterval = '5min'
-            submitRatelimit = '10sec'
-        }
-
-       mail {
-            smtp.host = 'localhost'
-        }
-    }
-
-    mac {
-        docker.enabled = true
-        docker.runOptions = '-v /Users:/Users'
-    }
-}
\ No newline at end of file
diff --git a/nextflow_schema.json b/nextflow_schema.json
deleted file mode 100644
index c5db6a24..00000000
--- a/nextflow_schema.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-07/schema",
-    "$id": "https://raw.githubusercontent.com/UMCUGenetics/DxNextflowRNA/master/nextflow_schema.json",
-    "title": "UMCUGenetics/DxNextflowRNA pipeline parameters",
-    "description": "UMCU Genetics RNA Workflow",
-    "type": "object",
-    "definitions": {
-      "input_output_options": {
-        "title": "Input/output options",
-        "type": "object",
-        "fa_icon": "fas fa-terminal",
-        "description": "Define where the pipeline should find input data and save output data.",
-        "required": ["input", "outdir"],
-        "properties": {
-            "input": {
-                "type": "string",
-                "format": "directory-path",
-                "exists": true,
-                "description": "Path to input data.",
-                "fa_icon": "fas fa-folder-open"
-            },
-            "outdir": {
-                "type": "string",
-                "format": "directory-path",
-                "description": "The output directory where the results will be saved.",
-                "fa_icon": "fas fa-folder-open"
-            },
-            "email": {
-                "type": "string",
-                "description": "Email address for completion summary.",
-                "fa_icon": "fas fa-envelope",
-                "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
-            }
-        }
-      },
-      "reference_genome_options": {
-        "title": "Reference genome options",
-        "type": "object",
-        "fa_icon": "fas fa-dna",
-        "description": "Reference genome related files and options required for the workflow.",
-        "properties": {
-          "genome": {
-            "type": "string",
-            "description": "Path of genome reference.",
-            "fa_icon": "fas fa-book",
-            "help_text": ""
-          },
-          "gtf": {
-            "type": "string",
-            "format": "file-path",
-            "exists": true,
-            "mimetype": "text/plain",
-            "pattern": "^\\S+\\.gtf(\\.gz)?$",
-            "description": "Path to GTF annotation file.",
-            "fa_icon": "fas fa-code-branch",
-            "help_text": "This parameter is *mandatory* if `--genome` is not specified."
-          },      
-          "star_index": {
-            "type": "string",
-            "format": "path",
-            "exists": true,
-            "fa_icon": "fas fa-bezier-curve",
-            "description": "Path to directory or tar.gz archive for pre-built STAR index."
-        }
-       }
-      },
-      "generic_options": {
-        "title": "Generic options",
-        "type": "object",
-        "fa_icon": "fas fa-file-import",
-        "description": "Less common options for the pipeline, typically set in a config file.",
-        "properties": {
-          "publish_dir_mode": {
-              "type": "string",
-              "default": "copy",
-              "description": "Method used to save pipeline results to output directory.",
-              "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
-              "fa_icon": "fas fa-copy",
-              "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
-              "hidden": true
-          }
-        }
-      }
-    },
-    "allOf": [
-      {
-        "$ref": "#/definitions/input_output_options"
-      },
-      {
-        "$ref": "#/definitions/reference_genome_options"
-      },
-      {
-        "$ref": "#/definitions/generic_options"
-      }
-    ]
-  }
-  
\ No newline at end of file
diff --git a/run_nextflow.sh b/run_nextflow.sh
deleted file mode 100644
index 69ad309f..00000000
--- a/run_nextflow.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-workflow_path='/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules'
-
-# Set input and output dirs
-input=`realpath $1`
-output=`realpath $2`
-email=$3
-
-mkdir -p $output && cd $output
-mkdir -p log
-
-if ! { [ -f 'workflow.running' ] || [ -f 'workflow.done' ] || [ -f 'workflow.failed' ]; }; then
-touch workflow.running
-echo "check directory for output: ${output}"
-
-export JAVA_HOME='/hpc/diaggen/software/tools/jdk-18.0.2.1/'  # change java version 
-export NXF_JAVA_HOME='/hpc/diaggen/software/tools/jdk-18.0.2.1/'  # change java vesion of nextflow
-
-sbatch <<EOT
-#!/bin/bash
-#SBATCH --time=04:00:00
-#SBATCH --nodes=1
-#SBATCH --mem 10G
-#SBATCH --gres=tmpspace:10G
-#SBATCH --job-name Nextflow_RNASeq
-#SBATCH -o log/slurm_nextflow_rnaseq.%j.out
-#SBATCH -e log/slurm_nextflow_rnaseq.%j.err
-#SBATCH --mail-user $email
-#SBATCH --mail-type FAIL
-#SBATCH --account=diaggen
-
-/hpc/diaggen/software/development/DxNextflowRNA/tools/nextflow run /hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/main.nf  \
--c /hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/nextflow.config -resume -ansi-log false -profile slurm \
---input $input \
---outdir $output \
---email $email
- 
-if [ \$? -eq 0 ]; then
-    echo "Nextflow done."
-
-    echo "RNA Trimgalore test workflow completed successfully."
-    rm workflow.running
-    touch workflow.done
-
-    exit 0
-else
-    echo "Nextflow failed"
-    rm workflow.running
-    touch workflow.failed
-
-    exit 1
-fi
-EOT
-else
-echo "Workflow job not submitted, please check $output for 'workflow.status' files."
-fi

From fac6d0e65261e10b0f493611efc039310b13b4db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Clonnekevanbrussel=E2=80=9D?=
 <“lonneke.vanbrussel@uvic.cat”>
Date: Tue, 9 Jan 2024 16:40:39 +0100
Subject: [PATCH 11/31] outrider.R with default output dir and correct R script
 call in main.nf at NexflowModules

---
 Outrider/1.20.0/main.nf    | 20 +++++++++-----------
 Outrider/1.20.0/outrider.R |  4 ++--
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 64a23ddd..e8e08276 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -1,18 +1,16 @@
-
 process OUTRIDER {
-    tag "$meta.id"
+//    tag "$meta.id"
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bioconductor-outrider:1.20.0--r43hf17093f_0 ' :
-        'biocontainers/bioconductor-outrider:1.20.0--r43hf17093f_0 ' }"
+    container "ghcr.io/umcugenetics/outrider_custom:0.0.1"
 
     input:
-    tuple val(meta), path(counts), path(ref)
-
+    path(counts)
+    path(ref)
+    
     output:
-    tuple val(meta), path("*.tsv"), emit: tsv
+    path("*.tsv"), emit: tsv
 //    path  "versions.yml"           , emit: versions
 
  /*   when:
@@ -20,9 +18,9 @@ process OUTRIDER {
 
     script:
     """
-    echo $counts
-    echo $ref
-    Rscript "outrider.R" "$counts" "/out" -r "$ref" 
+#    mkdir -p ${params.outdir}/outrider
+#    Rscript "/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/Outrider/1.20.0/outrider.R" "${counts}" -o "${params.outdir}/outrider/" -r "${ref}" 
+    Rscript "/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/Outrider/1.20.0/outrider.R" "${counts}" -r "${ref}"
     """
 
 }
\ No newline at end of file
diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 2cd08598..aefecd56 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -12,8 +12,8 @@ suppressPackageStartupMessages(library("dplyr"))
 parser <- ArgumentParser(description = "Process some integers")
 parser$add_argument("query", metavar = "query_input_files", nargs = "+", 
                     help = "Files or directories containing the query input files.")
-parser$add_argument("output_path", metavar = "output_path", nargs = "+", 
-                    help = "Path where output of OUTRIDER will be stored.")
+parser$add_argument("-o","--output_path", metavar = "output_path", nargs = "+", 
+                    help = "Path where output of OUTRIDER will be stored.", default="./")
 parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+", 
                     help = "Files or directories containing the reference input files.")
 args <- parser$parse_args()

From 388de3536eaaf9938eeb591897e547a69de05ba0 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Mon, 15 Jan 2024 16:19:27 +0100
Subject: [PATCH 12/31] Edit path to outrider dir

---
 Outrider/1.20.0/main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index e8e08276..4aae5175 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -18,9 +18,9 @@ process OUTRIDER {
 
     script:
     """
-#    mkdir -p ${params.outdir}/outrider
-#    Rscript "/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/Outrider/1.20.0/outrider.R" "${counts}" -o "${params.outdir}/outrider/" -r "${ref}" 
-    Rscript "/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/Outrider/1.20.0/outrider.R" "${counts}" -r "${ref}"
+    Rscript "${moduleDir}/outrider.R" "${counts}" -r "${ref}"
     """
 
+//    mkdir -p ${params.outdir}/outrider
+//   Rscript "/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/Outrider/1.20.0/outrider.R" "${counts}" -o "${params.outdir}/outrider/" -r "${ref}" 
 }
\ No newline at end of file

From fdaaa3ba9b3060875b823bbcda2ccd42f898132a Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 6 Feb 2024 15:34:07 +0100
Subject: [PATCH 13/31] Outrider main.nf added tag meta.id and param feature
 exon/gene

---
 Outrider/1.20.0/main.nf | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 4aae5175..a4f88083 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -1,14 +1,15 @@
 process OUTRIDER {
-//    tag "$meta.id"
+    tag "$meta.id"
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
     container "ghcr.io/umcugenetics/outrider_custom:0.0.1"
 
     input:
-    path(counts)
+    tuple val(meta), path(counts)
     path(ref)
-    
+    val(feature)
+
     output:
     path("*.tsv"), emit: tsv
 //    path  "versions.yml"           , emit: versions
@@ -18,9 +19,10 @@ process OUTRIDER {
 
     script:
     """
-    Rscript "${moduleDir}/outrider.R" "${counts}" -r "${ref}"
+    echo "${meta}"
+    echo "${ref}"
+    echo "${counts}"
+    Rscript ${moduleDir}/outrider.R ${counts} -r ${ref} -f ${feature}
     """
 
-//    mkdir -p ${params.outdir}/outrider
-//   Rscript "/hpc/diaggen/users/lonneke/github/DxNextflowRNA/NextflowModules/Outrider/1.20.0/outrider.R" "${counts}" -o "${params.outdir}/outrider/" -r "${ref}" 
 }
\ No newline at end of file

From fdb3a8018df50a947237ac2a114517a9dd8fed4c Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 6 Feb 2024 15:35:55 +0100
Subject: [PATCH 14/31] outrider.R added argument feature to use in output tsv
 file, also as parameter in functions main and save_output

---
 Outrider/1.20.0/outrider.R | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index aefecd56..73d7925d 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -10,17 +10,19 @@ suppressPackageStartupMessages(library("dplyr"))
 
 # Argument parser
 parser <- ArgumentParser(description = "Process some integers")
-parser$add_argument("query", metavar = "query_input_files", nargs = "+", 
+parser$add_argument("query", metavar = "query_input_files", nargs = "+",
                     help = "Files or directories containing the query input files.")
-parser$add_argument("-o","--output_path", metavar = "output_path", nargs = "+", 
+parser$add_argument("-o","--output_path", metavar = "output_path", nargs = "+",
                     help = "Path where output of OUTRIDER will be stored.", default="./")
-parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+", 
+parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+",
                     help = "Files or directories containing the reference input files.")
+parser$add_argument("-f", "--feat", metavar = "feature", nargs = "+",
+                    help = "Choose gene or exon level. Gene is default.", default="gene")
 args <- parser$parse_args()
 
 
 read_input_files <- function(input){
-  sampleIDs <- c() 
+  sampleIDs <- c()
   count_tables <- lapply(input, function(f) {
     input_ext <- tools::file_ext(f)
     if(input_ext == "Rds") {
@@ -79,23 +81,24 @@ run_outrider <- function(all_counts) {
 }
 
 
-save_output <- function(out_path, out_outrider, ref_samples, padj_thres=0.05, zscore_thres=0, all=True) {
+save_output <- function(out_path, out_outrider, ref_samples, feature, padj_thres=0.05, zscore_thres=0, all=True) {
 #  res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=TRUE))
   res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=TRUE))
   # Reference samples are excluded from final results. 
   query_res <- filter(res, !(sampleID %in% ref_samples))
   # Write output table with aberrant expressed targets.
-  write_tsv(query_res, paste0(out_path, "outrider_result.tsv"))
+  write_tsv(query_res, paste0(out_path, "outrider_result_", feature, ".tsv"))
 }
 
 
 # TODO: investigate memory usage and if possible reduced / parallel.
-main <- function(query, ref, output_path){
+main <- function(query, ref, output_path, feature){
   query_data <- get_input(query)
   ref_data <- get_input(ref)
   all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
   output <- run_outrider(all_counts)
-  save_output(output_path, output, ref_data$sampleIDs)
+  save_output(output_path, output, ref_data$sampleIDs, feature)
 }
 
-main(args$query, args$ref, args$output_path)
\ No newline at end of file
+
+main(args$query, args$ref, args$output_path, args$feat)

From 808c14865fa24ebf168fd6e88ae97c29da28ad68 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 6 Feb 2024 16:25:51 +0100
Subject: [PATCH 15/31] Added changes for feature and output in meta.yml file

---
 Outrider/1.20.0/meta.yml | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/Outrider/1.20.0/meta.yml b/Outrider/1.20.0/meta.yml
index 54155e70..e856b6c9 100644
--- a/Outrider/1.20.0/meta.yml
+++ b/Outrider/1.20.0/meta.yml
@@ -6,21 +6,32 @@ keywords:
 tools:
   - outrider:
       description: |
-      homepage: 
-      documentation: 
-      licence: 
+      homepage:
+      documentation:
+      licence:
 input:
   - meta:
       type: map
       description: |
         Groovy Map containing sample information
+ - counts:
+      type: file
+      description: |
+        Input file to measure aberrant expression
+ - ref:
+      type: file
+      description: |
+        List of files as a reference to measure aberrant expression to
+ - feature:
+      type: value
+      description: |
+        Value for the feature exon or gene to be used in the output file
 output:
   - meta:
-      type: map
+      type: file
       description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
+        tsv file with gene or exon values regarding aberrant expression
 authors:
-  - 
+  -
 maintainers:
-  - 
+  -

From af534aeae5094f5d98142650cff1091f0b0311cc Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Mon, 19 Feb 2024 16:43:04 +0100
Subject: [PATCH 16/31] Outrider main.nf changed input feature, input refset
 gene and exon and choose accordingly

---
 Outrider/1.20.0/main.nf | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index a4f88083..0f86537d 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -1,28 +1,31 @@
 process OUTRIDER {
     tag "$meta.id"
-    label 'process_medium'
+    label 'process_high'
 
     conda "${moduleDir}/environment.yml"
     container "ghcr.io/umcugenetics/outrider_custom:0.0.1"
 
     input:
-    tuple val(meta), path(counts)
-    path(ref)
-    val(feature)
+    tuple val(meta), path(counts), val(feature)
+    path(ref_gene)
+    path(ref_exon)
 
     output:
     path("*.tsv"), emit: tsv
-//    path  "versions.yml"           , emit: versions
+    path  "versions.yml"           , emit: versions
 
- /*   when:
-    task.ext.when == null || task.ext.when*/
+    when:
+    task.ext.when == null || task.ext.when
 
     script:
+    def refset = ref_gene
+    if (feature == 'exon') {
+        refset = ref_exon
+    }
     """
     echo "${meta}"
-    echo "${ref}"
     echo "${counts}"
-    Rscript ${moduleDir}/outrider.R ${counts} -r ${ref} -f ${feature}
+    echo "${refset}"
+    Rscript ${moduleDir}/outrider.R ${counts} -r ${refset} -f ${feature}
     """
-
 }
\ No newline at end of file

From cefbef6bf4f7dc1f3ab50d812a98f6c449c4083c Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 20 Feb 2024 13:19:41 +0100
Subject: [PATCH 17/31] Added filtering, findEncodingDim,
 controlForConfounders, sampleExclusionMask, fitting, pvalue and zscore
 calculation to run outrider

---
 Outrider/1.20.0/outrider.R | 62 +++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 73d7925d..711ef37d 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -31,9 +31,10 @@ read_input_files <- function(input){
       return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
     } else if (input_ext %in% c("txt", "tsv")) {
       count_table <- read_delim(f, show_col_types=FALSE, skip=1)
+      # col 1: samples IDs, col 7: counts
       ct <- count_table[,c(1,7)]
       names(ct)[1] <- "rownames"
-      sampleIDs <- append(sampleIDs, colnames(count_table))
+      sampleIDs <<- append(sampleIDs, colnames(ct)[2])
       return(ct)
     } else {
       stop("Input file extension is not supported.")
@@ -75,17 +76,64 @@ run_outrider <- function(all_counts) {
   rownames(all_counts_matrix) <- all_counts$rownames
 
   ods <- OutriderDataSet(countData=all_counts_matrix)
-  filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
-  out <- OUTRIDER(filtered)
+  filtered <- filterExpression(ods, minCounts = FALSE, filterGenes = FALSE, gtfFile="/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf")
+  filtered <- filtered[mcols(filtered)$passedFilter,]
+#  filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
+
+  ods <- estimateSizeFactors(filtered)
+  print(ods$sizeFactor)
+
+  ##Drop pipeline
+  #maxTestedDimensionProportion 
+  mp <- 3	 
+  implementation <- "autoencoder"
+
+  a <- 5 
+  b <- min(ncol(ods), nrow(ods)) / mp   # N/3
+  maxSteps <- 15
+   if(mp < 4){
+    maxSteps <- 20
+  }
+
+  Nsteps <- min(maxSteps, b)   # Do at most 20 steps or N/3
+  # Do unique in case 2 were repeated
+
+  pars_q <- round(exp(seq(log(a),log(b),length.out = Nsteps))) %>% unique
+  #help suggestion:
+  #pars_q <- seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2)
+  print(pars_q)
+
+  ods <- findEncodingDim(ods, params = pars_q, implementation = implementation, BPPARAM=MulticoreParam(8))
+
+  print("getbestQ")
+  opt_q <- getBestQ(ods)
+  print(opt_q)
+
+  ods <- controlForConfounders(ods, q=opt_q, implementation=implementation, BPPARAM=MulticoreParam(8))
+  
+  #exclude sample of interest from the fitting bc of possible replicates..
+  #message(date(), ": sample Exclusion Mask ...")
+  #sampleExclusionMask(ods) <- FALSE
+  #sampleExclusionMask(ods[,c(1)]) <- TRUE
+
+  #if(grepl("^(peer|pca)$", implementation)){ 
+  #   message(date(), ": Fitting the data ...")
+  #   ods <- fit(ods, BPPARAM=MulticoreParam(8))
+  #}
+
+  ods <- computePvalues(ods, alternative="two.sided", method="BY")
+  out <- computeZscores(ods)
+
+#  out <- OUTRIDER(filtered, BPPARAM=MulticoreParam(8))
   return(out)
 }
 
 
-save_output <- function(out_path, out_outrider, ref_samples, feature, padj_thres=0.05, zscore_thres=0, all=True) {
-#  res <- as_tibble(results(ods, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=TRUE))
-  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=TRUE))
+save_output <- function(out_path, out_outrider, ref_samples, feature, padj_thres=0.05, zscore_thres=0, a=TRUE) {
+  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
   # Reference samples are excluded from final results. 
   query_res <- filter(res, !(sampleID %in% ref_samples))
+  #query_res <- query_res[order(query_res$pValue),]
   # Write output table with aberrant expressed targets.
   write_tsv(query_res, paste0(out_path, "outrider_result_", feature, ".tsv"))
 }
@@ -101,4 +149,4 @@ main <- function(query, ref, output_path, feature){
 }
 
 
-main(args$query, args$ref, args$output_path, args$feat)
+main(args$query, args$ref, args$output_path, args$feat)
\ No newline at end of file

From abae593ac5c1855015df1b2c5c5778992db762cf Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 20 Feb 2024 13:53:22 +0100
Subject: [PATCH 18/31] versions.yml error

---
 Outrider/1.20.0/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 0f86537d..72695ac0 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -12,7 +12,7 @@ process OUTRIDER {
 
     output:
     path("*.tsv"), emit: tsv
-    path  "versions.yml"           , emit: versions
+    //path  "versions.yml"           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

From 22b3d9aea58d67ef1fa6b9f1cfd79f0e83ab9e9e Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 20 Feb 2024 13:56:24 +0100
Subject: [PATCH 19/31] full result, no exclusion mask and fit

---
 Outrider/1.20.0/outrider.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 711ef37d..bc53300d 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -132,8 +132,8 @@ run_outrider <- function(all_counts) {
 save_output <- function(out_path, out_outrider, ref_samples, feature, padj_thres=0.05, zscore_thres=0, a=TRUE) {
   res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
   # Reference samples are excluded from final results. 
-  query_res <- filter(res, !(sampleID %in% ref_samples))
-  #query_res <- query_res[order(query_res$pValue),]
+  #query_res <- filter(res, !(sampleID %in% ref_samples))
+  query_res <- res
   # Write output table with aberrant expressed targets.
   write_tsv(query_res, paste0(out_path, "outrider_result_", feature, ".tsv"))
 }

From 86487064cdcccdd660b109f0a677c0b0bca923cc Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Fri, 8 Mar 2024 16:00:52 +0100
Subject: [PATCH 20/31] Changes regarding separate outrider processes on GENE &
 EXON level with in ext.prefix

---
 Outrider/1.20.0/main.nf    | 17 +++++------------
 Outrider/1.20.0/outrider.R | 32 +++++++++++++++++---------------
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 72695ac0..847fc806 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -6,26 +6,19 @@ process OUTRIDER {
     container "ghcr.io/umcugenetics/outrider_custom:0.0.1"
 
     input:
-    tuple val(meta), path(counts), val(feature)
-    path(ref_gene)
-    path(ref_exon)
+    tuple val(meta), path(counts)
+    path(refset)
 
     output:
     path("*.tsv"), emit: tsv
-    //path  "versions.yml"           , emit: versions
+//    path  "versions.yml"           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def refset = ref_gene
-    if (feature == 'exon') {
-        refset = ref_exon
-    }
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    echo "${meta}"
-    echo "${counts}"
-    echo "${refset}"
-    Rscript ${moduleDir}/outrider.R ${counts} -r ${refset} -f ${feature}
+    Rscript ${moduleDir}/outrider.R ${counts} -r ${refset} -p ${prefix}
     """
 }
\ No newline at end of file
diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index bc53300d..6f5f6d1d 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -16,8 +16,8 @@ parser$add_argument("-o","--output_path", metavar = "output_path", nargs = "+",
                     help = "Path where output of OUTRIDER will be stored.", default="./")
 parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+",
                     help = "Files or directories containing the reference input files.")
-parser$add_argument("-f", "--feat", metavar = "feature", nargs = "+",
-                    help = "Choose gene or exon level. Gene is default.", default="gene")
+parser$add_argument("-p", "--pref", metavar = "prefix", nargs = "+",
+                    help = "Prefix of file.", default="gene")
 args <- parser$parse_args()
 
 
@@ -97,24 +97,25 @@ run_outrider <- function(all_counts) {
 
   Nsteps <- min(maxSteps, b)   # Do at most 20 steps or N/3
   # Do unique in case 2 were repeated
-
   pars_q <- round(exp(seq(log(a),log(b),length.out = Nsteps))) %>% unique
+
   #help suggestion:
   #pars_q <- seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2)
   print(pars_q)
 
   ods <- findEncodingDim(ods, params = pars_q, implementation = implementation, BPPARAM=MulticoreParam(8))
 
-  print("getbestQ")
+  #find best q (dimension)
   opt_q <- getBestQ(ods)
+  print("getbestQ")
   print(opt_q)
+  
+  ##exclude sample of interest from the fitting bc of possible replicates..
+  message(date(), ": sample Exclusion Mask ...")
+  sampleExclusionMask(ods) <- FALSE
+  sampleExclusionMask(ods[,c(1)]) <- TRUE
 
   ods <- controlForConfounders(ods, q=opt_q, implementation=implementation, BPPARAM=MulticoreParam(8))
-  
-  #exclude sample of interest from the fitting bc of possible replicates..
-  #message(date(), ": sample Exclusion Mask ...")
-  #sampleExclusionMask(ods) <- FALSE
-  #sampleExclusionMask(ods[,c(1)]) <- TRUE
 
   #if(grepl("^(peer|pca)$", implementation)){ 
   #   message(date(), ": Fitting the data ...")
@@ -129,24 +130,25 @@ run_outrider <- function(all_counts) {
 }
 
 
-save_output <- function(out_path, out_outrider, ref_samples, feature, padj_thres=0.05, zscore_thres=0, a=TRUE) {
+save_output <- function(out_path, out_outrider, ref_samples, prefix, padj_thres=0.05, zscore_thres=0, a=TRUE) {
   res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
   # Reference samples are excluded from final results. 
-  #query_res <- filter(res, !(sampleID %in% ref_samples))
+#  query_res <- filter(res, !(sampleID %in% ref_samples))
   query_res <- res
+  #query_res <- query_res[order(query_res$pValue),]
   # Write output table with aberrant expressed targets.
-  write_tsv(query_res, paste0(out_path, "outrider_result_", feature, ".tsv"))
+  write_tsv(query_res, paste0(out_path, prefix, ".outrider_result.tsv"))
 }
 
 
 # TODO: investigate memory usage and if possible reduced / parallel.
-main <- function(query, ref, output_path, feature){
+main <- function(query, ref, output_path, prefix){
   query_data <- get_input(query)
   ref_data <- get_input(ref)
   all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
   output <- run_outrider(all_counts)
-  save_output(output_path, output, ref_data$sampleIDs, feature)
+  save_output(output_path, output, ref_data$sampleIDs, prefix)
 }
 
 
-main(args$query, args$ref, args$output_path, args$feat)
\ No newline at end of file
+main(args$query, args$ref, args$output_path, args$pref)
\ No newline at end of file

From 2acf15156fe8a844a7aea5fe46d4ddbed0fbb94b Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 18 Jun 2024 17:52:25 +0200
Subject: [PATCH 21/31] Outrider.R additions to generate input files for EMC
 app. Added separate filtering for exon level and ratio fpkm

---
 Outrider/1.20.0/main.nf    |   2 +
 Outrider/1.20.0/outrider.R | 187 +++++++++++++++++++++++++++----------
 2 files changed, 142 insertions(+), 47 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 847fc806..6797671f 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -11,6 +11,8 @@ process OUTRIDER {
 
     output:
     path("*.tsv"), emit: tsv
+    path("*.csv"), emit: csv
+    path("*.pdf"), emit: pdf
 //    path  "versions.yml"           , emit: versions
 
     when:
diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 6f5f6d1d..b5da6ba7 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -7,6 +7,7 @@ suppressPackageStartupMessages(library("tools"))
 suppressPackageStartupMessages(library("tibble"))
 suppressPackageStartupMessages(library("readr"))
 suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("stringr"))
 
 # Argument parser
 parser <- ArgumentParser(description = "Process some integers")
@@ -31,8 +32,8 @@ read_input_files <- function(input){
       return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
     } else if (input_ext %in% c("txt", "tsv")) {
       count_table <- read_delim(f, show_col_types=FALSE, skip=1)
-      # col 1: samples IDs, col 7: counts
-      ct <- count_table[,c(1,7)]
+      # col 1: samples IDs, col 9: counts
+      ct <- count_table[,c(1,9)]
       names(ct)[1] <- "rownames"
       sampleIDs <<- append(sampleIDs, colnames(ct)[2])
       return(ct)
@@ -55,7 +56,7 @@ get_input <- function(input){
   } else {  # If both dir and files are provided, or different tyes (character, int etc)
     stop("Input is neither dir or file.")
   }
-  
+
   return(read_input_files(retrieved_files))
 }
 
@@ -68,74 +69,160 @@ merge_count_tables <- function(lst_query, lst_ref){
 }
 
 
-run_outrider <- function(all_counts) {
+run_outrider <- function(all_counts, query, prefix) {
   # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
-  # filter expression
   all_counts_matrix <- as.matrix(all_counts)[,-1]
   mode(all_counts_matrix) <- "integer"
   rownames(all_counts_matrix) <- all_counts$rownames
 
   ods <- OutriderDataSet(countData=all_counts_matrix)
-  filtered <- filterExpression(ods, minCounts = FALSE, filterGenes = FALSE, gtfFile="/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf")
-  filtered <- filtered[mcols(filtered)$passedFilter,]
-#  filtered <- filterExpression(ods, minCounts = TRUE, filterGenes = TRUE)
-
-  ods <- estimateSizeFactors(filtered)
-  print(ods$sizeFactor)
 
-  ##Drop pipeline
-  #maxTestedDimensionProportion 
-  mp <- 3	 
-  implementation <- "autoencoder"
+  plotheat = "counts_heatplots.pdf"
+  pdf(plotheat,onefile = TRUE)
 
-  a <- 5 
-  b <- min(ncol(ods), nrow(ods)) / mp   # N/3
-  maxSteps <- 15
-   if(mp < 4){
-    maxSteps <- 20
+  if(grepl("gene", prefix)){
+    ##FOR GENE LEVEL
+    ods <- filterExpression(ods, fpkmCutoff = 1, minCounts = FALSE, filterGenes = FALSE, gtfFile="/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf")
   }
+  else{
+    ##FOR EXON LEVEL
+    ct <- read_delim(query, show_col_types=FALSE, skip=1)
+    print("identical exon ids:")
+    print(identical(ct$Geneid,rownames(assays(ods)$counts)))
+    readD <- apply(assays(ods)$counts, 2, function(x) x / sum(x) * 10^6)
+    countsFPKM <- readD / ct$Length * 10^3
+    perc95e <- apply(countsFPKM, 1, function(x) quantile(x,probs=0.95))
+    mcols(ods)$passedFilter <- perc95e>1
+    mcols(ods)$basepairs <- ct$Length
+
+
+##FOR RATIO FPKM
+#  print("RATIO FPKM")
+#  ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
+#  }
+# display the FPKM distribution of counts.
+#  plotFPKM(ods)
+  message(date(), ": dim before filtering...")
+  print(dim(assays(ods)$counts))
+
+##SUBSETTING
+  ods <- ods[mcols(ods)$passedFilter,]
+  message(date(), ": dim after filtering...")
+  print(dim(assays(ods)$counts))
+
+# Heatmap of the sample correlation
+# it can also annotate the clusters resulting from the dendrogram
+  ods <- plotCountCorHeatmap(ods, normalized=FALSE, nRowCluster=4)
+
+# Heatmap of the gene/sample expression
+  ods <- plotCountGeneSampleHeatmap(ods, normalized=FALSE, nRowCluster=4)
+
+  ods <- estimateSizeFactors(ods)
+  message(date(), ": sizeFactors...")
+  print(ods$sizeFactor)
 
-  Nsteps <- min(maxSteps, b)   # Do at most 20 steps or N/3
-  # Do unique in case 2 were repeated
-  pars_q <- round(exp(seq(log(a),log(b),length.out = Nsteps))) %>% unique
-
-  #help suggestion:
-  #pars_q <- seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2)
-  print(pars_q)
-
-  ods <- findEncodingDim(ods, params = pars_q, implementation = implementation, BPPARAM=MulticoreParam(8))
+  implementation <- "autoencoder"
+  pars_q <- seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2)
 
+  message(date(), ": findEncodingDim...")
+  ods <- findEncodingDim(ods, params = pars_q, implementation=implementation, BPPARAM=MulticoreParam(8))
+  
   #find best q (dimension)
   opt_q <- getBestQ(ods)
-  print("getbestQ")
+  message(date(), ": getbestQ...")
   print(opt_q)
-  
-  ##exclude sample of interest from the fitting bc of possible replicates..
-  message(date(), ": sample Exclusion Mask ...")
-  sampleExclusionMask(ods) <- FALSE
-  sampleExclusionMask(ods[,c(1)]) <- TRUE
 
-  ods <- controlForConfounders(ods, q=opt_q, implementation=implementation, BPPARAM=MulticoreParam(8))
+  ods <- controlForConfounders(ods, q = opt_q, BPPARAM = MulticoreParam(8))
+
+# After controlling for confounders the heatmap should be plotted again. 
+# If it worked, no batches should be present and the correlations between samples should be reduced and close to zero. [1]*
+  ods <- plotCountCorHeatmap(ods, normalized=TRUE)
+  dev.off()
 
-  #if(grepl("^(peer|pca)$", implementation)){ 
-  #   message(date(), ": Fitting the data ...")
-  #   ods <- fit(ods, BPPARAM=MulticoreParam(8))
-  #}
+#  if(grepl("^(peer|pca)$", implementation)){ 
+#     message(date(), ": Fitting the data ...") # NOT with autoencoder
+#     ods <- fit(ods, BPPARAM=MulticoreParam(8))
+#  }
 
-  ods <- computePvalues(ods, alternative="two.sided", method="BY")
+  ods <- computePvalues(ods, alternative="two.sided", method="BY", BPPARAM = MulticoreParam(8))
   out <- computeZscores(ods)
 
-#  out <- OUTRIDER(filtered, BPPARAM=MulticoreParam(8))
+# run full OUTRIDER pipeline (control, fit model, calculate P-values)
+# out <- OUTRIDER(ods, BPPARAM=MulticoreParam(8))
   return(out)
 }
 
 
-save_output <- function(out_path, out_outrider, ref_samples, prefix, padj_thres=0.05, zscore_thres=0, a=TRUE) {
-  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
+##Only necessary for Erasmus app
+get_ct_emcapp <- function(query, prefix){
+  count_table <- read_delim(query, show_col_types=FALSE, skip=1)
+  ct <- count_table[,c(2,3,4,8,1)]
+  colnames(ct)<- c("chr","start","end","gene","geneID")
+  ct$chr <- str_split_fixed(ct$chr,';',2)[,1]
+  ct$start <- str_split_fixed(ct$start,';',2)[,1]
+  ct$end <- gsub("^.*;", "", ct$end)
+
+  ##For gene level: create countdata table Identifier chr_start_end_genename     
+  ct$emcID <- paste(ct$chr,ct$start,ct$end,ct$gene,sep="_")
+  ##For exon level: create countdata table Identifier: add Ensemble exon_id: chr_start_end_genename-exon_id
+  if(grepl("exon", prefix)){
+    ct$emcID <- paste(ct$emcID,ct$geneID,sep="-")
+  }
+
+  return(ct)
+}
+
+
+##Only necessary for Erasmus app
+save_res_emcapp <- function(ct, res, prefix, out_path){
+    res <- as_tibble(results(res, all=TRUE))[,c(2,1,3:14)]
+    res_merge <- merge(res, ct, by = "geneID", all.res = TRUE)
+    res_merge$geneENSG <- res_merge$geneID
+    res_merge$geneID <- res_merge$emcID
+    res_merge$sample_name<-NA
+    res_merge$TIN_mean<-NA
+    res_merge$link_bam<-NA
+
+   fragment <- "genes"
+    treatment <- "untreated"
+    if(grepl("exon",prefix)){ fragment <- "exons"}
+    if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
+    write_csv(res_merge[,c(1:18,20:23)], paste0(out_path, "umcu_rnaseq_fib_",treatment,"_res_outrider_",fragment,"_counts.tsv"), append=FALSE, col_names = TRUE)
+}
+
+##Only necessary for Erasmus app
+save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
+  ct <- ct[,c("emcID","geneID")]
+  colnames(all_counts)[1]<-c("geneID")
+  merge_counts <- merge(all_counts, ct, by = "geneID", all.ct = TRUE)
+  merge_counts$geneID <- merge_counts$emcID
+  counts_out <- merge_counts[,c(1:ncol(merge_counts)-1)]
+  colnames(counts_out)[1]<-c("")
+
+  ##countdata output table EMC app
+  if(grepl("gene", prefix)){
+    write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_genes_counts.tsv"))
+  }else{
+    write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_exons_counts.tsv"))}
+
+  ##Meta output table EMC app
+  treatment <- "untreated"
+  if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
+  metadata<-data.frame(colnames(counts_out[,2:ncol(counts_out)]),treatment,"fib","umcu_rnaseq",0,"")
+  colnames(metadata)<-c("sample_id","treatment","species","experiment_GS","gender","drop")
+  if(grepl("gene",prefix)){
+    write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_genes.csv"), row.names = FALSE, quote=FALSE)
+  }else{
+    write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_exons.csv"), row.names = FALSE, quote=FALSE)}
+}
+
+save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj_thres=0.05, zscore_thres=0, a=TRUE) {
+#  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
   # Reference samples are excluded from final results. 
 #  query_res <- filter(res, !(sampleID %in% ref_samples))
+  res <- as_tibble(results(out_outrider, all=a))
   query_res <- res
-  #query_res <- query_res[order(query_res$pValue),]
+
   # Write output table with aberrant expressed targets.
   write_tsv(query_res, paste0(out_path, prefix, ".outrider_result.tsv"))
 }
@@ -146,8 +233,14 @@ main <- function(query, ref, output_path, prefix){
   query_data <- get_input(query)
   ref_data <- get_input(ref)
   all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
-  output <- run_outrider(all_counts)
-  save_output(output_path, output, ref_data$sampleIDs, prefix)
+
+  output <- run_outrider(all_counts, query, prefix)
+  save_output(output_path, output, ref_data$sampleIDs, prefix, query)
+
+  ##Only necessary for Erasmus app, to get the counts, meta and outrider results tables
+  ct_emcapp <- get_ct_emcapp(query, prefix)
+  save_count_meta_emcapp(ct_emcapp, all_counts, output_path, prefix)
+  save_res_emcapp(ct_emcapp, output, prefix, output_path)
 }
 
 

From ccf831fe817454763ed6e4eb0fbf97a131e6479b Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Tue, 18 Jun 2024 20:43:45 +0200
Subject: [PATCH 22/31] forgot }

---
 Outrider/1.20.0/outrider.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index b5da6ba7..3dd5c624 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -95,11 +95,11 @@ run_outrider <- function(all_counts, query, prefix) {
     mcols(ods)$passedFilter <- perc95e>1
     mcols(ods)$basepairs <- ct$Length
 
+    ##FOR RATIO FPKM
+#   print("RATIO FPKM")
+#   ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
+  }
 
-##FOR RATIO FPKM
-#  print("RATIO FPKM")
-#  ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
-#  }
 # display the FPKM distribution of counts.
 #  plotFPKM(ods)
   message(date(), ": dim before filtering...")

From dd36ce0d695aeaa2137696faefb38ea1fe5fbbf6 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Wed, 19 Jun 2024 11:09:01 +0200
Subject: [PATCH 23/31] filter_expression as a separate function in outrider.R

---
 Outrider/1.20.0/outrider.R | 50 +++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 3dd5c624..89007fb9 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -69,17 +69,7 @@ merge_count_tables <- function(lst_query, lst_ref){
 }
 
 
-run_outrider <- function(all_counts, query, prefix) {
-  # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
-  all_counts_matrix <- as.matrix(all_counts)[,-1]
-  mode(all_counts_matrix) <- "integer"
-  rownames(all_counts_matrix) <- all_counts$rownames
-
-  ods <- OutriderDataSet(countData=all_counts_matrix)
-
-  plotheat = "counts_heatplots.pdf"
-  pdf(plotheat,onefile = TRUE)
-
+filter_expression <- function(ods, query, prefix){
   if(grepl("gene", prefix)){
     ##FOR GENE LEVEL
     ods <- filterExpression(ods, fpkmCutoff = 1, minCounts = FALSE, filterGenes = FALSE, gtfFile="/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf")
@@ -96,25 +86,41 @@ run_outrider <- function(all_counts, query, prefix) {
     mcols(ods)$basepairs <- ct$Length
 
     ##FOR RATIO FPKM
-#   print("RATIO FPKM")
-#   ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
+#    print("RATIO FPKM")
+#    ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
   }
 
-# display the FPKM distribution of counts.
-#  plotFPKM(ods)
+  # display the FPKM distribution of counts.
+  #  plotFPKM(ods)
   message(date(), ": dim before filtering...")
   print(dim(assays(ods)$counts))
 
-##SUBSETTING
+  ##SUBSETTING
   ods <- ods[mcols(ods)$passedFilter,]
   message(date(), ": dim after filtering...")
   print(dim(assays(ods)$counts))
+  return(ods)
+}
+
+
+run_outrider <- function(all_counts, query, prefix) {
+  # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
+  all_counts_matrix <- as.matrix(all_counts)[,-1]
+  mode(all_counts_matrix) <- "integer"
+  rownames(all_counts_matrix) <- all_counts$rownames
+
+  ods <- OutriderDataSet(countData=all_counts_matrix)
+
+  plotheat = "counts_heatplots.pdf"
+  pdf(plotheat,onefile = TRUE)
+
+  ods <- filter_expression(ods, query, prefix)
 
-# Heatmap of the sample correlation
-# it can also annotate the clusters resulting from the dendrogram
+  # Heatmap of the sample correlation
+  # it can also annotate the clusters resulting from the dendrogram
   ods <- plotCountCorHeatmap(ods, normalized=FALSE, nRowCluster=4)
 
-# Heatmap of the gene/sample expression
+  # Heatmap of the gene/sample expression
   ods <- plotCountGeneSampleHeatmap(ods, normalized=FALSE, nRowCluster=4)
 
   ods <- estimateSizeFactors(ods)
@@ -134,8 +140,8 @@ run_outrider <- function(all_counts, query, prefix) {
 
   ods <- controlForConfounders(ods, q = opt_q, BPPARAM = MulticoreParam(8))
 
-# After controlling for confounders the heatmap should be plotted again. 
-# If it worked, no batches should be present and the correlations between samples should be reduced and close to zero. [1]*
+  # After controlling for confounders the heatmap should be plotted again. 
+  # If it worked, no batches should be present and the correlations between samples should be reduced and close to zero. [1]*
   ods <- plotCountCorHeatmap(ods, normalized=TRUE)
   dev.off()
 
@@ -147,7 +153,7 @@ run_outrider <- function(all_counts, query, prefix) {
   ods <- computePvalues(ods, alternative="two.sided", method="BY", BPPARAM = MulticoreParam(8))
   out <- computeZscores(ods)
 
-# run full OUTRIDER pipeline (control, fit model, calculate P-values)
+  # run full OUTRIDER pipeline (control, fit model, calculate P-values)
 # out <- OUTRIDER(ods, BPPARAM=MulticoreParam(8))
   return(out)
 }

From 3e59c78eb06c564b11292a66255c4bbc488edd51 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Wed, 19 Jun 2024 13:35:36 +0200
Subject: [PATCH 24/31] chx->CHX in outrider.R

---
 Outrider/1.20.0/outrider.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 89007fb9..51e04cd0 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -192,10 +192,11 @@ save_res_emcapp <- function(ct, res, prefix, out_path){
    fragment <- "genes"
     treatment <- "untreated"
     if(grepl("exon",prefix)){ fragment <- "exons"}
-    if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
+    if(grepl("chx|CHX",prefix)){ treatment <- "CHX"}
     write_csv(res_merge[,c(1:18,20:23)], paste0(out_path, "umcu_rnaseq_fib_",treatment,"_res_outrider_",fragment,"_counts.tsv"), append=FALSE, col_names = TRUE)
 }
 
+
 ##Only necessary for Erasmus app
 save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
   ct <- ct[,c("emcID","geneID")]
@@ -213,7 +214,7 @@ save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
 
   ##Meta output table EMC app
   treatment <- "untreated"
-  if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
+  if(grepl("chx|CHX",prefix)){ treatment <- "CHX"}
   metadata<-data.frame(colnames(counts_out[,2:ncol(counts_out)]),treatment,"fib","umcu_rnaseq",0,"")
   colnames(metadata)<-c("sample_id","treatment","species","experiment_GS","gender","drop")
   if(grepl("gene",prefix)){
@@ -222,6 +223,7 @@ save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
     write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_exons.csv"), row.names = FALSE, quote=FALSE)}
 }
 
+
 save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj_thres=0.05, zscore_thres=0, a=TRUE) {
 #  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
   # Reference samples are excluded from final results. 

From 5d596e5af262f8c99a00c191b90c08a1602e63fc Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Thu, 4 Jul 2024 13:30:05 +0200
Subject: [PATCH 25/31] fraser module added for aberrant splicing

---
 Fraser/1.99.3/environment.yml |   6 ++
 Fraser/1.99.3/fraser.R        | 153 ++++++++++++++++++++++++++++++++++
 Fraser/1.99.3/main.nf         |  26 ++++++
 Outrider/1.20.0/outrider.R    |   7 +-
 4 files changed, 189 insertions(+), 3 deletions(-)
 create mode 100644 Fraser/1.99.3/environment.yml
 create mode 100644 Fraser/1.99.3/fraser.R
 create mode 100644 Fraser/1.99.3/main.nf

diff --git a/Fraser/1.99.3/environment.yml b/Fraser/1.99.3/environment.yml
new file mode 100644
index 00000000..c40ff5c1
--- /dev/null
+++ b/Fraser/1.99.3/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::fastqc=0.12.1
\ No newline at end of file
diff --git a/Fraser/1.99.3/fraser.R b/Fraser/1.99.3/fraser.R
new file mode 100644
index 00000000..137156cf
--- /dev/null
+++ b/Fraser/1.99.3/fraser.R
@@ -0,0 +1,153 @@
+
+# load FRASER library
+suppressPackageStartupMessages(library(FRASER))
+suppressPackageStartupMessages(library(biomaRt))
+suppressPackageStartupMessages(library(utils))
+
+args <- commandArgs()
+
+
+get_st_rows <- function(input){
+  if(dir.exists(input)){
+    sampleID <- list.files(path=input,pattern="*.bam",full.names=FALSE)
+    bamFile <- list.files(path=input,pattern="*.bam",full.names=TRUE)
+    return(data.table(sampleID,bamFile))
+  }
+  else if(file.exists(input)){
+    sampleID <- basename(file.path(input))
+    bamFile <- file.path(input)
+    return(data.table(sampleID,bamFile))
+  }
+}
+
+
+create_sample_table <- function(sample,ref,paired_end){
+  sampleTable <- rbind(get_st_rows(sample),get_st_rows(ref))
+  sampleTable <- sampleTable[!grepl(".bai",sampleID), ]
+  sampleTable$group <- c(1:nrow(sampleTable))
+  sampleTable$pairedEnd <- TRUE
+
+  message(date(), ": sampleTable")
+  print(sampleTable)
+
+  return(sampleTable)
+}
+
+
+run_fraser <- function(sampleTable){
+  # create FRASER object
+  settings <- FraserDataSet(colData=sampleTable, workingDir="./FRASER_output")
+  message(date(), ": settings...")
+  print(settings)
+
+  register(MulticoreParam(workers=min(8, multicoreWorkers())))
+
+  # count reads
+  fds <- countRNAData(settings)
+
+  fds <- calculatePSIValues(fds)
+    #FRASER vignette/gagneurlab settings:
+  #Currently, we suggest keeping only junctions which support the following:
+  #minExpressionInOneSample - 20 - At least one sample has 20 (or more) reads
+  #quantileMinExpression - 10 - The minimum total read count (N) an intron needs to have at the specified quantile across samples to pass the filter. See quantileForFiltering. 
+  #quantile - 0.95 -  Defines at which percentile the quantileMinExpression filter is applied. 
+  #A value of 0.95 means that at least 5% of the samples need to have a total read count N >= quantileMinExpression to pass the filter.
+  #minDeltaPsi - 0.05 -  The minimal variation (in delta psi) required for an intron to pass the filter. gagneurlab suggests 0.05
+
+  fds <- filterExpressionAndVariability(fds, minExpressionInOneSample = 20, quantileMinExpression=10, quantile = 0.95, minDeltaPsi=0.05, filter=FALSE)
+  fds <- saveFraserDataSet(fds, name="fds-filter")
+#  plotheat = "plots_fraser.pdf"
+#  pdf(plotheat,onefile = TRUE)
+#  plotFilterExpression(fds, bins=100)
+
+  message(date(), ": dimension before filtering...")
+  print(dim(fds))
+  fds <- fds[mcols(fds, type="j")[,"passed"]]
+  message(date(), ": dimension after filtering...")
+  print(dim(fds))
+
+  #Finds the optimal encoding dimension by injecting artificial splicing outlier ratios while maximizing the precision-recall curve.
+  # Get range for latent space dimension, max tested dimensions = 6
+  mp <- 6
+  a <- 2
+  b <- min(ncol(fds), nrow(fds)) / mp   # N/mp
+
+  maxSteps <- 12
+  if(mp < 6){
+    maxSteps <- 15
+  }
+
+  Nsteps <- min(maxSteps, b)
+  pars_q <- unique(round(exp(seq(log(a),log(b),length.out = Nsteps))))
+
+  message(date(), ": getEncDimRange", pars_q)
+
+  psiType = "jaccard"
+  impl = "PCA"
+
+  fds <- saveFraserDataSet(fds, name="fds-normfalse")
+  # Heatmap of the sample correlation
+#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=FALSE)
+  # Heatmap of the intron/sample expression
+#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=FALSE, plotType="junctionSample", topJ=100, minDeltaPsi=0.05)
+
+  #set.seed(as.integer(random))
+  # hyperparameter opimization can be used to estimate the dimension q of the latent space of the data. 
+  # It works by artificially injecting outliers into the data and then comparing the AUC of recalling these outliers for different values of q.
+  fds <- optimHyperParams(fds, type=psiType, implementation=impl, q_param=pars_q, minDeltaPsi=0.05, plot=FALSE)
+  # retrieve the estimated optimal dimension of the latent space
+  #currentType(fds) <- psiType
+  bestq = bestQ(fds, type=psiType)
+  message(date(), "...find best q: " ,bestq)
+
+  # Add verbosity to the FRASER object
+  verbose(fds) <- 3
+  fds <- fit(fds, q=bestq, type=psiType, implementation=impl, iterations=15)
+
+  # Heatmap of the sample correlation
+#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=TRUE)
+  # Heatmap of the intron/sample expression
+#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=TRUE, plotType="junctionSample", topJ=100, minDeltaPsi=0.05)
+  dev.off()
+  fds <- saveFraserDataSet(fds, name="fds-normtrue")
+
+  #all in one getEncDimRange, optimHyperParams, bestQ, fit
+  #fds_fraser <- FRASER(fds_filtered, q=c(jaccard=2))
+
+  fds <- annotateRanges(fds, GRCh = 38)
+
+  # Pvalues
+  fds <- calculatePvalues(fds, type=psiType)
+  # Adjust Pvalues
+  fds <- calculatePadjValues(fds, type=psiType)
+
+  fds <- saveFraserDataSet(fds, name="fds-final")
+
+  return (fds)
+}
+
+write_fraser_output <- function(fraser_ds, prefix){
+  # retrieve results with default and recommended cutoffs (padj <= 0.05 (0.1 gagneurlab) and # |deltaPsi| >= 0.3)
+  fraser_res <- results(fraser_ds, all=TRUE)
+  #res <- results(fds, psiType=psiTypes, aggregate=TRUE, collapse=FALSE, all=TRUE)
+  message(date(), ": result...")
+  print(fraser_res)
+
+  write.table(as.data.frame(unname(fraser_res)), paste0("fraser_result_",prefix,".tsv"), sep="\t", quote=FALSE)
+}
+
+
+main <- function(args){
+  ##Argument.Parser not in docker image..
+  sample <- args[6] #: bam + bai sample of interest
+  refset <- args[7] #: dir to ref set bams + bai
+  prefix <- args[8] #: prefix
+  pairedEnd <- args[9] #: pairedEnd (=TRUE)
+  sampleTable <- create_sample_table(sample, refset, pairedEnd)
+  message(date(), ": sampleTable")
+  fraser_ds <- run_fraser(sampleTable)
+  write_fraser_output(fraser_ds, prefix)
+}
+
+
+main(args)
diff --git a/Fraser/1.99.3/main.nf b/Fraser/1.99.3/main.nf
new file mode 100644
index 00000000..c63568f8
--- /dev/null
+++ b/Fraser/1.99.3/main.nf
@@ -0,0 +1,26 @@
+
+process FRASER {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "quay.io/biocontainers/bioconductor-fraser:1.99.3--r43hf17093f_0"
+
+    input:
+    tuple val(meta), path(input)
+    path(refset)
+
+    output:
+    path("*.tsv"), emit: tsv
+//    path  "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    export BIOMART_CACHE=/hpc/diaggen/projects/woc/rna/container
+    Rscript ${moduleDir}/fraser.R ${input} ${refset} ${prefix} TRUE
+    """
+}
\ No newline at end of file
diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 51e04cd0..0b17687e 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -91,7 +91,7 @@ filter_expression <- function(ods, query, prefix){
   }
 
   # display the FPKM distribution of counts.
-  #  plotFPKM(ods)
+  ods <- plotFPKM(ods)
   message(date(), ": dim before filtering...")
   print(dim(assays(ods)$counts))
 
@@ -118,10 +118,10 @@ run_outrider <- function(all_counts, query, prefix) {
 
   # Heatmap of the sample correlation
   # it can also annotate the clusters resulting from the dendrogram
-  ods <- plotCountCorHeatmap(ods, normalized=FALSE, nRowCluster=4)
+  ods <- plotCountCorHeatmap(ods, normalized=FALSE)
 
   # Heatmap of the gene/sample expression
-  ods <- plotCountGeneSampleHeatmap(ods, normalized=FALSE, nRowCluster=4)
+  ods <- plotCountGeneSampleHeatmap(ods, normalized=FALSE)
 
   ods <- estimateSizeFactors(ods)
   message(date(), ": sizeFactors...")
@@ -143,6 +143,7 @@ run_outrider <- function(all_counts, query, prefix) {
   # After controlling for confounders the heatmap should be plotted again. 
   # If it worked, no batches should be present and the correlations between samples should be reduced and close to zero. [1]*
   ods <- plotCountCorHeatmap(ods, normalized=TRUE)
+  ods <- plotCountGeneSampleHeatmap(ods, normalized=TRUE)
   dev.off()
 
 #  if(grepl("^(peer|pca)$", implementation)){ 

From 12c1e1c3d0ae208ed7154260e3806e716f3f83e6 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Thu, 25 Jul 2024 12:46:04 +0200
Subject: [PATCH 26/31] Instructions for Exon ratio code (un)commenting

---
 Outrider/1.20.0/outrider.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 0b17687e..2e120353 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -85,8 +85,10 @@ filter_expression <- function(ods, query, prefix){
     mcols(ods)$passedFilter <- perc95e>1
     mcols(ods)$basepairs <- ct$Length
 
-    ##FOR RATIO FPKM
-#    print("RATIO FPKM")
+    ##FOR EXON RATIO
+    ##REMOVE ##FOR EXON LEVEL code above
+    ##UNCOMMENT BELOW STEPS
+#    print("EXON RATIO")
 #    ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
   }
 

From 931a15962cb44bbc3ad2061de5010c40b00ff28a Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:13:59 +0200
Subject: [PATCH 27/31] added returns for readability, CHX->chx

---
 Outrider/1.20.0/outrider.R | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 2e120353..a7781b36 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -56,7 +56,7 @@ get_input <- function(input){
   } else {  # If both dir and files are provided, or different tyes (character, int etc)
     stop("Input is neither dir or file.")
   }
-
+  
   return(read_input_files(retrieved_files))
 }
 
@@ -161,7 +161,6 @@ run_outrider <- function(all_counts, query, prefix) {
   return(out)
 }
 
-
 ##Only necessary for Erasmus app
 get_ct_emcapp <- function(query, prefix){
   count_table <- read_delim(query, show_col_types=FALSE, skip=1)
@@ -181,7 +180,6 @@ get_ct_emcapp <- function(query, prefix){
   return(ct)
 }
 
-
 ##Only necessary for Erasmus app
 save_res_emcapp <- function(ct, res, prefix, out_path){
     res <- as_tibble(results(res, all=TRUE))[,c(2,1,3:14)]
@@ -191,15 +189,14 @@ save_res_emcapp <- function(ct, res, prefix, out_path){
     res_merge$sample_name<-NA
     res_merge$TIN_mean<-NA
     res_merge$link_bam<-NA
-
-   fragment <- "genes"
+    
+    fragment <- "genes"
     treatment <- "untreated"
     if(grepl("exon",prefix)){ fragment <- "exons"}
     if(grepl("chx|CHX",prefix)){ treatment <- "CHX"}
     write_csv(res_merge[,c(1:18,20:23)], paste0(out_path, "umcu_rnaseq_fib_",treatment,"_res_outrider_",fragment,"_counts.tsv"), append=FALSE, col_names = TRUE)
 }
 
-
 ##Only necessary for Erasmus app
 save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
   ct <- ct[,c("emcID","geneID")]
@@ -214,10 +211,10 @@ save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
     write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_genes_counts.tsv"))
   }else{
     write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_exons_counts.tsv"))}
-
+  
   ##Meta output table EMC app
   treatment <- "untreated"
-  if(grepl("chx|CHX",prefix)){ treatment <- "CHX"}
+  if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
   metadata<-data.frame(colnames(counts_out[,2:ncol(counts_out)]),treatment,"fib","umcu_rnaseq",0,"")
   colnames(metadata)<-c("sample_id","treatment","species","experiment_GS","gender","drop")
   if(grepl("gene",prefix)){
@@ -226,7 +223,6 @@ save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
     write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_exons.csv"), row.names = FALSE, quote=FALSE)}
 }
 
-
 save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj_thres=0.05, zscore_thres=0, a=TRUE) {
 #  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
   # Reference samples are excluded from final results. 
@@ -238,7 +234,6 @@ save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj
   write_tsv(query_res, paste0(out_path, prefix, ".outrider_result.tsv"))
 }
 
-
 # TODO: investigate memory usage and if possible reduced / parallel.
 main <- function(query, ref, output_path, prefix){
   query_data <- get_input(query)

From 0c330def3f83835a9a08f39e46a0a8b952514844 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:21:41 +0200
Subject: [PATCH 28/31] Fraser edits for readability and logging

---
 Fraser/1.99.3/fraser.R | 31 ++++++++++++++++++-------------
 Fraser/1.99.3/meta.yml | 25 +++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 13 deletions(-)
 create mode 100644 Fraser/1.99.3/meta.yml

diff --git a/Fraser/1.99.3/fraser.R b/Fraser/1.99.3/fraser.R
index 137156cf..4ef6ac4b 100644
--- a/Fraser/1.99.3/fraser.R
+++ b/Fraser/1.99.3/fraser.R
@@ -1,4 +1,3 @@
-
 # load FRASER library
 suppressPackageStartupMessages(library(FRASER))
 suppressPackageStartupMessages(library(biomaRt))
@@ -12,7 +11,7 @@ get_st_rows <- function(input){
     sampleID <- list.files(path=input,pattern="*.bam",full.names=FALSE)
     bamFile <- list.files(path=input,pattern="*.bam",full.names=TRUE)
     return(data.table(sampleID,bamFile))
-  }
+  }  
   else if(file.exists(input)){
     sampleID <- basename(file.path(input))
     bamFile <- file.path(input)
@@ -46,7 +45,8 @@ run_fraser <- function(sampleTable){
   fds <- countRNAData(settings)
 
   fds <- calculatePSIValues(fds)
-    #FRASER vignette/gagneurlab settings:
+
+  #FRASER vignette/gagneurlab settings:
   #Currently, we suggest keeping only junctions which support the following:
   #minExpressionInOneSample - 20 - At least one sample has 20 (or more) reads
   #quantileMinExpression - 10 - The minimum total read count (N) an intron needs to have at the specified quantile across samples to pass the filter. See quantileForFiltering. 
@@ -56,19 +56,22 @@ run_fraser <- function(sampleTable){
 
   fds <- filterExpressionAndVariability(fds, minExpressionInOneSample = 20, quantileMinExpression=10, quantile = 0.95, minDeltaPsi=0.05, filter=FALSE)
   fds <- saveFraserDataSet(fds, name="fds-filter")
-#  plotheat = "plots_fraser.pdf"
-#  pdf(plotheat,onefile = TRUE)
-#  plotFilterExpression(fds, bins=100)
+  plotheat = "plots_fraser.pdf"
+  pdf(plotheat,onefile = TRUE)
+  plotFilterExpression(fds, bins=100)
 
   message(date(), ": dimension before filtering...")
-  print(dim(fds))
+  print(dim(assays(fds)$rawCountsJ))
+  print(dim(assays(fds)$rawCountsSS))
+
   fds <- fds[mcols(fds, type="j")[,"passed"]]
   message(date(), ": dimension after filtering...")
-  print(dim(fds))
+  print(dim(assays(fds)$rawCountsJ)) 
+  print(dim(assays(fds)$rawCountsSS))
 
   #Finds the optimal encoding dimension by injecting artificial splicing outlier ratios while maximizing the precision-recall curve.
   # Get range for latent space dimension, max tested dimensions = 6
-  mp <- 6
+  mp <- 6 
   a <- 2
   b <- min(ncol(fds), nrow(fds)) / mp   # N/mp
 
@@ -101,7 +104,7 @@ run_fraser <- function(sampleTable){
   message(date(), "...find best q: " ,bestq)
 
   # Add verbosity to the FRASER object
-  verbose(fds) <- 3
+  verbose(fds) <- 3 
   fds <- fit(fds, q=bestq, type=psiType, implementation=impl, iterations=15)
 
   # Heatmap of the sample correlation
@@ -113,19 +116,21 @@ run_fraser <- function(sampleTable){
 
   #all in one getEncDimRange, optimHyperParams, bestQ, fit
   #fds_fraser <- FRASER(fds_filtered, q=c(jaccard=2))
-
+  
   fds <- annotateRanges(fds, GRCh = 38)
 
   # Pvalues
   fds <- calculatePvalues(fds, type=psiType)
   # Adjust Pvalues
   fds <- calculatePadjValues(fds, type=psiType)
+  
 
   fds <- saveFraserDataSet(fds, name="fds-final")
-
+  
   return (fds)
 }
 
+
 write_fraser_output <- function(fraser_ds, prefix){
   # retrieve results with default and recommended cutoffs (padj <= 0.05 (0.1 gagneurlab) and # |deltaPsi| >= 0.3)
   fraser_res <- results(fraser_ds, all=TRUE)
@@ -150,4 +155,4 @@ main <- function(args){
 }
 
 
-main(args)
+main(args)
\ No newline at end of file
diff --git a/Fraser/1.99.3/meta.yml b/Fraser/1.99.3/meta.yml
new file mode 100644
index 00000000..5f1ecfa0
--- /dev/null
+++ b/Fraser/1.99.3/meta.yml
@@ -0,0 +1,25 @@
+name: fraser
+description: Run FRASER on a set of bam files
+keywords:
+  - fraser
+  - abberant splicing
+tools:
+  - fraser:
+      description: FRASER - Find RAre Splicing Events in RNA-seq Data
+      homepage: 
+      documentation: https://www.bioconductor.org/packages/release/bioc/vignettes/FRASER/inst/doc/FRASER.pdf 
+      licence:
+input:
+ - bams:
+      type: path
+      description: |
+        path directory where bam files are located
+output:
+  - meta:
+      type: file
+      description: |
+        tsv file with aberrant spling output per sample
+authors:
+  -
+maintainers:
+  -
\ No newline at end of file

From 21dab56afca563e5f614ac4fd78221fa87aca3b3 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Mon, 26 Aug 2024 15:21:44 +0200
Subject: [PATCH 29/31] Newline, enter and genome_gtf passed as argument

---
 Fraser/1.99.3/environment.yml |   3 +-
 Fraser/1.99.3/fraser.R        | 248 ++++++++++++------------
 Fraser/1.99.3/main.nf         |   2 +-
 Fraser/1.99.3/meta.yml        |   2 +-
 Outrider/1.20.0/main.nf       |  26 +--
 Outrider/1.20.0/meta.yml      |   1 +
 Outrider/1.20.0/outrider.R    | 343 +++++++++++++++++-----------------
 7 files changed, 313 insertions(+), 312 deletions(-)

diff --git a/Fraser/1.99.3/environment.yml b/Fraser/1.99.3/environment.yml
index c40ff5c1..73a13da2 100644
--- a/Fraser/1.99.3/environment.yml
+++ b/Fraser/1.99.3/environment.yml
@@ -3,4 +3,5 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::fastqc=0.12.1
\ No newline at end of file
+  - bioconda::fastqc=0.12.1
+  
\ No newline at end of file
diff --git a/Fraser/1.99.3/fraser.R b/Fraser/1.99.3/fraser.R
index 4ef6ac4b..1e9491b3 100644
--- a/Fraser/1.99.3/fraser.R
+++ b/Fraser/1.99.3/fraser.R
@@ -7,152 +7,146 @@ args <- commandArgs()
 
 
 get_st_rows <- function(input){
-  if(dir.exists(input)){
-    sampleID <- list.files(path=input,pattern="*.bam",full.names=FALSE)
-    bamFile <- list.files(path=input,pattern="*.bam",full.names=TRUE)
-    return(data.table(sampleID,bamFile))
-  }  
-  else if(file.exists(input)){
-    sampleID <- basename(file.path(input))
-    bamFile <- file.path(input)
-    return(data.table(sampleID,bamFile))
-  }
+    if(dir.exists(input)){
+        sampleID <- list.files(path=input,pattern="*.bam",full.names=FALSE)
+        bamFile <- list.files(path=input,pattern="*.bam",full.names=TRUE)
+        return(data.table(sampleID,bamFile))
+    }  
+    else if(file.exists(input)){
+        sampleID <- basename(file.path(input))
+        bamFile <- file.path(input)
+        return(data.table(sampleID,bamFile))
+    }
 }
 
 
 create_sample_table <- function(sample,ref,paired_end){
-  sampleTable <- rbind(get_st_rows(sample),get_st_rows(ref))
-  sampleTable <- sampleTable[!grepl(".bai",sampleID), ]
-  sampleTable$group <- c(1:nrow(sampleTable))
-  sampleTable$pairedEnd <- TRUE
+    sampleTable <- rbind(get_st_rows(sample),get_st_rows(ref))
+    sampleTable <- sampleTable[!grepl(".bai",sampleID), ]
+    sampleTable$group <- c(1:nrow(sampleTable))
+    sampleTable$pairedEnd <- TRUE
 
-  message(date(), ": sampleTable")
-  print(sampleTable)
+    message(date(), ": sampleTable")
+    print(sampleTable)
 
-  return(sampleTable)
+    return(sampleTable)
 }
 
 
 run_fraser <- function(sampleTable){
-  # create FRASER object
-  settings <- FraserDataSet(colData=sampleTable, workingDir="./FRASER_output")
-  message(date(), ": settings...")
-  print(settings)
-
-  register(MulticoreParam(workers=min(8, multicoreWorkers())))
-
-  # count reads
-  fds <- countRNAData(settings)
-
-  fds <- calculatePSIValues(fds)
-
-  #FRASER vignette/gagneurlab settings:
-  #Currently, we suggest keeping only junctions which support the following:
-  #minExpressionInOneSample - 20 - At least one sample has 20 (or more) reads
-  #quantileMinExpression - 10 - The minimum total read count (N) an intron needs to have at the specified quantile across samples to pass the filter. See quantileForFiltering. 
-  #quantile - 0.95 -  Defines at which percentile the quantileMinExpression filter is applied. 
-  #A value of 0.95 means that at least 5% of the samples need to have a total read count N >= quantileMinExpression to pass the filter.
-  #minDeltaPsi - 0.05 -  The minimal variation (in delta psi) required for an intron to pass the filter. gagneurlab suggests 0.05
-
-  fds <- filterExpressionAndVariability(fds, minExpressionInOneSample = 20, quantileMinExpression=10, quantile = 0.95, minDeltaPsi=0.05, filter=FALSE)
-  fds <- saveFraserDataSet(fds, name="fds-filter")
-  plotheat = "plots_fraser.pdf"
-  pdf(plotheat,onefile = TRUE)
-  plotFilterExpression(fds, bins=100)
-
-  message(date(), ": dimension before filtering...")
-  print(dim(assays(fds)$rawCountsJ))
-  print(dim(assays(fds)$rawCountsSS))
-
-  fds <- fds[mcols(fds, type="j")[,"passed"]]
-  message(date(), ": dimension after filtering...")
-  print(dim(assays(fds)$rawCountsJ)) 
-  print(dim(assays(fds)$rawCountsSS))
-
-  #Finds the optimal encoding dimension by injecting artificial splicing outlier ratios while maximizing the precision-recall curve.
-  # Get range for latent space dimension, max tested dimensions = 6
-  mp <- 6 
-  a <- 2
-  b <- min(ncol(fds), nrow(fds)) / mp   # N/mp
-
-  maxSteps <- 12
-  if(mp < 6){
-    maxSteps <- 15
-  }
-
-  Nsteps <- min(maxSteps, b)
-  pars_q <- unique(round(exp(seq(log(a),log(b),length.out = Nsteps))))
-
-  message(date(), ": getEncDimRange", pars_q)
-
-  psiType = "jaccard"
-  impl = "PCA"
-
-  fds <- saveFraserDataSet(fds, name="fds-normfalse")
-  # Heatmap of the sample correlation
-#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=FALSE)
-  # Heatmap of the intron/sample expression
-#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=FALSE, plotType="junctionSample", topJ=100, minDeltaPsi=0.05)
-
-  #set.seed(as.integer(random))
-  # hyperparameter opimization can be used to estimate the dimension q of the latent space of the data. 
-  # It works by artificially injecting outliers into the data and then comparing the AUC of recalling these outliers for different values of q.
-  fds <- optimHyperParams(fds, type=psiType, implementation=impl, q_param=pars_q, minDeltaPsi=0.05, plot=FALSE)
-  # retrieve the estimated optimal dimension of the latent space
-  #currentType(fds) <- psiType
-  bestq = bestQ(fds, type=psiType)
-  message(date(), "...find best q: " ,bestq)
-
-  # Add verbosity to the FRASER object
-  verbose(fds) <- 3 
-  fds <- fit(fds, q=bestq, type=psiType, implementation=impl, iterations=15)
-
-  # Heatmap of the sample correlation
-#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=TRUE)
-  # Heatmap of the intron/sample expression
-#  plotCountCorHeatmap(fds, type=psiType, logit=FALSE, normalized=TRUE, plotType="junctionSample", topJ=100, minDeltaPsi=0.05)
-  dev.off()
-  fds <- saveFraserDataSet(fds, name="fds-normtrue")
-
-  #all in one getEncDimRange, optimHyperParams, bestQ, fit
-  #fds_fraser <- FRASER(fds_filtered, q=c(jaccard=2))
-  
-  fds <- annotateRanges(fds, GRCh = 38)
-
-  # Pvalues
-  fds <- calculatePvalues(fds, type=psiType)
-  # Adjust Pvalues
-  fds <- calculatePadjValues(fds, type=psiType)
-  
-
-  fds <- saveFraserDataSet(fds, name="fds-final")
-  
-  return (fds)
+    # create FRASER object
+    settings <- FraserDataSet(colData=sampleTable, workingDir="./FRASER_output")
+    message(date(), ": settings...")
+    print(settings)
+
+    register(MulticoreParam(workers=min(8, multicoreWorkers())))
+
+    # count reads
+    fds <- countRNAData(settings)
+
+    fds <- calculatePSIValues(fds)
+
+    #FRASER vignette/gagneurlab settings:
+    #Currently, we suggest keeping only junctions which support the following:
+    #minExpressionInOneSample - 20 - At least one sample has 20 (or more) reads
+    #quantileMinExpression - 10 - The minimum total read count (N) an intron needs to have at the specified quantile across samples to pass the filter. See quantileForFiltering. 
+    #quantile - 0.95 -  Defines at which percentile the quantileMinExpression filter is applied.  # nolint
+    #A value of 0.95 means that at least 5% of the samples need to have a total read count N >= quantileMinExpression to pass the filter.
+    #minDeltaPsi - 0.05 -  The minimal variation (in delta psi) required for an intron to pass the filter. gagneurlab suggests 0.05
+
+    fds <- filterExpressionAndVariability(fds, minExpressionInOneSample = 20, quantileMinExpression=10, quantile = 0.95, minDeltaPsi=0.05, filter=FALSE)
+    fds <- saveFraserDataSet(fds, name="fds-filter")
+    plotheat = "plots_fraser.pdf"
+    pdf(plotheat,onefile = TRUE)
+    plotFilterExpression(fds, bins=100)
+
+    message(date(), ": dimension before filtering...")
+    print(dim(assays(fds)$rawCountsJ))
+    print(dim(assays(fds)$rawCountsSS))
+
+    fds <- fds[mcols(fds, type="j")[,"passed"]]
+    message(date(), ": dimension after filtering...")
+    print(dim(assays(fds)$rawCountsJ)) 
+    print(dim(assays(fds)$rawCountsSS))
+
+    #Finds the optimal encoding dimension by injecting artificial splicing outlier ratios while maximizing the precision-recall curve.
+    # Get range for latent space dimension, max tested dimensions = 6
+    mp <- 6 
+    a <- 2
+    b <- min(ncol(fds), nrow(fds)) / mp   # N/mp
+
+    maxSteps <- 12
+    if(mp < 6){
+        maxSteps <- 15
+    }
+
+    Nsteps <- min(maxSteps, b)
+    pars_q <- unique(round(exp(seq(log(a),log(b),length.out = Nsteps))))
+
+    message(date(), ": getEncDimRange", pars_q)
+
+    fds <- saveFraserDataSet(fds, name="fds-normfalse")
+    # Heatmap of the sample correlation
+    # plotCountCorHeatmap(fds, type="jaccard", logit=FALSE, normalized=FALSE)
+    # Heatmap of the intron/sample expression
+    # plotCountCorHeatmap(fds, type="jaccard", logit=FALSE, normalized=FALSE, plotType="junctionSample", topJ=100, minDeltaPsi=0.05)
+
+    #set.seed(as.integer(random))
+    # hyperparameter opimization can be used to estimate the dimension q of the latent space of the data. 
+    # It works by artificially injecting outliers into the data and then comparing the AUC of recalling these outliers for different values of q.
+    fds <- optimHyperParams(fds, type="jaccard", implementation="PCA", 
+    q_param=pars_q, minDeltaPsi=0.05, plot=FALSE)
+    # retrieve the estimated optimal dimension of the latent space
+    bestq = bestQ(fds, type="jaccard")
+    message(date(), "...find best q: " ,bestq)
+
+    # Add verbosity to the FRASER object
+    verbose(fds) <- 3 
+    fds <- fit(fds, q=bestq, type="jaccard", implementation="PCA", iterations=15)
+
+    # Heatmap of the sample correlation
+    #  plotCountCorHeatmap(fds, type="jaccard", logit=FALSE, normalized=TRUE)
+    # Heatmap of the intron/sample expression
+    #  plotCountCorHeatmap(fds, type="jaccard", logit=FALSE, normalized=TRUE, plotType="junctionSample", topJ=100, minDeltaPsi=0.05)
+    dev.off()
+    fds <- saveFraserDataSet(fds, name="fds-normtrue")
+
+    #all in one getEncDimRange, optimHyperParams, bestQ, fit
+    #fds_fraser <- FRASER(fds_filtered, q=c(jaccard=2))
+
+    fds <- annotateRanges(fds, GRCh = 38)
+
+    # Pvalues
+    fds <- calculatePvalues(fds, type="jaccard")
+    # Adjust Pvalues
+    fds <- calculatePadjValues(fds, type="jaccard")
+
+    fds <- saveFraserDataSet(fds, name="fds-final")
+
+    return(fds)
 }
 
 
 write_fraser_output <- function(fraser_ds, prefix){
-  # retrieve results with default and recommended cutoffs (padj <= 0.05 (0.1 gagneurlab) and # |deltaPsi| >= 0.3)
-  fraser_res <- results(fraser_ds, all=TRUE)
-  #res <- results(fds, psiType=psiTypes, aggregate=TRUE, collapse=FALSE, all=TRUE)
-  message(date(), ": result...")
-  print(fraser_res)
+    # retrieve results with default and recommended cutoffs (padj <= 0.05 (0.1 gagneurlab) and # |deltaPsi| >= 0.3)
+    fraser_res <- results(fraser_ds, all=TRUE)
+    message(date(), ": result...")
+    print(fraser_res)
 
-  write.table(as.data.frame(unname(fraser_res)), paste0("fraser_result_",prefix,".tsv"), sep="\t", quote=FALSE)
+    write.table(as.data.frame(unname(fraser_res)), paste0("fraser_result_",prefix,".tsv"), sep="\t", quote=FALSE)
 }
 
 
 main <- function(args){
-  ##Argument.Parser not in docker image..
-  sample <- args[6] #: bam + bai sample of interest
-  refset <- args[7] #: dir to ref set bams + bai
-  prefix <- args[8] #: prefix
-  pairedEnd <- args[9] #: pairedEnd (=TRUE)
-  sampleTable <- create_sample_table(sample, refset, pairedEnd)
-  message(date(), ": sampleTable")
-  fraser_ds <- run_fraser(sampleTable)
-  write_fraser_output(fraser_ds, prefix)
+    ##Argument.Parser not in docker image..
+    sample <- args[6] #: bam + bai sample of interest
+    refset <- args[7] #: dir to ref set bams + bai
+    prefix <- args[8] #: prefix
+    pairedEnd <- args[9] #: pairedEnd (=TRUE)
+    sampleTable <- create_sample_table(sample, refset, pairedEnd)
+    fraser_ds <- run_fraser(sampleTable)
+    write_fraser_output(fraser_ds, prefix)
 }
 
 
-main(args)
\ No newline at end of file
+main(args)
diff --git a/Fraser/1.99.3/main.nf b/Fraser/1.99.3/main.nf
index c63568f8..29ed828b 100644
--- a/Fraser/1.99.3/main.nf
+++ b/Fraser/1.99.3/main.nf
@@ -23,4 +23,4 @@ process FRASER {
     export BIOMART_CACHE=/hpc/diaggen/projects/woc/rna/container
     Rscript ${moduleDir}/fraser.R ${input} ${refset} ${prefix} TRUE
     """
-}
\ No newline at end of file
+}
diff --git a/Fraser/1.99.3/meta.yml b/Fraser/1.99.3/meta.yml
index 5f1ecfa0..fe11285f 100644
--- a/Fraser/1.99.3/meta.yml
+++ b/Fraser/1.99.3/meta.yml
@@ -22,4 +22,4 @@ output:
 authors:
   -
 maintainers:
-  -
\ No newline at end of file
+  -
diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 6797671f..11ff5ace 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -6,21 +6,23 @@ process OUTRIDER {
     container "ghcr.io/umcugenetics/outrider_custom:0.0.1"
 
     input:
-    tuple val(meta), path(counts)
-    path(refset)
+        tuple val(meta), 
+        path(counts), 
+        path(refset),
+        path(genome_gtf)   
 
     output:
-    path("*.tsv"), emit: tsv
-    path("*.csv"), emit: csv
-    path("*.pdf"), emit: pdf
-//    path  "versions.yml"           , emit: versions
+        path("*.tsv"), emit: tsv
+        path("*.csv"), emit: csv
+        path("*.pdf"), emit: pdf
+//        path  "versions.yml", emit: versions
 
     when:
-    task.ext.when == null || task.ext.when
+        task.ext.when == null || task.ext.when
 
     script:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    Rscript ${moduleDir}/outrider.R ${counts} -r ${refset} -p ${prefix}
-    """
-}
\ No newline at end of file
+        def prefix = task.ext.prefix ?: "${meta.id}"
+        """
+        Rscript ${moduleDir}/outrider.R ${counts} -r ${refset} -p ${prefix} -g ${genome_gtf}
+        """
+}
diff --git a/Outrider/1.20.0/meta.yml b/Outrider/1.20.0/meta.yml
index e856b6c9..d5d083ff 100644
--- a/Outrider/1.20.0/meta.yml
+++ b/Outrider/1.20.0/meta.yml
@@ -35,3 +35,4 @@ authors:
   -
 maintainers:
   -
+
diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index a7781b36..2fd1e71a 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -19,165 +19,166 @@ parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+
                     help = "Files or directories containing the reference input files.")
 parser$add_argument("-p", "--pref", metavar = "prefix", nargs = "+",
                     help = "Prefix of file.", default="gene")
+parser$add_argument("-g", "--gtf", metavar = "genome_gtf", nargs = "+",
+                    help = "Genome gtf.")
 args <- parser$parse_args()
 
 
 read_input_files <- function(input){
-  sampleIDs <- c()
-  count_tables <- lapply(input, function(f) {
-    input_ext <- tools::file_ext(f)
-    if(input_ext == "Rds") {
-      rse <- readRDS(f)  # RangedSummarizedExperiment
-      sampleIDs <- append(sampleIDs, colnames(rse))
-      return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
-    } else if (input_ext %in% c("txt", "tsv")) {
-      count_table <- read_delim(f, show_col_types=FALSE, skip=1)
-      # col 1: samples IDs, col 9: counts
-      ct <- count_table[,c(1,9)]
-      names(ct)[1] <- "rownames"
-      sampleIDs <<- append(sampleIDs, colnames(ct)[2])
-      return(ct)
-    } else {
-      stop("Input file extension is not supported.")
-    }
-  })
-  return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
+    sampleIDs <- c()
+    count_tables <- lapply(input, function(f) {
+        input_ext <- tools::file_ext(f)
+        if(input_ext == "Rds") {
+            rse <- readRDS(f)  # RangedSummarizedExperiment
+            sampleIDs <- append(sampleIDs, colnames(rse))
+            return(as_tibble(rownames_to_column(as.data.frame(assays(rse)$counts), var="rownames")))
+        } else if (input_ext %in% c("txt", "tsv")) {
+            count_table <- read_delim(f, show_col_types=FALSE, skip=1)
+            # col 1: samples IDs, col 9: counts
+            ct <- count_table[,c(1,9)]
+            names(ct)[1] <- "rownames"
+            sampleIDs <<- append(sampleIDs, colnames(ct)[2])
+            return(ct)
+        } else {
+            stop("Input file extension is not supported.")
+        }
+    })
+    return(list("sampleIDs"=sampleIDs, "count_tables"=count_tables))
 }
 
 
 get_input <- function(input){
-  # If directories are provided
-  if(all(sapply(input, function(x) dir.exists(x)))) {
-    retrieved_files <- sapply(input, function(d){
-      list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
-    })
-  } else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
-    retrieved_files <- input
-  } else {  # If both dir and files are provided, or different tyes (character, int etc)
-    stop("Input is neither dir or file.")
-  }
-  
-  return(read_input_files(retrieved_files))
+    # If directories are provided
+    if(all(sapply(input, function(x) dir.exists(x)))) {
+        retrieved_files <- sapply(input, function(d){
+            list.files(path = d, pattern = "Rds|txt|tsv", full.names = TRUE, recursive = FALSE)
+        })
+    } else if(all(sapply(input, function(x) file.exists(x)))) {  # If files are provided.
+        retrieved_files <- input
+    } else {  # If both dir and files are provided, or different tyes (character, int etc)
+        stop("Input is neither dir or file.")
+    }
+    return(read_input_files(retrieved_files))
 }
 
 
 merge_count_tables <- function(lst_query, lst_ref){
-  # merge count tables together.
-  lst_count_tables <- c(lst_query, lst_ref)
-  all_counts <- lst_count_tables %>%
-    Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
+    # merge count tables together.
+    lst_count_tables <- c(lst_query, lst_ref)
+    all_counts <- lst_count_tables %>%
+        Reduce(function(dtf1,dtf2) dplyr::full_join(dtf1, dtf2,by="rownames"), .)
+    return(all_counts)
 }
 
 
-filter_expression <- function(ods, query, prefix){
-  if(grepl("gene", prefix)){
-    ##FOR GENE LEVEL
-    ods <- filterExpression(ods, fpkmCutoff = 1, minCounts = FALSE, filterGenes = FALSE, gtfFile="/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf")
-  }
-  else{
-    ##FOR EXON LEVEL
-    ct <- read_delim(query, show_col_types=FALSE, skip=1)
-    print("identical exon ids:")
-    print(identical(ct$Geneid,rownames(assays(ods)$counts)))
-    readD <- apply(assays(ods)$counts, 2, function(x) x / sum(x) * 10^6)
-    countsFPKM <- readD / ct$Length * 10^3
-    perc95e <- apply(countsFPKM, 1, function(x) quantile(x,probs=0.95))
-    mcols(ods)$passedFilter <- perc95e>1
-    mcols(ods)$basepairs <- ct$Length
-
-    ##FOR EXON RATIO
-    ##REMOVE ##FOR EXON LEVEL code above
-    ##UNCOMMENT BELOW STEPS
-#    print("EXON RATIO")
-#    ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
-  }
-
-  # display the FPKM distribution of counts.
-  ods <- plotFPKM(ods)
-  message(date(), ": dim before filtering...")
-  print(dim(assays(ods)$counts))
-
-  ##SUBSETTING
-  ods <- ods[mcols(ods)$passedFilter,]
-  message(date(), ": dim after filtering...")
-  print(dim(assays(ods)$counts))
-  return(ods)
+filter_expression <- function(ods, query, prefix, genome_gtf){
+    if(grepl("gene", prefix)){
+        ##FOR GENE LEVEL
+        ods <- filterExpression(ods, fpkmCutoff = 1, minCounts = FALSE, filterGenes = FALSE, gtfFile=genome_gtf)
+    }
+    else{
+        ##FOR EXON LEVEL
+        ct <- read_delim(query, show_col_types=FALSE, skip=1)
+        print("identical exon ids:")
+        print(identical(ct$Geneid,rownames(assays(ods)$counts)))
+        readD <- apply(assays(ods)$counts, 2, function(x) x / sum(x) * 10^6)
+        countsFPKM <- readD / ct$Length * 10^3
+        perc95e <- apply(countsFPKM, 1, function(x) quantile(x,probs=0.95))
+        mcols(ods)$passedFilter <- perc95e>1
+        mcols(ods)$basepairs <- ct$Length
+
+        ##FOR EXON RATIO
+        ##REMOVE ##FOR EXON LEVEL code above
+        ##UNCOMMENT BELOW STEPS
+#        print("EXON RATIO")
+#        ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
+    }
+
+    # display the FPKM distribution of counts.
+    ods <- plotFPKM(ods)
+    message(date(), ": dim before filtering...")
+    print(dim(assays(ods)$counts))
+
+    ##SUBSETTING
+    ods <- ods[mcols(ods)$passedFilter,]
+    message(date(), ": dim after filtering...")
+    print(dim(assays(ods)$counts))
+    return(ods)
 }
 
 
-run_outrider <- function(all_counts, query, prefix) {
-  # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
-  all_counts_matrix <- as.matrix(all_counts)[,-1]
-  mode(all_counts_matrix) <- "integer"
-  rownames(all_counts_matrix) <- all_counts$rownames
+run_outrider <- function(all_counts, query, prefix, genome_gtf) {
+    # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
+    all_counts_matrix <- as.matrix(all_counts)[,-1]
+    mode(all_counts_matrix) <- "integer"
+    rownames(all_counts_matrix) <- all_counts$rownames
 
-  ods <- OutriderDataSet(countData=all_counts_matrix)
+    ods <- OutriderDataSet(countData=all_counts_matrix)
 
-  plotheat = "counts_heatplots.pdf"
-  pdf(plotheat,onefile = TRUE)
+    plotheat = "counts_heatplots.pdf"
+    pdf(plotheat,onefile = TRUE)
 
-  ods <- filter_expression(ods, query, prefix)
+    ods <- filter_expression(ods, query, prefix, genome_gtf)
 
-  # Heatmap of the sample correlation
-  # it can also annotate the clusters resulting from the dendrogram
-  ods <- plotCountCorHeatmap(ods, normalized=FALSE)
+    # Heatmap of the sample correlation
+    # it can also annotate the clusters resulting from the dendrogram
+    ods <- plotCountCorHeatmap(ods, normalized=FALSE)
 
-  # Heatmap of the gene/sample expression
-  ods <- plotCountGeneSampleHeatmap(ods, normalized=FALSE)
+    # Heatmap of the gene/sample expression
+    ods <- plotCountGeneSampleHeatmap(ods, normalized=FALSE)
 
-  ods <- estimateSizeFactors(ods)
-  message(date(), ": sizeFactors...")
-  print(ods$sizeFactor)
+    ods <- estimateSizeFactors(ods)
+    message(date(), ": sizeFactors...")
+    print(ods$sizeFactor)
 
-  implementation <- "autoencoder"
-  pars_q <- seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2)
+    pars_q <- seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2)
 
-  message(date(), ": findEncodingDim...")
-  ods <- findEncodingDim(ods, params = pars_q, implementation=implementation, BPPARAM=MulticoreParam(8))
+    message(date(), ": findEncodingDim...")
+    ods <- findEncodingDim(ods, params = pars_q, implementation="autoencoder", BPPARAM=MulticoreParam(8))
   
-  #find best q (dimension)
-  opt_q <- getBestQ(ods)
-  message(date(), ": getbestQ...")
-  print(opt_q)
-
-  ods <- controlForConfounders(ods, q = opt_q, BPPARAM = MulticoreParam(8))
-
-  # After controlling for confounders the heatmap should be plotted again. 
-  # If it worked, no batches should be present and the correlations between samples should be reduced and close to zero. [1]*
-  ods <- plotCountCorHeatmap(ods, normalized=TRUE)
-  ods <- plotCountGeneSampleHeatmap(ods, normalized=TRUE)
-  dev.off()
-
-#  if(grepl("^(peer|pca)$", implementation)){ 
-#     message(date(), ": Fitting the data ...") # NOT with autoencoder
-#     ods <- fit(ods, BPPARAM=MulticoreParam(8))
-#  }
-
-  ods <- computePvalues(ods, alternative="two.sided", method="BY", BPPARAM = MulticoreParam(8))
-  out <- computeZscores(ods)
-
-  # run full OUTRIDER pipeline (control, fit model, calculate P-values)
-# out <- OUTRIDER(ods, BPPARAM=MulticoreParam(8))
-  return(out)
+    #find best q (dimension)
+    opt_q <- getBestQ(ods)
+    message(date(), ": getbestQ...")
+    print(opt_q)
+
+    ods <- controlForConfounders(ods, q = opt_q, BPPARAM = MulticoreParam(8))
+
+    # After controlling for confounders the heatmap should be plotted again. 
+    # If it worked, no batches should be present and the correlations between samples should be reduced and close to zero. [1]*
+    ods <- plotCountCorHeatmap(ods, normalized=TRUE)
+    ods <- plotCountGeneSampleHeatmap(ods, normalized=TRUE)
+    dev.off()
+
+#    if(grepl("^(peer|pca)$", implementation)){ 
+#        message(date(), ": Fitting the data ...") # NOT with autoencoder
+#        ods <- fit(ods, BPPARAM=MulticoreParam(8))
+#    }
+
+    ods <- computePvalues(ods, alternative="two.sided", method="BY", BPPARAM = MulticoreParam(8))
+    out <- computeZscores(ods)
+
+    # run full OUTRIDER pipeline (control, fit model, calculate P-values)
+    # out <- OUTRIDER(ods, BPPARAM=MulticoreParam(8))
+    return(out)
 }
 
 ##Only necessary for Erasmus app
 get_ct_emcapp <- function(query, prefix){
-  count_table <- read_delim(query, show_col_types=FALSE, skip=1)
-  ct <- count_table[,c(2,3,4,8,1)]
-  colnames(ct)<- c("chr","start","end","gene","geneID")
-  ct$chr <- str_split_fixed(ct$chr,';',2)[,1]
-  ct$start <- str_split_fixed(ct$start,';',2)[,1]
-  ct$end <- gsub("^.*;", "", ct$end)
-
-  ##For gene level: create countdata table Identifier chr_start_end_genename     
-  ct$emcID <- paste(ct$chr,ct$start,ct$end,ct$gene,sep="_")
-  ##For exon level: create countdata table Identifier: add Ensemble exon_id: chr_start_end_genename-exon_id
-  if(grepl("exon", prefix)){
-    ct$emcID <- paste(ct$emcID,ct$geneID,sep="-")
-  }
-
-  return(ct)
+    count_table <- read_delim(query, show_col_types=FALSE, skip=1)
+    ct <- count_table[,c(2,3,4,8,1)]
+    colnames(ct)<- c("chr","start","end","gene","geneID")
+    ct$chr <- str_split_fixed(ct$chr,';',2)[,1]
+    ct$start <- str_split_fixed(ct$start,';',2)[,1]
+    ct$end <- gsub("^.*;", "", ct$end)
+
+    ##For gene level: create countdata table Identifier chr_start_end_genename     
+    ct$emcID <- paste(ct$chr,ct$start,ct$end,ct$gene,sep="_")
+    ##For exon level: create countdata table Identifier: add Ensemble exon_id: chr_start_end_genename-exon_id
+    if(grepl("exon", prefix)){
+        ct$emcID <- paste(ct$emcID,ct$geneID,sep="-")
+    }
+
+    return(ct)
 }
 
 ##Only necessary for Erasmus app
@@ -199,55 +200,57 @@ save_res_emcapp <- function(ct, res, prefix, out_path){
 
 ##Only necessary for Erasmus app
 save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
-  ct <- ct[,c("emcID","geneID")]
-  colnames(all_counts)[1]<-c("geneID")
-  merge_counts <- merge(all_counts, ct, by = "geneID", all.ct = TRUE)
-  merge_counts$geneID <- merge_counts$emcID
-  counts_out <- merge_counts[,c(1:ncol(merge_counts)-1)]
-  colnames(counts_out)[1]<-c("")
-
-  ##countdata output table EMC app
-  if(grepl("gene", prefix)){
-    write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_genes_counts.tsv"))
-  }else{
-    write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_exons_counts.tsv"))}
+    ct <- ct[,c("emcID","geneID")]
+    colnames(all_counts)[1]<-c("geneID")
+    merge_counts <- merge(all_counts, ct, by = "geneID", all.ct = TRUE)
+    merge_counts$geneID <- merge_counts$emcID
+    counts_out <- merge_counts[,c(1:ncol(merge_counts)-1)]
+    colnames(counts_out)[1]<-c("")
+
+    ##countdata output table EMC app
+    if(grepl("gene", prefix)){
+        write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_genes_counts.tsv"))
+    }else{
+        write_tsv(counts_out, paste0(out_path, "umcu_rnaseq_exons_counts.tsv"))
+    }
   
-  ##Meta output table EMC app
-  treatment <- "untreated"
-  if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
-  metadata<-data.frame(colnames(counts_out[,2:ncol(counts_out)]),treatment,"fib","umcu_rnaseq",0,"")
-  colnames(metadata)<-c("sample_id","treatment","species","experiment_GS","gender","drop")
-  if(grepl("gene",prefix)){
-    write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_genes.csv"), row.names = FALSE, quote=FALSE)
-  }else{
-    write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_exons.csv"), row.names = FALSE, quote=FALSE)}
+    ##Meta output table EMC app
+    treatment <- "untreated"
+    if(grepl("chx|CHX",prefix)){ treatment <- "chx"}
+    metadata<-data.frame(colnames(counts_out[,2:ncol(counts_out)]),treatment,"fib","umcu_rnaseq",0,"")
+    colnames(metadata)<-c("sample_id","treatment","species","experiment_GS","gender","drop")
+    if(grepl("gene",prefix)){
+        write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_genes.csv"), row.names = FALSE, quote=FALSE)
+    }else{
+        write.csv2(metadata,paste0(out_path, "umcu_rnaseq_metadata_exons.csv"), row.names = FALSE, quote=FALSE)
+    }
 }
 
 save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj_thres=0.05, zscore_thres=0, a=TRUE) {
-#  res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
-  # Reference samples are excluded from final results. 
-#  query_res <- filter(res, !(sampleID %in% ref_samples))
-  res <- as_tibble(results(out_outrider, all=a))
-  query_res <- res
-
-  # Write output table with aberrant expressed targets.
-  write_tsv(query_res, paste0(out_path, prefix, ".outrider_result.tsv"))
+#    res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
+#    # Reference samples are excluded from final results. 
+#    query_res <- filter(res, !(sampleID %in% ref_samples))
+      res <- as_tibble(results(out_outrider, all=a))
+    query_res <- res
+
+    # Write output table with aberrant expressed targets.
+    write_tsv(query_res, paste0(out_path, prefix, ".outrider_result.tsv"))
 }
 
 # TODO: investigate memory usage and if possible reduced / parallel.
-main <- function(query, ref, output_path, prefix){
-  query_data <- get_input(query)
-  ref_data <- get_input(ref)
-  all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
-
-  output <- run_outrider(all_counts, query, prefix)
-  save_output(output_path, output, ref_data$sampleIDs, prefix, query)
-
-  ##Only necessary for Erasmus app, to get the counts, meta and outrider results tables
-  ct_emcapp <- get_ct_emcapp(query, prefix)
-  save_count_meta_emcapp(ct_emcapp, all_counts, output_path, prefix)
-  save_res_emcapp(ct_emcapp, output, prefix, output_path)
+main <- function(query, ref, output_path, prefix, genome_gtf){
+    query_data <- get_input(query)
+    ref_data <- get_input(ref)
+    all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
+
+    output <- run_outrider(all_counts, query, prefix, genome_gtf)
+    save_output(output_path, output, ref_data$sampleIDs, prefix, query)
+
+    ##Only necessary for Erasmus app, to get the counts, meta and outrider results tables
+    ct_emcapp <- get_ct_emcapp(query, prefix)
+    save_count_meta_emcapp(ct_emcapp, all_counts, output_path, prefix)
+    save_res_emcapp(ct_emcapp, output, prefix, output_path)
 }
 
 
-main(args$query, args$ref, args$output_path, args$pref)
\ No newline at end of file
+main(args$query, args$ref, args$output_path, args$pref)

From 84a34b14003f186236de1ac03e778901e5d700bf Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Mon, 26 Aug 2024 16:38:09 +0200
Subject: [PATCH 30/31] input format

---
 Outrider/1.20.0/main.nf | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Outrider/1.20.0/main.nf b/Outrider/1.20.0/main.nf
index 11ff5ace..283a58d3 100644
--- a/Outrider/1.20.0/main.nf
+++ b/Outrider/1.20.0/main.nf
@@ -6,9 +6,8 @@ process OUTRIDER {
     container "ghcr.io/umcugenetics/outrider_custom:0.0.1"
 
     input:
-        tuple val(meta), 
-        path(counts), 
-        path(refset),
+        tuple val(meta), path(counts) 
+        path(refset)
         path(genome_gtf)   
 
     output:

From b36dbb98624943e046128ecfb931dabe0b89a676 Mon Sep 17 00:00:00 2001
From: lonnekevanbrussel <125465018+lonnekevanbrussel@users.noreply.github.com>
Date: Mon, 26 Aug 2024 20:02:18 +0200
Subject: [PATCH 31/31] working file now with gtf file as argument

---
 Outrider/1.20.0/outrider.R | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/Outrider/1.20.0/outrider.R b/Outrider/1.20.0/outrider.R
index 2fd1e71a..aa964e80 100644
--- a/Outrider/1.20.0/outrider.R
+++ b/Outrider/1.20.0/outrider.R
@@ -20,7 +20,7 @@ parser$add_argument("-r", "--ref", metavar = "reference_input_files", nargs = "+
 parser$add_argument("-p", "--pref", metavar = "prefix", nargs = "+",
                     help = "Prefix of file.", default="gene")
 parser$add_argument("-g", "--gtf", metavar = "genome_gtf", nargs = "+",
-                    help = "Genome gtf.")
+                    help = "Genome gtf file")
 args <- parser$parse_args()
 
 
@@ -71,10 +71,10 @@ merge_count_tables <- function(lst_query, lst_ref){
 }
 
 
-filter_expression <- function(ods, query, prefix, genome_gtf){
+filter_expression <- function(ods, query, prefix, gtf){
     if(grepl("gene", prefix)){
         ##FOR GENE LEVEL
-        ods <- filterExpression(ods, fpkmCutoff = 1, minCounts = FALSE, filterGenes = FALSE, gtfFile=genome_gtf)
+        ods <- filterExpression(ods, fpkmCutoff = 1, minCounts = FALSE, filterGenes = FALSE, gtfFile=gtf)
     }
     else{
         ##FOR EXON LEVEL
@@ -90,12 +90,12 @@ filter_expression <- function(ods, query, prefix, genome_gtf){
         ##FOR EXON RATIO
         ##REMOVE ##FOR EXON LEVEL code above
         ##UNCOMMENT BELOW STEPS
-#        print("EXON RATIO")
-#        ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
+ #       print("EXON RATIO")
+ #       ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE)
     }
 
     # display the FPKM distribution of counts.
-    ods <- plotFPKM(ods)
+    #ods <- plotFPKM(ods)
     message(date(), ": dim before filtering...")
     print(dim(assays(ods)$counts))
 
@@ -107,7 +107,7 @@ filter_expression <- function(ods, query, prefix, genome_gtf){
 }
 
 
-run_outrider <- function(all_counts, query, prefix, genome_gtf) {
+run_outrider <- function(all_counts, query, prefix, gtf) {
     # TODO: change to add to single object (ods) in case OOM, instead of renaming the vars.
     all_counts_matrix <- as.matrix(all_counts)[,-1]
     mode(all_counts_matrix) <- "integer"
@@ -118,7 +118,7 @@ run_outrider <- function(all_counts, query, prefix, genome_gtf) {
     plotheat = "counts_heatplots.pdf"
     pdf(plotheat,onefile = TRUE)
 
-    ods <- filter_expression(ods, query, prefix, genome_gtf)
+    ods <- filter_expression(ods, query, prefix, gtf)
 
     # Heatmap of the sample correlation
     # it can also annotate the clusters resulting from the dendrogram
@@ -150,15 +150,15 @@ run_outrider <- function(all_counts, query, prefix, genome_gtf) {
     dev.off()
 
 #    if(grepl("^(peer|pca)$", implementation)){ 
-#        message(date(), ": Fitting the data ...") # NOT with autoencoder
-#        ods <- fit(ods, BPPARAM=MulticoreParam(8))
+#         message(date(), ": Fitting the data ...") # NOT with autoencoder
+#         ods <- fit(ods, BPPARAM=MulticoreParam(8))
 #    }
 
     ods <- computePvalues(ods, alternative="two.sided", method="BY", BPPARAM = MulticoreParam(8))
     out <- computeZscores(ods)
 
-    # run full OUTRIDER pipeline (control, fit model, calculate P-values)
-    # out <- OUTRIDER(ods, BPPARAM=MulticoreParam(8))
+#    run full OUTRIDER pipeline (control, fit model, calculate P-values)
+#    out <- OUTRIDER(ods, BPPARAM=MulticoreParam(8))
     return(out)
 }
 
@@ -228,9 +228,9 @@ save_count_meta_emcapp <- function(ct, all_counts, out_path, prefix){
 
 save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj_thres=0.05, zscore_thres=0, a=TRUE) {
 #    res <- as_tibble(results(out_outrider, padjCutoff=padj_thres, zScoreCutoff=zscore_thres, all=a))
-#    # Reference samples are excluded from final results. 
+    ##Reference samples are excluded from final results. 
 #    query_res <- filter(res, !(sampleID %in% ref_samples))
-      res <- as_tibble(results(out_outrider, all=a))
+    res <- as_tibble(results(out_outrider, all=a))
     query_res <- res
 
     # Write output table with aberrant expressed targets.
@@ -238,12 +238,12 @@ save_output <- function(out_path, out_outrider, ref_samples, prefix, query, padj
 }
 
 # TODO: investigate memory usage and if possible reduced / parallel.
-main <- function(query, ref, output_path, prefix, genome_gtf){
+main <- function(query, ref, output_path, prefix, gtf){
     query_data <- get_input(query)
     ref_data <- get_input(ref)
     all_counts <- merge_count_tables(query_data$count_tables, ref_data$count_tables)
 
-    output <- run_outrider(all_counts, query, prefix, genome_gtf)
+    output <- run_outrider(all_counts, query, prefix, gtf)
     save_output(output_path, output, ref_data$sampleIDs, prefix, query)
 
     ##Only necessary for Erasmus app, to get the counts, meta and outrider results tables
@@ -253,4 +253,4 @@ main <- function(query, ref, output_path, prefix, genome_gtf){
 }
 
 
-main(args$query, args$ref, args$output_path, args$pref)
+main(args$query, args$ref, args$output_path, args$pref, args$gtf)
\ No newline at end of file