UMCUGenetics · ALuesink · Aug 25, 2025 · Aug 25, 2025 · Sep 1, 2025 · Sep 1, 2025
diff --git a/DIMS/AssignToBins.R b/DIMS/AssignToBins.R
@@ -1,19 +1,22 @@
 # load required packages
+library("argparse")
 suppressPackageStartupMessages(library("xcms"))
 
-# define parameters
-cmd_args <- commandArgs(trailingOnly = TRUE)
+parser <- ArgumentParser(description = "AssignToBins")
 
-mzml_filepath <- cmd_args[1]
-breaks_filepath <- cmd_args[2]
-resol <- as.numeric(cmd_args[3])
+parser$add_argument("--mzML_filepath", dest = "mzml_filepath",
+                    help = "File path for the mzML file", required = TRUE)
+parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
+                    help = "File path for the breaks RData file", required = TRUE)
+
+args <- parser$parse_args()
 
 # load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
 # trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
-load(breaks_filepath)
+load(args$breaks_filepath)
 
 # get sample name
-techrep_name <- sub("\\..*$", "", basename(mzml_filepath))
+techrep_name <- sub("\\..*$", "", basename(args$mzml_filepath))
 
 options(digits = 16)
 
@@ -26,7 +29,7 @@ neg_bins <- bins
 dims_thresh <- 100
 
 # read in the data for 1 sample
-raw_data <- suppressMessages(xcms::xcmsRaw(mzml_filepath))
+raw_data <- suppressMessages(xcms::xcmsRaw(args$mzml_filepath))
 
 # Generate a matrix with retention times and intensities
 raw_data_matrix <- xcms::rawMat(raw_data)
@@ -41,13 +44,13 @@ neg_times_trimmed <- neg_times[neg_times > trim_left_neg & neg_times < trim_righ
 # get TIC intensities for areas between trim_left and trim_right
 tic_intensity_persample <- cbind(raw_data@scantime, raw_data@tic)
 colnames(tic_intensity_persample) <- c("retention_time", "tic_intensity")
-tic_intensity_pos <- tic_intensity_persample[tic_intensity_persample[ , "retention_time"] > min(pos_times_trimmed) &
-                                             tic_intensity_persample[ , "retention_time"] < max(pos_times_trimmed), ]
-tic_intensity_neg <- tic_intensity_persample[tic_intensity_persample[ , "retention_time"] > min(neg_times_trimmed) &
-                                             tic_intensity_persample[ , "retention_time"] < max(neg_times_trimmed), ]
+tic_intensity_pos <- tic_intensity_persample[tic_intensity_persample[, "retention_time"] > min(pos_times_trimmed) &
+                                               tic_intensity_persample[, "retention_time"] < max(pos_times_trimmed), ]
+tic_intensity_neg <- tic_intensity_persample[tic_intensity_persample[, "retention_time"] > min(neg_times_trimmed) &
+                                               tic_intensity_persample[, "retention_time"] < max(neg_times_trimmed), ]
 # calculate weighted mean of intensities for pos and neg separately
-mean_pos <- weighted.mean(tic_intensity_pos[ , "tic_intensity"], tic_intensity_pos[ , "tic_intensity"])
-mean_neg <- weighted.mean(tic_intensity_neg[ , "tic_intensity"], tic_intensity_neg[ , "tic_intensity"])
+mean_pos <- weighted.mean(tic_intensity_pos[, "tic_intensity"], tic_intensity_pos[, "tic_intensity"])
+mean_neg <- weighted.mean(tic_intensity_neg[, "tic_intensity"], tic_intensity_neg[, "tic_intensity"])
 # intensity per scan should be at least 80% of weighted mean
 dims_thresh_pos <- 0.8 * mean_pos
 dims_thresh_neg <- 0.8 * mean_neg
@@ -67,17 +70,17 @@ neg_raw_data_matrix <- raw_data_matrix[neg_index, ]
 
 # Get index for binning intensity values
 bin_indices_pos <- cut(
-  pos_raw_data_matrix[, "mz"], 
+  pos_raw_data_matrix[, "mz"],
   breaks_fwhm,
-  include.lowest = TRUE, 
-  right = TRUE, 
+  include.lowest = TRUE,
+  right = TRUE,
   labels = FALSE
 )
 bin_indices_neg <- cut(
-  neg_raw_data_matrix[, "mz"], 
-  breaks_fwhm, 
-  include.lowest = TRUE, 
-  right = TRUE, 
+  neg_raw_data_matrix[, "mz"],
+  breaks_fwhm,
+  include.lowest = TRUE,
+  right = TRUE,
   labels = FALSE
 )
 

diff --git a/DIMS/AssignToBins.nf b/DIMS/AssignToBins.nf
@@ -1,7 +1,7 @@
 process AssignToBins {
     tag "DIMS AssignToBins ${file_id}"
     label 'AssignToBins'
-    container = 'docker://umcugenbioinf/dims:1.3'
+    container = 'docker://umcugenbioinf/dims:1.4'
     shell = ['/bin/bash', '-euo', 'pipefail']
 
     input:
@@ -13,7 +13,9 @@ process AssignToBins {
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $params.resolution
+        Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R \\
+            --mzML_filepath $mzML_file \\
+            --breaks_filepath $breaks_file
         """
 }
 

diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
@@ -1,28 +1,37 @@
 # adapted from 3-AverageTechReplicates.R
 
 # load packages
+library("argparse")
 library("ggplot2")
 library("gridExtra")
 
-# define parameters
-cmd_args <- commandArgs(trailingOnly = TRUE)
+parser <- ArgumentParser(description = "AverageTechReplicates")
 
-init_file <- cmd_args[1]
-nr_replicates <- as.numeric(cmd_args[2])
-run_name <- cmd_args[3]
-dims_matrix <- cmd_args[4]
-highest_mz_file <- cmd_args[5]
-highest_mz <- get(load(highest_mz_file))
-breaks_filepath <- cmd_args[6]
-thresh2remove <- as.numeric(cmd_args[7])
+parser$add_argument("--init_filepath", dest = "init_file",
+                    help = "File path for the init RData file", required = TRUE)
+parser$add_argument("-n", "--nr_replicates", dest = "nr_replicates", type = "integer",
+                    help = "Number of replicates", required = TRUE)
+parser$add_argument("--run_name", dest = "run_name",
+                    help = "The run name/analysis ID", required = TRUE)
+parser$add_argument("--matrix", dest = "dims_matrix",
+                    help = "The matrix used, e.g. Plasma, Research, ...")
+parser$add_argument("--highest_mz_file", dest = "highest_mz_file",
+                    help = "File path for the highest Mz RData file", required = TRUE)
+parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
+                    help = "File path for the breaks RData file", required = TRUE)
+
+args <- parser$parse_args()
+
+highest_mz <- get(load(args$highest_mz_file))
+thresh2remove <- 1000000000
 
 remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
   # collect list of samples to remove from replication pattern
   remove_from_group <- NULL
-  for (sample_nr in 1:length(repl_pattern)){
+  for (sample_nr in seq_along(repl_pattern)){
     repl_pattern_1sample <- repl_pattern[[sample_nr]]
     remove <- NULL
-    for (file_nr in 1:length(repl_pattern_1sample)) {
+    for (file_nr in seq_along(repl_pattern_1sample)) {
       if (repl_pattern_1sample[file_nr] %in% bad_samples) {
         remove <- c(remove, file_nr)
       }
@@ -41,11 +50,11 @@ remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
 }
 
 # load init_file: contains repl_pattern
-load(init_file)
+load(args$init_file)
 
 # load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
 # trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
-load(breaks_filepath)
+load(args$breaks_filepath)
 
 # lower the threshold for non Plasma matrices
 if (dims_matrix != "Plasma") {
@@ -64,15 +73,15 @@ if (highest_mz > 700) {
 remove_neg <- NULL
 remove_pos <- NULL
 cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
-for (sample_nr in 1:length(repl_pattern)) {
+for (sample_nr in seq_along(repl_pattern)) {
   tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
   tech_reps_array_pos <- NULL
   tech_reps_array_neg <- NULL
   sum_neg <- 0
   sum_pos <- 0
   nr_pos <- 0
   nr_neg <- 0
-  for (file_nr in 1:length(tech_reps)) {
+  for (file_nr in seq_along(tech_reps)) {
     load(paste(tech_reps[file_nr], ".RData", sep = ""))
     cat("\n\nParsing", tech_reps[file_nr])
     # negative scanmode
@@ -96,7 +105,7 @@ for (sample_nr in 1:length(repl_pattern)) {
     }
     tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
   }
-  # save to file  
+  # save to file
   if (nr_neg != 0) {
     sum_neg[, 1] <- sum_neg[, 1] / nr_neg
     colnames(sum_neg) <- names(repl_pattern)[sample_nr]
@@ -109,25 +118,25 @@ for (sample_nr in 1:length(repl_pattern)) {
   }
 }
 
-pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
+pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, args$nr_replicates)
 repl_pattern_filtered <- pattern_list$pattern
 save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
 write.table(
-  remove_neg, 
-  file = "miss_infusions_negative.txt", 
-  row.names = FALSE, 
-  col.names = FALSE, 
+  remove_neg,
+  file = "miss_infusions_negative.txt",
+  row.names = FALSE,
+  col.names = FALSE,
   sep = "\t"
 )
 
-pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
+pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, args$nr_replicates)
 repl_pattern_filtered <- pattern_list$pattern
 save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
 write.table(
-  remove_pos, 
-  file = "miss_infusions_positive.txt", 
-  row.names = FALSE, 
-  col.names = FALSE, 
+  remove_pos,
+  file = "miss_infusions_positive.txt",
+  row.names = FALSE,
+  col.names = FALSE,
   sep = "\t"
 )
 
@@ -150,10 +159,10 @@ for (file in tic_files) {
 # create a list with information for all TIC plots
 tic_plot_list <- list()
 plot_nr <-  0
-for (sample_nr in c(1:length(repl_pattern))) {
+for (sample_nr in seq_along(repl_pattern)) {
   tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
   sample_name <- names(repl_pattern)[sample_nr]
-  for (file_nr in 1:length(tech_reps)) {
+  for (file_nr in seq_along(tech_reps)) {
     plot_nr <- plot_nr + 1
     # read file with retention time, intensity and dims_threshold values
     repl1_nr <- read.table(paste0(tech_reps[file_nr], "_TIC.txt"))
@@ -163,7 +172,7 @@ for (sample_nr in c(1:length(repl_pattern))) {
     # for replicates with bad TIC, determine what color the border of the plot should be
     bad_color_pos <- tech_reps[file_nr] %in% remove_pos
     bad_color_neg <- tech_reps[file_nr] %in% remove_neg
-    if (bad_color_neg & bad_color_pos) {
+    if (bad_color_neg && bad_color_pos) {
       plot_color <- "#F8766D"
     } else if (bad_color_pos) {
       plot_color <- "#ED8141"
@@ -191,19 +200,19 @@ for (sample_nr in c(1:length(repl_pattern))) {
 }
 
 # create a layout matrix dependent on number of replicates
-layout <- matrix(1:(10 * nr_replicates), 10, nr_replicates, TRUE)
+layout <- matrix(1:(10 * args$nr_replicates), 10, args$nr_replicates, TRUE)
 # put TIC plots in matrix
 tic_plot_pdf <- marrangeGrob(
   grobs = tic_plot_list,
-  nrow = 10, ncol = nr_replicates,
+  nrow = 10, ncol = args$nr_replicates,
   layout_matrix = layout,
   top = quote(paste(
-    "TICs of run", run_name,
+    "TICs of run", args$run_name,
     " \n colors: red = both modes misinjection, orange = pos mode misinjection, purple = neg mode misinjection \n ",
     g, "/", npages
   ))
 )
 
 # save to file
-ggsave(filename = paste0(run_name, "_TICplots.pdf"),
+ggsave(filename = paste0(args$run_name, "_TICplots.pdf"),
        tic_plot_pdf, width = 21, height = 29.7, units = "cm")
diff --git a/DIMS/AverageTechReplicates.nf b/DIMS/AverageTechReplicates.nf
@@ -1,7 +1,7 @@
 process AverageTechReplicates {
     tag "DIMS AverageTechReplicates"
     label 'AverageTechReplicates'
-    container = 'docker://umcugenbioinf/dims:1.3'
+    container = 'docker://umcugenbioinf/dims:1.4'
     shell = ['/bin/bash', '-euo', 'pipefail']
 
     input:
@@ -23,13 +23,13 @@ process AverageTechReplicates {
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R $init_file \
-                                                                      $params.nr_replicates \
-                                                                      $analysis_id \
-                                                                      $matrix \
-                                                                      $highest_mz_file \
-                                                                      $breaks_file \
-                                                                      $params.threshold_tics
+        Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R \\
+            --init_filepath $init_file \\
+            --nr_replicates $params.nr_replicates \\
+            --run_name $analysis_id \\
+            --matrix $matrix \\
+            --highest_mz_file $highest_mz_file \\
+            --breaks_filepath $breaks_file
         """
 }
 

diff --git a/DIMS/CollectFilled.R b/DIMS/CollectFilled.R
@@ -1,14 +1,17 @@
-## adapted from 10-collectSamplesFilled.R
+# load package
+library("argparse")
 
-# define parameters
-cmd_args <- commandArgs(trailingOnly = TRUE)
+parser <- ArgumentParser(description = "CollectFilled")
 
-scripts_dir <- cmd_args[1]
-ppm <- as.numeric(cmd_args[2])
-z_score <- as.numeric(cmd_args[3])
+parser$add_argument("--scripts_dir", dest = "scripts_dir",
+                    help = "File path to the directory containing functions used in this script", required = TRUE)
+parser$add_argument("-z", "--z_score", dest = "z_score", type = "integer",
+                    help = "Numeric value, z-score = 1", required = TRUE)
 
-source(paste0(scripts_dir, "merge_duplicate_rows.R"))
-source(paste0(scripts_dir, "calculate_zscores.R"))
+args <- parser$parse_args()
+
+source(paste0(args$scripts_dir, "merge_duplicate_rows.R"))
+source(paste0(args$scripts_dir, "calculate_zscores.R"))
 
 # for each scan mode, collect all filled peak group lists
 scanmodes <- c("positive", "negative")
@@ -17,7 +20,7 @@ for (scanmode in scanmodes) {
   filled_files <- list.files("./", full.names = TRUE, pattern = paste0(scanmode, "_identified_filled"))
   # load files and combine into one object
   outlist_total <- NULL
-  for (file_nr in 1:length(filled_files)) {
+  for (file_nr in seq_along(filled_files)) {
     peakgrouplist_filled <- get(load(filled_files[file_nr]))
     outlist_total <- rbind(outlist_total, peakgrouplist_filled)
   }
@@ -29,7 +32,7 @@ for (scanmode in scanmodes) {
   pattern_file <- paste0(scanmode, "_repl_pattern.RData")
   repl_pattern <- get(load(pattern_file))
   # calculate Z-scores
-  if (z_score == 1) {
+  if (args$z_score == 1) {
     outlist_stats <- calculate_zscores(outlist_total, adducts = FALSE)
     nr_removed_samples <- length(which(repl_pattern[] == "character(0)"))
     order_index_int <- order(colnames(outlist_stats)[8:(length(repl_pattern) - nr_removed_samples + 7)])
@@ -47,7 +50,7 @@ for (scanmode in scanmodes) {
     outlist_stats_more <- cbind(outlist_stats_more, tmp)
     outlist_total <- outlist_stats_more
   }
-  
+
   # make a copy of the outlist
   outlist_ident <- outlist_total
   # take care of NAs in theormz_noise

diff --git a/DIMS/CollectFilled.nf b/DIMS/CollectFilled.nf
@@ -1,7 +1,7 @@
 process CollectFilled {
     tag "DIMS CollectFilled"
     label 'CollectFilled'
-    container = 'docker://umcugenbioinf/dims:1.3'
+    container = 'docker://umcugenbioinf/dims:1.4'
     shell = ['/bin/bash', '-euo', 'pipefail']
 
     input:
@@ -14,6 +14,8 @@ process CollectFilled {
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/CollectFilled.R $params.scripts_dir $params.ppm $params.zscore
+        Rscript ${baseDir}/CustomModules/DIMS/CollectFilled.R \\
+            --scripts_dir $params.scripts_dir \\
+            --z_score $params.zscore
         """
 }
diff --git a/DIMS/CollectSumAdducts.R b/DIMS/CollectSumAdducts.R
@@ -1,13 +1,16 @@
 ## Combining all AdductSums part files for each scanmode and
 # combine intensities if present in both scanmodes
+library("argparse")
 suppressMessages(library("dplyr"))
 
-# define parameters
-cmd_args <- commandArgs(trailingOnly = TRUE)
+parser <- ArgumentParser(description = "GenerateExcel")
 
-preprocessing_scripts_dir <- cmd_args[1]
+parser$add_argument("--preprocessing_scripts_dir", dest = "preprocessing_scripts_dir",
+                    help = "File path to the directory containing functions used in this script", required = TRUE)
 
-source(paste0(preprocessing_scripts_dir, "collect_sum_adducts_functions.R"))
+args <- parser$parse_args()
+
+source(paste0(args$preprocessing_scripts_dir, "collect_sum_adducts_functions.R"))
 
 # collect all AdductSums part files for each scanmode and save to RData file
 outlist_tot_pos <- combine_sum_adduct_parts("positive")