Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0f724c4
GenerateBreaks output split into 2 files
mraves2 Jan 31, 2025
de495e3
AverageTechReplicates replaced by EvaluateTics
mraves2 Feb 3, 2025
434a41f
refactored PeakFinding, peak finding funtions moved to folder preproc…
mraves2 Feb 3, 2025
9d7a69f
refactor DIMS PeakFinding, flow between scripts
mraves2 Feb 10, 2025
62fc225
added unit tests for DIMS peak finding
mraves2 Feb 11, 2025
5d311a3
changed variable name tmp to replicates_persample
mraves2 Jul 8, 2025
708b872
omitted obsolete lines
mraves2 Jul 8, 2025
0521390
added weighted mean for half-bad TICs
mraves2 Jul 8, 2025
15403cd
replaced AverageTechReplicates step by EvaluateTics
mraves2 Jul 8, 2025
6a7ada6
replaced AverageTechReplicates step by EvaluateTics
mraves2 Jul 8, 2025
6438e0e
removed breaks as input for PeakFinding
mraves2 Jul 8, 2025
1ef197d
changed PeakFinding to new two-step method
mraves2 Jul 8, 2025
9272ec3
functions for new two-step PeakFinding method
mraves2 Jul 8, 2025
e006160
unit tests for new two-step PeakFinding method
mraves2 Jul 8, 2025
8102bdb
information for averaging peaks for technical replicates based on txt…
mraves2 Jul 8, 2025
d0ed769
modified input for PeakGrouping corresponding to new PeakFinding method
mraves2 Jul 8, 2025
a35c4ca
collect averaged peaks per biological sample, corresponding to new Pe…
mraves2 Jul 8, 2025
d903e1b
DIMS CustomModules merge conflicts resolved
mraves2 Jul 15, 2025
295e460
fixed path to DIMS peak_finding_functions
mraves2 Jul 15, 2025
109d664
created function for averaging peaks in DIMS/AveragePeaks.R
mraves2 Oct 2, 2025
cb33aba
added unit tests for average_peaks_functions
mraves2 Oct 2, 2025
db8633e
moved parameters matrix and nr_replicates from workflow into params
mraves2 Oct 3, 2025
004e3e9
refactored DIMS/EvaluateTics
mraves2 Oct 3, 2025
06e5e1a
moved functions for DIMS/EvaluateTics to separate file
mraves2 Oct 3, 2025
3f24b6d
added unit tests for DIMS/EvaluateTics
mraves2 Oct 3, 2025
c2c65dd
modifications suggested in code review DIMS/PeakFinding
mraves2 Oct 3, 2025
15a25ba
removed two obsolete lines
mraves2 Oct 16, 2025
527acf6
resolved merge conflict in DIMS/EvaluateTics.R
mraves2 Oct 16, 2025
007bea4
moved parameter ppm_peak from DIMS/AveragePeaks.R to inside function
mraves2 Oct 16, 2025
e58640b
added parameter sample_name to DIMS/preprocessing/average_peaks_funct…
mraves2 Oct 16, 2025
f0763a0
modified DIMS/tests/testthat/test_average_peaks.R for extra variable …
mraves2 Oct 16, 2025
ac9f43f
added fixture files for unit test for DIMS/EvaluateTics
mraves2 Oct 17, 2025
083aeac
added unit test for empty peaklist
mraves2 Dec 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DIMS/AssignToBins.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ cmd_args <- commandArgs(trailingOnly = TRUE)

mzml_filepath <- cmd_args[1]
breaks_filepath <- cmd_args[2]
resol <- as.numeric(cmd_args[3])
trim_parameters_filepath <- cmd_args[3]
resol <- as.numeric(cmd_args[4])

# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(breaks_filepath)
# load trim parameters file: contains trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(trim_parameters_filepath)

# get sample name
techrep_name <- sub("\\..*$", "", basename(mzml_filepath))
Expand Down
4 changes: 2 additions & 2 deletions DIMS/AssignToBins.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ process AssignToBins {
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(val(file_id), path(mzML_file), path(breaks_file))
tuple(val(file_id), path(mzML_file), path(breaks_file), path(trim_params_file))

output:
path("${file_id}.RData"), emit: rdata_file
path("${file_id}_TIC.txt"), emit: tic_txt_file

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $params.resolution
Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $trim_params_file $params.resolution
"""
}

Expand Down
37 changes: 37 additions & 0 deletions DIMS/AveragePeaks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
library(dplyr)

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)

sample_name <- cmd_args[1]
techreps <- cmd_args[2]
scanmode <- cmd_args[3]
preprocessing_scripts_dir <- cmd_args[4]
tech_reps <- strsplit(techreps, ";")[[1]]

# load in function scripts
source(paste0(preprocessing_scripts_dir, "average_peaks_functions.R"))

# Initialize per sample
peaklist_allrepl <- NULL
nr_repl_persample <- 0
averaged_peaks <- matrix(0, nrow = 0, ncol = 6)
colnames(averaged_peaks) <- c("samplenr", "mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")

# load RData files of technical replicates belonging to biological sample
for (file_nr in 1:length(tech_reps)) {
tech_repl_file <- paste0(tech_reps[file_nr], "_", scanmode, ".RData")
tech_repl <- get(load(tech_repl_file))
# combine data for all technical replicates
peaklist_allrepl <- rbind(peaklist_allrepl, tech_repl)
}
# sort on mass
peaklist_allrepl_df <- as.data.frame(peaklist_allrepl)
peaklist_allrepl_df$mzmed.pkt <- as.numeric(peaklist_allrepl_df$mzmed.pkt)
peaklist_allrepl_df$height.pkt <- as.numeric(peaklist_allrepl_df$height.pkt)
peaklist_allrepl_sorted <- peaklist_allrepl_df %>% arrange(mzmed.pkt)

# average over technical replicates
averaged_peaks <- average_peaks_per_sample(peaklist_allrepl_sorted, sample_name)
save(averaged_peaks, file = paste0("AvgPeaks_", sample_name, "_", scanmode, ".RData"))

18 changes: 18 additions & 0 deletions DIMS/AveragePeaks.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
process AveragePeaks {
tag "DIMS AveragePeaks"
label 'AveragePeaks'
container = 'docker://umcugenbioinf/dims:1.3'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
path(rdata_files)
tuple val(sample_id), val(tech_reps), val(scanmode)

output:
path 'AvgPeaks_*.RData'

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AveragePeaks.R $sample_id $tech_reps $scanmode $params.preprocessing_scripts_dir
"""
}
36 changes: 0 additions & 36 deletions DIMS/AverageTechReplicates.nf

This file was deleted.

18 changes: 18 additions & 0 deletions DIMS/CollectAveraged.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)

scripts_dir <- cmd_args[1]

# for each scan mode, collect all averaged peak lists per biological sample
scanmodes <- c("positive", "negative")
for (scanmode in scanmodes) {
# get list of files
filled_files <- list.files("./", full.names = TRUE, pattern = paste0(scanmode, ".RData"))
# load files and combine into one object
outlist_total <- NULL
for (file_nr in 1:length(filled_files)) {
peaklist_averaged <- get(load(filled_files[file_nr]))
outlist_total <- rbind(outlist_total, peaklist_averaged)
}
save(outlist_total, file = paste0("AvgPeaks_", scanmode, ".RData"))
}
17 changes: 17 additions & 0 deletions DIMS/CollectAveraged.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
process CollectAveraged {
tag "DIMS CollectAveraged"
label 'CollectAveraged'
container = 'docker://umcugenbioinf/dims:1.3'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
path(averaged_files)

output:
path('AvgPeaks*.RData'), emit: averaged_peaks

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/CollectAveraged.R
"""
}
123 changes: 26 additions & 97 deletions DIMS/AverageTechReplicates.R → DIMS/EvaluateTics.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# adapted from 3-AverageTechReplicates.R

# load packages
library("ggplot2")
library("gridExtra")
Expand All @@ -13,39 +11,14 @@ run_name <- cmd_args[3]
dims_matrix <- cmd_args[4]
highest_mz_file <- cmd_args[5]
highest_mz <- get(load(highest_mz_file))
breaks_filepath <- cmd_args[6]
thresh2remove <- as.numeric(cmd_args[7])

remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
# collect list of samples to remove from replication pattern
remove_from_group <- NULL
for (sample_nr in 1:length(repl_pattern)){
repl_pattern_1sample <- repl_pattern[[sample_nr]]
remove <- NULL
for (file_nr in 1:length(repl_pattern_1sample)) {
if (repl_pattern_1sample[file_nr] %in% bad_samples) {
remove <- c(remove, file_nr)
}
}
if (length(remove) == nr_replicates) {
remove_from_group <- c(remove_from_group, sample_nr)
}
if (!is.null(remove)) {
repl_pattern[[sample_nr]] <- repl_pattern[[sample_nr]][-remove]
}
}
if (length(remove_from_group) != 0) {
repl_pattern <- repl_pattern[-remove_from_group]
}
return(list("pattern" = repl_pattern))
}
trim_params_filepath <- cmd_args[6]
thresh2remove <- 1000000000

# load init_file: contains repl_pattern
load(init_file)

# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(breaks_filepath)
# load trim_params_file: contains trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(trim_params_filepath)

# lower the threshold for non Plasma matrices
if (dims_matrix != "Plasma") {
Expand All @@ -59,78 +32,32 @@ if (highest_mz > 700) {
thresh2remove <- 1000000
}

# find out which technical replicates are below the threshold
remove_tech_reps <- find_bad_replicates(repl_pattern, thresh2remove)
print(remove_tech_reps)

# remove technical replicates which are below the threshold
remove_neg <- NULL
remove_pos <- NULL
cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
for (sample_nr in 1:length(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
tech_reps_array_pos <- NULL
tech_reps_array_neg <- NULL
sum_neg <- 0
sum_pos <- 0
nr_pos <- 0
nr_neg <- 0
for (file_nr in 1:length(tech_reps)) {
load(paste(tech_reps[file_nr], ".RData", sep = ""))
cat("\n\nParsing", tech_reps[file_nr])
# negative scanmode
cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
if (sum(peak_list$neg[, 1]) < thresh2remove) {
cat(" ... Removed")
remove_neg <- c(remove_neg, tech_reps[file_nr])
} else {
nr_neg <- nr_neg + 1
sum_neg <- sum_neg + peak_list$neg
}
tech_reps_array_neg <- cbind(tech_reps_array_neg, peak_list$neg)
# positive scanmode
cat("\n\tPositive peak_list sum", sum(peak_list$pos[, 1]))
if (sum(peak_list$pos[, 1]) < thresh2remove) {
cat(" ... Removed")
remove_pos <- c(remove_pos, tech_reps[file_nr])
} else {
nr_pos <- nr_pos + 1
sum_pos <- sum_pos + peak_list$pos
}
tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
}
# save to file
if (nr_neg != 0) {
sum_neg[, 1] <- sum_neg[, 1] / nr_neg
colnames(sum_neg) <- names(repl_pattern)[sample_nr]
save(sum_neg, file = paste0(names(repl_pattern)[sample_nr], "_neg_avg.RData"))
}
if (nr_pos != 0) {
sum_pos[, 1] <- sum_pos[, 1] / nr_pos
colnames(sum_pos) <- names(repl_pattern)[sample_nr]
save(sum_pos, file = paste0(names(repl_pattern)[sample_nr], "_pos_avg.RData"))
}
}

pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
# negative scan mode
remove_neg <- remove_tech_reps$neg
repl_pattern_filtered <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
write.table(
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
# positive scan mode
remove_pos <- remove_tech_reps$pos
repl_pattern_filtered <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
write.table(
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"

# get an overview of suitable technical replicates for both scan modes
allsamples_techreps_neg <- get_overview_tech_reps(repl_pattern_filtered, "negative")
allsamples_techreps_pos <- get_overview_tech_reps(repl_pattern_filtered, "positive")
allsamples_techreps_both_scanmodes <- rbind(allsamples_techreps_pos, allsamples_techreps_neg)
write.table(allsamples_techreps_both_scanmodes,
file = "replicates_per_sample.txt",
col.names = FALSE,
row.names = FALSE,
sep = ","
)


## generate TIC plots
# get all txt files
tic_files <- list.files("./", full.names = TRUE, pattern = "*TIC.txt")
Expand Down Expand Up @@ -207,3 +134,5 @@ tic_plot_pdf <- marrangeGrob(
# save to file
ggsave(filename = paste0(run_name, "_TICplots.pdf"),
tic_plot_pdf, width = 21, height = 29.7, units = "cm")


33 changes: 33 additions & 0 deletions DIMS/EvaluateTics.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
process EvaluateTics {
tag "DIMS EvaluateTics"
label 'EvaluateTics'
container = 'docker://umcugenbioinf/dims:1.3'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
path(rdata_file)
path(tic_txt_files)
path(init_file)
val(analysis_id)
path(highest_mz_file)
path(trim_params_file)

output:
path('*_repl_pattern.RData'), emit: pattern_files
path('replicates_per_sample.txt'), emit: sample_techreps
path('miss_infusions_negative.txt')
path('miss_infusions_positive.txt')
path('*_TICplots.pdf'), emit: tic_plots_pdf

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/EvaluateTics.R $init_file \
$params.nr_replicates \
$analysis_id \
$params.matrix \
$highest_mz_file \
$trim_params_file
"""
}


5 changes: 2 additions & 3 deletions DIMS/GenerateBreaks.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
## adapted from 1-generateBreaksFwhm.HPC.R ##

# load required package
suppressPackageStartupMessages(library("xcms"))

Expand Down Expand Up @@ -57,5 +55,6 @@ for (i in 1:nr_segments) {
}

# generate output file
save(breaks_fwhm, breaks_fwhm_avg, trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "breaks.fwhm.RData")
save(breaks_fwhm, breaks_fwhm_avg, file = "breaks.fwhm.RData")
save(trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "trim_params.RData")
save(high_mz, file = "highest_mz.RData")
1 change: 1 addition & 0 deletions DIMS/GenerateBreaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ process GenerateBreaks {

output:
path('breaks.fwhm.RData'), emit: breaks
path('trim_params.RData'), emit: trim_params
path('highest_mz.RData'), emit: highest_mz

script:
Expand Down
8 changes: 3 additions & 5 deletions DIMS/MakeInit.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
## adapted from makeInit in old pipeline

# define parameters
args <- commandArgs(trailingOnly = TRUE)

Expand All @@ -15,12 +13,12 @@ group_names_unique <- unique(group_names)
# generate the replication pattern
repl_pattern <- c()
for (sample_group in 1:nr_sample_groups) {
tmp <- c()
replicates_persample <- c()
for (repl in nr_replicates:1) {
index <- ((sample_group * nr_replicates) - repl) + 1
tmp <- c(tmp, sample_names[index])
replicates_persample <- c(replicates_persample, sample_names[index])
}
repl_pattern <- c(repl_pattern, list(tmp))
repl_pattern <- c(repl_pattern, list(replicates_persample))
}

names(repl_pattern) <- group_names_unique
Expand Down
Loading
Loading