Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 24 additions & 21 deletions DIMS/AssignToBins.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
# load required packages
library("argparse")
suppressPackageStartupMessages(library("xcms"))

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
parser <- ArgumentParser(description = "AssignToBins")

mzml_filepath <- cmd_args[1]
breaks_filepath <- cmd_args[2]
resol <- as.numeric(cmd_args[3])
parser$add_argument("--mzML_filepath", dest = "mzml_filepath",
help = "File path for the mzML file", required = TRUE)
parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
help = "File path for the breaks RData file", required = TRUE)

args <- parser$parse_args()

# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(breaks_filepath)
load(args$breaks_filepath)

# get sample name
techrep_name <- sub("\\..*$", "", basename(mzml_filepath))
techrep_name <- sub("\\..*$", "", basename(args$mzml_filepath))

options(digits = 16)

Expand All @@ -26,7 +29,7 @@ neg_bins <- bins
dims_thresh <- 100

# read in the data for 1 sample
raw_data <- suppressMessages(xcms::xcmsRaw(mzml_filepath))
raw_data <- suppressMessages(xcms::xcmsRaw(args$mzml_filepath))

# Generate a matrix with retention times and intensities
raw_data_matrix <- xcms::rawMat(raw_data)
Expand All @@ -41,13 +44,13 @@ neg_times_trimmed <- neg_times[neg_times > trim_left_neg & neg_times < trim_righ
# get TIC intensities for areas between trim_left and trim_right
tic_intensity_persample <- cbind(raw_data@scantime, raw_data@tic)
colnames(tic_intensity_persample) <- c("retention_time", "tic_intensity")
tic_intensity_pos <- tic_intensity_persample[tic_intensity_persample[ , "retention_time"] > min(pos_times_trimmed) &
tic_intensity_persample[ , "retention_time"] < max(pos_times_trimmed), ]
tic_intensity_neg <- tic_intensity_persample[tic_intensity_persample[ , "retention_time"] > min(neg_times_trimmed) &
tic_intensity_persample[ , "retention_time"] < max(neg_times_trimmed), ]
tic_intensity_pos <- tic_intensity_persample[tic_intensity_persample[, "retention_time"] > min(pos_times_trimmed) &
tic_intensity_persample[, "retention_time"] < max(pos_times_trimmed), ]
tic_intensity_neg <- tic_intensity_persample[tic_intensity_persample[, "retention_time"] > min(neg_times_trimmed) &
tic_intensity_persample[, "retention_time"] < max(neg_times_trimmed), ]
# calculate weighted mean of intensities for pos and neg separately
mean_pos <- weighted.mean(tic_intensity_pos[ , "tic_intensity"], tic_intensity_pos[ , "tic_intensity"])
mean_neg <- weighted.mean(tic_intensity_neg[ , "tic_intensity"], tic_intensity_neg[ , "tic_intensity"])
mean_pos <- weighted.mean(tic_intensity_pos[, "tic_intensity"], tic_intensity_pos[, "tic_intensity"])
mean_neg <- weighted.mean(tic_intensity_neg[, "tic_intensity"], tic_intensity_neg[, "tic_intensity"])
# intensity per scan should be at least 80% of weighted mean
dims_thresh_pos <- 0.8 * mean_pos
dims_thresh_neg <- 0.8 * mean_neg
Expand All @@ -67,17 +70,17 @@ neg_raw_data_matrix <- raw_data_matrix[neg_index, ]

# Get index for binning intensity values
bin_indices_pos <- cut(
pos_raw_data_matrix[, "mz"],
pos_raw_data_matrix[, "mz"],
breaks_fwhm,
include.lowest = TRUE,
right = TRUE,
include.lowest = TRUE,
right = TRUE,
labels = FALSE
)
bin_indices_neg <- cut(
neg_raw_data_matrix[, "mz"],
breaks_fwhm,
include.lowest = TRUE,
right = TRUE,
neg_raw_data_matrix[, "mz"],
breaks_fwhm,
include.lowest = TRUE,
right = TRUE,
labels = FALSE
)

Expand Down
6 changes: 4 additions & 2 deletions DIMS/AssignToBins.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process AssignToBins {
tag "DIMS AssignToBins ${file_id}"
label 'AssignToBins'
container = 'docker://umcugenbioinf/dims:1.3'
container = 'docker://umcugenbioinf/dims:1.4'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
Expand All @@ -13,7 +13,9 @@ process AssignToBins {

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $params.resolution
Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R \\
--mzML_filepath $mzML_file \\
--breaks_filepath $breaks_file
"""
}

Expand Down
77 changes: 43 additions & 34 deletions DIMS/AverageTechReplicates.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,37 @@
# adapted from 3-AverageTechReplicates.R

# load packages
library("argparse")
library("ggplot2")
library("gridExtra")

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
parser <- ArgumentParser(description = "AverageTechReplicates")

init_file <- cmd_args[1]
nr_replicates <- as.numeric(cmd_args[2])
run_name <- cmd_args[3]
dims_matrix <- cmd_args[4]
highest_mz_file <- cmd_args[5]
highest_mz <- get(load(highest_mz_file))
breaks_filepath <- cmd_args[6]
thresh2remove <- as.numeric(cmd_args[7])
parser$add_argument("--init_filepath", dest = "init_file",
help = "File path for the init RData file", required = TRUE)
parser$add_argument("-n", "--nr_replicates", dest = "nr_replicates", type = "integer",
help = "Number of replicates", required = TRUE)
parser$add_argument("--run_name", dest = "run_name",
help = "The run name/analysis ID", required = TRUE)
parser$add_argument("--matrix", dest = "dims_matrix",
help = "The matrix used, e.g. Plasma, Research, ...")
parser$add_argument("--highest_mz_file", dest = "highest_mz_file",
help = "File path for the highest Mz RData file", required = TRUE)
parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
help = "File path for the breaks RData file", required = TRUE)

args <- parser$parse_args()

highest_mz <- get(load(args$highest_mz_file))
thresh2remove <- 1000000000

remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
# collect list of samples to remove from replication pattern
remove_from_group <- NULL
for (sample_nr in 1:length(repl_pattern)){
for (sample_nr in seq_along(repl_pattern)){
repl_pattern_1sample <- repl_pattern[[sample_nr]]
remove <- NULL
for (file_nr in 1:length(repl_pattern_1sample)) {
for (file_nr in seq_along(repl_pattern_1sample)) {
if (repl_pattern_1sample[file_nr] %in% bad_samples) {
remove <- c(remove, file_nr)
}
Expand All @@ -41,11 +50,11 @@ remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
}

# load init_file: contains repl_pattern
load(init_file)
load(args$init_file)

# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(breaks_filepath)
load(args$breaks_filepath)

# lower the threshold for non Plasma matrices
if (dims_matrix != "Plasma") {
Expand All @@ -64,15 +73,15 @@ if (highest_mz > 700) {
remove_neg <- NULL
remove_pos <- NULL
cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
for (sample_nr in 1:length(repl_pattern)) {
for (sample_nr in seq_along(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
tech_reps_array_pos <- NULL
tech_reps_array_neg <- NULL
sum_neg <- 0
sum_pos <- 0
nr_pos <- 0
nr_neg <- 0
for (file_nr in 1:length(tech_reps)) {
for (file_nr in seq_along(tech_reps)) {
load(paste(tech_reps[file_nr], ".RData", sep = ""))
cat("\n\nParsing", tech_reps[file_nr])
# negative scanmode
Expand All @@ -96,7 +105,7 @@ for (sample_nr in 1:length(repl_pattern)) {
}
tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
}
# save to file
# save to file
if (nr_neg != 0) {
sum_neg[, 1] <- sum_neg[, 1] / nr_neg
colnames(sum_neg) <- names(repl_pattern)[sample_nr]
Expand All @@ -109,25 +118,25 @@ for (sample_nr in 1:length(repl_pattern)) {
}
}

pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, args$nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
write.table(
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, args$nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
write.table(
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

Expand All @@ -150,10 +159,10 @@ for (file in tic_files) {
# create a list with information for all TIC plots
tic_plot_list <- list()
plot_nr <- 0
for (sample_nr in c(1:length(repl_pattern))) {
for (sample_nr in seq_along(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
sample_name <- names(repl_pattern)[sample_nr]
for (file_nr in 1:length(tech_reps)) {
for (file_nr in seq_along(tech_reps)) {
plot_nr <- plot_nr + 1
# read file with retention time, intensity and dims_threshold values
repl1_nr <- read.table(paste0(tech_reps[file_nr], "_TIC.txt"))
Expand All @@ -163,7 +172,7 @@ for (sample_nr in c(1:length(repl_pattern))) {
# for replicates with bad TIC, determine what color the border of the plot should be
bad_color_pos <- tech_reps[file_nr] %in% remove_pos
bad_color_neg <- tech_reps[file_nr] %in% remove_neg
if (bad_color_neg & bad_color_pos) {
if (bad_color_neg && bad_color_pos) {
plot_color <- "#F8766D"
} else if (bad_color_pos) {
plot_color <- "#ED8141"
Expand Down Expand Up @@ -191,19 +200,19 @@ for (sample_nr in c(1:length(repl_pattern))) {
}

# create a layout matrix dependent on number of replicates
layout <- matrix(1:(10 * nr_replicates), 10, nr_replicates, TRUE)
layout <- matrix(1:(10 * args$nr_replicates), 10, args$nr_replicates, TRUE)
# put TIC plots in matrix
tic_plot_pdf <- marrangeGrob(
grobs = tic_plot_list,
nrow = 10, ncol = nr_replicates,
nrow = 10, ncol = args$nr_replicates,
layout_matrix = layout,
top = quote(paste(
"TICs of run", run_name,
"TICs of run", args$run_name,
" \n colors: red = both modes misinjection, orange = pos mode misinjection, purple = neg mode misinjection \n ",
g, "/", npages
))
)

# save to file
ggsave(filename = paste0(run_name, "_TICplots.pdf"),
ggsave(filename = paste0(args$run_name, "_TICplots.pdf"),
tic_plot_pdf, width = 21, height = 29.7, units = "cm")
16 changes: 8 additions & 8 deletions DIMS/AverageTechReplicates.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process AverageTechReplicates {
tag "DIMS AverageTechReplicates"
label 'AverageTechReplicates'
container = 'docker://umcugenbioinf/dims:1.3'
container = 'docker://umcugenbioinf/dims:1.4'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
Expand All @@ -23,13 +23,13 @@ process AverageTechReplicates {

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R $init_file \
$params.nr_replicates \
$analysis_id \
$matrix \
$highest_mz_file \
$breaks_file \
$params.threshold_tics
Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R \\
--init_filepath $init_file \\
--nr_replicates $params.nr_replicates \\
--run_name $analysis_id \\
--matrix $matrix \\
--highest_mz_file $highest_mz_file \\
--breaks_filepath $breaks_file
"""
}

Expand Down
25 changes: 14 additions & 11 deletions DIMS/CollectFilled.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
## adapted from 10-collectSamplesFilled.R
# load package
library("argparse")

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
parser <- ArgumentParser(description = "CollectFilled")

scripts_dir <- cmd_args[1]
ppm <- as.numeric(cmd_args[2])
z_score <- as.numeric(cmd_args[3])
parser$add_argument("--scripts_dir", dest = "scripts_dir",
help = "File path to the directory containing functions used in this script", required = TRUE)
parser$add_argument("-z", "--z_score", dest = "z_score", type = "integer",
help = "Numeric value, z-score = 1", required = TRUE)

source(paste0(scripts_dir, "merge_duplicate_rows.R"))
source(paste0(scripts_dir, "calculate_zscores.R"))
args <- parser$parse_args()

source(paste0(args$scripts_dir, "merge_duplicate_rows.R"))
source(paste0(args$scripts_dir, "calculate_zscores.R"))

# for each scan mode, collect all filled peak group lists
scanmodes <- c("positive", "negative")
Expand All @@ -17,7 +20,7 @@ for (scanmode in scanmodes) {
filled_files <- list.files("./", full.names = TRUE, pattern = paste0(scanmode, "_identified_filled"))
# load files and combine into one object
outlist_total <- NULL
for (file_nr in 1:length(filled_files)) {
for (file_nr in seq_along(filled_files)) {
peakgrouplist_filled <- get(load(filled_files[file_nr]))
outlist_total <- rbind(outlist_total, peakgrouplist_filled)
}
Expand All @@ -29,7 +32,7 @@ for (scanmode in scanmodes) {
pattern_file <- paste0(scanmode, "_repl_pattern.RData")
repl_pattern <- get(load(pattern_file))
# calculate Z-scores
if (z_score == 1) {
if (args$z_score == 1) {
outlist_stats <- calculate_zscores(outlist_total, adducts = FALSE)
nr_removed_samples <- length(which(repl_pattern[] == "character(0)"))
order_index_int <- order(colnames(outlist_stats)[8:(length(repl_pattern) - nr_removed_samples + 7)])
Expand All @@ -47,7 +50,7 @@ for (scanmode in scanmodes) {
outlist_stats_more <- cbind(outlist_stats_more, tmp)
outlist_total <- outlist_stats_more
}

# make a copy of the outlist
outlist_ident <- outlist_total
# take care of NAs in theormz_noise
Expand Down
6 changes: 4 additions & 2 deletions DIMS/CollectFilled.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process CollectFilled {
tag "DIMS CollectFilled"
label 'CollectFilled'
container = 'docker://umcugenbioinf/dims:1.3'
container = 'docker://umcugenbioinf/dims:1.4'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
Expand All @@ -14,6 +14,8 @@ process CollectFilled {

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/CollectFilled.R $params.scripts_dir $params.ppm $params.zscore
Rscript ${baseDir}/CustomModules/DIMS/CollectFilled.R \\
--scripts_dir $params.scripts_dir \\
--z_score $params.zscore
"""
}
11 changes: 7 additions & 4 deletions DIMS/CollectSumAdducts.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
## Combining all AdductSums part files for each scanmode and
# combine intensities if present in both scanmodes
library("argparse")
suppressMessages(library("dplyr"))

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
parser <- ArgumentParser(description = "GenerateExcel")

preprocessing_scripts_dir <- cmd_args[1]
parser$add_argument("--preprocessing_scripts_dir", dest = "preprocessing_scripts_dir",
help = "File path to the directory containing functions used in this script", required = TRUE)

source(paste0(preprocessing_scripts_dir, "collect_sum_adducts_functions.R"))
args <- parser$parse_args()

source(paste0(args$preprocessing_scripts_dir, "collect_sum_adducts_functions.R"))

# collect all AdductSums part files for each scanmode and save to RData file
outlist_tot_pos <- combine_sum_adduct_parts("positive")
Expand Down
Loading
Loading