From 195ed54340edf76616990dc8c91469145c5c6a81 Mon Sep 17 00:00:00 2001 From: ALuesink Date: Mon, 15 Dec 2025 13:54:38 +0100 Subject: [PATCH 01/10] Removed SST mix and pos controls from plots --- DIMS/GenerateExcel.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R index 5366264..7dfcf9e 100644 --- a/DIMS/GenerateExcel.R +++ b/DIMS/GenerateExcel.R @@ -115,10 +115,6 @@ if (z_score == 1) { # save outlist for GenerateQC step save(outlist, file = "outlist.RData") - # get the IDs of the patients and sort - patient_ids <- unique(gsub("\\.[0-9]*", "", patient_columns)) - patient_ids <- patient_ids[order(nchar(patient_ids), patient_ids)] - # get Helix IDs for extra Excel file metabolite_files <- list.files( path = paste(path_metabolite_groups, "Diagnostics", sep = "/"), @@ -151,16 +147,21 @@ if (z_score == 1) { relocate(c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID), .after = last_col()) %>% rename(Name = H_Name) - for (row_index in seq_len(nrow(outlist))) { + # Get intensity columns for controls and patients + # Remove SST mix (P1001.x) and positive controls (P1002.x, P1002.x, P1005.x) + intensities_plots_df <- outlist %>% + select(HMDB_key, matches("^C|^P[0-9]"), -ends_with("_Zscore"), -matches("^P\\d{4}\\.\\d+$")) + + for (row_index in seq_len(nrow(intensities_plots_df))) { # get HMDB ID - hmdb_name <- rownames(outlist[row_index, ]) - - # get intensities of controls and patient for a metabolite, get intensity columns, + hmdb_id <- intensities_plots_df %>% slice(row_index) %>% pull(HMDB_key) + + # get intensities of controls and patient for the selected metabolite, # pivot to long format, arrange Samples nummerically, change Sample names, get group size and # set Intensities to numeric. - intensities <- outlist %>% + intensities_plots_df_long <- intensities_plots_df %>% slice(row_index) %>% - select(all_of(intensity_col_ids)) %>% + select(-HMDB_key) %>% as.data.frame() %>% pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>% arrange(nchar(Samples)) %>% @@ -175,17 +176,17 @@ if (z_score == 1) { ungroup() # set plot width to 40 times the number of samples - plot_width <- length(unique(intensities$Samples)) * 40 + plot_width <- length(unique(intensities_plots_df_long$Samples)) * 40 col_width <- plot_width * 2 plot.new() - tmp_png <- paste0("plots/plot_", hmdb_name, ".png") + tmp_png <- paste0("plots/plot_", hmdb_id, ".png") png(filename = tmp_png, width = plot_width, height = 300) # plot intensities for the controls and patients, use boxplot if group size is above 2, otherwise use a dash/line - p <- ggplot(intensities, aes(Samples, Intensities)) + - geom_boxplot(data = subset(intensities, group_size > 2), aes(fill = type)) + - geom_point(data = subset(intensities, group_size <= 2), shape = "-", size = 10, aes(colour = type, fill = type)) + + p <- ggplot(intensities_plots_df_long, aes(Samples, Intensities)) + + geom_boxplot(data = subset(intensities_plots_df_long, group_size > 2), aes(fill = type)) + + geom_point(data = subset(intensities_plots_df_long, group_size <= 2), shape = "-", size = 10, aes(colour = type, fill = type)) + scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) + scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) + theme( @@ -193,7 +194,7 @@ if (z_score == 1) { plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), axis.text = element_text(size = 12, face = "bold"), panel.background = element_rect(fill = "white", colour = "black") ) + - ggtitle(hmdb_name) + ggtitle(hmdb_id) print(p) dev.off() @@ -210,8 +211,7 @@ if (z_score == 1) { units = "px" ) - if (hmdb_name %in% metab_list_helix) { - print(row_helix) + if (hmdb_id %in% metab_list_helix) { openxlsx::insertImage( wb_helix_intensities, sheetname, From aae998e0b48a312ccbc4b563e4fea6f0295e7689 Mon Sep 17 00:00:00 2001 From: ALuesink Date: Mon, 15 Dec 2025 13:55:51 +0100 Subject: [PATCH 02/10] Styler and lintr changes --- DIMS/GenerateExcel.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R index 7dfcf9e..c075ba3 100644 --- a/DIMS/GenerateExcel.R +++ b/DIMS/GenerateExcel.R @@ -151,11 +151,13 @@ if (z_score == 1) { # Remove SST mix (P1001.x) and positive controls (P1002.x, P1002.x, P1005.x) intensities_plots_df <- outlist %>% select(HMDB_key, matches("^C|^P[0-9]"), -ends_with("_Zscore"), -matches("^P\\d{4}\\.\\d+$")) - + for (row_index in seq_len(nrow(intensities_plots_df))) { # get HMDB ID - hmdb_id <- intensities_plots_df %>% slice(row_index) %>% pull(HMDB_key) - + hmdb_id <- intensities_plots_df %>% + slice(row_index) %>% + pull(HMDB_key) + # get intensities of controls and patient for the selected metabolite, # pivot to long format, arrange Samples nummerically, change Sample names, get group size and # set Intensities to numeric. @@ -186,7 +188,8 @@ if (z_score == 1) { # plot intensities for the controls and patients, use boxplot if group size is above 2, otherwise use a dash/line p <- ggplot(intensities_plots_df_long, aes(Samples, Intensities)) + geom_boxplot(data = subset(intensities_plots_df_long, group_size > 2), aes(fill = type)) + - geom_point(data = subset(intensities_plots_df_long, group_size <= 2), shape = "-", size = 10, aes(colour = type, fill = type)) + + geom_point(data = subset(intensities_plots_df_long, group_size <= 2), + shape = "-", size = 10, aes(colour = type, fill = type)) + scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) + scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) + theme( From cab9ce7a249be65f35f97b74cf6ff39bce3c4232 Mon Sep 17 00:00:00 2001 From: ALuesink Date: Mon, 15 Dec 2025 14:09:34 +0100 Subject: [PATCH 03/10] Added pipe_consistency_linter to default --- .lintr | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.lintr b/.lintr index a952bb1..db67222 100644 --- a/.lintr +++ b/.lintr @@ -1,6 +1,7 @@ linters: linters_with_defaults( line_length_linter(127), object_usage_linter = NULL, - return_linter = NULL + return_linter = NULL, + pipe_consistency_linter = lintr::pipe_consistency_linter("auto") ) encoding: "UTF-8" From 9588296a68ade81f9ac9dfbcfcec92aeccce43ba Mon Sep 17 00:00:00 2001 From: ALuesink Date: Mon, 15 Dec 2025 14:09:49 +0100 Subject: [PATCH 04/10] Lintr changes --- DIMS/export/generate_excel_functions.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DIMS/export/generate_excel_functions.R b/DIMS/export/generate_excel_functions.R index 5183758..bc1b0ed 100644 --- a/DIMS/export/generate_excel_functions.R +++ b/DIMS/export/generate_excel_functions.R @@ -46,7 +46,8 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i control_cols, stat_filter)) } else { # Calculate mean, sd and number of remaining controls, remove outlier controls by using grubbs test - intensities_without_outliers <- remove_outliers_grubbs(as.numeric(outlist[metabolite_index, control_cols]), stat_filter) + intensities_without_outliers <- remove_outliers_grubbs(as.numeric(outlist[metabolite_index, control_cols]), + stat_filter) outlist$avg_ctrls[metabolite_index] <- mean(intensities_without_outliers) outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers) outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers) @@ -61,7 +62,7 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i }) outlist <- cbind(outlist, outlist_zscores) colnames(outlist)[startcol:ncol(outlist)] <- paste0(colnames(outlist)[intensity_col_ids], zscore_type) - + return(outlist) } From aa56eb2427998d287a3f1af816bc6db2c1df3bd6 Mon Sep 17 00:00:00 2001 From: ALuesink Date: Tue, 16 Dec 2025 09:47:53 +0100 Subject: [PATCH 05/10] Create separate plot for Helix Exel, moved code to functions and did styler and linter --- DIMS/GenerateExcel.R | 83 ++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R index c075ba3..48dd724 100644 --- a/DIMS/GenerateExcel.R +++ b/DIMS/GenerateExcel.R @@ -148,9 +148,7 @@ if (z_score == 1) { rename(Name = H_Name) # Get intensity columns for controls and patients - # Remove SST mix (P1001.x) and positive controls (P1002.x, P1002.x, P1005.x) - intensities_plots_df <- outlist %>% - select(HMDB_key, matches("^C|^P[0-9]"), -ends_with("_Zscore"), -matches("^P\\d{4}\\.\\d+$")) + intensities_plots_df <- outlist %>% select(HMDB_key, matches("^C|^P[0-9]"), -ends_with("_Zscore")) for (row_index in seq_len(nrow(intensities_plots_df))) { # get HMDB ID @@ -158,48 +156,47 @@ if (z_score == 1) { slice(row_index) %>% pull(HMDB_key) - # get intensities of controls and patient for the selected metabolite, - # pivot to long format, arrange Samples nummerically, change Sample names, get group size and - # set Intensities to numeric. - intensities_plots_df_long <- intensities_plots_df %>% - slice(row_index) %>% - select(-HMDB_key) %>% - as.data.frame() %>% - pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>% - arrange(nchar(Samples)) %>% - mutate( - Samples = gsub("\\..*", "", Samples), - Samples = gsub("(C).*", "\\1", Samples), - Intensities = as.numeric(Intensities), - type = ifelse(Samples == "C", "Control", "Patients") - ) %>% - group_by(Samples) %>% - mutate(group_size = n()) %>% - ungroup() + # Transform dataframe to long format + intensities_plots_df_long <- transform_ints_df_plots(intensities_plots_df, row_index) # set plot width to 40 times the number of samples plot_width <- length(unique(intensities_plots_df_long$Samples)) * 40 col_width <- plot_width * 2 + if (hmdb_id %in% metab_list_helix) { + # Make separate plot for Helix Excel containing all samples + plot.new() + tmp_png_helix <- paste0("plots/plot_helix_", hmdb_id, ".png") + png(filename = tmp_png_helix, width = plot_width, height = 300) + + boxplot_excel_helix <- create_boxplot_excel(intensities_plots_df_long, hmdb_id) + + print(boxplot_excel_helix) + dev.off() + + openxlsx::insertImage( + wb_helix_intensities, + sheetname, + tmp_png_helix, + startRow = row_helix, + startCol = 1, + height = 560, + width = col_width, + units = "px" + ) + row_helix <- row_helix + 1 + } + + # Remove postive controls and SST mix samples, (e.g. P1001, P1002, P1003, P1005) + intensities_plots_df_long <- intensities_plots_df_long %>% filter(!grepl("^P[0-9]{4}$", Samples)) + plot.new() tmp_png <- paste0("plots/plot_", hmdb_id, ".png") png(filename = tmp_png, width = plot_width, height = 300) - # plot intensities for the controls and patients, use boxplot if group size is above 2, otherwise use a dash/line - p <- ggplot(intensities_plots_df_long, aes(Samples, Intensities)) + - geom_boxplot(data = subset(intensities_plots_df_long, group_size > 2), aes(fill = type)) + - geom_point(data = subset(intensities_plots_df_long, group_size <= 2), - shape = "-", size = 10, aes(colour = type, fill = type)) + - scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) + - scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) + - theme( - legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1), axis.title = element_blank(), - plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), axis.text = element_text(size = 12, face = "bold"), - panel.background = element_rect(fill = "white", colour = "black") - ) + - ggtitle(hmdb_id) - - print(p) + boxplot_excel <- create_boxplot_excel(intensities_plots_df_long, hmdb_id) + + print(boxplot_excel) dev.off() # place the plot in the Excel file @@ -213,20 +210,6 @@ if (z_score == 1) { width = col_width, units = "px" ) - - if (hmdb_id %in% metab_list_helix) { - openxlsx::insertImage( - wb_helix_intensities, - sheetname, - tmp_png, - startRow = row_helix, - startCol = 1, - height = 560, - width = col_width, - units = "px" - ) - row_helix <- row_helix + 1 - } } wb_intensities <- set_row_height_col_width_wb( wb_intensities, From 9e638aed1e9c99a45a83f7ecd159fe90d557b24c Mon Sep 17 00:00:00 2001 From: ALuesink Date: Tue, 16 Dec 2025 09:48:22 +0100 Subject: [PATCH 06/10] Added new functions and did styler and linter --- DIMS/export/generate_excel_functions.R | 92 +++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 11 deletions(-) diff --git a/DIMS/export/generate_excel_functions.R b/DIMS/export/generate_excel_functions.R index bc1b0ed..727437f 100644 --- a/DIMS/export/generate_excel_functions.R +++ b/DIMS/export/generate_excel_functions.R @@ -34,23 +34,29 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i if (zscore_type == "_Zscore") { # Calculate mean and sd with all controls outlist$avg_ctrls <- apply(control_cols, 1, function(x) mean(as.numeric(x), na.rm = TRUE)) - outlist$sd_ctrls <- apply(control_cols, 1, function(x) sd(as.numeric(x), na.rm = TRUE)) + outlist$sd_ctrls <- apply(control_cols, 1, function(x) sd(as.numeric(x), na.rm = TRUE)) } else { if (length(control_cols) > 3) { for (metabolite_index in seq_len(nrow(outlist))) { if (zscore_type == "_RobustZscore") { # Calculate mean and sd, remove outlier controls by using robust scaler - outlist$avg_ctrls[metabolite_index] <- mean(robust_scaler(outlist[metabolite_index, control_cols], - control_cols, stat_filter)) - outlist$sd_ctrls[metabolite_index] <- sd(robust_scaler(outlist[metabolite_index, control_cols], - control_cols, stat_filter)) + outlist$avg_ctrls[metabolite_index] <- mean(robust_scaler( + outlist[metabolite_index, control_cols], + control_cols, stat_filter + )) + outlist$sd_ctrls[metabolite_index] <- sd(robust_scaler( + outlist[metabolite_index, control_cols], + control_cols, stat_filter + )) } else { # Calculate mean, sd and number of remaining controls, remove outlier controls by using grubbs test - intensities_without_outliers <- remove_outliers_grubbs(as.numeric(outlist[metabolite_index, control_cols]), - stat_filter) + intensities_without_outliers <- remove_outliers_grubbs( + outlist[metabolite_index, control_cols], + stat_filter + ) outlist$avg_ctrls[metabolite_index] <- mean(intensities_without_outliers) - outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers) - outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers) + outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers) + outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers) } } } @@ -76,8 +82,8 @@ robust_scaler <- function(control_intensities, control_col_ids, perc = 5) { #' @return trimmed_control_intensities: Intensities trimmed for outliers nr_to_remove <- ceiling(length(control_col_ids) * perc / 100) sorted_control_intensities <- sort(as.numeric(control_intensities)) - trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1) : - (length(sorted_control_intensities) - nr_to_remove)] + trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1): + (length(sorted_control_intensities) - nr_to_remove)] return(trimmed_control_intensities) } @@ -88,6 +94,8 @@ remove_outliers_grubbs <- function(control_intensities, outlier_threshold = 2) { #' @param outlier_threshold: Threshold for outliers which will be removed from controls (float) #' #' @return trimmed_control_intensities: Intensities trimmed for outliers + + control_intensities <- as.numeric(control_intensities) mean_permetabolite <- mean(as.numeric(control_intensities)) stdev_permetabolite <- sd(as.numeric(control_intensities)) zscores_permetabolite <- (control_intensities - mean_permetabolite) / stdev_permetabolite @@ -97,6 +105,7 @@ remove_outliers_grubbs <- function(control_intensities, outlier_threshold = 2) { } else { trimmed_control_intensities <- control_intensities } + trimmed_control_intensities <- as.numeric(trimmed_control_intensities) return(trimmed_control_intensities) } @@ -130,3 +139,64 @@ set_row_height_col_width_wb <- function(wb, sheetname, num_rows_df, num_cols_df, } return(wb) } + +#' Transform a dataframe with intensities to long format +#' +#' Get intensities of controls and patient for the selected metabolite, +#' pivot to long format, arrange Samples nummerically, change Sample names, get group size and +#' set Intensities to numeric. +#' +#' @param intensities_plots_df: a dataframe with HMDB_key column and intensities for all samples +#' +#' @returns intensities_plots_df_long: a dataframe with on each row a sample and their intensity +transform_ints_df_plots <- function(intensities_plots_df, row_index) { + intensities_plots_df_long <- intensities_plots_df %>% + slice(row_index) %>% + select(-HMDB_key) %>% + as.data.frame() %>% + pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>% + arrange(nchar(Samples)) %>% + mutate( + Samples = gsub("\\..*", "", Samples), + Samples = gsub("(C).*", "\\1", Samples), + Intensities = as.numeric(Intensities), + type = ifelse(Samples == "C", "Control", "Patients") + ) %>% + group_by(Samples) %>% + mutate(group_size = n()) %>% + ungroup() + + return(intensities_plots_df_long) +} + + +#' Create a plot of intensities of samples for Excel +#' Use boxplot if group size is above 2, otherwise use a dash/line +#' +#' @param intensities_plots_df_long: a dataframe with on each row a sample and their intensity +#' @param hmdb_id: HMDB ID of the selected metabolite +#' +#' @returns boxplot_excel: ggplot2 object containing the plot of intensities +create_boxplot_excel <- function(intensities_plots_df_long, hmdb_id) { + boxplot_excel <- ggplot(intensities_plots_df_long, aes(Samples, Intensities)) + + geom_boxplot(data = subset(intensities_plots_df_long, group_size > 2), aes(fill = type)) + + geom_point( + data = subset(intensities_plots_df_long, group_size <= 2), + shape = "-", + size = 10, + aes(colour = type, fill = type) + ) + + scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) + + scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) + + theme( + legend.position = "none", + axis.text = element_text(size = 12, face = "bold"), + axis.text.x = element_text(angle = 90, hjust = 1), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), + panel.background = element_rect(fill = "white", colour = "black") + ) + + ggtitle(hmdb_id) + + return(boxplot_excel) +} From 08dbf31ef0a7c36a93795ad1dbc5e15e8029f137 Mon Sep 17 00:00:00 2001 From: ALuesink Date: Tue, 16 Dec 2025 09:48:38 +0100 Subject: [PATCH 07/10] Added new unit tests and did styler and linter --- DIMS/tests/testthat/test_generate_excel.R | 422 ++++++++++++++++++---- 1 file changed, 352 insertions(+), 70 deletions(-) diff --git a/DIMS/tests/testthat/test_generate_excel.R b/DIMS/tests/testthat/test_generate_excel.R index 7a1b428..817bb36 100644 --- a/DIMS/tests/testthat/test_generate_excel.R +++ b/DIMS/tests/testthat/test_generate_excel.R @@ -5,107 +5,273 @@ library("ggplot2") library("reshape2") library("openxlsx") +library("vdiffr") suppressMessages(library("tidyr")) suppressMessages(library("dplyr")) suppressMessages(library("stringr")) source("../../export/generate_excel_functions.R") -testthat::test_that("Get indices of columns and dataframe of intensities of a given label", { +testthat::test_that("get_intensities_cols: Get indices of columns and dataframe of intensities of a given label", { test_outlist <- read.delim(test_path("fixtures", "test_outlist.txt")) - + control_label <- "C" case_label <- "P" expect_equal(get_intensities_cols(test_outlist, control_label)$col_idx, c(2:13)) - expect_equal(colnames(get_intensities_cols(test_outlist, control_label)$df_intensities), - c("C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", - "C109.1", "C110.1", "C111.1", "C112.1")) - expect_equal(rownames(get_intensities_cols(test_outlist, control_label)$df_intensities), - c("HMDB001", "HMDB002", "HMDB003", "HMDB004")) + expect_equal( + colnames(get_intensities_cols(test_outlist, control_label)$df_intensities), + c("C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", "C111.1", "C112.1") + ) + expect_equal( + rownames(get_intensities_cols(test_outlist, control_label)$df_intensities), + c("HMDB001", "HMDB002", "HMDB003", "HMDB004") + ) expect_equal(get_intensities_cols(test_outlist, control_label)$df_intensities$C101.1, c(1000, 1200, 1300, 1400)) expect_equal(get_intensities_cols(test_outlist, case_label)$col_idx, c(14, 15)) expect_equal(colnames(get_intensities_cols(test_outlist, case_label)$df_intensities), c("P2.1", "P3.1")) }) -testthat::test_that("Calculating Z-scores using different methods for excluding controls", { +testthat::test_that("calculate_zscores: Calculating Z-scores using different methods for excluding controls", { test_outlist <- read.delim(test_path("fixtures", "test_outlist.txt")) control_intensities <- read.delim(test_path("fixtures", "test_control_intensities.txt")) - + control_col_idx <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13) intensity_col_ids <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) startcol <- ncol(test_outlist) + 4 perc <- 5 outlier_threshold <- 2 - expect_type(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol), "list") - expect_identical(colnames(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol)), - c("plots", "C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", - "C111.1", "C112.1", "P2.1", "P3.1", "HMDB_name", "HMDB_name_all", "HMDB_ID_all", "sec_HMDB_ID", - "HMDB_key", "sec_HMDB_ID_rlvc", "name", "relevance", "descr", "origin", "fluids", "tissue", "disease", - "pathway", "HMDB_code", "avg_ctrls", "sd_ctrls", "nr_ctrls", "C101.1_Zscore", "C102.1_Zscore", "C103.1_Zscore", - "C104.1_Zscore", "C105.1_Zscore", "C106.1_Zscore", "C107.1_Zscore", "C108.1_Zscore", "C109.1_Zscore", - "C110.1_Zscore", "C111.1_Zscore", "C112.1_Zscore", "P2.1_Zscore", "P3.1_Zscore")) - expect_equal(round(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol)$avg_ctrls, 3), - c(16129.167, 1150.0, 1231.250, 4015.833), tolerance = 0.001) - expect_equal(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol)$P2.1_Zscore, - c(-0.2544103, 32.4586955, 13.6066674, 0.4037668), tolerance = 0.001) - - expect_type(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol), "list") - expect_identical(colnames(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol))[34:47], - c("C101.1_RobustZscore", "C102.1_RobustZscore", - "C103.1_RobustZscore", "C104.1_RobustZscore", "C105.1_RobustZscore", "C106.1_RobustZscore", - "C107.1_RobustZscore", "C108.1_RobustZscore", "C109.1_RobustZscore", "C110.1_RobustZscore", - "C111.1_RobustZscore", "C112.1_RobustZscore", "P2.1_RobustZscore", "P3.1_RobustZscore")) - expect_equal(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol)$avg_ctrls, - c(1255.0, 1110.0, 1227.5, 2811.5), tolerance = 0.001) - expect_equal(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol)$P2.1_RobustZscore, - c(9.1511750, 46.9804468, 16.8039663, 0.8565111), tolerance = 0.001) - - expect_type(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol), "list") - expect_identical(colnames(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol))[34:47], - c("C101.1_OutlierRemovedZscore", - "C102.1_OutlierRemovedZscore", "C103.1_OutlierRemovedZscore", "C104.1_OutlierRemovedZscore", - "C105.1_OutlierRemovedZscore", "C106.1_OutlierRemovedZscore", "C107.1_OutlierRemovedZscore", - "C108.1_OutlierRemovedZscore", "C109.1_OutlierRemovedZscore", "C110.1_OutlierRemovedZscore", - "C111.1_OutlierRemovedZscore", "C112.1_OutlierRemovedZscore", "P2.1_OutlierRemovedZscore", - "P3.1_OutlierRemovedZscore") - ) - expect_equal(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol)$avg_ctrls, - c(1231.818, 1077.273, 1231.250, 2649.091), tolerance = 0.001) - expect_equal(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol)$nr_ctrls, - c(11, 11, 12, 11)) - expect_equal(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol)$P2.1_OutlierRemovedZscore, - c(8.9955723, 44.9136860, 13.6066674, 0.9345077), tolerance = 0.001) + expect_type( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + ), + "list" + ) + + expect_identical( + colnames( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + ) + ), + c( + "plots", "C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", "C111.1", + "C112.1", "P2.1", "P3.1", "HMDB_name", "HMDB_name_all", "HMDB_ID_all", "sec_HMDB_ID", "HMDB_key", "sec_HMDB_ID_rlvc", + "name", "relevance", "descr", "origin", "fluids", "tissue", "disease", "pathway", "HMDB_code", "avg_ctrls", "sd_ctrls", + "nr_ctrls", "C101.1_Zscore", "C102.1_Zscore", "C103.1_Zscore", "C104.1_Zscore", "C105.1_Zscore", "C106.1_Zscore", + "C107.1_Zscore", "C108.1_Zscore", "C109.1_Zscore", "C110.1_Zscore", "C111.1_Zscore", "C112.1_Zscore", "P2.1_Zscore", + "P3.1_Zscore" + ) + ) + expect_equal( + round( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + )$avg_ctrls, 3 + ), + c(16129.167, 1150.0, 1231.250, 4015.833), + tolerance = 0.001 + ) + expect_equal( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + )$P2.1_Zscore, + c(-0.2544103, 32.4586955, 13.6066674, 0.4037668), + tolerance = 0.001 + ) + + expect_type( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + ), + "list" + ) + + expect_identical( + colnames( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + ) + )[34:47], + c( + "C101.1_RobustZscore", "C102.1_RobustZscore", "C103.1_RobustZscore", "C104.1_RobustZscore", "C105.1_RobustZscore", + "C106.1_RobustZscore", "C107.1_RobustZscore", "C108.1_RobustZscore", "C109.1_RobustZscore", "C110.1_RobustZscore", + "C111.1_RobustZscore", "C112.1_RobustZscore", "P2.1_RobustZscore", "P3.1_RobustZscore" + ) + ) + + expect_equal( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + )$avg_ctrls, + c(1255.0, 1110.0, 1227.5, 2811.5), + tolerance = 0.001 + ) + + expect_equal( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + )$P2.1_RobustZscore, + c(9.1511750, 46.9804468, 16.8039663, 0.8565111), + tolerance = 0.001 + ) + + expect_type( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + ), + "list" + ) + + expect_identical( + colnames( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + ) + )[34:47], + c( + "C101.1_OutlierRemovedZscore", "C102.1_OutlierRemovedZscore", "C103.1_OutlierRemovedZscore", + "C104.1_OutlierRemovedZscore", "C105.1_OutlierRemovedZscore", "C106.1_OutlierRemovedZscore", + "C107.1_OutlierRemovedZscore", "C108.1_OutlierRemovedZscore", "C109.1_OutlierRemovedZscore", + "C110.1_OutlierRemovedZscore", "C111.1_OutlierRemovedZscore", "C112.1_OutlierRemovedZscore", + "P2.1_OutlierRemovedZscore", "P3.1_OutlierRemovedZscore" + ) + ) + + expect_equal( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + )$avg_ctrls, + c(1231.818, 1077.273, 1231.250, 2649.091), + tolerance = 0.001 + ) + expect_equal( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + )$nr_ctrls, + c(11, 11, 12, 11) + ) + expect_equal( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + )$P2.1_OutlierRemovedZscore, + c(8.9955723, 44.9136860, 13.6066674, 0.9345077), + tolerance = 0.001 + ) }) -testthat::test_that("Use robust scaler", { +testthat::test_that("robust_scaler: Use robust scaler", { control_intensities <- read.delim(test_path("fixtures", "test_control_intensities.txt")) - + control_col_idx <- c(1) perc <- 5 expect_type(robust_scaler(control_intensities[1, 1:12], control_col_idx, perc), "double") expect_length(robust_scaler(control_intensities[1, 1:12], control_col_idx, perc), 10) - expect_equal(robust_scaler(control_intensities[1, 1:12], control_col_idx, perc), - c(1050, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1450, 1650), tolerance = 0.001) + expect_equal( + robust_scaler( + control_intensities[1, 1:12], + control_col_idx, + perc + ), + c(1050, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1450, 1650), + tolerance = 0.001 + ) }) -testthat::test_that("Use Grubbs outlier removal", { +testthat::test_that("remove_outliers_grubbs: Use Grubbs outlier removal", { control_intensities <- read.delim(test_path("fixtures", "test_control_intensities.txt")) - + outlier_threshold <- 2 expect_type(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold), "list") expect_length(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold), 11) - expect_equal(as.numeric(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold)), - c(1000, 1100, 1300, 1650, 1050, 1150, 1350, 1450, 1200, 1050, 1250), tolerance = 0.001) - expect_identical(colnames(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold)), - c("C101.1", "C102.1", "C103.1", "C104.1", "C106.1", "C107.1", "C108.1", - "C109.1", "C110.1", "C111.1", "C112.1")) + expect_equal( + as.numeric(remove_outliers_grubbs( + control_intensities[1, ], + outlier_threshold + )), + c(1000, 1100, 1300, 1650, 1050, 1150, 1350, 1450, 1200, 1050, 1250), + tolerance = 0.001 + ) + expect_identical( + colnames( + remove_outliers_grubbs( + control_intensities[1, ], + outlier_threshold + ) + ), + c("C101.1", "C102.1", "C103.1", "C104.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", "C111.1", "C112.1") + ) }) -testthat::test_that("Save data to RData and txt file", { +testthat::test_that("save_to_rdata_and_txt: Save data to RData and txt file", { test_df <- data.frame( C101.1 = c(100, 200, 300, 400), C102.1 = c(125, 225, 325, 425), @@ -123,14 +289,14 @@ testthat::test_that("Save data to RData and txt file", { expect_identical(colnames(df), c("C101.1", "C102.1", "P2.1", "P3.1", "HMDB_name", "sec_HMDB_ID")) expect_equal(df$P2.1, c(150, 250, 350, 450)) } - + check_cols_and_values(read.delim("test_df.txt")) check_cols_and_values(get(load("test_df.RData"))) file.remove("test_df.txt", "test_df.RData") }) -testthat::test_that("Check row height and column width in a workbook", { +testthat::test_that("set_row_height_col_width_wb: Check row height and column width in a workbook", { test_wb_plots <- openxlsx::createWorkbook("Test") openxlsx::addWorksheet(test_wb_plots, "Test_with_plots") @@ -142,13 +308,30 @@ testthat::test_that("Check row height and column width in a workbook", { correct_col_widths <- c("5", "20", "20", "20", "20") names(correct_col_widths) <- c(1, 2, 3, 4, 5) attr(correct_col_widths, "hidden") <- c("0", "0", "0", "0", "0") - expect_identical(set_row_height_col_width_wb(test_wb_plots, sheetname_with_plots, num_rows_df, num_cols_df, plot_width, - plots_present = TRUE)$colWidths[[1]], correct_col_widths) + expect_identical( + set_row_height_col_width_wb( + test_wb_plots, + sheetname_with_plots, + num_rows_df, + num_cols_df, + plot_width, + plots_present = TRUE + )$colWidths[[1]], + correct_col_widths + ) correct_row_heights <- c("140", "140", "140", "140", "140") names(correct_row_heights) <- c(2, 3, 4, 5, 6) - expect_identical(set_row_height_col_width_wb(test_wb_plots, sheetname_with_plots, num_rows_df, num_cols_df, plot_width, - plots_present = TRUE)$rowHeights[[1]], correct_row_heights) + expect_identical( + set_row_height_col_width_wb(test_wb_plots, + sheetname_with_plots, + num_rows_df, + num_cols_df, + plot_width, + plots_present = TRUE + )$rowHeights[[1]], + correct_row_heights + ) rm(test_wb_plots) @@ -159,13 +342,112 @@ testthat::test_that("Check row height and column width in a workbook", { correct_col_widths <- c("20", "20", "20", "20", "20") names(correct_col_widths) <- c(1, 2, 3, 4, 5) attr(correct_col_widths, "hidden") <- c("0", "0", "0", "0", "0") - expect_identical(set_row_height_col_width_wb(test_wb_no_plots, sheetname_no_plots, num_rows_df, num_cols_df, plot_width = NULL, - plots_present = FALSE)$colWidths[[1]], correct_col_widths) + expect_identical( + set_row_height_col_width_wb( + test_wb_no_plots, + sheetname_no_plots, + num_rows_df, + num_cols_df, + plot_width = NULL, + plots_present = FALSE + )$colWidths[[1]], + correct_col_widths + ) correct_row_heights <- c("18", "18", "18", "18", "18") names(correct_row_heights) <- c(1, 2, 3, 4, 5) - expect_identical(set_row_height_col_width_wb(test_wb_no_plots, sheetname_no_plots, num_rows_df, num_cols_df, plot_width = NULL, - plots_present = FALSE)$rowHeights[[1]], correct_row_heights) + expect_identical( + set_row_height_col_width_wb( + test_wb_no_plots, + sheetname_no_plots, + num_rows_df, + num_cols_df, + plot_width = NULL, + plots_present = FALSE + )$rowHeights[[1]], + correct_row_heights + ) rm(test_wb_no_plots) }) + +testthat::test_that("transform_ints_df_plots: Check transformation of dataframe to long format", { + test_intensities_plots_df <- data.frame( + C101.1 = c(100, 200, 300, 400), + C102.1 = c(125, 225, 325, 425), + P2000M00002.1 = c(150, 250, 350, 450), + P3000M00003.1 = c(175, 275, 375, 475), + HMDB_key = c("metab_1", "metab_2", "metab_3", "metab_4") + ) + + test_row_index <- 1 + + expect_equal(dim(transform_ints_df_plots(test_intensities_plots_df, test_row_index)), c(4, 4)) + expect_identical( + colnames( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + ) + ), + c("Samples", "Intensities", "type", "group_size") + ) + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$Samples, + c("C", "C", "P2000M00002", "P3000M00003") + ) + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$Intensities, + c(100, 125, 150, 175) + ) + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$type, + c("Control", "Control", "Patients", "Patients") + ) + expect_equal( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$group_size, + c(2, 2, 1, 1) + ) + + test_row_index <- 2 + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$Intensities, + c(200, 225, 250, 275) + ) +}) + +testthat::test_that("create_boxplot_excel: Create a boxplot for the Excel", { + test_ints_plots_df_long <- data.frame( + Samples = c("C", "C", "C", "C", "C", "P1", "P2", "P3", "P4", "P5"), + Intensities = c(150, 250, 225, 300, 175, 325, 600, 150, 350, 275), + type = c( + "Control", "Control", "Control", "Control", "Control", + "Patients", "Patients", "Patients", "Patients", "Patients" + ), + group_size = c(5, 5, 5, 5, 5, 1, 1, 1, 1, 1) + ) + + test_hmdb_id <- "Test Metab 1" + + expect_silent(create_boxplot_excel(test_ints_plots_df_long, test_hmdb_id)) + + expect_doppelganger( + title = "create boxplot excel", + fig = create_boxplot_excel(test_ints_plots_df_long, test_hmdb_id) + ) +}) From 97ec94d8930202aac61afb6c3d73107c5b72c8db Mon Sep 17 00:00:00 2001 From: ALuesink Date: Tue, 16 Dec 2025 09:52:48 +0100 Subject: [PATCH 08/10] Added vdiffr to DIMS test dependencies --- .github/workflows/dims_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dims_test.yml b/.github/workflows/dims_test.yml index fa2a8aa..b547d12 100644 --- a/.github/workflows/dims_test.yml +++ b/.github/workflows/dims_test.yml @@ -23,7 +23,7 @@ jobs: uses: actions/checkout@v4 - name: Install dependencies - run: Rscript -e "install.packages(c('testthat', 'withr'))" + run: Rscript -e "install.packages(c('testthat', 'withr', 'vdiffr'))" - name: Run tests run: Rscript tests/testthat.R From 1fc1d3ac84cbd88b5a4b5e1a306c015c3d61512a Mon Sep 17 00:00:00 2001 From: ALuesink Date: Tue, 16 Dec 2025 10:19:14 +0100 Subject: [PATCH 09/10] Fix error numeric values --- DIMS/export/generate_excel_functions.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/DIMS/export/generate_excel_functions.R b/DIMS/export/generate_excel_functions.R index 727437f..ef253a8 100644 --- a/DIMS/export/generate_excel_functions.R +++ b/DIMS/export/generate_excel_functions.R @@ -51,7 +51,7 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i } else { # Calculate mean, sd and number of remaining controls, remove outlier controls by using grubbs test intensities_without_outliers <- remove_outliers_grubbs( - outlist[metabolite_index, control_cols], + as.numeric(outlist[metabolite_index, control_cols]), stat_filter ) outlist$avg_ctrls[metabolite_index] <- mean(intensities_without_outliers) @@ -83,7 +83,7 @@ robust_scaler <- function(control_intensities, control_col_ids, perc = 5) { nr_to_remove <- ceiling(length(control_col_ids) * perc / 100) sorted_control_intensities <- sort(as.numeric(control_intensities)) trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1): - (length(sorted_control_intensities) - nr_to_remove)] + (length(sorted_control_intensities) - nr_to_remove)] return(trimmed_control_intensities) } @@ -94,8 +94,6 @@ remove_outliers_grubbs <- function(control_intensities, outlier_threshold = 2) { #' @param outlier_threshold: Threshold for outliers which will be removed from controls (float) #' #' @return trimmed_control_intensities: Intensities trimmed for outliers - - control_intensities <- as.numeric(control_intensities) mean_permetabolite <- mean(as.numeric(control_intensities)) stdev_permetabolite <- sd(as.numeric(control_intensities)) zscores_permetabolite <- (control_intensities - mean_permetabolite) / stdev_permetabolite @@ -105,7 +103,6 @@ remove_outliers_grubbs <- function(control_intensities, outlier_threshold = 2) { } else { trimmed_control_intensities <- control_intensities } - trimmed_control_intensities <- as.numeric(trimmed_control_intensities) return(trimmed_control_intensities) } From bb007f0aeebfafef3eaab6616987b1adca0cde88 Mon Sep 17 00:00:00 2001 From: ALuesink Date: Tue, 16 Dec 2025 10:51:47 +0100 Subject: [PATCH 10/10] Snapshot boxplot --- .../generate_excel/create-boxplot-excel.svg | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg diff --git a/DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg b/DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg new file mode 100644 index 0000000..4e0913f --- /dev/null +++ b/DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +- +- +- +- +- + + +200 +300 +400 +500 +600 + + + + + + + + + + + +C +P1 +P2 +P3 +P4 +P5 +Test Metab 1 + +