diff --git a/.github/workflows/dims_test.yml b/.github/workflows/dims_test.yml index fa2a8aa2..b547d126 100644 --- a/.github/workflows/dims_test.yml +++ b/.github/workflows/dims_test.yml @@ -23,7 +23,7 @@ jobs: uses: actions/checkout@v4 - name: Install dependencies - run: Rscript -e "install.packages(c('testthat', 'withr'))" + run: Rscript -e "install.packages(c('testthat', 'withr', 'vdiffr'))" - name: Run tests run: Rscript tests/testthat.R diff --git a/.lintr b/.lintr index a952bb1b..db672220 100644 --- a/.lintr +++ b/.lintr @@ -1,6 +1,7 @@ linters: linters_with_defaults( line_length_linter(127), object_usage_linter = NULL, - return_linter = NULL + return_linter = NULL, + pipe_consistency_linter = lintr::pipe_consistency_linter("auto") ) encoding: "UTF-8" diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R index 53662649..48dd7248 100644 --- a/DIMS/GenerateExcel.R +++ b/DIMS/GenerateExcel.R @@ -115,10 +115,6 @@ if (z_score == 1) { # save outlist for GenerateQC step save(outlist, file = "outlist.RData") - # get the IDs of the patients and sort - patient_ids <- unique(gsub("\\.[0-9]*", "", patient_columns)) - patient_ids <- patient_ids[order(nchar(patient_ids), patient_ids)] - # get Helix IDs for extra Excel file metabolite_files <- list.files( path = paste(path_metabolite_groups, "Diagnostics", sep = "/"), @@ -151,51 +147,56 @@ if (z_score == 1) { relocate(c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID), .after = last_col()) %>% rename(Name = H_Name) - for (row_index in seq_len(nrow(outlist))) { - # get HMDB ID - hmdb_name <- rownames(outlist[row_index, ]) + # Get intensity columns for controls and patients + intensities_plots_df <- outlist %>% select(HMDB_key, matches("^C|^P[0-9]"), -ends_with("_Zscore")) - # get intensities of controls and patient for a metabolite, get intensity columns, - # pivot to long format, arrange Samples nummerically, change Sample names, get group size and - # set Intensities to numeric. - intensities <- outlist %>% + for (row_index in seq_len(nrow(intensities_plots_df))) { + # get HMDB ID + hmdb_id <- intensities_plots_df %>% slice(row_index) %>% - select(all_of(intensity_col_ids)) %>% - as.data.frame() %>% - pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>% - arrange(nchar(Samples)) %>% - mutate( - Samples = gsub("\\..*", "", Samples), - Samples = gsub("(C).*", "\\1", Samples), - Intensities = as.numeric(Intensities), - type = ifelse(Samples == "C", "Control", "Patients") - ) %>% - group_by(Samples) %>% - mutate(group_size = n()) %>% - ungroup() + pull(HMDB_key) + + # Transform dataframe to long format + intensities_plots_df_long <- transform_ints_df_plots(intensities_plots_df, row_index) # set plot width to 40 times the number of samples - plot_width <- length(unique(intensities$Samples)) * 40 + plot_width <- length(unique(intensities_plots_df_long$Samples)) * 40 col_width <- plot_width * 2 + if (hmdb_id %in% metab_list_helix) { + # Make separate plot for Helix Excel containing all samples + plot.new() + tmp_png_helix <- paste0("plots/plot_helix_", hmdb_id, ".png") + png(filename = tmp_png_helix, width = plot_width, height = 300) + + boxplot_excel_helix <- create_boxplot_excel(intensities_plots_df_long, hmdb_id) + + print(boxplot_excel_helix) + dev.off() + + openxlsx::insertImage( + wb_helix_intensities, + sheetname, + tmp_png_helix, + startRow = row_helix, + startCol = 1, + height = 560, + width = col_width, + units = "px" + ) + row_helix <- row_helix + 1 + } + + # Remove postive controls and SST mix samples, (e.g. P1001, P1002, P1003, P1005) + intensities_plots_df_long <- intensities_plots_df_long %>% filter(!grepl("^P[0-9]{4}$", Samples)) + plot.new() - tmp_png <- paste0("plots/plot_", hmdb_name, ".png") + tmp_png <- paste0("plots/plot_", hmdb_id, ".png") png(filename = tmp_png, width = plot_width, height = 300) - # plot intensities for the controls and patients, use boxplot if group size is above 2, otherwise use a dash/line - p <- ggplot(intensities, aes(Samples, Intensities)) + - geom_boxplot(data = subset(intensities, group_size > 2), aes(fill = type)) + - geom_point(data = subset(intensities, group_size <= 2), shape = "-", size = 10, aes(colour = type, fill = type)) + - scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) + - scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) + - theme( - legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1), axis.title = element_blank(), - plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), axis.text = element_text(size = 12, face = "bold"), - panel.background = element_rect(fill = "white", colour = "black") - ) + - ggtitle(hmdb_name) - - print(p) + boxplot_excel <- create_boxplot_excel(intensities_plots_df_long, hmdb_id) + + print(boxplot_excel) dev.off() # place the plot in the Excel file @@ -209,21 +210,6 @@ if (z_score == 1) { width = col_width, units = "px" ) - - if (hmdb_name %in% metab_list_helix) { - print(row_helix) - openxlsx::insertImage( - wb_helix_intensities, - sheetname, - tmp_png, - startRow = row_helix, - startCol = 1, - height = 560, - width = col_width, - units = "px" - ) - row_helix <- row_helix + 1 - } } wb_intensities <- set_row_height_col_width_wb( wb_intensities, diff --git a/DIMS/export/generate_excel_functions.R b/DIMS/export/generate_excel_functions.R index 5183758c..ef253a8e 100644 --- a/DIMS/export/generate_excel_functions.R +++ b/DIMS/export/generate_excel_functions.R @@ -34,22 +34,29 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i if (zscore_type == "_Zscore") { # Calculate mean and sd with all controls outlist$avg_ctrls <- apply(control_cols, 1, function(x) mean(as.numeric(x), na.rm = TRUE)) - outlist$sd_ctrls <- apply(control_cols, 1, function(x) sd(as.numeric(x), na.rm = TRUE)) + outlist$sd_ctrls <- apply(control_cols, 1, function(x) sd(as.numeric(x), na.rm = TRUE)) } else { if (length(control_cols) > 3) { for (metabolite_index in seq_len(nrow(outlist))) { if (zscore_type == "_RobustZscore") { # Calculate mean and sd, remove outlier controls by using robust scaler - outlist$avg_ctrls[metabolite_index] <- mean(robust_scaler(outlist[metabolite_index, control_cols], - control_cols, stat_filter)) - outlist$sd_ctrls[metabolite_index] <- sd(robust_scaler(outlist[metabolite_index, control_cols], - control_cols, stat_filter)) + outlist$avg_ctrls[metabolite_index] <- mean(robust_scaler( + outlist[metabolite_index, control_cols], + control_cols, stat_filter + )) + outlist$sd_ctrls[metabolite_index] <- sd(robust_scaler( + outlist[metabolite_index, control_cols], + control_cols, stat_filter + )) } else { # Calculate mean, sd and number of remaining controls, remove outlier controls by using grubbs test - intensities_without_outliers <- remove_outliers_grubbs(as.numeric(outlist[metabolite_index, control_cols]), stat_filter) + intensities_without_outliers <- remove_outliers_grubbs( + as.numeric(outlist[metabolite_index, control_cols]), + stat_filter + ) outlist$avg_ctrls[metabolite_index] <- mean(intensities_without_outliers) - outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers) - outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers) + outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers) + outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers) } } } @@ -61,7 +68,7 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i }) outlist <- cbind(outlist, outlist_zscores) colnames(outlist)[startcol:ncol(outlist)] <- paste0(colnames(outlist)[intensity_col_ids], zscore_type) - + return(outlist) } @@ -75,7 +82,7 @@ robust_scaler <- function(control_intensities, control_col_ids, perc = 5) { #' @return trimmed_control_intensities: Intensities trimmed for outliers nr_to_remove <- ceiling(length(control_col_ids) * perc / 100) sorted_control_intensities <- sort(as.numeric(control_intensities)) - trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1) : + trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1): (length(sorted_control_intensities) - nr_to_remove)] return(trimmed_control_intensities) } @@ -129,3 +136,64 @@ set_row_height_col_width_wb <- function(wb, sheetname, num_rows_df, num_cols_df, } return(wb) } + +#' Transform a dataframe with intensities to long format +#' +#' Get intensities of controls and patient for the selected metabolite, +#' pivot to long format, arrange Samples nummerically, change Sample names, get group size and +#' set Intensities to numeric. +#' +#' @param intensities_plots_df: a dataframe with HMDB_key column and intensities for all samples +#' +#' @returns intensities_plots_df_long: a dataframe with on each row a sample and their intensity +transform_ints_df_plots <- function(intensities_plots_df, row_index) { + intensities_plots_df_long <- intensities_plots_df %>% + slice(row_index) %>% + select(-HMDB_key) %>% + as.data.frame() %>% + pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>% + arrange(nchar(Samples)) %>% + mutate( + Samples = gsub("\\..*", "", Samples), + Samples = gsub("(C).*", "\\1", Samples), + Intensities = as.numeric(Intensities), + type = ifelse(Samples == "C", "Control", "Patients") + ) %>% + group_by(Samples) %>% + mutate(group_size = n()) %>% + ungroup() + + return(intensities_plots_df_long) +} + + +#' Create a plot of intensities of samples for Excel +#' Use boxplot if group size is above 2, otherwise use a dash/line +#' +#' @param intensities_plots_df_long: a dataframe with on each row a sample and their intensity +#' @param hmdb_id: HMDB ID of the selected metabolite +#' +#' @returns boxplot_excel: ggplot2 object containing the plot of intensities +create_boxplot_excel <- function(intensities_plots_df_long, hmdb_id) { + boxplot_excel <- ggplot(intensities_plots_df_long, aes(Samples, Intensities)) + + geom_boxplot(data = subset(intensities_plots_df_long, group_size > 2), aes(fill = type)) + + geom_point( + data = subset(intensities_plots_df_long, group_size <= 2), + shape = "-", + size = 10, + aes(colour = type, fill = type) + ) + + scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) + + scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) + + theme( + legend.position = "none", + axis.text = element_text(size = 12, face = "bold"), + axis.text.x = element_text(angle = 90, hjust = 1), + axis.title = element_blank(), + plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), + panel.background = element_rect(fill = "white", colour = "black") + ) + + ggtitle(hmdb_id) + + return(boxplot_excel) +} diff --git a/DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg b/DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg new file mode 100644 index 00000000..4e0913fb --- /dev/null +++ b/DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +- +- +- +- +- + + +200 +300 +400 +500 +600 + + + + + + + + + + + +C +P1 +P2 +P3 +P4 +P5 +Test Metab 1 + + diff --git a/DIMS/tests/testthat/test_generate_excel.R b/DIMS/tests/testthat/test_generate_excel.R index 7a1b4280..817bb36f 100644 --- a/DIMS/tests/testthat/test_generate_excel.R +++ b/DIMS/tests/testthat/test_generate_excel.R @@ -5,107 +5,273 @@ library("ggplot2") library("reshape2") library("openxlsx") +library("vdiffr") suppressMessages(library("tidyr")) suppressMessages(library("dplyr")) suppressMessages(library("stringr")) source("../../export/generate_excel_functions.R") -testthat::test_that("Get indices of columns and dataframe of intensities of a given label", { +testthat::test_that("get_intensities_cols: Get indices of columns and dataframe of intensities of a given label", { test_outlist <- read.delim(test_path("fixtures", "test_outlist.txt")) - + control_label <- "C" case_label <- "P" expect_equal(get_intensities_cols(test_outlist, control_label)$col_idx, c(2:13)) - expect_equal(colnames(get_intensities_cols(test_outlist, control_label)$df_intensities), - c("C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", - "C109.1", "C110.1", "C111.1", "C112.1")) - expect_equal(rownames(get_intensities_cols(test_outlist, control_label)$df_intensities), - c("HMDB001", "HMDB002", "HMDB003", "HMDB004")) + expect_equal( + colnames(get_intensities_cols(test_outlist, control_label)$df_intensities), + c("C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", "C111.1", "C112.1") + ) + expect_equal( + rownames(get_intensities_cols(test_outlist, control_label)$df_intensities), + c("HMDB001", "HMDB002", "HMDB003", "HMDB004") + ) expect_equal(get_intensities_cols(test_outlist, control_label)$df_intensities$C101.1, c(1000, 1200, 1300, 1400)) expect_equal(get_intensities_cols(test_outlist, case_label)$col_idx, c(14, 15)) expect_equal(colnames(get_intensities_cols(test_outlist, case_label)$df_intensities), c("P2.1", "P3.1")) }) -testthat::test_that("Calculating Z-scores using different methods for excluding controls", { +testthat::test_that("calculate_zscores: Calculating Z-scores using different methods for excluding controls", { test_outlist <- read.delim(test_path("fixtures", "test_outlist.txt")) control_intensities <- read.delim(test_path("fixtures", "test_control_intensities.txt")) - + control_col_idx <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13) intensity_col_ids <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) startcol <- ncol(test_outlist) + 4 perc <- 5 outlier_threshold <- 2 - expect_type(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol), "list") - expect_identical(colnames(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol)), - c("plots", "C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", - "C111.1", "C112.1", "P2.1", "P3.1", "HMDB_name", "HMDB_name_all", "HMDB_ID_all", "sec_HMDB_ID", - "HMDB_key", "sec_HMDB_ID_rlvc", "name", "relevance", "descr", "origin", "fluids", "tissue", "disease", - "pathway", "HMDB_code", "avg_ctrls", "sd_ctrls", "nr_ctrls", "C101.1_Zscore", "C102.1_Zscore", "C103.1_Zscore", - "C104.1_Zscore", "C105.1_Zscore", "C106.1_Zscore", "C107.1_Zscore", "C108.1_Zscore", "C109.1_Zscore", - "C110.1_Zscore", "C111.1_Zscore", "C112.1_Zscore", "P2.1_Zscore", "P3.1_Zscore")) - expect_equal(round(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol)$avg_ctrls, 3), - c(16129.167, 1150.0, 1231.250, 4015.833), tolerance = 0.001) - expect_equal(calculate_zscores(test_outlist, "_Zscore", control_intensities, NULL, intensity_col_ids, startcol)$P2.1_Zscore, - c(-0.2544103, 32.4586955, 13.6066674, 0.4037668), tolerance = 0.001) - - expect_type(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol), "list") - expect_identical(colnames(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol))[34:47], - c("C101.1_RobustZscore", "C102.1_RobustZscore", - "C103.1_RobustZscore", "C104.1_RobustZscore", "C105.1_RobustZscore", "C106.1_RobustZscore", - "C107.1_RobustZscore", "C108.1_RobustZscore", "C109.1_RobustZscore", "C110.1_RobustZscore", - "C111.1_RobustZscore", "C112.1_RobustZscore", "P2.1_RobustZscore", "P3.1_RobustZscore")) - expect_equal(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol)$avg_ctrls, - c(1255.0, 1110.0, 1227.5, 2811.5), tolerance = 0.001) - expect_equal(calculate_zscores(test_outlist, "_RobustZscore", control_col_idx, perc, intensity_col_ids, startcol)$P2.1_RobustZscore, - c(9.1511750, 46.9804468, 16.8039663, 0.8565111), tolerance = 0.001) - - expect_type(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol), "list") - expect_identical(colnames(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol))[34:47], - c("C101.1_OutlierRemovedZscore", - "C102.1_OutlierRemovedZscore", "C103.1_OutlierRemovedZscore", "C104.1_OutlierRemovedZscore", - "C105.1_OutlierRemovedZscore", "C106.1_OutlierRemovedZscore", "C107.1_OutlierRemovedZscore", - "C108.1_OutlierRemovedZscore", "C109.1_OutlierRemovedZscore", "C110.1_OutlierRemovedZscore", - "C111.1_OutlierRemovedZscore", "C112.1_OutlierRemovedZscore", "P2.1_OutlierRemovedZscore", - "P3.1_OutlierRemovedZscore") - ) - expect_equal(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol)$avg_ctrls, - c(1231.818, 1077.273, 1231.250, 2649.091), tolerance = 0.001) - expect_equal(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol)$nr_ctrls, - c(11, 11, 12, 11)) - expect_equal(calculate_zscores(test_outlist, "_OutlierRemovedZscore", control_col_idx, outlier_threshold, intensity_col_ids, startcol)$P2.1_OutlierRemovedZscore, - c(8.9955723, 44.9136860, 13.6066674, 0.9345077), tolerance = 0.001) + expect_type( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + ), + "list" + ) + + expect_identical( + colnames( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + ) + ), + c( + "plots", "C101.1", "C102.1", "C103.1", "C104.1", "C105.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", "C111.1", + "C112.1", "P2.1", "P3.1", "HMDB_name", "HMDB_name_all", "HMDB_ID_all", "sec_HMDB_ID", "HMDB_key", "sec_HMDB_ID_rlvc", + "name", "relevance", "descr", "origin", "fluids", "tissue", "disease", "pathway", "HMDB_code", "avg_ctrls", "sd_ctrls", + "nr_ctrls", "C101.1_Zscore", "C102.1_Zscore", "C103.1_Zscore", "C104.1_Zscore", "C105.1_Zscore", "C106.1_Zscore", + "C107.1_Zscore", "C108.1_Zscore", "C109.1_Zscore", "C110.1_Zscore", "C111.1_Zscore", "C112.1_Zscore", "P2.1_Zscore", + "P3.1_Zscore" + ) + ) + expect_equal( + round( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + )$avg_ctrls, 3 + ), + c(16129.167, 1150.0, 1231.250, 4015.833), + tolerance = 0.001 + ) + expect_equal( + calculate_zscores( + test_outlist, + "_Zscore", + control_intensities, + NULL, + intensity_col_ids, + startcol + )$P2.1_Zscore, + c(-0.2544103, 32.4586955, 13.6066674, 0.4037668), + tolerance = 0.001 + ) + + expect_type( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + ), + "list" + ) + + expect_identical( + colnames( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + ) + )[34:47], + c( + "C101.1_RobustZscore", "C102.1_RobustZscore", "C103.1_RobustZscore", "C104.1_RobustZscore", "C105.1_RobustZscore", + "C106.1_RobustZscore", "C107.1_RobustZscore", "C108.1_RobustZscore", "C109.1_RobustZscore", "C110.1_RobustZscore", + "C111.1_RobustZscore", "C112.1_RobustZscore", "P2.1_RobustZscore", "P3.1_RobustZscore" + ) + ) + + expect_equal( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + )$avg_ctrls, + c(1255.0, 1110.0, 1227.5, 2811.5), + tolerance = 0.001 + ) + + expect_equal( + calculate_zscores( + test_outlist, + "_RobustZscore", + control_col_idx, + perc, + intensity_col_ids, + startcol + )$P2.1_RobustZscore, + c(9.1511750, 46.9804468, 16.8039663, 0.8565111), + tolerance = 0.001 + ) + + expect_type( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + ), + "list" + ) + + expect_identical( + colnames( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + ) + )[34:47], + c( + "C101.1_OutlierRemovedZscore", "C102.1_OutlierRemovedZscore", "C103.1_OutlierRemovedZscore", + "C104.1_OutlierRemovedZscore", "C105.1_OutlierRemovedZscore", "C106.1_OutlierRemovedZscore", + "C107.1_OutlierRemovedZscore", "C108.1_OutlierRemovedZscore", "C109.1_OutlierRemovedZscore", + "C110.1_OutlierRemovedZscore", "C111.1_OutlierRemovedZscore", "C112.1_OutlierRemovedZscore", + "P2.1_OutlierRemovedZscore", "P3.1_OutlierRemovedZscore" + ) + ) + + expect_equal( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + )$avg_ctrls, + c(1231.818, 1077.273, 1231.250, 2649.091), + tolerance = 0.001 + ) + expect_equal( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + )$nr_ctrls, + c(11, 11, 12, 11) + ) + expect_equal( + calculate_zscores( + test_outlist, + "_OutlierRemovedZscore", + control_col_idx, + outlier_threshold, + intensity_col_ids, + startcol + )$P2.1_OutlierRemovedZscore, + c(8.9955723, 44.9136860, 13.6066674, 0.9345077), + tolerance = 0.001 + ) }) -testthat::test_that("Use robust scaler", { +testthat::test_that("robust_scaler: Use robust scaler", { control_intensities <- read.delim(test_path("fixtures", "test_control_intensities.txt")) - + control_col_idx <- c(1) perc <- 5 expect_type(robust_scaler(control_intensities[1, 1:12], control_col_idx, perc), "double") expect_length(robust_scaler(control_intensities[1, 1:12], control_col_idx, perc), 10) - expect_equal(robust_scaler(control_intensities[1, 1:12], control_col_idx, perc), - c(1050, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1450, 1650), tolerance = 0.001) + expect_equal( + robust_scaler( + control_intensities[1, 1:12], + control_col_idx, + perc + ), + c(1050, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1450, 1650), + tolerance = 0.001 + ) }) -testthat::test_that("Use Grubbs outlier removal", { +testthat::test_that("remove_outliers_grubbs: Use Grubbs outlier removal", { control_intensities <- read.delim(test_path("fixtures", "test_control_intensities.txt")) - + outlier_threshold <- 2 expect_type(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold), "list") expect_length(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold), 11) - expect_equal(as.numeric(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold)), - c(1000, 1100, 1300, 1650, 1050, 1150, 1350, 1450, 1200, 1050, 1250), tolerance = 0.001) - expect_identical(colnames(remove_outliers_grubbs(control_intensities[1, ], outlier_threshold)), - c("C101.1", "C102.1", "C103.1", "C104.1", "C106.1", "C107.1", "C108.1", - "C109.1", "C110.1", "C111.1", "C112.1")) + expect_equal( + as.numeric(remove_outliers_grubbs( + control_intensities[1, ], + outlier_threshold + )), + c(1000, 1100, 1300, 1650, 1050, 1150, 1350, 1450, 1200, 1050, 1250), + tolerance = 0.001 + ) + expect_identical( + colnames( + remove_outliers_grubbs( + control_intensities[1, ], + outlier_threshold + ) + ), + c("C101.1", "C102.1", "C103.1", "C104.1", "C106.1", "C107.1", "C108.1", "C109.1", "C110.1", "C111.1", "C112.1") + ) }) -testthat::test_that("Save data to RData and txt file", { +testthat::test_that("save_to_rdata_and_txt: Save data to RData and txt file", { test_df <- data.frame( C101.1 = c(100, 200, 300, 400), C102.1 = c(125, 225, 325, 425), @@ -123,14 +289,14 @@ testthat::test_that("Save data to RData and txt file", { expect_identical(colnames(df), c("C101.1", "C102.1", "P2.1", "P3.1", "HMDB_name", "sec_HMDB_ID")) expect_equal(df$P2.1, c(150, 250, 350, 450)) } - + check_cols_and_values(read.delim("test_df.txt")) check_cols_and_values(get(load("test_df.RData"))) file.remove("test_df.txt", "test_df.RData") }) -testthat::test_that("Check row height and column width in a workbook", { +testthat::test_that("set_row_height_col_width_wb: Check row height and column width in a workbook", { test_wb_plots <- openxlsx::createWorkbook("Test") openxlsx::addWorksheet(test_wb_plots, "Test_with_plots") @@ -142,13 +308,30 @@ testthat::test_that("Check row height and column width in a workbook", { correct_col_widths <- c("5", "20", "20", "20", "20") names(correct_col_widths) <- c(1, 2, 3, 4, 5) attr(correct_col_widths, "hidden") <- c("0", "0", "0", "0", "0") - expect_identical(set_row_height_col_width_wb(test_wb_plots, sheetname_with_plots, num_rows_df, num_cols_df, plot_width, - plots_present = TRUE)$colWidths[[1]], correct_col_widths) + expect_identical( + set_row_height_col_width_wb( + test_wb_plots, + sheetname_with_plots, + num_rows_df, + num_cols_df, + plot_width, + plots_present = TRUE + )$colWidths[[1]], + correct_col_widths + ) correct_row_heights <- c("140", "140", "140", "140", "140") names(correct_row_heights) <- c(2, 3, 4, 5, 6) - expect_identical(set_row_height_col_width_wb(test_wb_plots, sheetname_with_plots, num_rows_df, num_cols_df, plot_width, - plots_present = TRUE)$rowHeights[[1]], correct_row_heights) + expect_identical( + set_row_height_col_width_wb(test_wb_plots, + sheetname_with_plots, + num_rows_df, + num_cols_df, + plot_width, + plots_present = TRUE + )$rowHeights[[1]], + correct_row_heights + ) rm(test_wb_plots) @@ -159,13 +342,112 @@ testthat::test_that("Check row height and column width in a workbook", { correct_col_widths <- c("20", "20", "20", "20", "20") names(correct_col_widths) <- c(1, 2, 3, 4, 5) attr(correct_col_widths, "hidden") <- c("0", "0", "0", "0", "0") - expect_identical(set_row_height_col_width_wb(test_wb_no_plots, sheetname_no_plots, num_rows_df, num_cols_df, plot_width = NULL, - plots_present = FALSE)$colWidths[[1]], correct_col_widths) + expect_identical( + set_row_height_col_width_wb( + test_wb_no_plots, + sheetname_no_plots, + num_rows_df, + num_cols_df, + plot_width = NULL, + plots_present = FALSE + )$colWidths[[1]], + correct_col_widths + ) correct_row_heights <- c("18", "18", "18", "18", "18") names(correct_row_heights) <- c(1, 2, 3, 4, 5) - expect_identical(set_row_height_col_width_wb(test_wb_no_plots, sheetname_no_plots, num_rows_df, num_cols_df, plot_width = NULL, - plots_present = FALSE)$rowHeights[[1]], correct_row_heights) + expect_identical( + set_row_height_col_width_wb( + test_wb_no_plots, + sheetname_no_plots, + num_rows_df, + num_cols_df, + plot_width = NULL, + plots_present = FALSE + )$rowHeights[[1]], + correct_row_heights + ) rm(test_wb_no_plots) }) + +testthat::test_that("transform_ints_df_plots: Check transformation of dataframe to long format", { + test_intensities_plots_df <- data.frame( + C101.1 = c(100, 200, 300, 400), + C102.1 = c(125, 225, 325, 425), + P2000M00002.1 = c(150, 250, 350, 450), + P3000M00003.1 = c(175, 275, 375, 475), + HMDB_key = c("metab_1", "metab_2", "metab_3", "metab_4") + ) + + test_row_index <- 1 + + expect_equal(dim(transform_ints_df_plots(test_intensities_plots_df, test_row_index)), c(4, 4)) + expect_identical( + colnames( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + ) + ), + c("Samples", "Intensities", "type", "group_size") + ) + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$Samples, + c("C", "C", "P2000M00002", "P3000M00003") + ) + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$Intensities, + c(100, 125, 150, 175) + ) + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$type, + c("Control", "Control", "Patients", "Patients") + ) + expect_equal( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$group_size, + c(2, 2, 1, 1) + ) + + test_row_index <- 2 + expect_identical( + transform_ints_df_plots( + test_intensities_plots_df, + test_row_index + )$Intensities, + c(200, 225, 250, 275) + ) +}) + +testthat::test_that("create_boxplot_excel: Create a boxplot for the Excel", { + test_ints_plots_df_long <- data.frame( + Samples = c("C", "C", "C", "C", "C", "P1", "P2", "P3", "P4", "P5"), + Intensities = c(150, 250, 225, 300, 175, 325, 600, 150, 350, 275), + type = c( + "Control", "Control", "Control", "Control", "Control", + "Patients", "Patients", "Patients", "Patients", "Patients" + ), + group_size = c(5, 5, 5, 5, 5, 1, 1, 1, 1, 1) + ) + + test_hmdb_id <- "Test Metab 1" + + expect_silent(create_boxplot_excel(test_ints_plots_df_long, test_hmdb_id)) + + expect_doppelganger( + title = "create boxplot excel", + fig = create_boxplot_excel(test_ints_plots_df_long, test_hmdb_id) + ) +})