Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dims_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
uses: actions/checkout@v4

- name: Install dependencies
run: Rscript -e "install.packages(c('testthat', 'withr'))"
run: Rscript -e "install.packages(c('testthat', 'withr', 'vdiffr'))"

- name: Run tests
run: Rscript tests/testthat.R
3 changes: 2 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
linters: linters_with_defaults(
line_length_linter(127),
object_usage_linter = NULL,
return_linter = NULL
return_linter = NULL,
pipe_consistency_linter = lintr::pipe_consistency_linter("auto")
)
encoding: "UTF-8"
96 changes: 41 additions & 55 deletions DIMS/GenerateExcel.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,6 @@ if (z_score == 1) {
# save outlist for GenerateQC step
save(outlist, file = "outlist.RData")

# get the IDs of the patients and sort
patient_ids <- unique(gsub("\\.[0-9]*", "", patient_columns))
patient_ids <- patient_ids[order(nchar(patient_ids), patient_ids)]

# get Helix IDs for extra Excel file
metabolite_files <- list.files(
path = paste(path_metabolite_groups, "Diagnostics", sep = "/"),
Expand Down Expand Up @@ -151,51 +147,56 @@ if (z_score == 1) {
relocate(c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID), .after = last_col()) %>%
rename(Name = H_Name)

for (row_index in seq_len(nrow(outlist))) {
# get HMDB ID
hmdb_name <- rownames(outlist[row_index, ])
# Get intensity columns for controls and patients
intensities_plots_df <- outlist %>% select(HMDB_key, matches("^C|^P[0-9]"), -ends_with("_Zscore"))

# get intensities of controls and patient for a metabolite, get intensity columns,
# pivot to long format, arrange Samples nummerically, change Sample names, get group size and
# set Intensities to numeric.
intensities <- outlist %>%
for (row_index in seq_len(nrow(intensities_plots_df))) {
# get HMDB ID
hmdb_id <- intensities_plots_df %>%
slice(row_index) %>%
select(all_of(intensity_col_ids)) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>%
arrange(nchar(Samples)) %>%
mutate(
Samples = gsub("\\..*", "", Samples),
Samples = gsub("(C).*", "\\1", Samples),
Intensities = as.numeric(Intensities),
type = ifelse(Samples == "C", "Control", "Patients")
) %>%
group_by(Samples) %>%
mutate(group_size = n()) %>%
ungroup()
pull(HMDB_key)

# Transform dataframe to long format
intensities_plots_df_long <- transform_ints_df_plots(intensities_plots_df, row_index)

# set plot width to 40 times the number of samples
plot_width <- length(unique(intensities$Samples)) * 40
plot_width <- length(unique(intensities_plots_df_long$Samples)) * 40
col_width <- plot_width * 2

if (hmdb_id %in% metab_list_helix) {
# Make separate plot for Helix Excel containing all samples
plot.new()
tmp_png_helix <- paste0("plots/plot_helix_", hmdb_id, ".png")
png(filename = tmp_png_helix, width = plot_width, height = 300)

boxplot_excel_helix <- create_boxplot_excel(intensities_plots_df_long, hmdb_id)

print(boxplot_excel_helix)
dev.off()

openxlsx::insertImage(
wb_helix_intensities,
sheetname,
tmp_png_helix,
startRow = row_helix,
startCol = 1,
height = 560,
width = col_width,
units = "px"
)
row_helix <- row_helix + 1
}

# Remove postive controls and SST mix samples, (e.g. P1001, P1002, P1003, P1005)
intensities_plots_df_long <- intensities_plots_df_long %>% filter(!grepl("^P[0-9]{4}$", Samples))

plot.new()
tmp_png <- paste0("plots/plot_", hmdb_name, ".png")
tmp_png <- paste0("plots/plot_", hmdb_id, ".png")
png(filename = tmp_png, width = plot_width, height = 300)

# plot intensities for the controls and patients, use boxplot if group size is above 2, otherwise use a dash/line
p <- ggplot(intensities, aes(Samples, Intensities)) +
geom_boxplot(data = subset(intensities, group_size > 2), aes(fill = type)) +
geom_point(data = subset(intensities, group_size <= 2), shape = "-", size = 10, aes(colour = type, fill = type)) +
scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) +
scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) +
theme(
legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1), axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), axis.text = element_text(size = 12, face = "bold"),
panel.background = element_rect(fill = "white", colour = "black")
) +
ggtitle(hmdb_name)

print(p)
boxplot_excel <- create_boxplot_excel(intensities_plots_df_long, hmdb_id)

print(boxplot_excel)
dev.off()

# place the plot in the Excel file
Expand All @@ -209,21 +210,6 @@ if (z_score == 1) {
width = col_width,
units = "px"
)

if (hmdb_name %in% metab_list_helix) {
print(row_helix)
openxlsx::insertImage(
wb_helix_intensities,
sheetname,
tmp_png,
startRow = row_helix,
startCol = 1,
height = 560,
width = col_width,
units = "px"
)
row_helix <- row_helix + 1
}
}
wb_intensities <- set_row_height_col_width_wb(
wb_intensities,
Expand Down
88 changes: 78 additions & 10 deletions DIMS/export/generate_excel_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,29 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i
if (zscore_type == "_Zscore") {
# Calculate mean and sd with all controls
outlist$avg_ctrls <- apply(control_cols, 1, function(x) mean(as.numeric(x), na.rm = TRUE))
outlist$sd_ctrls <- apply(control_cols, 1, function(x) sd(as.numeric(x), na.rm = TRUE))
outlist$sd_ctrls <- apply(control_cols, 1, function(x) sd(as.numeric(x), na.rm = TRUE))
} else {
if (length(control_cols) > 3) {
for (metabolite_index in seq_len(nrow(outlist))) {
if (zscore_type == "_RobustZscore") {
# Calculate mean and sd, remove outlier controls by using robust scaler
outlist$avg_ctrls[metabolite_index] <- mean(robust_scaler(outlist[metabolite_index, control_cols],
control_cols, stat_filter))
outlist$sd_ctrls[metabolite_index] <- sd(robust_scaler(outlist[metabolite_index, control_cols],
control_cols, stat_filter))
outlist$avg_ctrls[metabolite_index] <- mean(robust_scaler(
outlist[metabolite_index, control_cols],
control_cols, stat_filter
))
outlist$sd_ctrls[metabolite_index] <- sd(robust_scaler(
outlist[metabolite_index, control_cols],
control_cols, stat_filter
))
} else {
# Calculate mean, sd and number of remaining controls, remove outlier controls by using grubbs test
intensities_without_outliers <- remove_outliers_grubbs(as.numeric(outlist[metabolite_index, control_cols]), stat_filter)
intensities_without_outliers <- remove_outliers_grubbs(
as.numeric(outlist[metabolite_index, control_cols]),
stat_filter
)
outlist$avg_ctrls[metabolite_index] <- mean(intensities_without_outliers)
outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers)
outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers)
outlist$sd_ctrls[metabolite_index] <- sd(intensities_without_outliers)
outlist$nr_ctrls[metabolite_index] <- length(intensities_without_outliers)
}
}
}
Expand All @@ -61,7 +68,7 @@ calculate_zscores <- function(outlist, zscore_type, control_cols, stat_filter, i
})
outlist <- cbind(outlist, outlist_zscores)
colnames(outlist)[startcol:ncol(outlist)] <- paste0(colnames(outlist)[intensity_col_ids], zscore_type)

return(outlist)
}

Expand All @@ -75,7 +82,7 @@ robust_scaler <- function(control_intensities, control_col_ids, perc = 5) {
#' @return trimmed_control_intensities: Intensities trimmed for outliers
nr_to_remove <- ceiling(length(control_col_ids) * perc / 100)
sorted_control_intensities <- sort(as.numeric(control_intensities))
trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1) :
trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1):
(length(sorted_control_intensities) - nr_to_remove)]
return(trimmed_control_intensities)
}
Expand Down Expand Up @@ -129,3 +136,64 @@ set_row_height_col_width_wb <- function(wb, sheetname, num_rows_df, num_cols_df,
}
return(wb)
}

#' Transform a dataframe with intensities to long format
#'
#' Get intensities of controls and patient for the selected metabolite,
#' pivot to long format, arrange Samples nummerically, change Sample names, get group size and
#' set Intensities to numeric.
#'
#' @param intensities_plots_df: a dataframe with HMDB_key column and intensities for all samples
#'
#' @returns intensities_plots_df_long: a dataframe with on each row a sample and their intensity
transform_ints_df_plots <- function(intensities_plots_df, row_index) {
intensities_plots_df_long <- intensities_plots_df %>%
slice(row_index) %>%
select(-HMDB_key) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "Samples", values_to = "Intensities") %>%
arrange(nchar(Samples)) %>%
mutate(
Samples = gsub("\\..*", "", Samples),
Samples = gsub("(C).*", "\\1", Samples),
Intensities = as.numeric(Intensities),
type = ifelse(Samples == "C", "Control", "Patients")
) %>%
group_by(Samples) %>%
mutate(group_size = n()) %>%
ungroup()

return(intensities_plots_df_long)
}


#' Create a plot of intensities of samples for Excel
#' Use boxplot if group size is above 2, otherwise use a dash/line
#'
#' @param intensities_plots_df_long: a dataframe with on each row a sample and their intensity
#' @param hmdb_id: HMDB ID of the selected metabolite
#'
#' @returns boxplot_excel: ggplot2 object containing the plot of intensities
create_boxplot_excel <- function(intensities_plots_df_long, hmdb_id) {
boxplot_excel <- ggplot(intensities_plots_df_long, aes(Samples, Intensities)) +
geom_boxplot(data = subset(intensities_plots_df_long, group_size > 2), aes(fill = type)) +
geom_point(
data = subset(intensities_plots_df_long, group_size <= 2),
shape = "-",
size = 10,
aes(colour = type, fill = type)
) +
scale_fill_manual(values = c("Control" = "green", "Patients" = "#b20000")) +
scale_color_manual(values = c("Control" = "black", "Patients" = "#b20000")) +
theme(
legend.position = "none",
axis.text = element_text(size = 12, face = "bold"),
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
panel.background = element_rect(fill = "white", colour = "black")
) +
ggtitle(hmdb_id)

return(boxplot_excel)
}
81 changes: 81 additions & 0 deletions DIMS/tests/testthat/_snaps/generate_excel/create-boxplot-excel.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading