diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..b826a9c Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index b1401c9..500d4a0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ # History files .Rhistory .Rapp.history +.result_for_antismash.json +.data_all.csv # Session Data files .RData diff --git a/DESCRIPTION b/DESCRIPTION index adc53f7..a695e05 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,15 @@ Package: BGCViz Title: BGCViz -Version: 0.0.0.9000 -Authors@R: person('Pavlo', 'Hrab', email = 'pavlo.hrab@gmail.com', role = c('cre', 'aut')) -Description: BGCViz is a shiny package, that aims to integrate BGC result from different softwares for same genome sequence. +Version: 0.99.0 +Authors@R: person('Lev', 'Tsarin', email = 'Tsarinlev@gmail.com', role = c('cre', 'aut')) +Description: BGCViz is a shiny package, that aims to integrate BGC result + from different softwares for same genome sequence. On top of additional + filtering of GECCO and DeepBGC data and choosing optimal thresholds for them, + package allows to form groups of semi-identical BGCs, allowing better + annotation and boundary resolution of each one. The output of BGCViz can + serve as input to clinker software. License: MIT + file LICENSE -biocViews: +biocViews: Visualization, MultipleComparison, FunctionalGenomics, ShinyApps Imports: BioCircos, config (>= 0.3.1), @@ -28,10 +33,10 @@ Imports: sortable, stringi, stringr, - tidyr + tidyr, + ggtree Encoding: UTF-8 -LazyData: true -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 URL: https://github.com/ostash-group/BGCViz BugReports: https://github.com/ostash-group/BGCViz/issues Suggests: @@ -45,4 +50,5 @@ Config/testthat/edition: 3 Language: en-US VignetteBuilder: knitr Depends: - R (>= 2.10) + R (>= 4.2.0) +LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 51c9b83..9f5db87 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(antismash_to_csv) export(arts_to_csv) export(get_defaults) +export(get_dissect_example) export(prism_to_csv) export(run_app) export(sempi_to_csv) diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..f8499d4 --- /dev/null +++ b/NEWS @@ -0,0 +1,2 @@ +Changes in version 0.99.0 (2022-06-06) ++ Submitted to Bioconductor \ No newline at end of file diff --git a/R/app_config.R b/R/app_config.R index e6c06d0..1239bfb 100644 --- a/R/app_config.R +++ b/R/app_config.R @@ -2,7 +2,7 @@ #' #' NOTE: If you manually change your package name in the DESCRIPTION, #' don't forget to change it here too, and in the config file. -#' For a safer name change mechanism, use the `golem::set_golem_name()` +#' For a safer name change mechanism, use the `golem::set_golem_name()` #' function. #' #' @param ... character vectors, specifying subdirectory and file(s) @@ -10,7 +10,7 @@ #' #' @noRd app_sys <- function(...) { - system.file(..., package = "BGCViz") + system.file(..., package = "BGCViz") } @@ -24,18 +24,18 @@ app_sys <- function(...) { #' @noRd get_golem_config <- function(value, config = Sys.getenv( - "GOLEM_CONFIG_ACTIVE", - Sys.getenv( - "R_CONFIG_ACTIVE", - "default" - ) + "GOLEM_CONFIG_ACTIVE", + Sys.getenv( + "R_CONFIG_ACTIVE", + "default" + ) ), use_parent = TRUE) { - config::get( - value = value, - config = config, - # Modify this if your config file is somewhere else: - file = app_sys("golem-config.yml"), - use_parent = use_parent - ) + config::get( + value = value, + config = config, + # Modify this if your config file is somewhere else: + file = app_sys("golem-config.yml"), + use_parent = use_parent + ) } diff --git a/R/app_server.R b/R/app_server.R index 657ea39..18b608c 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -5,1603 +5,1968 @@ #' @import shiny #' @noRd app_server <- function(input, output, session) { - # Your application server logic - # Silence R CMD note - Start <- Stop <- Core <- Chr <- NULL - ## --------------------------------------------------------------- - ## Some lists of reactive values to listen later - - ## --------------------------------------------------------------- - check_to_rename <- shiny::reactive({ - list( - input$sempi_data, input$anti_data, input$prism_data, - input$sempi_sco, input$anti_sco, input$prism_sco + # Your application server logic + # Silence R CMD note + Start <- Stop <- Core <- Chr <- NULL + ## --------------------------------------------------------------- + ## Some lists of reactive values to listen later - + ## --------------------------------------------------------------- + check_to_rename <- shiny::reactive({ + list( + input$sempi_data, input$anti_data, input$prism_data, + input$sempi_sco, input$anti_sco, input$prism_sco, input$compare_sco, + input$ripp_sco, input$ripp_data, input$emerald_data, input$compare_data + ) + }) + biocircos_listen <- shiny::reactive({ + list( + input$biocircos_color, vals$need_filter, input$label_color, input$label_color_class, + input$ref_col_biocircos, vals$inters_filtered, input$prism_supp_data_input_width, vals$prism_supp_data_input, + input$arts_width, input$sempi_width, input$rre_width, vals$anti_data, vals$sempi_data, vals$prism_data, + vals$coloring_datatable, + vals$ripp_data, vals$emerald_data, vals$compare_data + ) + }) + inputData <- shiny::reactive({ + list( + vals$sempi_data_input, vals$rre_data_input, vals$anti_data_input, vals$prism_data_input, + vals$prism_supp_data_input, vals$deep_data_input, vals$gecco_data_input, vals$arts_data_input, + vals$ripp_data_input, vals$emerald_data_input, vals$compare_data_input + ) + }) + dynamicInput <- shiny::reactive({ + list(input$dup_choice, vals$need_filter, input$prism_supp, input$phylo_file) + }) + deep_reference <- shiny::reactive({ + list( + vals$inters_filtered, vals$rre_more, input$ref, input$arts_width, input$sempi_width, input$rre_width, + input$prism_supp_data_input_width, vals$anti_data, vals$prism_data, vals$sempi_data, vals$arts_data, + vals$ripp_data, vals$arts_tree_data, vals$emerald_data, vals$compare_data + ) + }) + + to_debounce <- shiny::reactive({ + list( + vals$cluster_type, vals$gene_filter, vals$biodomain_filter, vals$score_c, vals$score_d, + vals$score_a, vals$score_average_gecco, vals$score_cluster_gecco, vals$domains_filter_gecco, + vals$prot_filter_gecco + ) + }) %>% shiny::debounce(500) + + # Some dataframes that are used through the app + some vectors of untercepted values + vals <- shiny::reactiveValues( + deep_data = NULL, anti_data = NULL, rre_data = NULL, prism_data = NULL, chr_len = NULL, fullness_deep = NULL, + biocircos_deep = NULL, deep_data_input = FALSE, tracklist = NULL, json_for_anti = NULL, chromosomes = NULL, fullness_gecco = NULL, + anti_data_input = FALSE, rre_data_input = FALSE, prism_data_input = FALSE, seg_df_ref_a = NULL, + seg_df_ref_d = NULL, seg_df_ref_r = NULL, seg_df_ref_p = NULL, deep_data_chromo = NULL, + data_upload_count = 0, anti_type = NULL, prism_type = NULL, sempi_data = NULL, sempi_data_input = FALSE, + sempi_type = NULL, biocircos_color = NULL, rename_data = NULL, group_by_data = NULL, + rre_interact = NULL, anti_interact = NULL, prism_interact = NULL, deep_interact = NULL, + sempi_interact = NULL, df_a = NULL, df_d = NULL, df_p = NULL, df_r = NULL, prism_supp = NULL, + prism_json = FALSE, df_s = NULL, prism_supp_interact = NULL, known_data = NULL, dup_data = NULL, + known_data_input = FALSE, dup_data_input = FALSE, arts_data = NULL, arts_tree_data = NULL, arts_data_input = FALSE, seg_df_ref_ar = NULL, + df_ps = NULL, arts_interact = NULL, rre_more = FALSE, gecco_data = NULL, gecco_data_input = FALSE, + gecco_data_filtered = NULL, seg_df_ref_g = NULL, prism_supp_data_input = FALSE, computed = NULL, + need_filter = FALSE, filter_data = FALSE, choices = list(ref = NULL, group_by = NULL, ref_col_biocircos = NULL, ref_comparison_gecco = NULL, ref_comparison = NULL), + renamed = NULL, renaming_notification = list(), rename_y_axis = list(), can_plot_deep_ref_2 = FALSE, can_plot_deep_ref = FALSE, + can_plot_biocircos = FALSE, can_plot_barplot_rank = FALSE, can_plot_group_table = FALSE, prism_supp_plot = FALSE, + ripp_data = NULL, ripp_data_input = FALSE, ripp_type = NULL, ripp_interact = NULL, seg_df_ref_ri = NULL, + emerald_data = NULL, emerald_data_input = FALSE, emerald_type = NULL, emerald_interact = NULL, seg_df_ref_emer = NULL, + compare_data = NULL, compare_data_input = FALSE, compare_type = NULL, compare_interact = NULL, seg_df_ref_compare = NULL + ) + + vals$computed <- list( + anti = FALSE, deep = FALSE, gecco = FALSE, arts = FALSE, + prism = FALSE, sempi = FALSE, prism_supp = FALSE, rre = FALSE, + ripp = FALSE, emerald = FALSE, compare = FALSE ) - }) - biocircos_listen <- shiny::reactive({ - list( - input$biocircos_color, vals$need_filter, input$label_color, input$label_color_class, - input$ref_col_biocircos, vals$inters_filtered, input$prism_supp_data_input_width, vals$prism_supp_data_input, - input$arts_width, input$sempi_width, input$rre_width, vals$anti_data, vals$sempi_data, vals$prism_data, - vals$coloring_datatable + # Making coloring datatable + rename_file <- system.file("extdata", "rename.csv", package = "BGCViz") + vals$rename_data <- utils::read.csv(rename_file) + rename_data <- utils::read.csv(rename_file) + coloring_datatable <- data.frame(tidyr::drop_na(data.frame(cbind(as.character(rename_data$Group_color), as.character(rename_data$Color), rename_data$Hierarchy)))) + coloring_datatable <- coloring_datatable[!apply(coloring_datatable == "", 1, all), ] + colnames(coloring_datatable) <- c("Name", "Color", "Hierarchy") + vals$coloring_datatable <- DT::datatable(coloring_datatable, rownames = FALSE, editable = "column", options = list(dom = "t", ordering = FALSE)) + # Variables, that holds data uploads boolean (so if data is present or not) + data_uploads <- c( + "anti_data_input", "sempi_data_input", "prism_data_input", "prism_supp_data_input", + "arts_data_input", "deep_data_input", "gecco_data_input", "rre_data_input", + "ripp_data_input","emerald_data_input","compare_data_input" ) - }) - inputData <- shiny::reactive({ - list( - vals$sempi_data_input, vals$rre_data_input, vals$anti_data_input, vals$prism_data_input, - vals$prism_supp_data_input, vals$deep_data_input, vals$gecco_data_input, vals$arts_data_input + data_uploads_inter <- c( + "anti_data_input", "sempi_data_input", "prism_data_input", "prism_json", + "arts_data_input", "deep_data_input", "gecco_data_input", "rre_data_input", + "ripp_data_input","emerald_data_input","compare_data_input" ) - }) - dynamicInput <- shiny::reactive({ - list(input$dup_choice, vals$need_filter, input$prism_supp) - }) - deep_reference <- shiny::reactive({ - list( - vals$inters_filtered, vals$rre_more, input$ref, input$arts_width, input$sempi_width, input$rre_width, - input$prism_supp_data_input_width, vals$anti_data, vals$prism_data, vals$sempi_data, vals$arts_data + # Universal beginings for variables, used in the app for different data + soft_names <- c("anti", "sempi", "prism", "prism_supp", "arts", "deep", "gecco", "rre", "ripp","emerald","compare") + # The Namings, meaning how to label the data on the plots + soft_namings <- c("Antismash", "SEMPI", "PRISM", "PRISM-Supp", + "ARTS", "DeepBGC", "GECCO", "RRE-Finder", + "RippMiner","Emerald/SanntiS","Compare") + # Dataframes undes vals$list, that stored the data + data_to_use <- c("anti_data", "sempi_data", "prism_data", "prism_supp_data", + "arts_data_filtered", "deep_data_filtered", "gecco_data_filtered", + "rre_data","ripp_data","emerald_data","compare_data") + # Used in barplot on summarise tab + Annotation on chromosome plots + abbr <- c("A", "S", "P", "P-Supp", "AR", "D", "G", "RRE", "Ripp",'Emer',"C") + # Used for deep reference 2 plot + soft_datafr <- c( + "seg_df_ref_a", "seg_df_ref_s", "seg_df_ref_p", "seg_df_ref_p_s", "seg_df_ref_ar", "seg_df_ref_d", + "seg_df_ref_g", "seg_df_ref_r", "seg_df_ref_ri","seg_df_ref_emer","seg_df_ref_compare" ) - }) - to_debounce <- shiny::reactive({ - list( - vals$cluster_type, vals$gene_filter, vals$biodomain_filter, vals$score_c, vals$score_d, - vals$score_a, vals$score_average_gecco, vals$score_cluster_gecco, vals$domains_filter_gecco, - vals$prot_filter_gecco - ) - }) %>% shiny::debounce(500) - - # Some dataframes that are used through the app + some vectors of untercepted values - vals <- shiny::reactiveValues( - deep_data = NULL, anti_data = NULL, rre_data = NULL, prism_data = NULL, chr_len = NULL, fullness_deep = NULL, - biocircos_deep = NULL, deep_data_input = FALSE, tracklist = NULL, chromosomes = NULL, fullness_gecco = NULL, - anti_data_input = FALSE, rre_data_input = FALSE, prism_data_input = FALSE, seg_df_ref_a = NULL, - seg_df_ref_d = NULL, seg_df_ref_r = NULL, seg_df_ref_p = NULL, deep_data_chromo = NULL, - data_upload_count = 0, anti_type = NULL, prism_type = NULL, sempi_data = NULL, sempi_data_input = FALSE, - sempi_type = NULL, biocircos_color = NULL, rename_data = NULL, group_by_data = NULL, - rre_interact = NULL, anti_interact = NULL, prism_interact = NULL, deep_interact = NULL, - sempi_interact = NULL, df_a = NULL, df_d = NULL, df_p = NULL, df_r = NULL, prism_supp = NULL, - prism_json = FALSE, df_s = NULL, prism_supp_interact = NULL, known_data = NULL, dup_data = NULL, - known_data_input = F, dup_data_input = F, arts_data = NULL, arts_data_input = F, seg_df_ref_ar = NULL, - df_ps = NULL, arts_interact = NULL, rre_more = FALSE, gecco_data = NULL, gecco_data_input = FALSE, - gecco_data_filtered = NULL, seg_df_ref_g = NULL, prism_supp_data_input = F, computed = NULL, - need_filter = F, filter_data = F, choices = list(ref = NULL, group_by = NULL, ref_col_biocircos = NULL, ref_comparison_gecco = NULL, ref_comparison = NULL), - renamed = NULL, renaming_notification = list(), rename_y_axis = list(), can_plot_deep_ref_2 = F, can_plot_deep_ref = F, - can_plot_biocircos = F, can_plot_barplot_rank = F, can_plot_group_table = F, prism_supp_plot = F - ) - - vals$computed <- list( - anti = F, deep = F, gecco = F, arts = F, prism = F, sempi = F, prism_supp = F, rre = F - ) - # Making coloring datatable - rename_file <- system.file("extdata", "rename.csv", package = "BGCViz") - vals$rename_data <- utils::read.csv(rename_file) - rename_data <- utils::read.csv(rename_file) - coloring_datatable <- data.frame(tidyr::drop_na(data.frame(cbind(as.character(rename_data$Group_color), as.character(rename_data$Color), rename_data$Hierarchy)))) - coloring_datatable <- coloring_datatable[!apply(coloring_datatable == "", 1, all), ] - colnames(coloring_datatable) <- c("Name", "Color", "Hierarchy") - vals$coloring_datatable <- DT::datatable(coloring_datatable, rownames = F, editable = "column", options = list(dom = "t", ordering = F)) - # Variables, that holds data uploads boolean (so if data is present or not) - data_uploads <- c( - "anti_data_input", "sempi_data_input", "prism_data_input", "prism_supp_data_input", - "arts_data_input", "deep_data_input", "gecco_data_input", "rre_data_input" - ) - data_uploads_inter <- c( - "anti_data_input", "sempi_data_input", "prism_data_input", "prism_json", - "arts_data_input", "deep_data_input", "gecco_data_input", "rre_data_input" - ) - # Universal beginings for variables, used in the app for different data - soft_names <- c("anti", "sempi", "prism", "prism_supp", "arts", "deep", "gecco", "rre") - # The Namings, meaning how to label the data on the plots - soft_namings <- c("Antismash", "SEMPI", "PRISM", "PRISM-Supp", "ARTS", "DeepBGC", "GECCO", "RRE-Finder") - # Dataframes undes vals$list, that stored the data - data_to_use <- c("anti_data", "sempi_data", "prism_data", "prism_supp_data", "arts_data_filtered", "deep_data_filtered", "gecco_data_filtered", "rre_data") - # Used in barplot on summarise tab + Annotation on chromosome plots - abbr <- c("A", "S", "P", "P-Supp", "AR", "D", "G", "RRE") - # Used for deep reference 2 plot - soft_datafr <- c( - "seg_df_ref_a", "seg_df_ref_s", "seg_df_ref_p", "seg_df_ref_p_s", "seg_df_ref_ar", "seg_df_ref_d", - "seg_df_ref_g", "seg_df_ref_r" - ) - - vals$score_a <- 50 - vals$score_d <- 50 - vals$score_c <- 50 - vals$domains_filter <- 5 - vals$biodomain_filter <- 1 - vals$gene_filter <- 1 - vals$cluster_type <- 50 - vals$score_average_gecco <- 50 - vals$score_cluster_gecco <- 50 - vals$domains_filter_gecco <- 1 - vals$prot_filter_gecco <- 1 - vals$gecco_sidebar <- FALSE - vals$deep_sidebar <- FALSE - vals$deep_global <- FALSE - vals$gecco_global <- FALSE - ## ---------------------------------------------------------------- - ## Helper functions - - ## ---------------------------------------------------------------- - # Need to get them to a tidyr::separate file later - # TODO - files_in_dir <- list.files() - # Iterate over those files and if found "_biocircos.csv" add remove them - for (file_names in files_in_dir) { - if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { - file.remove(file_names) - } - } - options(shiny.maxRequestSize = 100 * 1024^2) - - disable_event_logic <- function() { - vals$can_plot_deep_ref <- F - vals$can_plot_biocircos <- F - vals$can_plot_barplot_rank <- F - vals$can_plot_group_table <- F - } - enable_event_logic <- function() { - vals$can_plot_deep_ref <- T - vals$can_plot_biocircos <- T - vals$can_plot_barplot_rank <- T - vals$can_plot_group_table <- T - } - - ########################################################################### - ########################################################################### - ### ### - ### DATA INPUT PROCESSING ### - ### ### - ########################################################################### - ########################################################################### - # TODO Make tidyr::separate functions for different data types. - # For now you just have duplicated the code. Specifically for ARTS! - # Reading functions: - - process_antismash <- function(data, example_data = F) { - if (example_data == T) { - anti_data <- data - } else { - anti_data <- read_anti(data) - } - vals$anti_type <- anti_data$Type2 - vals$anti_data <- anti_data - vals$anti_data_input <- TRUE - vals$data_upload_count <- vals$data_upload_count + 1 - vals$choices$ref <- c(vals$choices$ref, "Antismash" = "Antismash") - vals$choices$group_by <- c(vals$choices$group_by, "Antismash" = "Antismash") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "Antismash" = "Antismash") - vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "Antismash" = "Antismash") - vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "Antismash" = "Antismash") - update_ui_with_data() - disable_event_logic() - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "Antismash" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "Antismash" - ) - shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", - selected = "Antismash" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - selected = "Antismash" - ) - shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", - selected = "Antismash" - ) - } - } - process_gecco <- function(data, example_data = F) { - if (example_data == T) { - gecco_data <- data - } else { - gecco_data <- read_gecco(data) - } - vals$gecco_data <- gecco_data - vals$gecco_data_filtered <- filter_gecco(vals$gecco_data, vals$score_cluster_gecco, vals$score_average_gecco, vals$domains_filter_gecco, vals$prot_filter_gecco) - vals$gecco_data_input <- TRUE - vals$data_upload_count <- vals$data_upload_count + 1 - vals$choices$ref <- c(vals$choices$ref, "GECCO" = "GECCO") - vals$choices$group_by <- c(vals$choices$group_by, "GECCO" = "GECCO") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "GECCO" = "GECCO") - update_ui_with_data() - disable_event_logic() - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "GECCO" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "GECCO" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - selected = "GECCO" - ) + vals$score_a <- 50 + vals$score_d <- 50 + vals$score_c <- 50 + vals$domains_filter <- 5 + vals$biodomain_filter <- 1 + vals$gene_filter <- 1 + vals$cluster_type <- 50 + vals$score_average_gecco <- 50 + vals$score_cluster_gecco <- 50 + vals$domains_filter_gecco <- 1 + vals$prot_filter_gecco <- 1 + vals$gecco_sidebar <- FALSE + vals$deep_sidebar <- FALSE + vals$deep_global <- FALSE + vals$gecco_global <- FALSE + ## ---------------------------------------------------------------- + ## Helper functions - + ## ---------------------------------------------------------------- + # Need to get them to a tidyr::separate file later + # TODO + files_in_dir <- list.files() + # Iterate over those files and if found "_biocircos.csv" add remove them + for (file_names in files_in_dir) { + if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { + file.remove(file_names) + } + if (file_names == "all_data.csv") { + file.remove(file_names) + } } - } - process_prism <- function(data, json = T, example_data = F) { - if (example_data == T) { - prism_data <- data - prism_data$Type <- stringr::str_trim(tolower(prism_data$Type)) - prism_data["Type2"] <- stringr::str_trim(tolower(prism_data$Type)) - vals$prism_supp_data_input <- T - vals$prism_supp <- BGCViz:::prism_supp_data - vals$prism_supp_data <- BGCViz:::prism_supp_data - vals$prism_supp_plot <- T - vals$prism_json <- T - shiny::updateCheckboxInput(inputId = "prism_supp", value = T) - } else { - if (json == T) { - processed <- read_prism(data, json = T) - prism_data <- processed[[1]] - vals$prism_supp_data_input <- T - vals$prism_supp <- processed[[2]] - vals$prism_supp_data <- processed[[2]] - vals$prism_json <- T - vals$prism_supp_plot <- T - shiny::updateCheckboxInput(inputId = "prism_supp", value = T) + options(shiny.maxRequestSize = 100 * 1024^2) + + disable_event_logic <- function() { + vals$can_plot_deep_ref <- FALSE + vals$can_plot_biocircos <- FALSE + vals$can_plot_barplot_rank <- FALSE + vals$can_plot_group_table <- FALSE + } + enable_event_logic <- function() { + vals$can_plot_deep_ref <- TRUE + vals$can_plot_biocircos <- TRUE + vals$can_plot_barplot_rank <- TRUE + vals$can_plot_group_table <- TRUE + } + ########################################################################### + ########################################################################### + ### ### + ### DATA INPUT PROCESSING ### + ### ### + ########################################################################### + ########################################################################### + # TODO Make tidyr::separate functions for different data types. + # For now you just have duplicated the code. Specifically for ARTS! + # Reading functions: + process_compare <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + compare_data <- data } else { - processed <- read_prism(data, json = F) - prism_data <- processed[[1]] + compare_data<- read_compare(data) + } + vals$compare_type <- compare_data$Type2 + vals$compare_data <- compare_data + vals$compare_data$chromosome <- rep("C", length(vals$compare_data$Cluster)) + + vals$compare_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "Compare" = "Compare") + vals$choices$group_by <- c(vals$choices$group_by, "Compare" = "Compare") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "Compare" = "Compare") + vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "Compare" = "Compare") + vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "Compare" = "Compare") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "Compare" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "Compare" + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + selected = "Compare" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "Compare" + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + selected = "Compare" + ) } } - vals$choices$ref <- c(vals$choices$ref, "PRISM-Supp" = "PRISM-Supp") - vals$choices$group_by <- c(vals$choices$group_by, "PRISM-Supp" = "PRISM-Supp") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "PRISM-Supp" = "PRISM-Supp") - update_ui_with_data() - vals$prism_data <- prism_data - vals$prism_type <- prism_data$Type2 - - # Add chromosome info column - vals$prism_data$chromosome <- rep("P", length(vals$prism_data$Cluster)) - # Add ID column (same as Cluster) - vals$prism_data$ID <- vals$prism_data$Cluster - vals$prism_data_input <- TRUE - vals$data_upload_count <- vals$data_upload_count + 1 - vals$choices$ref <- c(vals$choices$ref, "PRISM" = "PRISM") - vals$choices$group_by <- c(vals$choices$group_by, "PRISM" = "PRISM") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "PRISM" = "PRISM") - vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "PRISM" = "PRISM") - vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "PRISM" = "PRISM") - update_ui_with_data() - disable_event_logic() - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "PRISM" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "PRISM" - ) - shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", - selected = "PRISM" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - selected = "PRISM" - ) - shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", - selected = "PRISM" - ) - } - } - process_sempi <- function(data, zip = T, example_data = F) { - if (example_data == T) { - sempi_data <- data - } else { - if (zip == T) { - sempi_data <- read_sempi(data, zip = T) + + process_emerald <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + emerald_data <- data } else { - sempi_data <- read_sempi(data, zip = F) + emerald_data <- read_emerald(data) + } + vals$emerald_type <- emerald_data$Type2 + vals$emerald_data <- emerald_data + vals$emerald_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "Emerald/SanntiS" = "Emerald/SanntiS") + vals$choices$group_by <- c(vals$choices$group_by, "Emerald/SanntiS" = "Emerald/SanntiS") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "Emerald/SanntiS" = "Emerald/SanntiS") + vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "Emerald/SanntiS" = "Emerald/SanntiS") + vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "Emerald/SanntiS" = "Emerald/SanntiS") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "Emerald/SanntiS" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "Emerald/SanntiS" + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + selected = "Emerald/SanntiS" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "Emerald/SanntiS" + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + selected = "Emerald/SanntiS" + ) } } - vals$sempi_type <- sempi_data$Type2 - vals$sempi_data <- sempi_data - # Add chromosome info column - vals$sempi_data$chromosome <- rep("S", length(vals$sempi_data$Cluster)) - # Add ID column (same as Cluster) - vals$sempi_data$ID <- vals$sempi_data$Cluster - vals$sempi_data_input <- TRUE - vals$data_upload_count <- vals$data_upload_count + 1 - vals$choices$ref <- c(vals$choices$ref, "SEMPI" = "SEMPI") - vals$choices$group_by <- c(vals$choices$group_by, "SEMPI" = "SEMPI") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "SEMPI" = "SEMPI") - vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "SEMPI" = "SEMPI") - vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "SEMPI" = "SEMPI") - update_ui_with_data() - disable_event_logic() - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "SEMPI" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "SEMPI" - ) - shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", - selected = "SEMPI" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - selected = "SEMPI" - ) - shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", - selected = "SEMPI" - ) - } - } - process_arts_archive <- function(archive, zip = T, example_data = F) { - if (example_data == T) { - arts_data <- BGCViz:::arts_data - } else { - if (zip == T) { - arts_data <- read_arts_archive(archive, zip = T) + + process_rippminer <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + ripp_data <- data } else { - arts_data <- utils::read.csv(archive) + ripp_data <- read_ripp(data) + } + vals$ripp_type <- ripp_data$Type2 + vals$ripp_data <- ripp_data + vals$ripp_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "RippMiner" = "RippMiner") + vals$choices$group_by <- c(vals$choices$group_by, "RippMiner" = "RippMiner") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "RippMiner" = "RippMiner") + vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "RippMiner" = "RippMiner") + vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "RippMiner" = "RippMiner") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "RippMiner" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "RippMiner" + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + selected = "RippMiner" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "RippMiner" + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + selected = "RippMiner" + ) } } - vals$arts_data <- arts_data - vals$choices$ref <- c(vals$choices$ref, "ARTS" = "ARTS") - vals$choices$group_by <- c(vals$choices$group_by, "ARTS" = "ARTS") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "ARTS" = "ARTS") - update_ui_with_data() - vals$data_upload_count <- vals$data_upload_count + 1 - vals$arts_data_input <- T - dup_table_id <- vals$arts_data %>% - dplyr::filter(Core != "Not_core") - shiny::updateSelectInput(session, "dup_choice", - choices = c("All", paste0("ID:", dup_table_id$ID, " ,Core:", dup_table_id$Core)), - selected = "All" - ) - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "ARTS" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "ARTS" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - selected = "ARTS" - ) + + process_antismash <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + anti_data <- data + } else { + anti_data <- read_anti(data) + } + vals$anti_type <- anti_data$Type2 + vals$anti_data <- anti_data + vals$anti_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "Antismash" = "Antismash") + vals$choices$group_by <- c(vals$choices$group_by, "Antismash" = "Antismash") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "Antismash" = "Antismash") + vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "Antismash" = "Antismash") + vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "Antismash" = "Antismash") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "Antismash" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "Antismash" + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + selected = "Antismash" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "Antismash" + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + selected = "Antismash" + ) + } } - } - process_deep <- function(data, example_data = F) { - if (example_data == T) { - deep_data <- data - } else { - deep_data <- read_deep(data) + + + process_gecco <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + gecco_data <- data + } else { + gecco_data <- read_gecco(data) + } + vals$gecco_data <- gecco_data + vals$gecco_data_filtered <- filter_gecco(vals$gecco_data, vals$score_cluster_gecco, vals$score_average_gecco, vals$domains_filter_gecco, vals$prot_filter_gecco) + vals$gecco_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "GECCO" = "GECCO") + vals$choices$group_by <- c(vals$choices$group_by, "GECCO" = "GECCO") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "GECCO" = "GECCO") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "GECCO" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "GECCO" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "GECCO" + ) + } } - vals$deep_data <- deep_data - vals$deep_data$chromosome <- rep("D", length(vals$deep_data$bgc_candidate_id)) - vals$deep_data$Start <- vals$deep_data$nucl_start - vals$deep_data$Stop <- vals$deep_data$nucl_end - # Add ID column as number seuquence of dataframe length - vals$deep_data$ID <- seq(1:length(vals$deep_data$bgc_candidate_id)) - vals$deep_data$Cluster <- vals$deep_data$ID - vals$deep_data_input <- TRUE - vals$data_upload_count <- vals$data_upload_count + 1 - vals$deep_data_filtered <- filter_deepbgc(vals$deep_data, vals$cluster_type, vals$score_a, vals$score_c, vals$score_d, vals$domains_filter, vals$biodomain_filter, vals$gene_filter) - vals$choices$ref <- c(vals$choices$ref, "DeepBGC" = "DeepBGC") - vals$choices$group_by <- c(vals$choices$group_by, "DeepBGC" = "DeepBGC") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "DeepBGC" = "DeepBGC") - update_ui_with_data() - disable_event_logic() - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "DeepBGC" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "DeepBGC" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - choices = "DeepBGC", - selected = "DeepBGC" - ) + process_prism <- function(data, json = TRUE, example_data = FALSE) { + if (example_data == TRUE) { + prism_data <- data + prism_data$Type <- stringr::str_trim(tolower(prism_data$Type)) + prism_data["Type2"] <- stringr::str_trim(tolower(prism_data$Type)) + vals$prism_supp_data_input <- TRUE + vals$prism_supp <- BGCViz:::prism_supp_data + vals$prism_supp_data <- BGCViz:::prism_supp_data + vals$prism_supp_plot <- TRUE + vals$prism_json <- TRUE + shiny::updateCheckboxInput(inputId = "prism_supp", value = TRUE) + } else { + if (json == TRUE) { + processed <- read_prism(data, json = TRUE) + prism_data <- processed[[1]] + vals$prism_supp_data_input <- TRUE + vals$prism_supp <- processed[[2]] + vals$prism_supp_data <- processed[[2]] + vals$prism_json <- TRUE + vals$prism_supp_plot <- TRUE + shiny::updateCheckboxInput(inputId = "prism_supp", value = TRUE) + } else { + processed <- read_prism(data, json = FALSE) + prism_data <- processed[[1]] + } + } + vals$choices$ref <- c(vals$choices$ref, "PRISM-Supp" = "PRISM-Supp") + vals$choices$group_by <- c(vals$choices$group_by, "PRISM-Supp" = "PRISM-Supp") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "PRISM-Supp" = "PRISM-Supp") + update_ui_with_data() + vals$prism_data <- prism_data + vals$prism_type <- prism_data$Type2 + + # Add chromosome info column + vals$prism_data$chromosome <- rep("P", length(vals$prism_data$Cluster)) + # Add ID column (same as Cluster) + vals$prism_data$ID <- vals$prism_data$Cluster + vals$prism_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "PRISM" = "PRISM") + vals$choices$group_by <- c(vals$choices$group_by, "PRISM" = "PRISM") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "PRISM" = "PRISM") + vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "PRISM" = "PRISM") + vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "PRISM" = "PRISM") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "PRISM" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "PRISM" + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + selected = "PRISM" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "PRISM" + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + selected = "PRISM" + ) + } } - } - process_rre <- function(data, example_data = F) { - if (example_data == T) { - rre_data <- data - } else { - rre_data <- read_rre(data) + process_sempi <- function(data, zip = TRUE, example_data = FALSE) { + if (example_data == TRUE) { + sempi_data <- data + } else { + if (zip == TRUE) { + sempi_data <- read_sempi(data, zip = TRUE) + } else { + sempi_data <- read_sempi(data, zip = FALSE) + } + } + vals$sempi_type <- sempi_data$Type2 + vals$sempi_data <- sempi_data + # Add chromosome info column + vals$sempi_data$chromosome <- rep("S", length(vals$sempi_data$Cluster)) + # Add ID column (same as Cluster) + vals$sempi_data$ID <- vals$sempi_data$Cluster + vals$sempi_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "SEMPI" = "SEMPI") + vals$choices$group_by <- c(vals$choices$group_by, "SEMPI" = "SEMPI") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "SEMPI" = "SEMPI") + vals$choices$ref_comparison_gecco <- c(vals$choices$ref_comparison_gecco, "SEMPI" = "SEMPI") + vals$choices$ref_comparison <- c(vals$choices$ref_comparison, "SEMPI" = "SEMPI") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "SEMPI" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "SEMPI" + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + selected = "SEMPI" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "SEMPI" + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + selected = "SEMPI" + ) + } } - vals$rre_data <- rre_data - # write.csv(vals$rre_data, "rre_data.csv", row.names = F) - - vals$rre_data_input <- TRUE - vals$data_upload_count <- vals$data_upload_count + 1 - vals$choices$ref <- c(vals$choices$ref, "RRE-Finder" = "RRE-Finder") - vals$choices$group_by <- c(vals$choices$group_by, "RRE-Finder" = "RRE-Finder") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "RRE-Finder" = "RRE-Finder") - update_ui_with_data() - disable_event_logic() - if (vals$data_upload_count == 1) { - shiny::updateSelectInput(session, "ref", - selected = "RRE-Finder" - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - selected = "RRE-Finder" - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - selected = "RRE-Finder" - ) + process_arts_archive <- function(archive, zip = TRUE, example_data = FALSE) { + if (example_data == TRUE) { + arts_data <- BGCViz:::arts_data + } else { + if (zip == TRUE) { + arts_data <- read_arts_archive(archive, zip = TRUE) + } else { + arts_data <- utils::read.csv(archive) + } + } + vals$arts_tree_data <- arts_data + vals$arts_data <- arts_data[,!(names(arts_data) %in% c("Trees", "TreesFiles"))] + vals$choices$ref <- c(vals$choices$ref, "ARTS" = "ARTS") + vals$choices$group_by <- c(vals$choices$group_by, "ARTS" = "ARTS") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "ARTS" = "ARTS") + update_ui_with_data() + vals$data_upload_count <- vals$data_upload_count + 1 + vals$arts_data_input <- TRUE + dup_table_id <- vals$arts_data %>% + dplyr::filter(Core != "Not_core") + shiny::updateSelectInput(session, "dup_choice", + choices = c("All", paste0("ID:", dup_table_id$ID, " ,Core:", dup_table_id$Core)), + selected = "All" + ) + + + + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "ARTS" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "ARTS" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "ARTS" + ) + } } - if (!is.null(vals$rre_data$Probability)) { - vals$rre_more <- T - } else { - vals$rre_more <- F + process_deep <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + deep_data <- data + } else { + deep_data <- read_deep(data) + } + vals$deep_data <- deep_data + vals$deep_data$chromosome <- rep("D", length(vals$deep_data$bgc_candidate_id)) + vals$deep_data$Start <- vals$deep_data$nucl_start + vals$deep_data$Stop <- vals$deep_data$nucl_end + # Add ID column as number seuquence of dataframe length + vals$deep_data$ID <- seq(1:length(vals$deep_data$bgc_candidate_id)) + vals$deep_data$Cluster <- vals$deep_data$ID + vals$deep_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$deep_data_filtered <- filter_deepbgc(vals$deep_data, vals$cluster_type, vals$score_a, vals$score_c, vals$score_d, vals$domains_filter, vals$biodomain_filter, vals$gene_filter) + vals$choices$ref <- c(vals$choices$ref, "DeepBGC" = "DeepBGC") + vals$choices$group_by <- c(vals$choices$group_by, "DeepBGC" = "DeepBGC") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "DeepBGC" = "DeepBGC") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "DeepBGC" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "DeepBGC" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + choices = "DeepBGC", + selected = "DeepBGC" + ) + } } - } - - - #---------------------------------------------------------------- - ## Loading and processing of example data - - ## ---------------------------------------------------------------- - shiny::observeEvent(input$anti_sco, { - process_antismash(BGCViz:::anti_data, example_data = T) - }) - - shiny::observeEvent(input$gecco_sco, { - process_gecco(BGCViz:::gecco_data, example_data = T) - }) - - shiny::observeEvent(input$prism_sco, { - process_prism(BGCViz:::prism_data, example_data = T) - }) - - shiny::observeEvent(input$sempi_sco, { - process_sempi(BGCViz:::sempi_data, example_data = T) - }) - - shiny::observeEvent(input$arts_sco, { - process_arts_archive(BGCViz:::arts_data, example_data = T) - }) - - shiny::observeEvent(input$deep_sco, { - process_deep(BGCViz:::deep_data, example_data = T) - }) - - shiny::observeEvent(input$rre_sco, { - process_rre(BGCViz:::rre_data, example_data = T) - }) - - ## ---------------------------------------------------------------- - ## Loading and processing user data - - ## ---------------------------------------------------------------- - shiny::observeEvent(input$anti_data, { - disable_event_logic() - # Read data - if (input$anti_data$type == "text/csv") { - anti_data <- utils::read.csv(input$anti_data$datapath) - } else { - data <- rjson::fromJSON(file = input$anti_data$datapath) - types <- sapply(data$records, function(y) { - lapply(y$features, function(x) { - if (unlist(x$type == "region")) { - tolower(x$qualifiers$product) - } - }) - }) + process_rre <- function(data, example_data = FALSE) { + if (example_data == TRUE) { + rre_data <- data + } else { + rre_data <- read_rre(data) + } + vals$rre_data <- rre_data + # write.csv(vals$rre_data, "rre_data.csv", row.names = FALSE) + + vals$rre_data_input <- TRUE + vals$data_upload_count <- vals$data_upload_count + 1 + vals$choices$ref <- c(vals$choices$ref, "RRE-Finder" = "RRE-Finder") + vals$choices$group_by <- c(vals$choices$group_by, "RRE-Finder" = "RRE-Finder") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "RRE-Finder" = "RRE-Finder") + update_ui_with_data() + disable_event_logic() + if (vals$data_upload_count == 1) { + shiny::updateSelectInput(session, "ref", + selected = "RRE-Finder" + ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + selected = "RRE-Finder" + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + selected = "RRE-Finder" + ) + } + if (!is.null(vals$rre_data$Probability)) { + vals$rre_more <- TRUE + } else { + vals$rre_more <- FALSE + } + } + + + #---------------------------------------------------------------- + ## Loading and processing of example data - + ## ---------------------------------------------------------------- + + # shiny::observeEvent(input$emerald_sco, { + # process_emerald(BGCViz:::emerald_data, example_data = TRUE) + # }) + + shiny::observeEvent(input$ripp_sco, { + process_rippminer(BGCViz:::ripp_data, example_data = TRUE) + }) + + shiny::observeEvent(input$anti_sco, { + process_antismash(BGCViz:::anti_data, example_data = TRUE) + }) + + shiny::observeEvent(input$gecco_sco, { + process_gecco(BGCViz:::gecco_data, example_data = TRUE) + }) + + shiny::observeEvent(input$prism_sco, { + process_prism(BGCViz:::prism_data, example_data = TRUE) + }) + + shiny::observeEvent(input$sempi_sco, { + process_sempi(BGCViz:::sempi_data, example_data = TRUE) + }) + + shiny::observeEvent(input$arts_sco, { + process_arts_archive(BGCViz:::arts_data, example_data = TRUE) + }) + + shiny::observeEvent(input$deep_sco, { + process_deep(BGCViz:::deep_data, example_data = TRUE) + }) - types <- Filter(Negate(is.null), types) + shiny::observeEvent(input$rre_sco, { + process_rre(BGCViz:::rre_data, example_data = TRUE) + }) - types <- sapply(types, function(x) { - if (length(unlist(x)) > 1) { - tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) - gsub(" ", "__", tmp) + ## ---------------------------------------------------------------- + ## Loading and processing user data - + ## ---------------------------------------------------------------- + + shiny::observeEvent(input$compare_data, { + process_compare(input$compare_data$datapath) + }) + + shiny::observeEvent(input$emerald_data, { + process_emerald(input$emerald_data$datapath) + }) + + shiny::observeEvent(input$ripp_data, { + process_rippminer(input$ripp_data$datapath) + }) + + shiny::observeEvent(input$anti_data, { + disable_event_logic() + # Read data + if (input$anti_data$type == "text/csv") { + anti_data <- utils::read.csv(input$anti_data$datapath) } else { - x - } - }) - - location <- sapply(data$records, function(y) { - unlist(sapply(y$features, function(x) { - if (unlist(x$type == "region")) { - unlist(x$location) - } - })) - }) - - - location <- gsub("\\[", "", location) - location <- gsub("\\]", "", location) - location <- gsub("<", "", location) - location <- gsub(">", "", location) - location <- data.frame(location) - colnames(location) <- "split" - anti_data <- location %>% - tidyr::separate(split, c("Start", "Stop")) %>% - dplyr::transmute(ID = rownames(location), Start, Stop) - - anti_data <- cbind(anti_data, types) - colnames(anti_data) <- c("Cluster", "Start", "Stop", "Type") - anti_data$Cluster <- as.numeric(anti_data$Cluster) - anti_data$Start <- as.numeric(anti_data$Start) - anti_data$Stop <- as.numeric(anti_data$Stop) - } + data <- rjson::fromJSON(file = input$anti_data$datapath) + types <- sapply(data$records, function(y) { + lapply(y$features, function(x) { + if (unlist(x$type == "region")) { + tolower(x$qualifiers$product) + } + }) + }) - process_antismash(anti_data) - }) + types <- Filter(Negate(is.null), types) - shiny::observeEvent(input$sempi_data, { - if (input$sempi_data$type == "text/csv") { - sempi_data <- utils::read.csv(input$sempi_data$datapath) - process_sempi(sempi_data, zip = F) - } else { - process_sempi(input$sempi_data$datapath, zip = T) - } - }) - - shiny::observeEvent(input$gecco_data, { - gecco_data <- utils::read.delim(input$gecco_data$datapath) - process_gecco(gecco_data) - }) - - # These are for ARTS data processing - # input$known_data and inoput$dup_data - - shiny::observeEvent(input$arts_data, { - disable_event_logic() - if (input$arts_data$type == "text/csv") { - process_arts_archive(input$arts_data$datapath, zip = F) - } else { - process_arts_archive(input$arts_data$datapath, zip = T) - } - }) + types <- sapply(types, function(x) { + if (length(unlist(x)) > 1) { + tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) + gsub(" ", "__", tmp) + } else { + x + } + }) + location <- sapply(data$records, function(y) { + unlist(sapply(y$features, function(x) { + if (unlist(x$type == "region")) { + unlist(x$location) + } + })) + }) - shiny::observeEvent(input$prism_data, { - # Read data - if (input$prism_data$type == "text/csv") { - prism_data <- utils::read.csv(input$prism_data$datapath) - process_prism(prism_data, json = F) - } else { - data <- rjson::fromJSON(file = input$prism_data$datapath) - process_prism(data) - } - }) - - shiny::observeEvent(input$deep_data, { - data <- utils::read.delim(input$deep_data$datapath) - process_deep(data) - }) - - shiny::observeEvent(input$rre_data, { - - # Read data - rre_data <- utils::read.delim(input$rre_data$datapath) - process_rre(rre_data) - }) - - ############################################################################ - ############################################################################ - ### ### - ### INTERFACE LOGIC: WHAT TO SHOW AND WHEN ### - ### ### - ############################################################################ - ############################################################################ - # Update choices - update_ui_with_data <- function() { - shiny::updateSelectInput(session, "ref", - choices = vals$choices$ref - ) - shiny::updateSelectInput(session, "group_table_ui_1-group_by", - choices = vals$choices$group_by - ) - shiny::updateSelectInput(session, "ref_col_biocircos", - choices = vals$choices$ref_col_biocircos - ) - shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", - choices = vals$choices$ref_comparison_gecco - ) - shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", - choices = vals$choices$ref_comparison - ) - } - # Observe input of chromosome length - shiny::observeEvent(input$chr_len, { - vals$chr_len <- input$chr_len - }) - ## ---------------------------------------------------------------- - ## Simple options showing/hiding logic for every data input - - ## ---------------------------------------------------------------- - # SHOW rre_width parameter if data is available - # and hide_viz == F - shiny::observeEvent(vals$rre_data_input, { - if (vals$rre_data_input == T) { - shinyjs::showElement(selector = "#rre_width") - } else { - shinyjs::hideElement(selector = "#rre_width") - } - }) - # Show anti_hybrid option if data is available - # And checkbox is unchecked - shiny::observeEvent(vals$anti_data_input, { - if (vals$anti_data_input == T) { - shinyjs::showElement(selector = "#anti_hybrid") - } else { - shinyjs::hideElement(selector = "#anti_hybrid") - } - }) - # Show prism options if data is available - # If hide anti is F (checkbox), then show them - # Only if prism_json file, then show Prism-Supp - # And if hide_viz == F, and prism_json, then - # show width - shiny::observeEvent(vals$prism_data_input, { - if (vals$prism_data_input == T) { - shinyjs::showElement(selector = "#prism_hybrid") - if (vals$prism_json == T) { - shinyjs::showElement(selector = "#prism_supp") - } - if (vals$prism_json == T) { - shinyjs::showElement(selector = "#prism_supp_data_input_width") - } - } else { - shinyjs::hideElement(selector = "#prism_header") - shinyjs::hideElement(selector = "#prism_hybrid") - shinyjs::hideElement(selector = "#prism_supp") - shinyjs::hideElement(selector = "#prism_supp_data_input_width") - } - }) - # Show SEMPI elements on data upload - shiny::observeEvent(vals$sempi_data_input, { - if (vals$sempi_data_input == T) { - shinyjs::showElement(selector = "#sempi_hybrid") - shinyjs::showElement(selector = "#sempi_width") - } else { - shinyjs::hideElement(selector = "#sempi_hybrid") - shinyjs::hideElement(selector = "#sempi_width") - } - }) - # Ahow ARTS data options, if data is available - shiny::observeEvent(vals$arts_data_input, { - if (vals$arts_data_input == T) { - shinyjs::showElement(selector = "#dup_choice") - shinyjs::showElement(selector = "#arts_width") - } else { - shinyjs::hideElement(selector = "#dup_choice") - shinyjs::hideElement(selector = "#arts_width") - } - }) + location <- gsub("\\[", "", location) + location <- gsub("\\]", "", location) + location <- gsub("<", "", location) + location <- gsub(">", "", location) + location <- data.frame(location) + colnames(location) <- "split" + anti_data <- location %>% + tidyr::separate(split, c("Start", "Stop")) %>% + dplyr::transmute(ID = rownames(location), Start, Stop) + + anti_data <- cbind(anti_data, types) + colnames(anti_data) <- c("Cluster", "Start", "Stop", "Type") + anti_data$Cluster <- as.numeric(anti_data$Cluster) + anti_data$Start <- as.numeric(anti_data$Start) + anti_data$Stop <- as.numeric(anti_data$Stop) + } - shiny::observeEvent(vals$data_upload_count, { - if ((vals$arts_data_input == T) || (vals$sempi_data_input == T) || (vals$prism_supp_data_input == T) || (vals$rre_data_input == T)) { - shinyjs::showElement(selector = "#improve_visualization_box") - } else { - shinyjs::hideElement(selector = "#improve_visualization_box") - } - }) - shiny::observeEvent(vals$data_upload_count, { - if ((vals$arts_data_input == T) || (vals$prism_json == T)) { - shinyjs::showElement(selector = "#prism_supplement_arts_box") - } else { - shinyjs::hideElement(selector = "#prism_supplement_arts_box") - } - }) - ## --------------------------------------------------------------- - ## Data processing options show/hide - - ## --------------------------------------------------------------- - # Count data uploads, to show tabs and corresponding - # options - - output$deep_sidemenu_out <- shinydashboard::renderMenu({ - if (vals$data_upload_count >= 2) { - if ((vals$deep_data_input == T) & ((vals$anti_data_input == T) | (vals$prism_data_input == T) | (vals$sempi_data_input == T))) { - shinydashboard::menuItem("Compare data with DeepBGC", - tabName = "deep_sidemenu", icon = shiny::icon("dyalog"), - shinydashboard::menuItem("Compare with DeepBGC plots", tabName = "deep_sidemenu", icon = shiny::icon("chart-pie")), - shinydashboard::menuItem("Filtering options", - tabName = "deep_filter", icon = shiny::icon("filter"), - shiny::uiOutput("deep_filter_UI_sidemenu") - ) + process_antismash(anti_data) + }) + + shiny::observeEvent(input$sempi_data, { + if (input$sempi_data$type == "text/csv") { + sempi_data <- utils::read.csv(input$sempi_data$datapath) + process_sempi(sempi_data, zip = FALSE) + } else { + process_sempi(input$sempi_data$datapath, zip = TRUE) + } + }) + + shiny::observeEvent(input$gecco_data, { + gecco_data <- utils::read.delim(input$gecco_data$datapath) + process_gecco(gecco_data) + }) + + # These are for ARTS data processing + # input$known_data and inoput$dup_data + + shiny::observeEvent(input$arts_data, { + disable_event_logic() + if (input$arts_data$type == "text/csv") { + process_arts_archive(input$arts_data$datapath, zip = FALSE) + } else { + process_arts_archive(input$arts_data$datapath, zip = TRUE) + } + }) + + + shiny::observeEvent(input$prism_data, { + + # Read data + if (input$prism_data$type == "text/csv") { + prism_data <- utils::read.csv(input$prism_data$datapath) + process_prism(prism_data, json = FALSE) + } else { + data <- rjson::fromJSON(file = input$prism_data$datapath) + process_prism(data) + } + }) + + shiny::observeEvent(input$deep_data, { + data <- utils::read.delim(input$deep_data$datapath) + process_deep(data) + }) + + shiny::observeEvent(input$rre_data, { + + # Read data + rre_data <- utils::read.delim(input$rre_data$datapath) + process_rre(rre_data) + }) + + ############################################################################ + ############################################################################ + ### ### + ### INTERFACE LOGIC: WHAT TO SHOW AND WHEN ### + ### ### + ############################################################################ + ############################################################################ + # Update choices + update_ui_with_data <- function() { + shiny::updateSelectInput(session, "ref", + choices = vals$choices$ref ) - } - } - }) - output$gecco_sidemenu_out <- shinydashboard::renderMenu({ - if (vals$data_upload_count >= 2) { - if ((vals$gecco_data_input == T) & ((vals$anti_data_input == T) | (vals$prism_data_input == T) | (vals$sempi_data_input == T))) { - shinydashboard::menuItem("Compare data with GECCO", - tabName = "gecco", icon = icon("fas fa-dragon"), - shinydashboard::menuItem("Compare with GECCO plots", tabName = "gecco_sidemenu", icon = shiny::icon("chart-pie")), - shinydashboard::menuItem("Filtering options", - tabName = "gecco_filter", icon = shiny::icon("filter"), - shiny::uiOutput("gecco_filter_UI_sidemenu") - ) + shiny::updateSelectInput(session, "group_table_ui_1-group_by", + choices = vals$choices$group_by + ) + shiny::updateSelectInput(session, "ref_col_biocircos", + choices = vals$choices$ref_col_biocircos + ) + shiny::updateSelectInput(session, "gecco_plots_ui_1-ref_comparison_gecco", + choices = vals$choices$ref_comparison_gecco + ) + shiny::updateSelectInput(session, "deep_barplot_ui_1-ref_comparison", + choices = vals$choices$ref_comparison ) - } - } - }) - output$anno_sidemenu_out <- shinydashboard::renderMenu({ - if (vals$data_upload_count >= 1) { - shinydashboard::menuItem("Annotation visualization and comparison", tabName = "anno_sidemenu", icon = icon("fas fa-project-diagram")) - } - }) - output$biocircos_sidemenu_out <- shinydashboard::renderMenu({ - if (vals$data_upload_count >= 2) { - shinydashboard::menuItem("Biocircos plot", tabName = "biocircos_sidemenu", icon = icon("fas fa-circle-notch")) - } - }) - output$summarize_sidemenu_out <- shinydashboard::renderMenu({ - if (vals$data_upload_count >= 2) { - shinydashboard::menuItem("Summarize interception", tabName = "summarize_sidemenu", icon = icon("fas fa-chart-bar")) - } - }) - - output$deep_filter_box <- shiny::renderUI({ - if (vals$deep_data_input == T) { - vals$deep_global <- T - shinydashboardPlus::box( - title = "DeepBGC filtering", - id = "deep_filtering_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - shiny::sliderInput("score_a", "Activity score threshold for DeepBGC data", min = 0, max = 100, value = 50), - shiny::sliderInput("score_d", "DeepBGC score threshold for DeepBGC data", min = 0, max = 100, value = 50), - shiny::sliderInput("score_c", "Cluster_type score threshold for DeepBGC data", min = 0, max = 100, value = 50), - # Domains, biodomains and proteins dplyr::filter. Remain >= of set threshold - shiny::sliderInput("domains_filter", "Domain number threshold for DeepBGC data", min = 0, max = 100, value = 5), - shiny::sliderInput("biodomain_filter", "Biodomain number threshold for DeepBGC data", min = 0, max = 100, value = 1), - shiny::sliderInput("gene_filter", "Protein number threshold for DeepBGC data", min = 0, max = 100, value = 1), - shiny::sliderInput("cluster_type", "Choose threshold to assign cluster type for DeepBGC data ", min = 0, max = 100, value = 50) - ) - } - }) - output$gecco_filter_box <- shiny::renderUI({ - if (vals$gecco_data_input == T) { - vals$gecco_global <- T - shinydashboardPlus::box( - title = "GECCO filtering", - id = "gecco_filtering_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - shiny::sliderInput("score_average_gecco", "Average p-value threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), - shiny::sliderInput("score_cluster_gecco", "Cluster type threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), - shiny::sliderInput("domains_filter_gecco", "Domain number threshold for Gecco data", min = 0, max = 100, value = 1), - shiny::sliderInput("prot_filter_gecco", "Protein number threshold for Gecco data", min = 0, max = 100, value = 1) - ) } - }) - - output$deep_filter_UI_sidemenu <- shiny::renderUI({ - vals$deep_sidebar <- T - shiny::tagList( - shiny::sliderInput("score_a_sidemenu", "Activity score threshold for DeepBGC data", min = 0, max = 100, value = 50), - shiny::sliderInput("score_d_sidemenu", "DeepBGC score threshold for DeepBGC data", min = 0, max = 100, value = 50), - shiny::sliderInput("score_c_sidemenu", "Cluster_type score threshold for DeepBGC data", min = 0, max = 100, value = 50), - # Domains, biodomains and proteins dplyr::filter. Remain >= of set threshold - shiny::sliderInput("domains_filter_sidemenu", "Domain number threshold for DeepBGC data", min = 0, max = 100, value = 5), - shiny::sliderInput("biodomain_filter_sidemenu", "Biodomain number threshold for DeepBGC data", min = 0, max = 100, value = 1), - shiny::sliderInput("gene_filter_sidemenu", "Protein number threshold for DeepBGC data", min = 0, max = 100, value = 1), - shiny::sliderInput("cluster_type_sidemenu", "Choose threshold to assign cluster type for DeepBGC data ", min = 0, max = 100, value = 50) - ) - }) - output$gecco_filter_UI_sidemenu <- shiny::renderUI({ - vals$gecco_sidebar <- T - shiny::tagList( - shiny::sliderInput("score_average_gecco_sidemenu", "Average p-value threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), - shiny::sliderInput("score_cluster_gecco_sidemenu", "Cluster type threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), - shiny::sliderInput("domains_filter_gecco_sidemenu", "Domain number threshold for Gecco data", min = 0, max = 100, value = 1), - shiny::sliderInput("prot_filter_gecco_sidemenu", "Protein number threshold for Gecco data", min = 0, max = 100, value = 1) - ) - }) - - update_filter_values <- function(listening_value, comparing_values, updating_value, rendering_check) { - if ((as.numeric(listening_value) != comparing_values) && (rendering_check == F)) { - shiny::updateSliderInput(session, updating_value, NULL, listening_value) - return(list(as.numeric(listening_value), F)) - } else { - if (grepl("sidemenu", updating_value) == T) { - shiny::updateSliderInput(session, stringr::str_split(updating_value, "_sidemenu")[[1]][1], NULL, comparing_values) + # Observe input of chromosome length + shiny::observeEvent(input$chr_len, { + vals$chr_len <- input$chr_len + }) + ## ---------------------------------------------------------------- + ## Simple options showing/hiding logic for every data input - + ## ---------------------------------------------------------------- + # SHOW rre_width parameter if data is available + # and hide_viz == FALSE + shiny::observeEvent(vals$rre_data_input, { + if (vals$rre_data_input == TRUE) { + shinyjs::showElement(selector = "#rre_width") + } else { + shinyjs::hideElement(selector = "#rre_width") + } + }) + # Show anti_hybrid option if data is available + # And checkbox is unchecked + shiny::observeEvent(vals$anti_data_input, { + if (vals$anti_data_input == TRUE) { + shinyjs::showElement(selector = "#anti_hybrid") + } else { + shinyjs::hideElement(selector = "#anti_hybrid") + } + }) + + # Show ripp_hybrid options + shiny::observeEvent(vals$ripp_data_input, { + if (vals$ripp_data_input == TRUE){ + shinyjs::showElement(selector = "#ripp_hybrid") } else { - shiny::updateSliderInput(session, paste0(updating_value, "_sidemenu")[[1]][1], NULL, comparing_values) + shinyjs::hideElement(selector = "#ripp_hybrid") + } + }) + + shiny::observeEvent(vals$emerald_data_input, { + if (vals$emerald_data_input == TRUE){ + shinyjs::showElement(selector = "#emerald_hybrid") + } else { + shinyjs::hideElement(selector = "#emerald_hybrid") + } + }) + + shiny::observeEvent(vals$compare_data_input, { + if (vals$compare_data_input == TRUE){ + shinyjs::showElement(selector = "#compare_hybrid") + } else { + shinyjs::hideElement(selector = "#compare_hybrid") + } + }) + # Show prism options if data is available + # If hide anti is FALSE (checkbox), then show them + # Only if prism_json file, then show Prism-Supp + # And if hide_viz == FALSE, and prism_json, then + # show width + shiny::observeEvent(vals$prism_data_input, { + if (vals$prism_data_input == TRUE) { + shinyjs::showElement(selector = "#prism_hybrid") + if (vals$prism_json == TRUE) { + shinyjs::showElement(selector = "#prism_supp") + } + if (vals$prism_json == TRUE) { + shinyjs::showElement(selector = "#prism_supp_data_input_width") + } + } else { + shinyjs::hideElement(selector = "#prism_header") + shinyjs::hideElement(selector = "#prism_hybrid") + shinyjs::hideElement(selector = "#prism_supp") + shinyjs::hideElement(selector = "#prism_supp_data_input_width") + } + }) + # Show SEMPI elements on data upload + shiny::observeEvent(vals$sempi_data_input, { + if (vals$sempi_data_input == TRUE) { + shinyjs::showElement(selector = "#sempi_hybrid") + shinyjs::showElement(selector = "#sempi_width") + } else { + shinyjs::hideElement(selector = "#sempi_hybrid") + shinyjs::hideElement(selector = "#sempi_width") + } + }) + # Ahow ARTS data options, if data is available + shiny::observeEvent(vals$arts_data_input, { + if (vals$arts_data_input == TRUE) { + shinyjs::showElement(selector = "#dup_choice") + shinyjs::showElement(selector = "#arts_width") + shinyjs::showElement(selector = "#phylo_file") + } else { + shinyjs::hideElement(selector = "#dup_choice") + shinyjs::hideElement(selector = "#arts_width") + shinyjs::hideElement(selector = "#phylo_file") + } + }) + + shiny::observeEvent(vals$data_upload_count, { + if ((vals$arts_data_input == TRUE) || (vals$sempi_data_input == TRUE) || (vals$prism_supp_data_input == TRUE) || (vals$rre_data_input == TRUE)) { + shinyjs::showElement(selector = "#improve_visualization_box") + } else { + shinyjs::hideElement(selector = "#improve_visualization_box") + } + }) + shiny::observeEvent(vals$data_upload_count, { + if ((vals$arts_data_input == TRUE) || (vals$prism_json == TRUE)) { + shinyjs::showElement(selector = "#prism_supplement_arts_box") + } else { + shinyjs::hideElement(selector = "#prism_supplement_arts_box") + } + }) + ## --------------------------------------------------------------- + ## Data processing options show/hide - + ## --------------------------------------------------------------- + # Count data uploads, to show tabs and corresponding + # options + + output$deep_sidemenu_out <- shinydashboard::renderMenu({ + if (vals$data_upload_count >= 2) { + if ((vals$deep_data_input == TRUE) & ((vals$anti_data_input == TRUE) | (vals$prism_data_input == TRUE) | (vals$sempi_data_input == TRUE))) { + shinydashboard::menuItem("Compare data with DeepBGC", + tabName = "deep_sidemenu", icon = shiny::icon("dyalog"), + shinydashboard::menuItem("Compare with DeepBGC plots", tabName = "deep_sidemenu", icon = shiny::icon("chart-pie")), + shinydashboard::menuItem("Filtering options", + tabName = "deep_filter", icon = shiny::icon("filter"), + shiny::uiOutput("deep_filter_UI_sidemenu") + ) + ) + } + } + }) + output$gecco_sidemenu_out <- shinydashboard::renderMenu({ + if (vals$data_upload_count >= 2) { + if ((vals$gecco_data_input == TRUE) & ((vals$anti_data_input == TRUE) | (vals$prism_data_input == TRUE) | (vals$sempi_data_input == TRUE))) { + shinydashboard::menuItem("Compare data with GECCO", + tabName = "gecco", icon = icon("fas fa-dragon"), + shinydashboard::menuItem("Compare with GECCO plots", tabName = "gecco_sidemenu", icon = shiny::icon("chart-pie")), + shinydashboard::menuItem("Filtering options", + tabName = "gecco_filter", icon = shiny::icon("filter"), + shiny::uiOutput("gecco_filter_UI_sidemenu") + ) + ) + } + } + }) + output$anno_sidemenu_out <- shinydashboard::renderMenu({ + if (vals$data_upload_count >= 1) { + shinydashboard::menuItem("Annotation visualization and comparison", tabName = "anno_sidemenu", icon = icon("fas fa-project-diagram")) + } + }) + output$biocircos_sidemenu_out <- shinydashboard::renderMenu({ + if (vals$data_upload_count >= 2) { + shinydashboard::menuItem("Biocircos plot", tabName = "biocircos_sidemenu", icon = icon("fas fa-circle-notch")) + } + }) + output$summarize_sidemenu_out <- shinydashboard::renderMenu({ + if (vals$data_upload_count >= 2) { + shinydashboard::menuItem("Summarize interception", tabName = "summarize_sidemenu", icon = icon("fas fa-chart-bar")) + } + }) + output$arts_tree_sidemenu_out <- shinydashboard::renderMenu( + { + if (vals$arts_data_input == TRUE){ + shinydashboard::menuItem('ARTS phylogeny', tabName = "arts_tree_sidemenu", icon = icon("tree")) + } } - return(list(comparing_values, F)) - } - } - - - shiny::observeEvent(input$score_a, { - res <- update_filter_values(input$score_a, vals$score_a, "score_a_sidemenu", vals$deep_sidebar) - vals$score_a <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$score_d, { - res <- update_filter_values(input$score_d, vals$score_d, "score_d_sidemenu", vals$deep_sidebar) - vals$score_d <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$score_c, { - res <- update_filter_values(input$score_c, vals$score_c, "score_c_sidemenu", vals$deep_sidebar) - vals$score_c <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$domains_filter, { - res <- update_filter_values(input$domains_filter, vals$domains_filter, "domains_filter_sidemenu", vals$deep_sidebar) - vals$domains_filter <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$biodomain_filter, { - res <- update_filter_values(input$biodomain_filter, vals$biodomain_filter, "biodomain_filter_sidemenu", vals$deep_sidebar) - vals$biodomain_filter <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$gene_filter, { - res <- update_filter_values(input$gene_filter, vals$gene_filter, "gene_filter_sidemenu", vals$deep_sidebar) - vals$gene_filter <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$cluster_type, { - res <- update_filter_values(input$cluster_type, vals$cluster_type, "cluster_type_sidemenu", vals$deep_sidebar) - vals$cluster_type <- res[[1]] - vals$deep_sidebar <- res[[2]] - }) - shiny::observeEvent(input$score_a_sidemenu, { - res <- update_filter_values(input$score_a_sidemenu, vals$score_a, "score_a", vals$deep_global) - vals$score_a <- res[[1]] - vals$deep_global <- res[[2]] - }) - shiny::observeEvent(input$score_d_sidemenu, { - res <- update_filter_values(input$score_d_sidemenu, vals$score_d, "score_d", vals$deep_global) - vals$score_d <- res[[1]] - vals$deep_global <- res[[2]] - }) - shiny::observeEvent(input$score_c_sidemenu, { - res <- update_filter_values(input$score_c_sidemenu, vals$score_c, "score_c", vals$deep_global) - vals$score_c <- res[[1]] - vals$deep_global <- res[[2]] - }) - shiny::observeEvent(input$domains_filter_sidemenu, { - res <- update_filter_values(input$domains_filter_sidemenu, vals$domains_filter, "domains_filter", vals$deep_global) - vals$domains_filter <- res[[1]] - vals$deep_global <- res[[2]] - }) - shiny::observeEvent(input$biodomain_filter_sidemenu, { - res <- update_filter_values(input$biodomain_filter_sidemenu, vals$biodomain_filter, "biodomain_filter", vals$deep_global) - vals$biodomain_filter <- res[[1]] - vals$deep_global <- res[[2]] - }) - shiny::observeEvent(input$gene_filter_sidemenu, { - res <- update_filter_values(input$gene_filter_sidemenu, vals$gene_filter, "gene_filter", vals$deep_global) - vals$gene_filter <- res[[1]] - vals$deep_global <- res[[2]] - }) - shiny::observeEvent(input$cluster_type_sidemenu, { - res <- update_filter_values(input$cluster_type_sidemenu, vals$cluster_type, "cluster_type", vals$deep_global) - vals$cluster_type <- res[[1]] - vals$deep_global <- res[[2]] - }) - - - - shiny::observeEvent(input$score_average_gecco, { - res <- update_filter_values(input$score_average_gecco, vals$score_average_gecco, "score_average_gecco_sidemenu", vals$gecco_sidebar) - vals$score_average_gecco <- res[[1]] - vals$gecco_sidebar <- res[[2]] - }) - shiny::observeEvent(input$score_cluster_gecco, { - res <- update_filter_values(input$score_cluster_gecco, vals$score_cluster_gecco, "score_cluster_gecco_sidemenu", vals$gecco_sidebar) - vals$score_cluster_gecco <- res[[1]] - vals$gecco_sidebar <- res[[2]] - }) - shiny::observeEvent(input$domains_filter_gecco, { - res <- update_filter_values(input$domains_filter_gecco, vals$domains_filter_gecco, "domains_filter_gecco_sidemenu", vals$gecco_sidebar) - vals$domains_filter_gecco <- res[[1]] - vals$gecco_sidebar <- res[[2]] - }) - shiny::observeEvent(input$prot_filter_gecco, { - res <- update_filter_values(input$prot_filter_gecco, vals$prot_filter_gecco, "prot_filter_gecco_sidemenu", vals$gecco_sidebar) - vals$prot_filter_gecco <- res[[1]] - vals$gecco_sidebar <- res[[2]] - }) - shiny::observeEvent(input$score_average_gecco_sidemenu, { - res <- update_filter_values(input$score_average_gecco_sidemenu, vals$score_average_gecco, "score_average_gecco", vals$gecco_global) - vals$score_average_gecco <- res[[1]] - vals$gecco_global <- res[[2]] - }) - shiny::observeEvent(input$score_cluster_gecco_sidemenu, { - res <- update_filter_values(input$score_cluster_gecco_sidemenu, vals$score_cluster_gecco, "score_cluster_gecco", vals$gecco_global) - vals$score_cluster_gecco <- res[[1]] - vals$gecco_global <- res[[2]] - }) - shiny::observeEvent(input$domains_filter_gecco_sidemenu, { - res <- update_filter_values(input$domains_filter_gecco_sidemenu, vals$domains_filter_gecco, "domains_filter_gecco", vals$gecco_global) - vals$domains_filter_gecco <- res[[1]] - vals$gecco_global <- res[[2]] - }) - shiny::observeEvent(input$prot_filter_gecco_sidemenu, { - res <- update_filter_values(input$prot_filter_gecco_sidemenu, vals$prot_filter_gecco, "prot_filter_gecco", vals$gecco_global) - vals$prot_filter_gecco <- res[[1]] - vals$gecco_global <- res[[2]] - }) - - shiny::observeEvent(input$restore_box, { - box_ids <- c( - "deep_comparison_box", "deep_rate_box", "deep_comparison_controls_box", "gecco_comparison_box", - "gecco_rate_box", "gecco_comparison_controls_box", "annotation_reference_box", "annotation_reference_comparison_box", - "annotation_reference_comparison_controls_box", "biocircos_plot_box", "biocircos_controls_box", - "ranking_barplot_box", "group_table_box", "upload_anti_box", "upload_prism_box", - "upload_sempi_box", "upload_deep_box", "upload_gecco_box", "upload_rre_box", "upload_arts_box", - "use_example_data_box", "rename_box", "prism_supplement_arts_box", "improve_visualization_box", - "download_data_box", "gecco_filtering_box", "deep_filtering_box" ) - for (id in box_ids) { - shinydashboardPlus::updateBox(id, action = "restore") - } - }) + output$deep_filter_box <- shiny::renderUI({ + if (vals$deep_data_input == TRUE) { + vals$deep_global <- TRUE + shinydashboardPlus::box( + title = "DeepBGC filtering", + id = "deep_filtering_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + shiny::sliderInput("score_a", "Activity score threshold for DeepBGC data", min = 0, max = 100, value = 50), + shiny::sliderInput("score_d", "DeepBGC score threshold for DeepBGC data", min = 0, max = 100, value = 50), + shiny::sliderInput("score_c", "Cluster_type score threshold for DeepBGC data", min = 0, max = 100, value = 50), + # Domains, biodomains and proteins dplyr::filter. Remain >= of set threshold + shiny::sliderInput("domains_filter", "Domain number threshold for DeepBGC data", min = 0, max = 100, value = 5), + shiny::sliderInput("biodomain_filter", "Biodomain number threshold for DeepBGC data", min = 0, max = 100, value = 1), + shiny::sliderInput("gene_filter", "Protein number threshold for DeepBGC data", min = 0, max = 100, value = 1), + shiny::sliderInput("cluster_type", "Choose threshold to assign cluster type for DeepBGC data ", min = 0, max = 100, value = 50) + ) + } + }) + output$gecco_filter_box <- shiny::renderUI({ + if (vals$gecco_data_input == TRUE) { + vals$gecco_global <- TRUE + shinydashboardPlus::box( + title = "GECCO filtering", + id = "gecco_filtering_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + shiny::sliderInput("score_average_gecco", "Average p-value threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), + shiny::sliderInput("score_cluster_gecco", "Cluster type threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), + shiny::sliderInput("domains_filter_gecco", "Domain number threshold for Gecco data", min = 0, max = 100, value = 1), + shiny::sliderInput("prot_filter_gecco", "Protein number threshold for Gecco data", min = 0, max = 100, value = 1) + ) + } + }) - # Logic show/hide selectinput in Link coloring in - # Biocircos - shiny::observeEvent(input$label_color_class, { - if (input$label_color_class == "R") { - shinyjs::showElement(selector = "#ref_col_biocircos") - } else { - shinyjs::hideElement(selector = "#ref_col_biocircos") - } - }) - # Make hybrids from the data, if checkbox is checked - # TODO Put the function to the root. - # Tou have duplicated code - shiny::observeEvent(input$anti_hybrid, ignoreInit = T, { - if (input$anti_hybrid == T) { - vals$anti_data$Type2 <- hybrid_col(vals$anti_data) - } else { - vals$anti_data$Type2 <- vals$anti_type - } - }) - shiny::observeEvent(input$prism_hybrid, ignoreInit = T, { - if (input$prism_hybrid == T) { - vals$prism_data$Type2 <- hybrid_col(vals$prism_data) - } else { - vals$prism_data$Type2 <- vals$prism_type - } - }) - shiny::observeEvent(input$sempi_hybrid, ignoreInit = T, { - if (input$sempi_hybrid == T) { - vals$sempi_data$Type2 <- hybrid_col(vals$sempi_data) - } else { - vals$sempi_data$Type2 <- vals$sempi_type - } - }) - # Rename the data, if button is clicked - shiny::observeEvent(input$rename, { - rename_data <- vals$rename_data - if (vals$anti_data_input == T) { - anti_data <- vals$anti_data - res <- rename_vector(anti_data, rename_data, vals$renaming_notification) - vals$anti_type <- res[[1]] - vals$renaming_notification <- res[[2]] - anti_data["Type2"] <- vals$anti_type - vals$anti_data <- anti_data - } + output$deep_filter_UI_sidemenu <- shiny::renderUI({ + vals$deep_sidebar <- TRUE + shiny::tagList( + shiny::sliderInput("score_a_sidemenu", "Activity score threshold for DeepBGC data", min = 0, max = 100, value = 50), + shiny::sliderInput("score_d_sidemenu", "DeepBGC score threshold for DeepBGC data", min = 0, max = 100, value = 50), + shiny::sliderInput("score_c_sidemenu", "Cluster_type score threshold for DeepBGC data", min = 0, max = 100, value = 50), + # Domains, biodomains and proteins dplyr::filter. Remain >= of set threshold + shiny::sliderInput("domains_filter_sidemenu", "Domain number threshold for DeepBGC data", min = 0, max = 100, value = 5), + shiny::sliderInput("biodomain_filter_sidemenu", "Biodomain number threshold for DeepBGC data", min = 0, max = 100, value = 1), + shiny::sliderInput("gene_filter_sidemenu", "Protein number threshold for DeepBGC data", min = 0, max = 100, value = 1), + shiny::sliderInput("cluster_type_sidemenu", "Choose threshold to assign cluster type for DeepBGC data ", min = 0, max = 100, value = 50) + ) + }) + output$gecco_filter_UI_sidemenu <- shiny::renderUI({ + vals$gecco_sidebar <- TRUE + shiny::tagList( + shiny::sliderInput("score_average_gecco_sidemenu", "Average p-value threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), + shiny::sliderInput("score_cluster_gecco_sidemenu", "Cluster type threshold for Gecco data (%, mapped from 0 to 1)", min = 0, max = 100, value = 50), + shiny::sliderInput("domains_filter_gecco_sidemenu", "Domain number threshold for Gecco data", min = 0, max = 100, value = 1), + shiny::sliderInput("prot_filter_gecco_sidemenu", "Protein number threshold for Gecco data", min = 0, max = 100, value = 1) + ) + }) - if (vals$sempi_data_input == T) { - sempi_data <- vals$sempi_data - res <- rename_vector(sempi_data, rename_data, vals$renaming_notification) - vals$sempi_type <- res[[1]] - vals$renaming_notification <- res[[2]] - sempi_data["Type2"] <- vals$sempi_type - vals$sempi_data <- sempi_data + update_filter_values <- function(listening_value, comparing_values, updating_value, rendering_check) { + if ((as.numeric(listening_value) != comparing_values) && (rendering_check == FALSE)) { + shiny::updateSliderInput(session, updating_value, NULL, listening_value) + return(list(as.numeric(listening_value), FALSE)) + } else { + if (grepl("sidemenu", updating_value) == TRUE) { + shiny::updateSliderInput(session, stringr::str_split(updating_value, "_sidemenu")[[1]][1], NULL, comparing_values) + } else { + shiny::updateSliderInput(session, paste0(updating_value, "_sidemenu")[[1]][1], NULL, comparing_values) + } + return(list(comparing_values, FALSE)) + } } - if (vals$prism_data_input == T) { - prism_data <- vals$prism_data - res <- rename_vector(prism_data, rename_data, vals$renaming_notification) - vals$prism_type <- res[[1]] - vals$renaming_notification <- res[[2]] - prism_data["Type2"] <- vals$prism_type - vals$prism_data <- prism_data - } - shinyjs::showElement(selector = "#reset_name") - shinyjs::hideElement(selector = "#rename") - vals$renamed <- T - shiny::showNotification(paste("Please note: SEMPI, PRISM and Antismash input data will be renamed on upload"), type = "warning", duration = 10) - }) - # When the new data is uploaded and renamed - # is T, then rename data on upload - shiny::observeEvent(check_to_rename(), { - shiny::req(vals$renamed == T) - - rename_data <- vals$rename_data - if (vals$anti_data_input == T) { - anti_data <- vals$anti_data - res <- rename_vector(anti_data, rename_data, vals$renaming_notification) - vals$anti_type <- res[[1]] - vals$renaming_notification <- res[[2]] - anti_data["Type2"] <- vals$anti_type - vals$anti_data <- anti_data - } - if (vals$sempi_data_input == T) { - sempi_data <- vals$sempi_data - res <- rename_vector(sempi_data, rename_data, vals$renaming_notification) - vals$sempi_type <- res[[1]] - vals$renaming_notification <- res[[2]] - sempi_data["Type2"] <- vals$sempi_type - vals$sempi_data <- sempi_data - } + shiny::observeEvent(input$score_a, { + res <- update_filter_values(input$score_a, vals$score_a, "score_a_sidemenu", vals$deep_sidebar) + vals$score_a <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$score_d, { + res <- update_filter_values(input$score_d, vals$score_d, "score_d_sidemenu", vals$deep_sidebar) + vals$score_d <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$score_c, { + res <- update_filter_values(input$score_c, vals$score_c, "score_c_sidemenu", vals$deep_sidebar) + vals$score_c <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$domains_filter, { + res <- update_filter_values(input$domains_filter, vals$domains_filter, "domains_filter_sidemenu", vals$deep_sidebar) + vals$domains_filter <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$biodomain_filter, { + res <- update_filter_values(input$biodomain_filter, vals$biodomain_filter, "biodomain_filter_sidemenu", vals$deep_sidebar) + vals$biodomain_filter <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$gene_filter, { + res <- update_filter_values(input$gene_filter, vals$gene_filter, "gene_filter_sidemenu", vals$deep_sidebar) + vals$gene_filter <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$cluster_type, { + res <- update_filter_values(input$cluster_type, vals$cluster_type, "cluster_type_sidemenu", vals$deep_sidebar) + vals$cluster_type <- res[[1]] + vals$deep_sidebar <- res[[2]] + }) + shiny::observeEvent(input$score_a_sidemenu, { + res <- update_filter_values(input$score_a_sidemenu, vals$score_a, "score_a", vals$deep_global) + vals$score_a <- res[[1]] + vals$deep_global <- res[[2]] + }) + shiny::observeEvent(input$score_d_sidemenu, { + res <- update_filter_values(input$score_d_sidemenu, vals$score_d, "score_d", vals$deep_global) + vals$score_d <- res[[1]] + vals$deep_global <- res[[2]] + }) + shiny::observeEvent(input$score_c_sidemenu, { + res <- update_filter_values(input$score_c_sidemenu, vals$score_c, "score_c", vals$deep_global) + vals$score_c <- res[[1]] + vals$deep_global <- res[[2]] + }) + shiny::observeEvent(input$domains_filter_sidemenu, { + res <- update_filter_values(input$domains_filter_sidemenu, vals$domains_filter, "domains_filter", vals$deep_global) + vals$domains_filter <- res[[1]] + vals$deep_global <- res[[2]] + }) + shiny::observeEvent(input$biodomain_filter_sidemenu, { + res <- update_filter_values(input$biodomain_filter_sidemenu, vals$biodomain_filter, "biodomain_filter", vals$deep_global) + vals$biodomain_filter <- res[[1]] + vals$deep_global <- res[[2]] + }) + shiny::observeEvent(input$gene_filter_sidemenu, { + res <- update_filter_values(input$gene_filter_sidemenu, vals$gene_filter, "gene_filter", vals$deep_global) + vals$gene_filter <- res[[1]] + vals$deep_global <- res[[2]] + }) + shiny::observeEvent(input$cluster_type_sidemenu, { + res <- update_filter_values(input$cluster_type_sidemenu, vals$cluster_type, "cluster_type", vals$deep_global) + vals$cluster_type <- res[[1]] + vals$deep_global <- res[[2]] + }) - if (vals$prism_data_input == T) { - prism_data <- vals$prism_data - res <- rename_vector(prism_data, rename_data, vals$renaming_notification) - vals$prism_type <- res[[1]] - vals$renaming_notification <- res[[2]] - prism_data["Type2"] <- vals$prism_type - vals$prism_data <- prism_data - } - }) - # Reset the renaming. Uncheck the hybrid checkboxes - shiny::observeEvent(input$reset_name, { - vals$anti_data["Type2"] <- vals$anti_data["Type"] - vals$sempi_data["Type2"] <- vals$sempi_data["Type"] - vals$ prism_data["Type2"] <- vals$ prism_data["Type"] - if (input$anti_hybrid == T) { - shiny::showNotification(paste("Antismash cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10) - shiny::updateCheckboxInput(inputId = "anti_hybrid", value = F) - } - if (input$prism_hybrid == T) { - shiny::showNotification(paste("PRISM cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10) - shiny::updateCheckboxInput(inputId = "prism_hybrid", value = F) - } - if (input$sempi_hybrid == T) { - shiny::showNotification(paste("SEMPI cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10) - shiny::updateCheckboxInput(inputId = "sempi_hybrid", value = F) - } - shinyjs::showElement(selector = "#rename") - shinyjs::hideElement(selector = "#reset_name") - vals$renamed <- F - }) - # Read the uploaded renaming scheme csv - shiny::observeEvent(input$rename_data, { - rename_data <- utils::read.csv(input$rename_data$datapath) - vals$rename_data <- rename_data - coloring_datatable <- data.frame(tidyr::drop_na(data.frame(cbind(as.character(rename_data$Group_color), as.character(rename_data$Color), rename_data$Hierarchy)))) - coloring_datatable <- coloring_datatable[!apply(coloring_datatable == "", 1, all), ] - colnames(coloring_datatable) <- c("Name", "Color", "Hierarchy") - vals$coloring_datatable <- DT::datatable(coloring_datatable, rownames = F, editable = "column") - }) - - - # What to do, if hide DeepBGC comparison options scheme is triggered - - - ############################################################################ - ############################################################################ - ### ### - ### COMPUTATIONS ### - ### ### - ############################################################################ - ############################################################################ - shiny::observeEvent(input$prism_supp, ignoreInit = T, priority = 3, { - if (input$prism_supp == T) { - vals$need_filter <- T - vals$prism_supp_data_input <- T - vals$prism_supp_plot <- T - if (!("PRISM-Supp" %in% names(vals$choices$ref))) { - vals$choices$ref <- c(vals$choices$ref, "PRISM-Supp" = "PRISM-Supp") - vals$choices$group_by <- c(vals$choices$group_by, "PRISM-Supp" = "PRISM-Supp") - vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "PRISM-Supp" = "PRISM-Supp") - update_ui_with_data() - } - } else { - vals$prism_supp_data_input <- F - vals$need_filter <- T - vals$prism_supp_plot <- F - vals$choices$ref <- vals$choices$ref[!(names(vals$choices$ref) %in% c("PRISM-Supp"))] - vals$choices$group_by <- vals$choices$group_by[!(names(vals$choices$group_by) %in% c("PRISM-Supp"))] - vals$choices$ref_col_biocircos <- vals$choices$ref_col_biocircos[!(names(vals$choices$ref_col_biocircos) %in% c("PRISM-Supp"))] - update_ui_with_data() - } - }) - - # Compute all interceptions on data upload. - # dplyr::filter while ploting then. - shiny::observeEvent(inputData(), ignoreInit = T, priority = 5, { - # GENERATE DATA - if (vals$anti_data_input == TRUE) { - anti_data <- vals$anti_data - anti_inter <- vals$anti_data %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } - if (vals$deep_data_input == TRUE) { - deep_data <- vals$deep_data - deep_inter <- vals$deep_data %>% - dplyr::select(Start, Stop) - deep_inter$seqnames <- "chr" - } - if (vals$rre_data_input == TRUE) { - # Convert numeric columns in a dataframe as a numeric - vals$rre_data$Start <- as.numeric(vals$rre_data$Start) - vals$rre_data$Stop <- as.numeric(vals$rre_data$Stop) - # Store rre data into local variable - rre_data <- data.frame(vals$rre_data) - # Start/Stop columns from rre data as matrix - rre_inter <- rre_data %>% - dplyr::select(Start, Stop) - rre_inter$seqnames <- "chr" - } - if (vals$prism_data_input == TRUE) { - # Store master prism data in local variable - prism_data <- vals$prism_data - # Start/Stop columns from prism data as matrix - prism_inter <- prism_data %>% - dplyr::select(Start, Stop) - prism_inter$seqnames <- "chr" - } - if (vals$sempi_data_input == TRUE) { - # Store master prism data in local variable - sempi_data <- vals$sempi_data - # Start/Stop columns from prism data as matrix - sempi_inter <- vals$sempi_data %>% - dplyr::select(Start, Stop) - sempi_inter$seqnames <- "chr" - } - if (vals$prism_json == T) { - prism_supp_data <- vals$prism_supp_data - prism_supp_inter <- vals$prism_supp_data %>% - dplyr::select(Start, Stop) - prism_supp_inter$seqnames <- "chr" - } - if (vals$arts_data_input == T) { - arts_data <- vals$arts_data - arts_inter <- vals$arts_data %>% - dplyr::select(Start, Stop) - arts_inter$seqnames <- "chr" - } - if (vals$gecco_data_input == TRUE) { - gecco_data <- vals$gecco_data - # Start/Stop columns from prism data as matrix - gecco_inter <- vals$gecco_data %>% - dplyr::select(Start, Stop) - gecco_inter$seqnames <- "chr" - } - get_inter <- function(inter1, inter2) { - query <- GenomicRanges::makeGRangesFromDataFrame(inter2) - subject <- GenomicRanges::makeGRangesFromDataFrame(inter1) - interseption <- GenomicRanges::findOverlaps(query, subject) - inter_from <- interseption@from - inter_to <- interseption@to - return(list(from = inter_from, to = inter_to)) - } + shiny::observeEvent(input$score_average_gecco, { + res <- update_filter_values(input$score_average_gecco, vals$score_average_gecco, "score_average_gecco_sidemenu", vals$gecco_sidebar) + vals$score_average_gecco <- res[[1]] + vals$gecco_sidebar <- res[[2]] + }) + shiny::observeEvent(input$score_cluster_gecco, { + res <- update_filter_values(input$score_cluster_gecco, vals$score_cluster_gecco, "score_cluster_gecco_sidemenu", vals$gecco_sidebar) + vals$score_cluster_gecco <- res[[1]] + vals$gecco_sidebar <- res[[2]] + }) + shiny::observeEvent(input$domains_filter_gecco, { + res <- update_filter_values(input$domains_filter_gecco, vals$domains_filter_gecco, "domains_filter_gecco_sidemenu", vals$gecco_sidebar) + vals$domains_filter_gecco <- res[[1]] + vals$gecco_sidebar <- res[[2]] + }) + shiny::observeEvent(input$prot_filter_gecco, { + res <- update_filter_values(input$prot_filter_gecco, vals$prot_filter_gecco, "prot_filter_gecco_sidemenu", vals$gecco_sidebar) + vals$prot_filter_gecco <- res[[1]] + vals$gecco_sidebar <- res[[2]] + }) + shiny::observeEvent(input$score_average_gecco_sidemenu, { + res <- update_filter_values(input$score_average_gecco_sidemenu, vals$score_average_gecco, "score_average_gecco", vals$gecco_global) + vals$score_average_gecco <- res[[1]] + vals$gecco_global <- res[[2]] + }) + shiny::observeEvent(input$score_cluster_gecco_sidemenu, { + res <- update_filter_values(input$score_cluster_gecco_sidemenu, vals$score_cluster_gecco, "score_cluster_gecco", vals$gecco_global) + vals$score_cluster_gecco <- res[[1]] + vals$gecco_global <- res[[2]] + }) + shiny::observeEvent(input$domains_filter_gecco_sidemenu, { + res <- update_filter_values(input$domains_filter_gecco_sidemenu, vals$domains_filter_gecco, "domains_filter_gecco", vals$gecco_global) + vals$domains_filter_gecco <- res[[1]] + vals$gecco_global <- res[[2]] + }) + shiny::observeEvent(input$prot_filter_gecco_sidemenu, { + res <- update_filter_values(input$prot_filter_gecco_sidemenu, vals$prot_filter_gecco, "prot_filter_gecco", vals$gecco_global) + vals$prot_filter_gecco <- res[[1]] + vals$gecco_global <- res[[2]] + }) - inters <- vals$inters - index <- 1 - for (i in data_uploads_inter) { - index_2 <- 1 - j <- soft_names[index] - for (p in data_uploads_inter) { - x <- soft_names[index_2] - if ((vals[[i]] == TRUE) & (vals$computed[[j]] == F) & (j != x)) { - if ((vals[[p]] == TRUE) & (j != soft_names[index_2])) { - res <- get_inter(eval(as.name(paste(j, "_inter", sep = ""))), eval(as.name(paste(x, "_inter", sep = "")))) - new_res <- list() - new_res$from <- eval(as.name(paste(x, "_data", sep = "")))[res$from, ]$Cluster - new_res$to <- eval(as.name(paste(j, "_data", sep = "")))[res$to, ]$Cluster - inters[[j]][[x]] <- new_res - inters[[x]][[j]] <- list(from = new_res$to, to = new_res$from) - } - } - index_2 <- index_2 + 1 + shiny::observeEvent(input$restore_box, { + box_ids <- c( + "deep_comparison_box", "deep_rate_box", "deep_comparison_controls_box", "gecco_comparison_box", + "gecco_rate_box", "gecco_comparison_controls_box", "annotation_reference_box", "annotation_reference_comparison_box", + "annotation_reference_comparison_controls_box", "biocircos_plot_box", "biocircos_controls_box", + "ranking_barplot_box", "group_table_box", "upload_anti_box","upload_ripp_box", "upload_emerald_box", "upload_compare_box", + "upload_prism_box","upload_sempi_box", "upload_deep_box", "upload_gecco_box", "upload_rre_box", "upload_arts_box", + "use_example_data_box", "rename_box", "prism_supplement_arts_box", "improve_visualization_box", + "download_data_box", "gecco_filtering_box", "deep_filtering_box", "arts_tree_box" + ) + for (id in box_ids) { + shinydashboardPlus::updateBox(id, action = "restore") + } + }) + + shiny::observeEvent(mod_download_ui('download_anti_data_2'), { +## display something on screen + if (!is.null(vals$tracklist)) { + shiny::showNotification("Downloading data...", duration = 5, type = "message") + } + }) + + + # Logic show/hide selectinput in Link coloring in + # Biocircos + shiny::observeEvent(input$label_color_class, { + if (input$label_color_class == "R") { + shinyjs::showElement(selector = "#ref_col_biocircos") + } else { + shinyjs::hideElement(selector = "#ref_col_biocircos") + } + }) + # Make hybrids from the data, if checkbox is checked + # TODO Put the function to the root. + # Tou have duplicated code + shiny::observeEvent(input$compare_hybrid, ignoreInit = TRUE, { + if (input$compare_hybrid == TRUE) { + vals$compare_data$Type2 <- hybrid_col(vals$compare_data) + } else { + vals$compare_data$Type2 <- vals$compare_type } - if (vals[[i]] == TRUE) { - vals$computed[[j]] <- TRUE + }) + + shiny::observeEvent(input$emerald_hybrid, ignoreInit = TRUE, { + if (input$emerald_hybrid == TRUE) { + vals$emerald_data$Type2 <- hybrid_col(vals$emerald_data) + } else { + vals$emerald_data$Type2 <- vals$emerald_type } - index <- index + 1 - } - - vals$inters <- inters - if ((vals$deep_data_input == F) & (vals$gecco_data_input == F) & (vals$arts_data_input == F)) { - vals$inters_filtered <- inters - enable_event_logic() - } else { - vals$need_filter <- T - vals$filter_data <- T - } - }) - # dplyr::filter ARTS, DeepBGC, GECCO interception data - # and general dataframes to plot, if data filtering - # options are triggered - shiny::observeEvent( - { - dynamicInput() - to_debounce() - }, - ignoreInit = T, - priority = 4, - { - shiny::req(vals$data_upload_count >= 1) - inters <- vals$inters - if (vals$deep_data_input == TRUE) { - if (vals$need_filter == F) { - biocircos_deep <- filter_deepbgc(vals$deep_data, vals$cluster_type, vals$score_a, vals$score_c, vals$score_d, vals$domains_filter, vals$biodomain_filter, vals$gene_filter) - vals$deep_data_filtered <- biocircos_deep + }) + shiny::observeEvent(input$ripp_hybrid, ignoreInit = TRUE, { + if (input$ripp_hybrid == TRUE) { + vals$ripp_data$Type2 <- hybrid_col(vals$ripp_data) + } else { + vals$ripp_data$Type2 <- vals$ripp_type + } + }) + shiny::observeEvent(input$anti_hybrid, ignoreInit = TRUE, { + if (input$anti_hybrid == TRUE) { + vals$anti_data$Type2 <- hybrid_col(vals$anti_data) } else { - biocircos_deep <- vals$deep_data_filtered - } - if (vals$data_upload_count != 1) { - new_deep <- lapply(inters$deep, function(x) { - new_to <- x$to[x$to %in% biocircos_deep$Cluster] - new_from <- x$from[x$to %in% biocircos_deep$Cluster] - list(from = new_from, to = new_to) - }) - new_inters <- inters - update_list <- names(inters$deep) - for (b in seq(1:length(update_list))) { - new_inters[[update_list[b]]]$deep$to <- new_deep[[update_list[b]]]$from - new_inters[[update_list[b]]]$deep$from <- new_deep[[update_list[b]]]$to - } - new_inters$deep <- new_deep - vals$inters_filtered <- new_inters - inters <- new_inters + vals$anti_data$Type2 <- vals$anti_type } - } - if (vals$gecco_data_input == TRUE) { - if (vals$need_filter == F) { - gecco_data <- filter_gecco(vals$gecco_data, vals$score_cluster_gecco, vals$score_average_gecco, vals$domains_filter_gecco, vals$prot_filter_gecco) - vals$gecco_data_filtered <- gecco_data + }) + shiny::observeEvent(input$prism_hybrid, ignoreInit = TRUE, { + if (input$prism_hybrid == TRUE) { + vals$prism_data$Type2 <- hybrid_col(vals$prism_data) } else { - gecco_data <- vals$gecco_data_filtered - } - if (vals$data_upload_count != 1) { - new_gecco <- lapply(inters$gecco, function(x) { - new_to <- x$to[x$to %in% gecco_data$Cluster] - new_from <- x$from[x$to %in% gecco_data$Cluster] - list(from = new_from, to = new_to) - }) - new_inters <- inters - update_list <- names(inters$gecco) - for (b in seq(1:length(update_list))) { - new_inters[[update_list[b]]]$gecco$to <- new_gecco[[update_list[b]]]$from - new_inters[[update_list[b]]]$gecco$from <- new_gecco[[update_list[b]]]$to - } - new_inters$gecco <- new_gecco - vals$inters_filtered <- new_inters - inters <- new_inters + vals$prism_data$Type2 <- vals$prism_type } - } - if (vals$arts_data_input == TRUE) { - if (input$dup_choice != "All") { - vals$arts_data_filtered <- data.frame(vals$arts_data) %>% - dplyr::filter(Core == stringr::str_split(stringr::str_split(input$dup_choice, " ,")[[1]][[2]], "Core:")[[1]][[2]] | Core == "Not_core") - if (vals$data_upload_count != 1) { - new_arts <- lapply(inters$arts, function(x) { - new_to <- x$to[x$to %in% vals$arts_data_filtered$Cluster] - new_from <- x$from[x$to %in% vals$arts_data_filtered$Cluster] - list(from = new_from, to = new_to) - }) - new_inters <- inters - update_list <- names(inters$arts) - for (b in seq(1:length(update_list))) { - new_inters[[update_list[b]]]$arts$to <- new_arts[[update_list[b]]]$from - new_inters[[update_list[b]]]$arts$from <- new_arts[[update_list[b]]]$to - } - new_inters$arts <- new_arts - vals$inters_filtered <- new_inters - inters <- new_inters - } + }) + shiny::observeEvent(input$sempi_hybrid, ignoreInit = TRUE, { + if (input$sempi_hybrid == TRUE) { + vals$sempi_data$Type2 <- hybrid_col(vals$sempi_data) } else { - vals$arts_data_filtered <- vals$arts_data - vals$inters_filtered <- inters + vals$sempi_data$Type2 <- vals$sempi_type } - } - if (input$prism_supp == FALSE) { - inters$prism_supp <- NULL - for (name in names(inters)) { - inters[[name]][which(names(inters[[name]]) %in% c("prism_supp"))] <- NULL + }) + + # Rename the data, if button is clicked + shiny::observeEvent(input$rename, { + rename_data <- vals$rename_data + if (vals$anti_data_input == TRUE) { + anti_data <- vals$anti_data + res <- rename_vector(anti_data, rename_data, vals$renaming_notification) + vals$anti_type <- res[[1]] + vals$renaming_notification <- res[[2]] + anti_data["Type2"] <- vals$anti_type + vals$anti_data <- anti_data } - } - if ((vals$gecco_data_input == F) & (vals$deep_data_input == F) & (vals$arts_data_input == F)) { - vals$inters_filtered <- inters - } - vals$need_filter <- F - vals$filter_data <- F - vals$can_plot_deep_ref <- T - enable_event_logic() - } - ) - # Compute the Biociros plot. Store information to plot later - shiny::observeEvent(biocircos_listen(), ignoreInit = T, priority = 3, { - shiny::req(vals$data_upload_count >= 2) - shiny::req(vals$need_filter == F) - shiny::req(vals$can_plot_biocircos == T) - ## source("src/biocircos_functions.R") - # BioCircos! - Biocircos_chromosomes <- list() - arcs_chromosomes <- c() - arcs_begin <- c() - arcs_end <- c() - arc_labels <- c() - arc_col <- c() - - if (is.null(vals$inters_filtered)) { - inters <- vals$inters - } else { - inters <- vals$inters_filtered - } + if (vals$sempi_data_input == TRUE) { + sempi_data <- vals$sempi_data + res <- rename_vector(sempi_data, rename_data, vals$renaming_notification) + vals$sempi_type <- res[[1]] + vals$renaming_notification <- res[[2]] + sempi_data["Type2"] <- vals$sempi_type + vals$sempi_data <- sempi_data + } - rename_data <- vals$rename_data - coloring_datatable <- vals$coloring_datatable - - index <- 1 - # browser() - for (upload in data_uploads) { - if (vals[[upload]] == T) { - # Store data in local variable - corrected_data <- correct_width(vals[[data_to_use[index]]], soft_namings[index], input$sempi_width, input$prism_supp_data_input_width, input$arts_width, input$rre_width) - init_data <- initialize_biocircos(corrected_data, soft_namings[index], Biocircos_chromosomes, arcs_chromosomes, arcs_begin, arcs_end, arc_labels, arc_col, rename_data, vals$chr_len, input$biocircos_color, coloring_datatable) - # Make chromosome list for Biocircos plot. Use chr_len as an input - Biocircos_chromosomes <- init_data[[1]] - # Add arcs. Quantity of arcs is length of dataframes - arcs_chromosomes <- init_data[[2]] - # Add arcs begin positions. (Start column) - arcs_begin <- init_data[[3]] - # Stop position of arcs. - arcs_end <- init_data[[4]] - # Add Arcs labels. Can add only one label... - arc_labels <- init_data[[5]] - - arc_col <- init_data[[6]] - } - index <- index + 1 + if (vals$prism_data_input == TRUE) { + prism_data <- vals$prism_data + res <- rename_vector(prism_data, rename_data, vals$renaming_notification) + vals$prism_type <- res[[1]] + vals$renaming_notification <- res[[2]] + prism_data["Type2"] <- vals$prism_type + vals$prism_data <- prism_data + } + if (vals$ripp_data_input == TRUE) { + ripp_data <- vals$ripp_data + res <- rename_vector(ripp_data, rename_data, vals$renaming_notification) + vals$ripp_type <- res[[1]] + vals$renaming_notification <-res[[2]] + ripp_data["Type2"] <- vals$ripp_data + vals$ripp_data <- ripp_data + } + if (vals$emerald_data_input == TRUE) { + emerald_data <- vals$emerald_data + res <- rename_vector(emerald_data, rename_data, vals$renaming_notification) + vals$emerald_type <- res[[1]] + vals$renaming_notification <-res[[2]] + emerald_data["Type2"] <- vals$emerald_data + vals$emerald_data <- emerald_data + } + if (vals$compare_data_input == TRUE) { + compare_data <- vals$compare_data + res <- rename_vector(compare_data, rename_data, vals$renaming_notification) + vals$compare_type <- res[[1]] + vals$renaming_notification <-res[[2]] + compare_data["Type2"] <- vals$compare_data + vals$compare_data <- compare_data + } + shinyjs::showElement(selector = "#reset_name") + shinyjs::hideElement(selector = "#rename") + vals$renamed <- TRUE + shiny::showNotification(paste("Please note: SEMPI, PRISM and Antismash input data will be renamed on upload"), type = "warning", duration = 10) + }) + # When the new data is uploaded and renamed + # is TRUE, then rename data on upload + shiny::observeEvent(check_to_rename(), { + shiny::req(vals$renamed == TRUE) + + rename_data <- vals$rename_data + if (vals$anti_data_input == TRUE) { + anti_data <- vals$anti_data + res <- rename_vector(anti_data, rename_data, vals$renaming_notification) + vals$anti_type <- res[[1]] + vals$renaming_notification <- res[[2]] + anti_data["Type2"] <- vals$anti_type + vals$anti_data <- anti_data + } + + if (vals$sempi_data_input == TRUE) { + sempi_data <- vals$sempi_data + res <- rename_vector(sempi_data, rename_data, vals$renaming_notification) + vals$sempi_type <- res[[1]] + vals$renaming_notification <- res[[2]] + sempi_data["Type2"] <- vals$sempi_type + vals$sempi_data <- sempi_data + } + + if (vals$prism_data_input == TRUE) { + prism_data <- vals$prism_data + res <- rename_vector(prism_data, rename_data, vals$renaming_notification) + vals$prism_type <- res[[1]] + vals$renaming_notification <- res[[2]] + prism_data["Type2"] <- vals$prism_type + vals$prism_data <- prism_data + } + if (vals$ripp_data_input == TRUE) { + ripp_data <- vals$ripp_data + res <- rename_vector(ripp_data, rename_data, vals$renaming_notification) + vals$ripp_type <- res[[1]] + vals$renaming_notification <-res[[2]] + ripp_data["Type2"] <- vals$ripp_data + vals$ripp_data <- ripp_data + } + if (vals$emerald_data_input == TRUE) { + emerald_data <- vals$emerald_data + res <- rename_vector(emerald_data, rename_data, vals$renaming_notification) + vals$emerald_type <- res[[1]] + vals$renaming_notification <-res[[2]] + emerald_data["Type2"] <- vals$emerald_data + vals$emerald_data <- emerald_data + } + if (vals$compare_data_input == TRUE) { + compare_data <- vals$compare_data + res <- rename_vector(compare_data, rename_data, vals$renaming_notification) + vals$compare_type <- res[[1]] + vals$renaming_notification <-res[[2]] + compare_data["Type2"] <- vals$compare_data + vals$compare_data <- compare_data + } } - # Add to tracklist. Then it can be populated with links - tracklist <- BioCircos::BioCircosArcTrack("myArcTrack", arcs_chromosomes, arcs_begin, arcs_end, - minRadius = 0.90, maxRadius = 0.97, labels = arc_labels, colors = arc_col + ) - # Function to get interception between two matrices. Returns a list of two elements - IDs from first matrix and - # from second one. IDs are duplicated, if intercepted more than one time - - chromosomes_start <- c() - chromosomes_end <- c() - link_pos_start <- c() - link_pos_start_1 <- c() - link_pos_end <- c() - link_pos_end_2 <- c() - label_1 <- c() - label_2 <- c() - label_color <- c() - - # CALCULATIONS - # ----------------------------------------- - - - data_uploads_2 <- data_uploads - soft_2 <- soft_namings - soft_names_2 <- soft_names - data_to_use_2 <- data_to_use - index <- 1 - for (upload in data_uploads) { - data_uploads_2 <- data_uploads_2[-1] - soft_2 <- soft_2[-1] - soft_names_2 <- soft_names_2[-1] - data_to_use_2 <- data_to_use_2[-1] - index2 <- 1 - if (vals[[upload]] == T) { - for (upload2 in data_uploads_2) { - if ((vals[[upload2]] == T) & (length(data_uploads_2) > 0) & (soft_namings[index] != soft_2[index2])) { - output <- add_biocircos_data(inters[[soft_names[index]]][[soft_names_2[index2]]]$from, inters[[soft_names[index]]][[soft_names_2[index2]]]$to, vals[[data_to_use_2[index2]]], vals[[data_to_use[index]]], soft_2[index2], soft_namings[index], rename_data, input$label_color_class, input$ref_col_biocircos, coloring_datatable) - chromosomes_start <- c(chromosomes_start, output[[3]]) - # Add link end. Just populate second output from the vectors, used above. - chromosomes_end <- c(chromosomes_end, output[[4]]) - # Add links start positions as a start from dataframe. This vector is for chromosome start - link_pos_start <- as.numeric(c(link_pos_start, output[[5]])) - # Add links start positions as a start from dataframe. For chromosome start variable - link_pos_start_1 <- as.numeric(c(link_pos_start_1, output[[6]])) - # Add links start position for a chromosome stop variable - link_pos_end <- as.numeric(c(link_pos_end, output[[7]])) - # Add links start position for a chromosome stop position - link_pos_end_2 <- as.numeric(c(link_pos_end_2, output[[8]])) - label_1 <- c(label_1, output[[9]]) - label_2 <- c(label_2, output[[10]]) - label_color <- c(label_color, output[[11]]) - } - index2 <- index2 + 1 - } - utils::write.csv(vals[[data_to_use[index]]], paste0(soft_names[index], "_biocircos.csv"), row.names = F) - } - index <- index + 1 - } + # Reset the renaming. Uncheck the hybrid checkboxes + shiny::observeEvent(input$reset_name, { + vals$anti_data["Type2"] <- vals$anti_data["Type"] + vals$sempi_data["Type2"] <- vals$sempi_data["Type"] + vals$prism_data["Type2"] <- vals$prism_data["Type"] + vals$ripp_data["Type2"] <- vals$ripp_data["Type"] + vals$emerald_data["Type2"] <- vals$emerald_data["Type"] + vals$compare_data["Type2"] <- vals$compare_data["Type"] + if (input$compare_hybrid == TRUE) { + shiny::showNotification(paste("Reference cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10 ) + shiny::showNotification(inputId ="compare_hybrid", value = FALSE) + } + if (input$emerald_hybrid == TRUE) { + shiny::showNotification(paste("Emerald/SanntiS cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10 ) + shiny::showNotification(inputId ="emerald_hybrid", value = FALSE) + } + if (input$ripp_hybrid == TRUE) { + shiny::showNotification(paste("RippMiner cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10 ) + shiny::showNotification(inputId ="ripp_hybrid", value = FALSE) + } + if (input$anti_hybrid == TRUE) { + shiny::showNotification(paste("Antismash cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10) + shiny::updateCheckboxInput(inputId = "anti_hybrid", value = FALSE) + } + if (input$prism_hybrid == TRUE) { + shiny::showNotification(paste("PRISM cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10) + shiny::updateCheckboxInput(inputId = "prism_hybrid", value = FALSE) + } + if (input$sempi_hybrid == TRUE) { + shiny::showNotification(paste("SEMPI cluster types are NOT visualized as hybrid anymore. You should check the option one more time"), type = "warning", duration = 10) + shiny::updateCheckboxInput(inputId = "sempi_hybrid", value = FALSE) + } + shinyjs::showElement(selector = "#rename") + shinyjs::hideElement(selector = "#reset_name") + vals$renamed <- FALSE + }) + # Read the uploaded renaming scheme csv + shiny::observeEvent(input$rename_data, { + rename_data <- utils::read.csv(input$rename_data$datapath) + vals$rename_data <- rename_data + coloring_datatable <- data.frame(tidyr::drop_na(data.frame(cbind(as.character(rename_data$Group_color), as.character(rename_data$Color), rename_data$Hierarchy)))) + coloring_datatable <- coloring_datatable[!apply(coloring_datatable == "", 1, all), ] + colnames(coloring_datatable) <- c("Name", "Color", "Hierarchy") + vals$coloring_datatable <- DT::datatable(coloring_datatable, rownames = FALSE, editable = "column") + }) + # What to do, if hide DeepBGC comparison options scheme is triggered + + + ############################################################################ + ############################################################################ + ### ### + ### COMPUTATIONS ### + ### ### + ############################################################################ + ############################################################################ + shiny::observeEvent(input$prism_supp, ignoreInit = TRUE, priority = 3, { + if (input$prism_supp == TRUE) { + vals$need_filter <- TRUE + vals$prism_supp_data_input <- TRUE + vals$prism_supp_plot <- TRUE + if (!("PRISM-Supp" %in% names(vals$choices$ref))) { + vals$choices$ref <- c(vals$choices$ref, "PRISM-Supp" = "PRISM-Supp") + vals$choices$group_by <- c(vals$choices$group_by, "PRISM-Supp" = "PRISM-Supp") + vals$choices$ref_col_biocircos <- c(vals$choices$ref_col_biocircos, "PRISM-Supp" = "PRISM-Supp") + update_ui_with_data() + } + } else { + vals$prism_supp_data_input <- FALSE + vals$need_filter <- TRUE + vals$prism_supp_plot <- FALSE + vals$choices$ref <- vals$choices$ref[!(names(vals$choices$ref) %in% c("PRISM-Supp"))] + vals$choices$group_by <- vals$choices$group_by[!(names(vals$choices$group_by) %in% c("PRISM-Supp"))] + vals$choices$ref_col_biocircos <- vals$choices$ref_col_biocircos[!(names(vals$choices$ref_col_biocircos) %in% c("PRISM-Supp"))] + update_ui_with_data() + } + }) + # Compute all interceptions on data upload. + # dplyr::filter while ploting then. + shiny::observeEvent(inputData(), ignoreInit = TRUE, priority = 5, { + # GENERATE DATA + if (vals$compare_data_input == TRUE) { + compare_data <- vals$compare_data + compare_inter <- vals$compare_data %>% + dplyr::select(Start, Stop) + compare_inter$seqnames <- "chr" + } + if (vals$emerald_data_input == TRUE) { + emerald_data <- vals$emerald_data + emerald_inter <- vals$emerald_data %>% + dplyr::select(Start, Stop) + emerald_inter$seqnames <- "chr" + } + if (vals$ripp_data_input == TRUE) { + ripp_data <- vals$ripp_data + ripp_inter <- vals$ripp_data %>% + dplyr::select(Start, Stop) + ripp_inter$seqnames <- "chr" + } + if (vals$anti_data_input == TRUE) { + anti_data <- vals$anti_data + anti_inter <- vals$anti_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } + if (vals$deep_data_input == TRUE) { + deep_data <- vals$deep_data + deep_inter <- vals$deep_data %>% + dplyr::select(Start, Stop) + deep_inter$seqnames <- "chr" + } + if (vals$rre_data_input == TRUE) { + # Convert numeric columns in a dataframe as a numeric + vals$rre_data$Start <- as.numeric(vals$rre_data$Start) + vals$rre_data$Stop <- as.numeric(vals$rre_data$Stop) + # Store rre data into local variable + rre_data <- data.frame(vals$rre_data) + # Start/Stop columns from rre data as matrix + rre_inter <- rre_data %>% + dplyr::select(Start, Stop) + rre_inter$seqnames <- "chr" + } + if (vals$prism_data_input == TRUE) { + # Store master prism data in local variable + prism_data <- vals$prism_data + # Start/Stop columns from prism data as matrix + prism_inter <- prism_data %>% + dplyr::select(Start, Stop) + prism_inter$seqnames <- "chr" + } + if (vals$sempi_data_input == TRUE) { + # Store master prism data in local variable + sempi_data <- vals$sempi_data + # Start/Stop columns from prism data as matrix + sempi_inter <- vals$sempi_data %>% + dplyr::select(Start, Stop) + sempi_inter$seqnames <- "chr" + } + if (vals$prism_json == TRUE) { + prism_supp_data <- vals$prism_supp_data + prism_supp_inter <- vals$prism_supp_data %>% + dplyr::select(Start, Stop) + prism_supp_inter$seqnames <- "chr" + } + if (vals$arts_data_input == TRUE) { + arts_data <- vals$arts_data + arts_inter <- vals$arts_data %>% + dplyr::select(Start, Stop) + arts_inter$seqnames <- "chr" + } + if (vals$gecco_data_input == TRUE) { + gecco_data <- vals$gecco_data + # Start/Stop columns from prism data as matrix + gecco_inter <- vals$gecco_data %>% + dplyr::select(Start, Stop) + gecco_inter$seqnames <- "chr" + } - # Combine labels with mapply to one list - link_labels <- mapply(function(x, y) paste(x, y, sep = " | "), label_1, label_2) + get_inter <- function(inter1, inter2) { + query <- GenomicRanges::makeGRangesFromDataFrame(inter2) + subject <- GenomicRanges::makeGRangesFromDataFrame(inter1) + interseption <- GenomicRanges::findOverlaps(query, subject) + inter_from <- interseption@from + inter_to <- interseption@to + return(list(from = inter_from, to = inter_to)) + } - # Add links and labels to the track list for subsequent visualization - if ((input$label_color == T) & (length(chromosomes_start) > 0)) { - group_colors <- plyr::count(unlist(label_color)) - for (i in seq(1:dim(group_colors)[1])) { - subset <- unname(which(label_color %in% group_colors$x[i])) - tracklist <- tracklist + BioCircos::BioCircosLinkTrack(as.character(i), chromosomes_start[subset], link_pos_start[subset], - link_pos_start_1[subset], chromosomes_end[subset], link_pos_end[subset], - link_pos_end_2[subset], - maxRadius = 0.85, labels = link_labels[subset], - displayLabel = FALSE, color = group_colors$x[i] - ) - } - } else if ((input$label_color == F) & (length(chromosomes_start) > 0)) { - tracklist <- tracklist + BioCircos::BioCircosLinkTrack("myLinkTrack_master", chromosomes_start, link_pos_start, - link_pos_start_1, chromosomes_end, link_pos_end, - link_pos_end_2, - maxRadius = 0.85, labels = link_labels, - displayLabel = FALSE, color = coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"] - ) - } else { - shiny::showNotification(paste("No interceptions are being made in the Biocircos plot. Please provide data with clusters that do have intercepting borders"), type = "warning", duration = NULL) - } + inters <- vals$inters + index <- 1 + for (i in data_uploads_inter) { + index_2 <- 1 + j <- soft_names[index] + for (p in data_uploads_inter) { + x <- soft_names[index_2] + if ((vals[[i]] == TRUE) & (vals$computed[[j]] == FALSE) & (j != x)) { + if ((vals[[p]] == TRUE) & (j != soft_names[index_2])) { + res <- get_inter(eval(as.name(paste(j, "_inter", sep = ""))), eval(as.name(paste(x, "_inter", sep = "")))) + new_res <- list() + new_res$from <- eval(as.name(paste(x, "_data", sep = "")))[res$from, ]$Cluster + new_res$to <- eval(as.name(paste(j, "_data", sep = "")))[res$to, ]$Cluster + inters[[j]][[x]] <- new_res + inters[[x]][[j]] <- list(from = new_res$to, to = new_res$from) + } + } + index_2 <- index_2 + 1 + } + if (vals[[i]] == TRUE) { + vals$computed[[j]] <- TRUE + } + index <- index + 1 + } - vals$tracklist <- tracklist - vals$Biocircos_chromosomes <- Biocircos_chromosomes - }) - - shiny::observeEvent(deep_reference(), ignoreInit = T, { - shiny::req(vals$data_upload_count >= 1) - shiny::req(vals$need_filter == F) - shiny::req(vals$can_plot_deep_ref == T) - shiny::req(input$ref != "") - shiny::req(vals$data_upload_count >= 1) - - if (is.null(vals$inters_filtered)) { - inters <- vals$inters - } else { - inters <- vals$inters_filtered - } - ## source("src/deep_reference_functions.R") - # GENERATE DATA - index <- 1 - for (upload in data_uploads) { - if (vals[[upload]] == T) { - data <- vals[[data_to_use[index]]] - assign(paste0(soft_names[index], "_data"), correct_width(data, soft_namings[index], input$sempi_width, input$prism_supp_data_input_width, input$arts_width, input$rre_width)) - } - index <- index + 1 - } + vals$inters <- inters + if ((vals$deep_data_input == FALSE) & (vals$gecco_data_input == FALSE) & (vals$arts_data_input == FALSE)) { + vals$inters_filtered <- inters + enable_event_logic() + } else { + vals$need_filter <- TRUE + vals$filter_data <- TRUE + } + }) + # dplyr::filter ARTS, DeepBGC, GECCO interception data + # and general dataframes to plot, if data filtering + # options are triggered + shiny::observeEvent( + { + dynamicInput() + to_debounce() + }, + ignoreInit = TRUE, + priority = 4, + { + shiny::req(vals$data_upload_count >= 1) + inters <- vals$inters + if (vals$deep_data_input == TRUE) { + if (vals$need_filter == FALSE) { + biocircos_deep <- filter_deepbgc(vals$deep_data, vals$cluster_type, vals$score_a, vals$score_c, vals$score_d, vals$domains_filter, vals$biodomain_filter, vals$gene_filter) + vals$deep_data_filtered <- biocircos_deep + } else { + biocircos_deep <- vals$deep_data_filtered + } + if (vals$data_upload_count != 1) { + new_deep <- lapply(inters$deep, function(x) { + new_to <- x$to[x$to %in% biocircos_deep$Cluster] + new_from <- x$from[x$to %in% biocircos_deep$Cluster] + list(from = new_from, to = new_to) + }) + new_inters <- inters + update_list <- names(inters$deep) + for (b in seq(1:length(update_list))) { + new_inters[[update_list[b]]]$deep$to <- new_deep[[update_list[b]]]$from + new_inters[[update_list[b]]]$deep$from <- new_deep[[update_list[b]]]$to + } + new_inters$deep <- new_deep + vals$inters_filtered <- new_inters + inters <- new_inters + } + } + if (vals$gecco_data_input == TRUE) { + if (vals$need_filter == FALSE) { + gecco_data <- filter_gecco(vals$gecco_data, vals$score_cluster_gecco, vals$score_average_gecco, vals$domains_filter_gecco, vals$prot_filter_gecco) + vals$gecco_data_filtered <- gecco_data + } else { + gecco_data <- vals$gecco_data_filtered + } + if (vals$data_upload_count != 1) { + new_gecco <- lapply(inters$gecco, function(x) { + new_to <- x$to[x$to %in% gecco_data$Cluster] + new_from <- x$from[x$to %in% gecco_data$Cluster] + list(from = new_from, to = new_to) + }) + new_inters <- inters + update_list <- names(inters$gecco) + for (b in seq(1:length(update_list))) { + new_inters[[update_list[b]]]$gecco$to <- new_gecco[[update_list[b]]]$from + new_inters[[update_list[b]]]$gecco$from <- new_gecco[[update_list[b]]]$to + } + new_inters$gecco <- new_gecco + vals$inters_filtered <- new_inters + inters <- new_inters + } + } + if (vals$arts_data_input == TRUE) { + if (input$dup_choice != "All") { + vals$arts_data_filtered <- data.frame(vals$arts_data) %>% + dplyr::filter(Core == stringr::str_split(stringr::str_split(input$dup_choice, " ,")[[1]][[2]], "Core:")[[1]][[2]] | Core == "Not_core") + if (vals$data_upload_count != 1) { + new_arts <- lapply(inters$arts, function(x) { + new_to <- x$to[x$to %in% vals$arts_data_filtered$Cluster] + new_from <- x$from[x$to %in% vals$arts_data_filtered$Cluster] + list(from = new_from, to = new_to) + }) + new_inters <- inters + update_list <- names(inters$arts) + for (b in seq(1:length(update_list))) { + new_inters[[update_list[b]]]$arts$to <- new_arts[[update_list[b]]]$from + new_inters[[update_list[b]]]$arts$from <- new_arts[[update_list[b]]]$to + } + new_inters$arts <- new_arts + vals$inters_filtered <- new_inters + inters <- new_inters + } + } else { + vals$arts_data_filtered <- vals$arts_data + vals$inters_filtered <- inters + } + } + if (input$prism_supp == FALSE) { + inters$prism_supp <- NULL + for (name in names(inters)) { + inters[[name]][which(names(inters[[name]]) %in% c("prism_supp"))] <- NULL + } + } + if ((vals$gecco_data_input == FALSE) & (vals$deep_data_input == FALSE) & (vals$arts_data_input == FALSE)) { + vals$inters_filtered <- inters + } + vals$need_filter <- FALSE + vals$filter_data <- FALSE + vals$can_plot_deep_ref <- TRUE + enable_event_logic() + } + ) + # Compute the Biociros plot. Store information to plot later + shiny::observeEvent(biocircos_listen(), ignoreInit = TRUE, priority = 3, { + shiny::req(vals$data_upload_count >= 2) + shiny::req(vals$need_filter == FALSE) + shiny::req(vals$can_plot_biocircos == TRUE) + ## source("src/biocircos_functions.R") + # BioCircos! + Biocircos_chromosomes <- list() + arcs_chromosomes <- c() + arcs_begin <- c() + arcs_end <- c() + arc_labels <- c() + arc_col <- c() + + if (is.null(vals$inters_filtered)) { + inters <- vals$inters + } else { + inters <- vals$inters_filtered + } - lett <- rev(LETTERS)[1:9] + rename_data <- vals$rename_data + coloring_datatable <- vals$coloring_datatable + + index <- 1 + # browser() + for (upload in data_uploads) { + if (vals[[upload]] == TRUE) { + # Store data in local variable + corrected_data <- correct_width(vals[[data_to_use[index]]], soft_namings[index], input$sempi_width, input$prism_supp_data_input_width, input$arts_width, input$rre_width) + init_data <- initialize_biocircos(corrected_data, soft_namings[index], Biocircos_chromosomes, arcs_chromosomes, arcs_begin, arcs_end, arc_labels, arc_col, rename_data, vals$chr_len, input$biocircos_color, coloring_datatable) + # Make chromosome list for Biocircos plot. Use chr_len as an input + Biocircos_chromosomes <- init_data[[1]] + # Add arcs. Quantity of arcs is length of dataframes + arcs_chromosomes <- init_data[[2]] + # Add arcs begin positions. (Start column) + arcs_begin <- init_data[[3]] + # Stop position of arcs. + arcs_end <- init_data[[4]] + # Add Arcs labels. Can add only one label... + arc_labels <- init_data[[5]] + + arc_col <- init_data[[6]] + } + index <- index + 1 + } + # Add to tracklist. Then it can be populated with links + tracklist <- BioCircos::BioCircosArcTrack("myArcTrack", arcs_chromosomes, arcs_begin, arcs_end, + minRadius = 0.90, maxRadius = 0.97, labels = arc_labels, colors = arc_col + ) + # Function to get interception between two matrices. Returns a list of two elements - IDs from first matrix and + # from second one. IDs are duplicated, if intercepted more than one time + + chromosomes_start <- c() + chromosomes_end <- c() + link_pos_start <- c() + link_pos_start_1 <- c() + link_pos_end <- c() + link_pos_end_2 <- c() + label_1 <- c() + label_2 <- c() + label_color <- c() + + + # CALCULATIONS + # ----------------------------------------- + + + data_uploads_2 <- data_uploads + soft_2 <- soft_namings + soft_names_2 <- soft_names + data_to_use_2 <- data_to_use + index <- 1 + for (upload in data_uploads) { + data_uploads_2 <- data_uploads_2[-1] + soft_2 <- soft_2[-1] + soft_names_2 <- soft_names_2[-1] + data_to_use_2 <- data_to_use_2[-1] + index2 <- 1 + if (vals[[upload]] == TRUE) { + for (upload2 in data_uploads_2) { + if ((vals[[upload2]] == TRUE) & (length(data_uploads_2) > 0) & (soft_namings[index] != soft_2[index2])) { + output <- add_biocircos_data(inters[[soft_names[index]]][[soft_names_2[index2]]]$from, inters[[soft_names[index]]][[soft_names_2[index2]]]$to, vals[[data_to_use_2[index2]]], vals[[data_to_use[index]]], soft_2[index2], soft_namings[index], rename_data, input$label_color_class, input$ref_col_biocircos, coloring_datatable) + chromosomes_start <- c(chromosomes_start, output[[3]]) + # Add link end. Just populate second output from the vectors, used above. + chromosomes_end <- c(chromosomes_end, output[[4]]) + # Add links start positions as a start from dataframe. This vector is for chromosome start + link_pos_start <- as.numeric(c(link_pos_start, output[[5]])) + # Add links start positions as a start from dataframe. For chromosome start variable + link_pos_start_1 <- as.numeric(c(link_pos_start_1, output[[6]])) + # Add links start position for a chromosome stop variable + link_pos_end <- as.numeric(c(link_pos_end, output[[7]])) + # Add links start position for a chromosome stop position + link_pos_end_2 <- as.numeric(c(link_pos_end_2, output[[8]])) + label_1 <- c(label_1, output[[9]]) + label_2 <- c(label_2, output[[10]]) + label_color <- c(label_color, output[[11]]) + + + } + index2 <- index2 + 1 + } + utils::write.csv(vals[[data_to_use[index]]], paste0(soft_names[index], "_biocircos.csv"), row.names = FALSE) + + } - tooltip <- c( - "Software", "ID", "Start", "Stop", "Type", "num_domains", "deepbgc_score", "activity", "Score", "E_value", - "P_value", "RRE_start", "RRE_stop", "Probability", "Name", "Full_name", "Hit", "Core", "Count", "Bitscore", "Model", - "Num_domains", "Num_proteins", "Average_p", "Max_p" - ) + index <- index + 1 + } + + + #### made for json antismash #### + # ----------------------------------------- + json_start = c() + json_end = c() + json_lable = c() + json_description = c() + + ### list dir for appropriate files of biocircos to create json + + files_in_dir <- list.files() + # Iterate over those files and if found "_biocircos.csv" add remove them + for (file_names in files_in_dir) { + if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { + df_read <- read.csv(file_names) + if (file_names != "arts_biocircos.csv" & file_names != "prism_supp_biocircos.csv"){ + df_read <- read.csv(file_names) + df_read$Description <- strsplit(file_names, '_', fixed = TRUE)[[1]][1] + json_start <- c(json_start, df_read$Start) + json_end <- c(json_end, df_read$Stop) + json_lable <- c(json_lable, df_read$Type) + json_description <- c(json_description, df_read$Description) + } + # json_description <- c(json_description, df_read$Description) + }} + + vals$json_for_anti$start <- json_start + vals$json_for_anti$end <- json_end + vals$json_for_anti$label <- json_lable + vals$json_for_anti$description <- json_description + utils::write.csv(vals$json_for_anti, "data_all.csv", row.names = FALSE) + + + + mod_download_anti_server('download_anti_ui_1') + + + # THE END OF JSON ANTISMASH + # ----------------------------------------- + + + # Combine labels with mapply to one list + link_labels <- mapply(function(x, y) paste(x, y, sep = " | "), label_1, label_2) + + # Add links and labels to the track list for subsequent visualization + if ((input$label_color == TRUE) & (length(chromosomes_start) > 0)) { + group_colors <- plyr::count(unlist(label_color)) + for (i in seq(1:dim(group_colors)[1])) { + subset <- unname(which(label_color %in% group_colors$x[i])) + tracklist <- tracklist + BioCircos::BioCircosLinkTrack(as.character(i), chromosomes_start[subset], link_pos_start[subset], + link_pos_start_1[subset], chromosomes_end[subset], link_pos_end[subset], + link_pos_end_2[subset], + maxRadius = 0.85, labels = link_labels[subset], + displayLabel = FALSE, color = group_colors$x[i] + ) + } + } else if ((input$label_color == FALSE) & (length(chromosomes_start) > 0)) { + tracklist <- tracklist + BioCircos::BioCircosLinkTrack("myLinkTrack_master", chromosomes_start, link_pos_start, + link_pos_start_1, chromosomes_end, link_pos_end, + link_pos_end_2, + maxRadius = 0.85, labels = link_labels, + displayLabel = FALSE, color = coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"] + ) + } else { + shiny::showNotification(paste("No interceptions are being made in the Biocircos plot. Please provide data with clusters that do have intercepting borders"), type = "warning", duration = NULL) + } + vals$tracklist <- tracklist + vals$Biocircos_chromosomes <- Biocircos_chromosomes + }) + shiny::observeEvent(deep_reference(), ignoreInit = TRUE, { + shiny::req(vals$data_upload_count >= 1) + shiny::req(vals$need_filter == FALSE) + shiny::req(vals$can_plot_deep_ref == TRUE) + shiny::req(input$ref != "") + shiny::req(vals$data_upload_count >= 1) - # MAKE COMPUTATIONS - sup_index <- 1 - soft_lttrs <- lett - rename_y_axis <- vals$rename_y_axis - rename_y_axis <- lapply(1:(length(soft_lttrs) - 1), function(x) { - soft_lttrs[x] <- soft_namings[x] - }) - names(rename_y_axis) <- soft_lttrs[-length(soft_lttrs)] - for (upload in data_uploads) { - soft_lttr <- soft_lttrs[1] - soft_lttrs <- soft_lttrs[-1] - if (vals[[upload]] == T) { - soft_major <- soft_names[sup_index] - seg_ref_g <- simple_seg(eval(as.name(paste(soft_names[sup_index], "_data", sep = ""))), "Z", soft_namings[sup_index], soft_names[sup_index], soft_major, inter = F, inters) - seg_ref_g <- define_spec_seg_df(soft_names, sup_index, seg_ref_g, soft_major, eval(as.name(paste(soft_names[sup_index], "_data", sep = ""))), inter = F, vals$rre_more, inters) - seg_ref <- seg_ref_g - - if (input$ref == soft_namings[sup_index]) { - shiny::validate(need(nrow(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))) > 0, "Reference data is empty, and so, insufficient for plotting. Please select another one")) - plot <- ggplot2::ggplot(eval(as.name(paste(soft_names[sup_index], "_data", sep = ""))), ggplot2::aes(x = vals$chr_len, y = Chr)) + - suppressWarnings(eval(as.name(paste0("geom_", soft_names[sup_index])))(seg_ref, vals$rre_more)) - soft_let <- abbr[sup_index] - lettrs <- lett[2:length(lett)] - labels_1 <- list() - index <- 1 - for (i in data_uploads) { - if ((vals[[i]] == T) & (soft_names[index] != soft_major)) { - df <- eval(as.name(paste(soft_names[index], "_data", sep = ""))) - seg_df <- simple_seg(df, lettrs[index], soft_namings[index], soft_names[index], soft_major, inter = T, inters) - seg_df <- define_spec_seg_df(soft_names, index, seg_df, soft_major, df, inter = T, vals$rre_more, inters) - labels_1[[lettrs[index]]] <- (paste(abbr[index], "_vs_", soft_let, sep = "")) - plot <- suppressWarnings(add_more_annot(seg_df, plot, soft_names, index, vals$rre_more)) + if (is.null(vals$inters_filtered)) { + inters <- vals$inters + } else { + inters <- vals$inters_filtered + } + ## source("src/deep_reference_functions.R") + # GENERATE DATA + index <- 1 + for (upload in data_uploads) { + if (vals[[upload]] == TRUE) { + data <- vals[[data_to_use[index]]] + assign(paste0(soft_names[index], "_data"), correct_width(data, soft_namings[index], input$sempi_width, input$prism_supp_data_input_width, input$arts_width, input$rre_width)) } index <- index + 1 - } - plot <- plot + - ggplot2::scale_y_discrete(labels = c("Z" = input$ref, unlist(labels_1))) + - ggplot2::theme(axis.text.y = ggplot2::element_text(size = 10)) + - ggplot2::ylab("") + - ggplot2::xlab("Chromosome length") + - ggplot2::theme(legend.title = ggplot2::element_blank()) + - ggplot2::ggtitle("Annotations' comparison to the reference") - to_plot <- plotly::ggplotly(plot, tooltip = tooltip) - to_plot <- to_plot %>% - plotly::layout(legend = list( - font = list( - family = "sans-serif", - size = 12, - color = "#000" - ), - bordercolor = "#FFFFFF", - borderwidth = 2, - title = list(text = " Cluster Types ") - )) - } - seg_ref$yend <- rep(soft_lttr, length(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))$Cluster)) - seg_ref$y <- rep(soft_lttr, length(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))$Cluster)) - vals[[soft_datafr[sup_index]]] <- seg_ref - } - sup_index <- sup_index + 1 - } - vals$rename_y_axis <- rename_y_axis - vals$deep_reference_to_plot <- to_plot - }) - - - ############################################################################ - ############################################################################ - ### ### - ### OUTPUT PLOTS ### - ### ### - ############################################################################ - ############################################################################ - - ## ---------------------------------------------------------------- - ## DeepBGC Comparison tab - - ## ---------------------------------------------------------------- - # Render barplot - mod_deepbgc_plots_server("deep_barplot_ui_1", vals = vals, score_a = vals$score_a, score_d = vals$score_d, score_c = vals$score_c) - - # Render interactive plot with plotly for rates of DeepBGC data in regards with antismash data - - ## ---------------------------------------------------------------- - ## GECCO Comparison tab - - ## ---------------------------------------------------------------- - # Render barplot - mod_gecco_plots_server("gecco_plots_ui_1", - vals = vals, score_average_gecco = vals$score_average_gecco, - score_cluster_gecco = vals$score_cluster_gecco - ) - ## --------------------------------------------------------------- - ## Annotation on chromosome plots' tab - - ## --------------------------------------------------------------- - - # Render interactive plot, which shows bgcs of antismash, intercepted with chosen app. Also all app bgs. On hover shows all available information - # For antismash and PRISM data showed only ID, Start, Stop, Type - mod_deep_reference_server("deep_reference_ui_1", vals = vals) - - mod_deep_reference_2_server("deep_reference_2_ui_1", vals = vals, data_uploads = data_uploads, data_to_use = data_to_use) - ## ---------------------------------------------------------------- - ## Biocircos plot tab - - ## --------------------------------------------------------------- - # Render Biocircos Plot for all-vs-all comparison - mod_biocircos_server("biocircos_ui_1", vals = vals) - ## --------------------------------------------------------------- - ## Summarize tab - - ## --------------------------------------------------------------- - # Render barplot with number plyr::count of interception for BGC IDs - mod_barplot_rank_server("barplot_rank_ui_1", vals = vals, data_uploads = data_uploads, soft_names = soft_names, soft_namings = soft_namings, data_to_use = data_to_use, abbr = abbr) - - - # Render table with data - mod_group_table_server("group_table_ui_1", vals = vals, data_uploads = data_uploads, soft_names = soft_names, soft_namings = soft_namings, data_to_use = data_to_use, abbr = abbr) - - # Download used datasets (as for BioCircos) - mod_download_server("download_ui_1") - - shiny::onSessionEnded(function() { - # List files in directory - files_in_dir <- list.files() - # Iterate over those files and if found "_biocircos.csv" add to the flst vector - for (file_names in files_in_dir) { - if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { - file.remove(file_names) - } else if (grepl("group_by.csv", file_names, fixed = TRUE)) { - file.remove(file_names) - } - } + } + + lett <- rev(LETTERS)[1:(length(data_uploads)+1)] + + - shiny::stopApp() - }) + tooltip <- c( + "Software", "ID", "Start", "Stop", "Type", "num_domains", "deepbgc_score", "activity", "Score", "E_value", + "P_value", "RRE_start", "RRE_stop", "Probability", "Name", "Full_name", "Hit", "Core", "Count", "Bitscore", "Model", + "Num_domains", "Num_proteins", "Average_p", "Max_p" + ) + + # MAKE COMPUTATIONS + sup_index <- 1 + soft_lttrs <- lett + rename_y_axis <- vals$rename_y_axis + rename_y_axis <- lapply(1:(length(soft_lttrs) - 1), function(x) { + soft_lttrs[x] <- soft_namings[x] + }) + names(rename_y_axis) <- soft_lttrs[-length(soft_lttrs)] + for (upload in data_uploads) { + soft_lttr <- soft_lttrs[1] + soft_lttrs <- soft_lttrs[-1] + if (vals[[upload]] == TRUE) { + soft_major <- soft_names[sup_index] + seg_ref_g <- simple_seg(eval(as.name(paste(soft_names[sup_index], "_data", sep = ""))), "Z", soft_namings[sup_index], soft_names[sup_index], soft_major, inter = FALSE, inters) + seg_ref_g <- define_spec_seg_df(soft_names, sup_index, seg_ref_g, soft_major, eval(as.name(paste(soft_names[sup_index], "_data", sep = ""))), inter = FALSE, vals$rre_more, inters) + seg_ref <- seg_ref_g + if (input$ref == soft_namings[sup_index]) { + shiny::validate(need(nrow(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))) > 0, "Reference data is empty, and so, insufficient for plotting. Please select another one")) + + plot <- ggplot2::ggplot(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))) + + suppressWarnings(eval(as.name(paste0("geom_", soft_names[sup_index])))(seg_ref, vals$rre_more)) + soft_let <- abbr[sup_index] + lettrs <- lett[2:length(lett)] + labels_1 <- list() + index <- 1 + + for (i in data_uploads) { + + if ((vals[[i]] == TRUE) & (soft_names[index] != soft_major)) { + + df <- eval(as.name(paste(soft_names[index], "_data", sep = ""))) + seg_df <- simple_seg(df, lettrs[index], soft_namings[index], soft_names[index], soft_major, inter = TRUE, inters) + seg_df <- define_spec_seg_df(soft_names, index, seg_df, soft_major, df, inter = TRUE, vals$rre_more, inters) + labels_1[[lettrs[index]]] <- (paste(abbr[index], "_vs_", soft_let, sep = "")) + plot <- suppressWarnings(add_more_annot(seg_df, plot, soft_names, index, vals$rre_more)) + } + index <- index + 1 + } + plot <- plot + + ggplot2::scale_y_discrete(labels = c("Z" = input$ref, unlist(labels_1))) + + ggplot2::theme(axis.text.y = ggplot2::element_text(size = 10)) + + ggplot2::ylab("") + + ggplot2::xlab("Chromosome length") + + ggplot2::theme(legend.title = ggplot2::element_blank()) + + ggplot2::ggtitle("Annotations' comparison to the reference") + to_plot <- plotly::ggplotly(plot, tooltip = tooltip) + to_plot <- to_plot %>% + plotly::layout(legend = list( + font = list( + family = "sans-serif", + size = 12, + color = "#000" + ), + bordercolor = "#FFFFFF", + borderwidth = 2, + title = list(text = " Cluster Types ") + )) + } + + seg_ref$yend <- rep(soft_lttr, length(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))$Cluster)) + seg_ref$y <- rep(soft_lttr, length(eval(as.name(paste(soft_names[sup_index], "_data", sep = "")))$Cluster)) + vals[[soft_datafr[sup_index]]] <- seg_ref + + + } + sup_index <- sup_index + 1 + } + vals$rename_y_axis <- rename_y_axis + vals$deep_reference_to_plot <- to_plot + }) + + + + + + + + + + + + ############################################################################ + ############################################################################ + ### ### + ### OUTPUT PLOTS ### + ### ### + ############################################################################ + ############################################################################ + + ## ---------------------------------------------------------------- + ## ARTS phylogenetic tree - + ## ---------------------------------------------------------------- + # Plot tree + + mod_arts_tree_server("arts_tree_1",vals = vals) + + ## ---------------------------------------------------------------- + ## DeepBGC Comparison tab - + ## ---------------------------------------------------------------- + # Render barplot + mod_deepbgc_plots_server("deep_barplot_ui_1", vals = vals, score_a = vals$score_a, score_d = vals$score_d, score_c = vals$score_c) + + # Render interactive plot with plotly for rates of DeepBGC data in regards with antismash data + + ## ---------------------------------------------------------------- + ## GECCO Comparison tab - + ## ---------------------------------------------------------------- + # Render barplot + mod_gecco_plots_server("gecco_plots_ui_1", + vals = vals, score_average_gecco = vals$score_average_gecco, + score_cluster_gecco = vals$score_cluster_gecco + ) + ## --------------------------------------------------------------- + ## Annotation on chromosome plots' tab - + ## --------------------------------------------------------------- + + # Render interactive plot, which shows bgcs of antismash, intercepted with chosen app. Also all app bgs. On hover shows all available information + # For antismash and PRISM data showed only ID, Start, Stop, Type + mod_deep_reference_server("deep_reference_ui_1", vals = vals) + + mod_deep_reference_2_server("deep_reference_2_ui_1", vals = vals, data_uploads = data_uploads, data_to_use = data_to_use) + ## ---------------------------------------------------------------- + ## Biocircos plot tab - + ## --------------------------------------------------------------- + # Render Biocircos Plot for all-vs-all comparison + mod_biocircos_server("biocircos_ui_1", vals = vals) + ## --------------------------------------------------------------- + ## Summarize tab - + ## --------------------------------------------------------------- + # Render barplot with number plyr::count of interception for BGC IDs + mod_barplot_rank_server("barplot_rank_ui_1", vals = vals, data_uploads = data_uploads, soft_names = soft_names, soft_namings = soft_namings, data_to_use = data_to_use, abbr = abbr) + + + # Render table with data + mod_group_table_server("group_table_ui_1", vals = vals, data_uploads = data_uploads, soft_names = soft_names, soft_namings = soft_namings, data_to_use = data_to_use, abbr = abbr) + + # Download used datasets (as for BioCircos) + mod_download_server("download_ui_1") + mod_download_server("download_anti_ui_1") + + shiny::onSessionEnded(function() { + # List files in directory + files_in_dir <- list.files() + # Iterate over those files and if found "_biocircos.csv" add to the flst vector + for (file_names in files_in_dir) { + if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { + file.remove(file_names) + } else if (grepl("group_by.csv", file_names, fixed = TRUE)) { + file.remove(file_names) + } + } + + shiny::stopApp() + }) } diff --git a/R/app_ui.R b/R/app_ui.R index 1b555e2..7868a9e 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -5,296 +5,353 @@ #' @import shiny #' @noRd app_ui <- function(request) { - tagList( - # Leave this function for adding external resources - golem_add_external_resources(), - # Your application UI logic - shinydashboardPlus::dashboardPage( - shinydashboardPlus::dashboardHeader(title = "BGCViz"), - shinydashboardPlus::dashboardSidebar( - width = 350, - shinydashboard::sidebarMenu( - id = "menu_items", - style = "white-space: normal;", - shinydashboard::menuItem("Upload data", tabName = "uploaddata_sidemenu", icon = icon("fas fa-upload")), - shinydashboard::menuItem("Global options", tabName = "options_sidemenu", icon = icon("fas fa-cogs")), - shinydashboard::menuItemOutput("deep_sidemenu_out"), - shinydashboard::menuItemOutput("gecco_sidemenu_out"), - shinydashboard::menuItemOutput("anno_sidemenu_out"), - shinydashboard::menuItemOutput("biocircos_sidemenu_out"), - shinydashboard::menuItemOutput("summarize_sidemenu_out"), - shinydashboard::menuItem( - tabName = "restore_boxes", - actionButton("restore_box", "Restore all boxes", class = "bg-success") - ) - ) - ), - shinydashboard::dashboardBody( - tags$head( - tags$style(HTML(".main-sidebar { font-size: 15px; }")) # change the font size to 20 - ), - shinyjs::useShinyjs(), - shinydisconnect::disconnectMessage( - text = "An error occurred. Please refresh the page and try again. Also, if error persists, then you are welcome to create an issue at https://github.com/ostash-group/BGCViz/issues (:", - refresh = "Refresh", - background = "#FFFFFF", - colour = "#444444", - refreshColour = "#337AB7", - overlayColour = "#000000", - overlayOpacity = 0.6, - width = 450, - top = 50, - size = 22, - css = "" - ), - shinydashboard::tabItems( - shinydashboard::tabItem( - tabName = "deep_sidemenu", - mod_deepbgc_plots_ui("deep_barplot_ui_1"), - sortable::sortable_js("deep_data1", options = sortable::sortable_options(swap = TRUE, group = "deep_data")), - sortable::sortable_js("deep_data2", options = sortable::sortable_options(swap = TRUE, group = "deep_data")) - ), - shinydashboard::tabItem( - tabName = "gecco_sidemenu", - mod_gecco_plots_ui("gecco_plots_ui_1"), - sortable::sortable_js("gecco_data1", options = sortable::sortable_options(swap = TRUE, group = "gecco_data")), - sortable::sortable_js("gecco_data2", options = sortable::sortable_options(swap = TRUE, group = "gecco_data")) - ), - shinydashboard::tabItem( - tabName = "anno_sidemenu", - shiny::fluidRow( - tags$div( - id = "anno_data1", - shiny::column( - width = 12, - mod_deep_reference_2_ui("deep_reference_2_ui_1"), - mod_deep_reference_ui("deep_reference_ui_1") + tagList( + # Leave this function for adding external resources + golem_add_external_resources(), + # Your application UI logic + shinydashboardPlus::dashboardPage( + shinydashboardPlus::dashboardHeader(title = "BGCViz"), + shinydashboardPlus::dashboardSidebar( + width = 350, + shinydashboard::sidebarMenu( + id = "menu_items", + style = "white-space: normal;", + shinydashboard::menuItem("Upload data", tabName = "uploaddata_sidemenu", icon = icon("fas fa-upload")), + shinydashboard::menuItem("Global options", tabName = "options_sidemenu", icon = icon("fas fa-cogs")), + shinydashboard::menuItemOutput("deep_sidemenu_out"), + shinydashboard::menuItemOutput("gecco_sidemenu_out"), + shinydashboard::menuItemOutput("anno_sidemenu_out"), + shinydashboard::menuItemOutput("biocircos_sidemenu_out"), + shinydashboard::menuItemOutput("summarize_sidemenu_out"), + shinydashboard::menuItemOutput("arts_tree_sidemenu_out"), + shinydashboard::menuItem( + tabName = "restore_boxes", + actionButton("restore_box", "Restore all boxes", class = "bg-success") + ), + mod_download_anti_ui("download_anti_ui_1") ) - ) - ), - sortable::sortable_js("anno_data1", options = sortable::sortable_options(swap = TRUE, group = "anno_data")), - sortable::sortable_js("anno_data2", options = sortable::sortable_options(swap = TRUE, group = "anno_data")) - ), - shinydashboard::tabItem( - tabName = "biocircos_sidemenu", - mod_biocircos_ui("biocircos_ui_1"), - sortable::sortable_js("biocircos_data1", options = sortable::sortable_options(swap = TRUE, group = "biocircos_data")), - sortable::sortable_js("biocircos_data2", options = sortable::sortable_options(swap = TRUE, group = "biocircos_data")) - ), - shinydashboard::tabItem( - tabName = "summarize_sidemenu", - shiny::fluidRow( - tags$div( - id = "summarize_data1", - mod_barplot_rank_ui("barplot_rank_ui_1"), - mod_group_table_ui("group_table_ui_1") - ) ), - sortable::sortable_js("summarize_data1", options = sortable::sortable_options(swap = TRUE)) - ), - shinydashboard::tabItem( - tabName = "uploaddata_sidemenu", - shiny::fluidRow( - tags$div( - id = "upload_data1", - div( - id = "id1", - shinydashboardPlus::box( - title = "Upload Antismash data", - id = "upload_anti_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput("anti_data", - "Upload Antismash data", - accept = list(".csv", ".json") - ) - ) - ), - div( - id = "id2", - shinydashboardPlus::box( - title = "Upload PRISM data", - id = "upload_prism_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput("prism_data", - "Upload PRISM data", - accept = list(".csv", ".json") - ) - ) - ), - div( - id = "id3", - shinydashboardPlus::box( - title = "Upload SEMPI 2.0 data", - id = "upload_sempi_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput("sempi_data", - "Upload SEMPI 2.0 data", - accept = list(".csv", ".zip") - ) - ) + shinydashboard::dashboardBody( + tags$head( + tags$style(HTML(".main-sidebar { font-size: 15px; }")) # change the font size to 20 ), - div( - id = "id4", - shinydashboardPlus::box( - title = "Upload DeepBGC data", - id = "upload_deep_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput("deep_data", - "Upload DeepBGC data", - accept = ".tsv" - ) - ) - ) - ) - ), - shiny::fluidRow( - tags$div( - id = "upload_data2", - div( - id = "id1", - shinydashboardPlus::box( - title = "Upload Gecco data", - id = "upload_gecco_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput("gecco_data", - "Upload Gecco data", - accept = ".tsv" - ) - ) - ), - div( - id = "id2", - shinydashboardPlus::box( - title = "Upload RRE-Finder data", - id = "upload_rre_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput( - "rre_data", - "Upload RRE-Finder data" - ) - ) + shinyjs::useShinyjs(), + shinydisconnect::disconnectMessage( + text = "An error occurred. Please refresh the page and try again. Also, if error persists, then you are welcome to create an issue at https://github.com/ostash-group/BGCViz/issues (:", + refresh = "Refresh", + background = "#FFFFFF", + colour = "#444444", + refreshColour = "#337AB7", + overlayColour = "#000000", + overlayOpacity = 0.6, + width = 450, + top = 50, + size = 22, + css = "" ), - div( - id = "id3", - shinydashboardPlus::box( - title = "Upload ARTS data", - id = "upload_arts_box", - collapsible = TRUE, - closable = TRUE, - shiny::fileInput("arts_data", - "Upload ARTS data", - accept = list(".csv", ".zip") - ) - ) - ), - div( - id = "id4", - shinydashboardPlus::box( - title = "Use Example data", - id = "use_example_data_box", - collapsible = TRUE, - closable = TRUE, - shiny::actionButton("anti_sco", "Use Antismash example data from S.coelicolor"), - shiny::actionButton("prism_sco", "Use PRISM example data from S.coelicolor"), - shiny::actionButton("sempi_sco", "Use SEMPI example data from S.coelicolor"), - shiny::actionButton("deep_sco", "Use DeepBGC example data from S.coelicolor"), - shiny::actionButton("gecco_sco", "Use Gecco example data from S.coelicolor"), - shiny::actionButton("rre_sco", "Use RRE-Finder example data from S.coelicolor"), - shiny::actionButton("arts_sco", "Use ARTS example data from S.coelicolor"), - shiny::numericInput("chr_len", "Please type chr len of an organism", value = 10000000) - ) - ) - ) - ), - sortable::sortable_js("upload_data1", options = sortable::sortable_options(swap = TRUE, group = "upload_data")), - sortable::sortable_js("upload_data2", options = sortable::sortable_options(swap = TRUE, group = "upload_data")) - ), - shinydashboard::tabItem( - tabName = "options_sidemenu", - shiny::fluidRow( - shiny::column( - width = 6, - tags$div( - id = "options_data1", - div( - id = "id1", - shinydashboardPlus::box( - title = "Rename", - id = "rename_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - shiny::checkboxInput("anti_hybrid", "Visualize AntiSMASH BGC with several types as 'Hybrid'"), - shiny::checkboxInput("prism_hybrid", "Visualize PRISM BGC with several types as 'Hybrid'"), - shiny::checkboxInput("sempi_hybrid", "Visualize SEMPI BGC with several types as 'Hybrid'"), - shiny::fileInput("rename_data", - "Upload renaming and coloring scheme", - accept = ".csv" + shinydashboard::tabItems( + shinydashboard::tabItem( + tabName = "deep_sidemenu", + mod_deepbgc_plots_ui("deep_barplot_ui_1"), + sortable::sortable_js("deep_data1", options = sortable::sortable_options(swap = TRUE, group = "deep_data")), + ), + shinydashboard::tabItem( + tabName = "gecco_sidemenu", + mod_gecco_plots_ui("gecco_plots_ui_1"), + sortable::sortable_js("gecco_data1", options = sortable::sortable_options(swap = TRUE, group = "gecco_data")), + sortable::sortable_js("gecco_data2", options = sortable::sortable_options(swap = TRUE, group = "gecco_data")) + ), + shinydashboard::tabItem( + tabName = "anno_sidemenu", + shiny::fluidRow( + tags$div( + id = "anno_data1", + shiny::column( + width = 12, + mod_deep_reference_2_ui("deep_reference_2_ui_1"), + mod_deep_reference_ui("deep_reference_ui_1") + ) + ) + ), + sortable::sortable_js("anno_data1", options = sortable::sortable_options(swap = TRUE, group = "anno_data")), + sortable::sortable_js("anno_data2", options = sortable::sortable_options(swap = TRUE, group = "anno_data")) + ), + shinydashboard::tabItem( + tabName = "biocircos_sidemenu", + mod_biocircos_ui("biocircos_ui_1"), + sortable::sortable_js("biocircos_data1", options = sortable::sortable_options(swap = TRUE, group = "biocircos_data")), + sortable::sortable_js("biocircos_data2", options = sortable::sortable_options(swap = TRUE, group = "biocircos_data")) + ), + shinydashboard::tabItem( + tabName = "arts_tree_sidemenu", + shiny::fluidRow( + tags$div( + id = "arts_tree", + shiny::column( + width = 12, + mod_arts_tree_ui("arts_tree_1"), + ) + ) ), - shiny::actionButton("rename", "Rename"), - shiny::actionButton("reset_name", "Reset") - ) - ), - div( - id = "id2", - shiny::uiOutput("deep_filter_box") - ) - ) - ), - shiny::column( - width = 6, - tags$div( - id = "options_data2", - div( - id = "id3", - shiny::uiOutput("gecco_filter_box") - ), - div( - id = "id5", - shinydashboardPlus::box( - title = "Improve global visualization", - id = "improve_visualization_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - shiny::checkboxInput("rre_width", "Add thickness to RRE results visualization"), - shiny::checkboxInput("prism_supp_data_input_width", "Add thickness to PRISM resistance + regulatory genes results visualization"), - shiny::checkboxInput("arts_width", "Add thickness to ARTS results visualization"), - shiny::checkboxInput("sempi_width", "Add thickness to SEMPI results visualization") - ) - ), - div( - id = "id4", - shinydashboardPlus::box( - title = "Prism supplement + ARTS options", - id = "prism_supplement_arts_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - shiny::checkboxInput("prism_supp", "Visualize PRISM resistance and regulatory genes"), - shiny::selectInput("dup_choice", "Choose duplicated core gene to plot only it", - choices = c("All"), - selected = "All" - ) + sortable::sortable_js("arts_tree_data1", options = sortable::sortable_options(swap = TRUE, group = "arts_tree_data")), + sortable::sortable_js("arts_tree_data2", options = sortable::sortable_options(swap = TRUE, group = "arts_tree_data")), + + ), + shinydashboard::tabItem( + tabName = "summarize_sidemenu", + shiny::fluidRow( + tags$div( + id = "summarize_data1", + mod_barplot_rank_ui("barplot_rank_ui_1"), + mod_group_table_ui("group_table_ui_1") + ) + ), + sortable::sortable_js("summarize_data1", options = sortable::sortable_options(swap = TRUE)) + ), + shinydashboard::tabItem( + tabName = "uploaddata_sidemenu", + shiny::fluidRow( + tags$div( + id = "upload_data1", + div( + id = "id1", + shinydashboardPlus::box( + title = "Upload Antismash data", + id = "upload_anti_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("anti_data", + "Upload Antismash data", + accept = list(".csv", ".json") + ) + ) + ), + div( + id = "id2", + shinydashboardPlus::box( + title = "Upload PRISM data", + id = "upload_prism_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("prism_data", + "Upload PRISM data", + accept = list(".csv", ".json") + ) + ) + ), + div( + id = "id3", + shinydashboardPlus::box( + title = "Upload SEMPI 2.0 data", + id = "upload_sempi_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("sempi_data", + "Upload SEMPI 2.0 data", + accept = list(".csv", ".zip") + ) + ) + ), + div( + id = "id4", + shinydashboardPlus::box( + title = "Upload DeepBGC data", + id = "upload_deep_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("deep_data", + "Upload DeepBGC data", + accept = ".tsv" + ) + ) + ), + div( + id = "id6", + shinydashboardPlus::box( + title = "Upload reference data", + id = "upload_compare_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("compare_data", + "Upload reference data", + accept = ".csv" + ) + ) + ), + div( + id = "id5", + shinydashboardPlus::box( + title = "Upload RippMiner-Genome data", + id = "upload_ripp_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("ripp_data", + "Upload RippMiner-Genome data", + accept = ".txt" + ) + ) + ) + ) + ), + shiny::fluidRow( + tags$div( + id = "upload_data2", + div( + id = "id1", + shinydashboardPlus::box( + title = "Upload Gecco data", + id = "upload_gecco_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("gecco_data", + "Upload Gecco data", + accept = ".tsv" + ) + ) + ), + div( + id = "id2", + shinydashboardPlus::box( + title = "Upload RRE-Finder data", + id = "upload_rre_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput( + "rre_data", + "Upload RRE-Finder data" + ) + ) + ), + div( + id = "id3", + shinydashboardPlus::box( + title = "Upload ARTS data", + id = "upload_arts_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("arts_data", + "Upload ARTS data", + accept = list(".csv", ".zip") + ) + ) + ), + div( + id = "id5", + shinydashboardPlus::box( + title = "Upload Emerald/SanntiS data", + id = "upload_emerald_box", + collapsible = TRUE, + closable = TRUE, + shiny::fileInput("emerald_data", + "Upload Emerald/SanntiS data", + accept = list(".gff", ".gff3") + ) + ) + ), + div( + id = "id4", + shinydashboardPlus::box( + title = "Use Example data", + id = "use_example_data_box", + collapsible = TRUE, + closable = TRUE, + shiny::actionButton("anti_sco", "Use Antismash example data from S.coelicolor"), + shiny::actionButton("prism_sco", "Use PRISM example data from S.coelicolor"), + shiny::actionButton("sempi_sco", "Use SEMPI example data from S.coelicolor"), + shiny::actionButton("deep_sco", "Use DeepBGC example data from S.coelicolor"), + shiny::actionButton("gecco_sco", "Use Gecco example data from S.coelicolor"), + shiny::actionButton("rre_sco", "Use RRE-Finder example data from S.coelicolor"), + shiny::actionButton("arts_sco", "Use ARTS example data from S.coelicolor"), + shiny::actionButton("ripp_sco","Use RippMiner-Genome data example data from S.coelicolor"), + + shiny::numericInput("chr_len", "Please type chr len of an organism", value = 10000000) + ) + ) + ) + ), + sortable::sortable_js("upload_data1", options = sortable::sortable_options(swap = TRUE, group = "upload_data")), + sortable::sortable_js("upload_data2", options = sortable::sortable_options(swap = TRUE, group = "upload_data")) + ), + shinydashboard::tabItem( + tabName = "options_sidemenu", + shiny::fluidRow( + shiny::column( + width = 6, + tags$div( + id = "options_data1", + div( + id = "id1", + shinydashboardPlus::box( + title = "Rename", + id = "rename_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + shiny::checkboxInput("anti_hybrid", "Visualize AntiSMASH BGC with several types as 'Hybrid'"), + shiny::checkboxInput("prism_hybrid", "Visualize PRISM BGC with several types as 'Hybrid'"), + shiny::checkboxInput("sempi_hybrid", "Visualize SEMPI BGC with several types as 'Hybrid'"), + shiny::fileInput("rename_data", + "Upload renaming and coloring scheme", + accept = ".csv" + ), + shiny::actionButton("rename", "Rename"), + shiny::actionButton("reset_name", "Reset") + ) + ), + div( + id = "id2", + shiny::uiOutput("deep_filter_box") + ) + ) + ), + shiny::column( + width = 6, + tags$div( + id = "options_data2", + div( + id = "id3", + shiny::uiOutput("gecco_filter_box") + ), + div( + id = "id5", + shinydashboardPlus::box( + title = "Improve global visualization", + id = "improve_visualization_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + shiny::checkboxInput("rre_width", "Add thickness to RRE results visualization"), + shiny::checkboxInput("prism_supp_data_input_width", "Add thickness to PRISM resistance + regulatory genes results visualization"), + shiny::checkboxInput("arts_width", "Add thickness to ARTS results visualization"), + shiny::checkboxInput("sempi_width", "Add thickness to SEMPI results visualization") + ) + ), + div( + id = "id4", + shinydashboardPlus::box( + title = "Prism supplement + ARTS options", + id = "prism_supplement_arts_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + shiny::checkboxInput("prism_supp", "Visualize PRISM resistance and regulatory genes"), + shiny::selectInput("dup_choice", "Choose duplicated core gene to plot only it", + choices = c("All"), + selected = "All" + ) + ) + ), + mod_download_ui("download_ui_1") + ) + ) + ), + sortable::sortable_js("options_data1", options = sortable::sortable_options(swap = TRUE, group = "options_data")), + sortable::sortable_js("options_data2", options = sortable::sortable_options(swap = TRUE, group = "options_data")) ) - ), - mod_download_ui("download_ui_1") ) - ) - ), - sortable::sortable_js("options_data1", options = sortable::sortable_options(swap = TRUE, group = "options_data")), - sortable::sortable_js("options_data2", options = sortable::sortable_options(swap = TRUE, group = "options_data")) - ) + ) ) - ) ) - ) } #' Add external Resources to the Application @@ -306,17 +363,17 @@ app_ui <- function(request) { #' @importFrom golem add_resource_path activate_js favicon bundle_resources #' @noRd golem_add_external_resources <- function() { - add_resource_path( - "www", app_sys("app/www") - ) + add_resource_path( + "www", app_sys("app/www") + ) - tags$head( - favicon(), - bundle_resources( - path = app_sys("app/www"), - app_title = "BGCViz" + tags$head( + favicon(), + bundle_resources( + path = app_sys("app/www"), + app_title = "BGCViz" + ) + # Add here other external resources + # for example, you can add shinyalert::useShinyalert() ) - # Add here other external resources - # for example, you can add shinyalert::useShinyalert() - ) } diff --git a/R/fct_biocircos.R b/R/fct_biocircos.R index 4bad000..629bd6d 100644 --- a/R/fct_biocircos.R +++ b/R/fct_biocircos.R @@ -6,29 +6,29 @@ #' #' @noRd initialize_biocircos <- function(biocircos_anti, name, Biocircos_chromosomes, arcs_chromosomes, arcs_begin, arcs_end, arc_labels, arc_col, rename_data, chr_len, biocircos_color, coloring_datatable) { - # Make chromosome list for Biocircos plot. Use chr_len as an input - Biocircos_chromosomes[[name]] <- chr_len - # Add arcs. Quantity of arcs is length of dataframes - arcs_chromosomes <- c(arcs_chromosomes, rep(name, length(biocircos_anti$Cluster))) - # Add arcs begin positions. (Start column) - arcs_begin <- c(arcs_begin, biocircos_anti$Start) - # Stop position of arcs. - arcs_end <- c(arcs_end, biocircos_anti$Stop) - # Add Arcs labels. Can add only one label... - arc_labels <- c(arc_labels, biocircos_anti$Type) - if ((biocircos_color == T)) { - arc_colors <- sapply(biocircos_anti$Type2, function(x) { - if (x %in% coloring_datatable$x$data$Name) { - coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x] - } else { - coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"] - } - }) - } else { - arc_colors <- coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"] - } - arc_col <- c(arc_col, as.character(arc_colors)) - return(list(Biocircos_chromosomes, arcs_chromosomes, arcs_begin, arcs_end, arc_labels, arc_col)) + # Make chromosome list for Biocircos plot. Use chr_len as an input + Biocircos_chromosomes[[name]] <- chr_len + # Add arcs. Quantity of arcs is length of dataframes + arcs_chromosomes <- c(arcs_chromosomes, rep(name, length(biocircos_anti$Cluster))) + # Add arcs begin positions. (Start column) + arcs_begin <- c(arcs_begin, biocircos_anti$Start) + # Stop position of arcs. + arcs_end <- c(arcs_end, biocircos_anti$Stop) + # Add Arcs labels. Can add only one label... + arc_labels <- c(arc_labels, biocircos_anti$Type) + if ((biocircos_color == TRUE)) { + arc_colors <- sapply(biocircos_anti$Type2, function(x) { + if (x %in% coloring_datatable$x$data$Name) { + coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x] + } else { + coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"] + } + }) + } else { + arc_colors <- coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"] + } + arc_col <- c(arc_col, as.character(arc_colors)) + return(list(Biocircos_chromosomes, arcs_chromosomes, arcs_begin, arcs_end, arc_labels, arc_col)) } #' add_biocircos_data @@ -40,90 +40,90 @@ initialize_biocircos <- function(biocircos_anti, name, Biocircos_chromosomes, ar #' @noRd add_biocircos_data <- function(data1_inter, data2_inter, data1, data2, data1_label, data2_label, rename_data, class, ref_col_biocircos, coloring_datatable) { - inter_s_rre_n <- data1_inter - inter_rre_s <- data2_inter - # Add link start. Just populate certain chromosome name times the lenght of interception - chromosomes_start <- c(rep(data2_label, length(inter_rre_s))) - # Add link end. Just populate second output from the vectors, used above. - chromosomes_end <- c(rep(data1_label, length(inter_s_rre_n))) - # Add links start positions as a start from dataframe. This vector is for chromosome start - link_pos_start <- as.numeric(c(data2$Start[match(inter_rre_s, data2$Cluster)])) - # Add links start positions as a start from dataframe. For chromosome start variable - link_pos_start_1 <- as.numeric(c(data2$Stop[match(inter_rre_s, data2$Cluster)])) - # Add links start position for a chromosome stop variable - link_pos_end <- as.numeric(c(data1$Start[match(inter_s_rre_n, data1$Cluster)])) - # Add links start position for a chromosome stop position - link_pos_end_2 <- as.numeric(c(data1$Stop[match(inter_s_rre_n, data1$Cluster)])) - label_1 <- c(sapply(inter_rre_s, function(x) { - x <- paste(paste0(data2_label, ":"), x, ",", data2$Type[data2$Cluster == x]) - })) - label_2 <- c(sapply(inter_s_rre_n, function(x) { - x <- paste(paste0(data1_label, ":"), x, ",", data1$Type[data1$Cluster == x]) - })) - # browser() - if (!is.null(inter_rre_s)) { - if (class == "P") { - subset_vec <- data2$Type2[match(inter_rre_s, data2$Cluster)] == data1$Type2[match(inter_s_rre_n, data1$Cluster)] - label_color <- as.character(c(sapply(data2$Type2[match(inter_rre_s, data2$Cluster)], function(x) { - if (x %in% coloring_datatable$x$data$Name) { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) - } else { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) - } - }))) - if (length(label_color) != 0) { - for (t in seq(1:length(label_color))) { - if (!is.null(subset_vec[t])) { - if (subset_vec[t] == F) { - label_color[t] <- as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + inter_s_rre_n <- data1_inter + inter_rre_s <- data2_inter + # Add link start. Just populate certain chromosome name times the lenght of interception + chromosomes_start <- c(rep(data2_label, length(inter_rre_s))) + # Add link end. Just populate second output from the vectors, used above. + chromosomes_end <- c(rep(data1_label, length(inter_s_rre_n))) + # Add links start positions as a start from dataframe. This vector is for chromosome start + link_pos_start <- as.numeric(c(data2$Start[match(inter_rre_s, data2$Cluster)])) + # Add links start positions as a start from dataframe. For chromosome start variable + link_pos_start_1 <- as.numeric(c(data2$Stop[match(inter_rre_s, data2$Cluster)])) + # Add links start position for a chromosome stop variable + link_pos_end <- as.numeric(c(data1$Start[match(inter_s_rre_n, data1$Cluster)])) + # Add links start position for a chromosome stop position + link_pos_end_2 <- as.numeric(c(data1$Stop[match(inter_s_rre_n, data1$Cluster)])) + label_1 <- c(sapply(inter_rre_s, function(x) { + x <- paste(paste0(data2_label, ":"), x, ",", data2$Type[data2$Cluster == x]) + })) + label_2 <- c(sapply(inter_s_rre_n, function(x) { + x <- paste(paste0(data1_label, ":"), x, ",", data1$Type[data1$Cluster == x]) + })) + # browser() + if (!is.null(inter_rre_s)) { + if (class == "P") { + subset_vec <- data2$Type2[match(inter_rre_s, data2$Cluster)] == data1$Type2[match(inter_s_rre_n, data1$Cluster)] + label_color <- as.character(c(sapply(data2$Type2[match(inter_rre_s, data2$Cluster)], function(x) { + if (x %in% coloring_datatable$x$data$Name) { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) + } else { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + } + }))) + if (length(label_color) != 0) { + for (t in seq(1:length(label_color))) { + if (!is.null(subset_vec[t])) { + if (subset_vec[t] == FALSE) { + label_color[t] <- as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + } + } + } } - } + } else if (class == "H") { + if (grep(paste0("^", data1_label, "$"), coloring_datatable$x$data$Hierarchy) < (grep(paste0("^", data2_label, "$"), coloring_datatable$x$data$Hierarchy))) { + label_color <- as.character(c(sapply(data1$Type2[match(inter_s_rre_n, data1$Cluster)], function(x) { + if (x %in% coloring_datatable$x$data$Name) { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) + } else { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + } + }))) + } else { + label_color <- as.character(c(sapply(data2$Type2[match(inter_rre_s, data2$Cluster)], function(x) { + if (x %in% coloring_datatable$x$data$Name) { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) + } else { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + } + }))) + } + } else if (class == "R") { + if (data2_label == ref_col_biocircos) { + label_color <- as.character(c(sapply(data1$Type2[match(inter_s_rre_n, data1$Cluster)], function(x) { + if (x %in% coloring_datatable$x$data$Name) { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) + } else { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + } + }))) + } else if (data1_label == ref_col_biocircos) { + label_color <- as.character(c(sapply(data2$Type2[match(inter_rre_s, data2$Cluster)], function(x) { + if (x %in% coloring_datatable$x$data$Name) { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) + } else { + as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) + } + }))) + } else { + label_color <- as.character(rep(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"], length(chromosomes_start))) + } + } else { + label_color <- as.character(rep(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"], length(chromosomes_start))) } - } - } else if (class == "H") { - if (grep(paste0("^", data1_label, "$"), coloring_datatable$x$data$Hierarchy) < (grep(paste0("^", data2_label, "$"), coloring_datatable$x$data$Hierarchy))) { - label_color <- as.character(c(sapply(data1$Type2[match(inter_s_rre_n, data1$Cluster)], function(x) { - if (x %in% coloring_datatable$x$data$Name) { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) - } else { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) - } - }))) - } else { - label_color <- as.character(c(sapply(data2$Type2[match(inter_rre_s, data2$Cluster)], function(x) { - if (x %in% coloring_datatable$x$data$Name) { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) - } else { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) - } - }))) - } - } else if (class == "R") { - if (data2_label == ref_col_biocircos) { - label_color <- as.character(c(sapply(data1$Type2[match(inter_s_rre_n, data1$Cluster)], function(x) { - if (x %in% coloring_datatable$x$data$Name) { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) - } else { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) - } - }))) - } else if (data1_label == ref_col_biocircos) { - label_color <- as.character(c(sapply(data2$Type2[match(inter_rre_s, data2$Cluster)], function(x) { - if (x %in% coloring_datatable$x$data$Name) { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == x]) - } else { - as.character(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"]) - } - }))) - } else { - label_color <- as.character(rep(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"], length(chromosomes_start))) - } - } else { - label_color <- as.character(rep(coloring_datatable$x$data$Color[coloring_datatable$x$data$Name == "base"], length(chromosomes_start))) } - } - return(list( - inter_s_rre_n, inter_s_rre_n, chromosomes_start, chromosomes_end, link_pos_start, link_pos_start_1, link_pos_end, - link_pos_end_2, label_1, label_2, label_color - )) + return(list( + inter_s_rre_n, inter_s_rre_n, chromosomes_start, chromosomes_end, link_pos_start, link_pos_start_1, link_pos_end, + link_pos_end_2, label_1, label_2, label_color, data2 + )) } diff --git a/R/fct_deep_reference.R b/R/fct_deep_reference.R index ed5a0f9..ed1190e 100644 --- a/R/fct_deep_reference.R +++ b/R/fct_deep_reference.R @@ -7,26 +7,26 @@ #' used for coloring and legend), Software, ID (unique number), Start, Stop (of a cluster, to show on mouse hover) #' #' @noRd -simple_seg <- function(df, letter, software, soft_name, soft_namings, inter = T, inters) { - if (inter == T) { - data <- df[df$Cluster %in% inters[[soft_namings]][[soft_name]]$from, ] - } else { - data <- df - } +simple_seg <- function(df, letter, software, soft_name, soft_namings, inter = TRUE, inters) { + if (inter == TRUE) { + data <- df[df$Cluster %in% inters[[soft_namings]][[soft_name]]$from, ] + } else { + data <- df + } - seg_df <- data.frame( - x = as.numeric(data$Start), - y = rep(letter, length(data$Cluster)), - xend = as.numeric(data$Stop), - yend = rep(letter, length(data$Cluster)), - Type = as.factor(data$Type), - Type2 = as.factor(data$Type2), - Software = rep(software, length(data$Cluster)), - ID = data$Cluster, - Start = data$Start, - Stop = data$Stop - ) - return(seg_df) + seg_df <- data.frame( + x = as.numeric(data$Start), + y = rep(letter, length(data$Cluster)), + xend = as.numeric(data$Stop), + yend = rep(letter, length(data$Cluster)), + Type = as.factor(data$Type), + Type2 = as.factor(data$Type2), + Software = rep(software, length(data$Cluster)), + ID = data$Cluster, + Start = data$Start, + Stop = data$Stop + ) + return(seg_df) } @@ -37,20 +37,20 @@ simple_seg <- function(df, letter, software, soft_name, soft_namings, inter = T, #' @return dataframe with the fields, specified in simple_seg() + added Hit, Core, Count, E_value, Bitscore, Model. #' #' @noRd -add_arts <- function(seg_df, soft_namings, df, inter = T, inters) { - if (inter == T) { - subset_df <- df[df$Cluster %in% inters[[soft_namings]]$arts$from, ] - } else { - subset_df <- df - } - seg_df$Hit <- subset_df$Hit - seg_df$xend <- as.numeric(subset_df$Stop) - seg_df$Core <- subset_df$Core - seg_df$Count <- subset_df$Count - seg_df$E_value <- subset_df$Evalue - seg_df$Bitscore <- subset_df$Bitscore - seg_df$Model <- subset_df$Model - return(seg_df) +add_arts <- function(seg_df, soft_namings, df, inter = TRUE, inters) { + if (inter == TRUE) { + subset_df <- df[df$Cluster %in% inters[[soft_namings]]$arts$from, ] + } else { + subset_df <- df + } + seg_df$Hit <- subset_df$Hit + seg_df$xend <- as.numeric(subset_df$Stop) + seg_df$Core <- subset_df$Core + seg_df$Count <- subset_df$Count + seg_df$E_value <- subset_df$Evalue + seg_df$Bitscore <- subset_df$Bitscore + seg_df$Model <- subset_df$Model + return(seg_df) } #' add_prism_supp #' @@ -59,17 +59,17 @@ add_arts <- function(seg_df, soft_namings, df, inter = T, inters) { #' @return dataframe with the fields, specified in simple_seg() + added Score, Name and Full_Name #' #' @noRd -add_prism_supp <- function(seg_df, soft_namings, df, inter = T, inters) { - if (inter == T) { - subset_df <- df[df$Cluster %in% inters[[soft_namings]]$prism_supp$from, ] - } else { - subset_df <- df - } - seg_df$xend <- as.numeric(subset_df$Stop) - seg_df$Score <- subset_df$Score - seg_df$Name <- subset_df$Name - seg_df$Full_name <- subset_df$Full_name - return(seg_df) +add_prism_supp <- function(seg_df, soft_namings, df, inter = TRUE, inters) { + if (inter == TRUE) { + subset_df <- df[df$Cluster %in% inters[[soft_namings]]$prism_supp$from, ] + } else { + subset_df <- df + } + seg_df$xend <- as.numeric(subset_df$Stop) + seg_df$Score <- subset_df$Score + seg_df$Name <- subset_df$Name + seg_df$Full_name <- subset_df$Full_name + return(seg_df) } #' add_deep #' @@ -78,16 +78,16 @@ add_prism_supp <- function(seg_df, soft_namings, df, inter = T, inters) { #' @return dataframe with the fields, specified in simpl_seg() + added Num_domains, deepbgc_score, activity #' #' @noRd -add_deep <- function(seg_df, soft_namings, df, inter = T, inters) { - if (inter == T) { - subset_df <- df[df$Cluster %in% inters[[soft_namings]]$deep$from, ] - } else { - subset_df <- df - } - seg_df$num_domains <- subset_df$num_domains - seg_df$deepbgc_score <- subset_df$deepbgc_score - seg_df$activity <- subset_df$product_activity - return(seg_df) +add_deep <- function(seg_df, soft_namings, df, inter = TRUE, inters) { + if (inter == TRUE) { + subset_df <- df[df$Cluster %in% inters[[soft_namings]]$deep$from, ] + } else { + subset_df <- df + } + seg_df$num_domains <- subset_df$num_domains + seg_df$deepbgc_score <- subset_df$deepbgc_score + seg_df$activity <- subset_df$product_activity + return(seg_df) } #' add_rre #' @@ -98,27 +98,27 @@ add_deep <- function(seg_df, soft_namings, df, inter = T, inters) { #' P_value, RRE_Start, RRE_stop and Probability in long format #' #' @noRd -add_rre <- function(seg_df, soft_namings, df, inter = T, rre_more, inters) { - if (inter == T) { - subset_df <- df[df$Cluster %in% inters[[soft_namings]]$rre$from, ] - } else { - subset_df <- df - } - if (rre_more == T) { - seg_df$xend <- as.numeric(subset_df$Stop) - seg_df$Score <- subset_df$Score - seg_df$Stop <- subset_df$Stop - seg_df$E_value <- subset_df$E.value - seg_df$P_value <- subset_df$P.value - seg_df$RRE_start <- subset_df$RRE.start - seg_df$RRE_stop <- subset_df$RRE.end - seg_df$Probability <- subset_df$Probability - } else { - seg_df$xend <- subset_df$Stop - seg_df$E_value <- subset_df$E.value - } +add_rre <- function(seg_df, soft_namings, df, inter = TRUE, rre_more, inters) { + if (inter == TRUE) { + subset_df <- df[df$Cluster %in% inters[[soft_namings]]$rre$from, ] + } else { + subset_df <- df + } + if (rre_more == TRUE) { + seg_df$xend <- as.numeric(subset_df$Stop) + seg_df$Score <- subset_df$Score + seg_df$Stop <- subset_df$Stop + seg_df$E_value <- subset_df$E.value + seg_df$P_value <- subset_df$P.value + seg_df$RRE_start <- subset_df$RRE.start + seg_df$RRE_stop <- subset_df$RRE.end + seg_df$Probability <- subset_df$Probability + } else { + seg_df$xend <- subset_df$Stop + seg_df$E_value <- subset_df$E.value + } - return(seg_df) + return(seg_df) } #' add_gecco #' @@ -127,17 +127,17 @@ add_rre <- function(seg_df, soft_namings, df, inter = T, rre_more, inters) { #' @return dataframe with the fields, specified in simple_seg() + Num_proteins, Num_domains, Average_p, Max_p #' #' @noRd -add_gecco <- function(seg_df, soft_namings, df, inter = T, inters) { - if (inter == T) { - subset_df <- df[df$Cluster %in% inters[[soft_namings]]$gecco$from, ] - } else { - subset_df <- df - } - seg_df$Num_proteins <- subset_df$num_prot - seg_df$Num_domains <- subset_df$num_domains - seg_df$Average_p <- subset_df$average_p - seg_df$Max_p <- subset_df$max_p - return(seg_df) +add_gecco <- function(seg_df, soft_namings, df, inter = TRUE, inters) { + if (inter == TRUE) { + subset_df <- df[df$Cluster %in% inters[[soft_namings]]$gecco$from, ] + } else { + subset_df <- df + } + seg_df$Num_proteins <- subset_df$num_prot + seg_df$Num_domains <- subset_df$num_domains + seg_df$Average_p <- subset_df$average_p + seg_df$Max_p <- subset_df$max_p + return(seg_df) } #' define_spec_seg_df @@ -147,20 +147,20 @@ add_gecco <- function(seg_df, soft_namings, df, inter = T, inters) { #' @return dataframe with the fields, specified in simple_seg() + specific to the software. #' #' @noRd -define_spec_seg_df <- function(soft_names, index, seg_df, soft_major, df, inter = T, rre_more, inters) { - if (inter == F) { - soft_major <- "Not applicable" - } - if ((soft_names[index] == "prism_supp") & (soft_names[index] != soft_major)) { - seg_df <- add_prism_supp(seg_df, soft_major, df, inter, inters) - } else if ((soft_names[index] == "arts") & (soft_names[index] != soft_major)) { - seg_df <- add_arts(seg_df, soft_major, df, inter, inters) - } else if ((soft_names[index] == "deep") & (soft_names[index] != soft_major)) { - seg_df <- add_deep(seg_df, soft_major, df, inter, inters) - } else if ((soft_names[index] == "gecco") & (soft_names[index] != soft_major)) { - seg_df <- add_gecco(seg_df, soft_major, df, inter, inters) - } else if ((soft_names[index] == "rre") & (soft_names[index] != soft_major)) { - seg_df <- add_rre(seg_df, soft_major, df, inter, rre_more, inters) - } - return(seg_df) +define_spec_seg_df <- function(soft_names, index, seg_df, soft_major, df, inter = TRUE, rre_more, inters) { + if (inter == FALSE) { + soft_major <- "Not applicable" + } + if ((soft_names[index] == "prism_supp") & (soft_names[index] != soft_major)) { + seg_df <- add_prism_supp(seg_df, soft_major, df, inter, inters) + } else if ((soft_names[index] == "arts") & (soft_names[index] != soft_major)) { + seg_df <- add_arts(seg_df, soft_major, df, inter, inters) + } else if ((soft_names[index] == "deep") & (soft_names[index] != soft_major)) { + seg_df <- add_deep(seg_df, soft_major, df, inter, inters) + } else if ((soft_names[index] == "gecco") & (soft_names[index] != soft_major)) { + seg_df <- add_gecco(seg_df, soft_major, df, inter, inters) + } else if ((soft_names[index] == "rre") & (soft_names[index] != soft_major)) { + seg_df <- add_rre(seg_df, soft_major, df, inter, rre_more, inters) + } + return(seg_df) } diff --git a/R/fct_filtering.R b/R/fct_filtering.R index a2ab10b..cbcd0cc 100644 --- a/R/fct_filtering.R +++ b/R/fct_filtering.R @@ -6,57 +6,57 @@ #' #' @noRd filter_deepbgc <- function(deep_data, cluster_type, score_a_input, score_c_input, score_d_input, domains_filter, biodomain_filter, gene_filter) { - # Silence R CMD note - alkaloid <- nrps <- other <- - pks <- ripp <- saccharide <- - terpene <- score <- Cluster_type <- - num_domains <- num_bio_domains <- - num_proteins <- NULL - score_a <- apply(deep_data %>% dplyr::select(c("antibacterial", "cytotoxic", "inhibitor", "antifungal")), 1, function(x) max(x)) - score_d <- apply(deep_data %>% dplyr::select(c("deepbgc_score")), 1, function(x) max(x)) - score_c <- apply(deep_data %>% dplyr::select(c("alkaloid", "nrps", "other", "pks", "ripp", "saccharide", "terpene")), 1, function(x) max(x)) - if (is.null(cluster_type)) { - deep_data_chromo <- deep_data %>% - dplyr::mutate(score = apply(deep_data %>% - dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, function(x) max(x))) - # Cluster_type column. Here extract colnames, and assign max value to a new column - deep_data_chromo$Cluster_type <- colnames(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene))[apply(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, which.max)] - # If max score is under_threshold, print "under_threshold" - deep_data_chromo <- deep_data_chromo %>% - dplyr::mutate(Cluster_type = ifelse(score > 50 / 100, Cluster_type, "under_threshold")) - # Finally store deepbgc data in plotting variable. Do final scores processing - biocircos_deep <- deep_data_chromo %>% - dplyr::mutate(product_class = Cluster_type, score_a = score_a, score_d = score_d, score_c = score_c) %>% - dplyr::filter( - score_a >= 50 / 100, score_c >= 50 / 100, - score_d >= 50 / 100, num_domains >= 5, - num_bio_domains >= 1, num_proteins >= 1 - ) - } else { - deep_data_chromo <- deep_data %>% - dplyr::mutate(score = apply(deep_data %>% - dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, function(x) max(x))) - # Cluster_type column. Here extract colnames, and assign max value to a new column - deep_data_chromo$Cluster_type <- colnames(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene))[apply(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, which.max)] - # If max score is under_threshold, print "under_threshold" - deep_data_chromo <- deep_data_chromo %>% - dplyr::mutate(Cluster_type = ifelse(score > as.numeric(cluster_type) / 100, Cluster_type, "under_threshold")) - # Finally store deepbgc data in plotting variable. Do final scores processing - biocircos_deep <- deep_data_chromo %>% - dplyr::mutate(product_class = Cluster_type, score_a = score_a, score_d = score_d, score_c = score_c) %>% - dplyr::filter( - score_a >= as.numeric(score_a_input) / 100, score_c >= as.numeric(score_c_input) / 100, - score_d >= as.numeric(score_d_input) / 100, num_domains >= domains_filter, - num_bio_domains >= biodomain_filter, num_proteins >= gene_filter - ) - } + # Silence R CMD note + alkaloid <- nrps <- other <- + pks <- ripp <- saccharide <- + terpene <- score <- Cluster_type <- + num_domains <- num_bio_domains <- + num_proteins <- NULL + score_a <- apply(deep_data %>% dplyr::select(c("antibacterial", "cytotoxic", "inhibitor", "antifungal")), 1, function(x) max(x)) + score_d <- apply(deep_data %>% dplyr::select(c("deepbgc_score")), 1, function(x) max(x)) + score_c <- apply(deep_data %>% dplyr::select(c("alkaloid", "nrps", "other", "pks", "ripp", "saccharide", "terpene")), 1, function(x) max(x)) + if (is.null(cluster_type)) { + deep_data_chromo <- deep_data %>% + dplyr::mutate(score = apply(deep_data %>% + dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, function(x) max(x))) + # Cluster_type column. Here extract colnames, and assign max value to a new column + deep_data_chromo$Cluster_type <- colnames(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene))[apply(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, which.max)] + # If max score is under_threshold, print "under_threshold" + deep_data_chromo <- deep_data_chromo %>% + dplyr::mutate(Cluster_type = ifelse(score > 50 / 100, Cluster_type, "under_threshold")) + # Finally store deepbgc data in plotting variable. Do final scores processing + biocircos_deep <- deep_data_chromo %>% + dplyr::mutate(product_class = Cluster_type, score_a = score_a, score_d = score_d, score_c = score_c) %>% + dplyr::filter( + score_a >= 50 / 100, score_c >= 50 / 100, + score_d >= 50 / 100, num_domains >= 5, + num_bio_domains >= 1, num_proteins >= 1 + ) + } else { + deep_data_chromo <- deep_data %>% + dplyr::mutate(score = apply(deep_data %>% + dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, function(x) max(x))) + # Cluster_type column. Here extract colnames, and assign max value to a new column + deep_data_chromo$Cluster_type <- colnames(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene))[apply(deep_data_chromo %>% dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, which.max)] + # If max score is under_threshold, print "under_threshold" + deep_data_chromo <- deep_data_chromo %>% + dplyr::mutate(Cluster_type = ifelse(score > as.numeric(cluster_type) / 100, Cluster_type, "under_threshold")) + # Finally store deepbgc data in plotting variable. Do final scores processing + biocircos_deep <- deep_data_chromo %>% + dplyr::mutate(product_class = Cluster_type, score_a = score_a, score_d = score_d, score_c = score_c) %>% + dplyr::filter( + score_a >= as.numeric(score_a_input) / 100, score_c >= as.numeric(score_c_input) / 100, + score_d >= as.numeric(score_d_input) / 100, num_domains >= domains_filter, + num_bio_domains >= biodomain_filter, num_proteins >= gene_filter + ) + } - biocircos_deep["Start"] <- biocircos_deep$nucl_start - biocircos_deep["Stop"] <- biocircos_deep$nucl_end - biocircos_deep["Type"] <- biocircos_deep$product_class - biocircos_deep["Type2"] <- biocircos_deep$product_class - biocircos_deep["Cluster"] <- biocircos_deep$ID - return(biocircos_deep) + biocircos_deep["Start"] <- biocircos_deep$nucl_start + biocircos_deep["Stop"] <- biocircos_deep$nucl_end + biocircos_deep["Type"] <- biocircos_deep$product_class + biocircos_deep["Type2"] <- biocircos_deep$product_class + biocircos_deep["Cluster"] <- biocircos_deep$ID + return(biocircos_deep) } #' filter_gecco #' @@ -66,34 +66,34 @@ filter_deepbgc <- function(deep_data, cluster_type, score_a_input, score_c_input #' #' @noRd filter_gecco <- function(gecco_data, score_cluster_gecco, score_average_gecco, domains_filter_gecco, prot_filter_gecco) { - # Silence R CMD note - alkaloid <- nrps <- other <- - pks <- ripp <- saccharide <- - terpene <- score <- Type2 <- - Cluster_type <- score_a <- score_c <- - num_domains <- num_prot <- NULL - score_a_gecco <- apply(gecco_data %>% dplyr::select(c("average_p")), 1, function(x) max(x)) - score_c_gecco <- apply(gecco_data %>% dplyr::select(c("alkaloid", "nrps", "other", "pks", "ripp", "saccharide", "terpene")), 1, function(x) max(x)) - if (is.null(score_cluster_gecco)) { - gecco_data <- gecco_data %>% - dplyr::mutate(score = apply(gecco_data %>% - dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, function(x) max(x))) %>% - dplyr::mutate(Cluster_type = ifelse(score > 50 / 100, Type2, "under_threshold")) %>% - dplyr::mutate(Type2 = Cluster_type, score_a = score_a_gecco, score_c = score_c_gecco) %>% - dplyr::filter( - score_a >= 50 / 100, score_c >= 50 / 100, - num_domains >= 1, num_prot >= 1 - ) - } else { - gecco_data <- gecco_data %>% - dplyr::mutate(score = apply(gecco_data %>% - dplyr::select(alkaloid, nrps, other, pks, ripp, saccharide, terpene), 1, function(x) max(x))) %>% - dplyr::mutate(Cluster_type = ifelse(score > as.numeric(score_cluster_gecco) / 100, Type2, "under_threshold")) %>% - dplyr::mutate(Type2 = Cluster_type, score_a = score_a_gecco, score_c = score_c_gecco) %>% - dplyr::filter( - score_a >= as.numeric(score_average_gecco) / 100, score_c >= as.numeric(score_cluster_gecco) / 100, - num_domains >= domains_filter_gecco, num_prot >= prot_filter_gecco - ) - } - return(gecco_data) + # Silence R CMD note + alkaloid <- nrps <- + pks <- ripp <- saccharide <- + terpene <- score <- Type2 <- + Cluster_type <- score_a <- score_c <- + num_domains <- num_prot <- NULL + score_a_gecco <- apply(gecco_data %>% dplyr::select(c("average_p")), 1, function(x) max(x)) + score_c_gecco <- apply(gecco_data %>% dplyr::select(c("alkaloid", "nrps", "pks", "ripp", "saccharide", "terpene")), 1, function(x) max(x)) + if (is.null(score_cluster_gecco)) { + gecco_data <- gecco_data %>% + dplyr::mutate(score = apply(gecco_data %>% + dplyr::select(alkaloid, nrps, pks, ripp, saccharide, terpene), 1, function(x) max(x))) %>% + dplyr::mutate(Cluster_type = ifelse(score > 50 / 100, Type2, "under_threshold")) %>% + dplyr::mutate(Type2 = Cluster_type, score_a = score_a_gecco, score_c = score_c_gecco) %>% + dplyr::filter( + score_a >= 50 / 100, score_c >= 50 / 100, + num_domains >= 1, num_prot >= 1 + ) + } else { + gecco_data <- gecco_data %>% + dplyr::mutate(score = apply(gecco_data %>% + dplyr::select(alkaloid, nrps, pks, ripp, saccharide, terpene), 1, function(x) max(x))) %>% + dplyr::mutate(Cluster_type = ifelse(score > as.numeric(score_cluster_gecco) / 100, Type2, "under_threshold")) %>% + dplyr::mutate(Type2 = Cluster_type, score_a = score_a_gecco, score_c = score_c_gecco) %>% + dplyr::filter( + score_a >= as.numeric(score_average_gecco) / 100, score_c >= as.numeric(score_cluster_gecco) / 100, + num_domains >= domains_filter_gecco, num_prot >= prot_filter_gecco + ) + } + return(gecco_data) } diff --git a/R/fct_format_transformation.R b/R/fct_format_transformation.R index 366ea5a..de1571e 100644 --- a/R/fct_format_transformation.R +++ b/R/fct_format_transformation.R @@ -7,33 +7,39 @@ #' @param write_to - path where to write generated csv file #' #' @return csv file in specified location -#' +#' @examples +#' \dontrun{ +#' sempi_to_csv() +#' } #' @export -sempi_to_csv <- function(project_archive, write_to = getwd()) { - trackid <- NULL # Silence R CMD note - utils::unzip(project_archive, files = "genome_browser/main/Tracks.db", exdir = paste0(write_to, "/SEMPI_TracksDB"), junkpaths = T) - fl <- paste0(stringr::str_extract(write_to, ".*/"), "/SEMPI_TracksDB/Tracks.db") - conn <- RSQLite::dbConnect(RSQLite::SQLite(), fl) - - data <- RSQLite::dbGetQuery(conn, "SELECT * FROM tbl_segments") - RSQLite::dbDisconnect(conn) - unlink(paste0(stringr::str_extract(write_to, ".*/"), "/SEMPI_TracksDB"), recursive = T) - data <- data %>% - dplyr::filter(trackid == 6) +#' - types <- sapply(data$name, function(x) { - tmp <- stringr::str_trim(x) - tmp <- gsub(", ", "", tmp) - gsub(" ", "__", tmp) - }) - sempi_data <- data.frame(cbind(seq(1:length(data$trackid)), data$start, data$end, as.character(types))) - colnames(sempi_data) <- c("Cluster", "Start", "Stop", "Type") - sempi_data$Cluster <- as.numeric(sempi_data$Cluster) - sempi_data$Start <- as.numeric(sempi_data$Start) - sempi_data$Stop <- as.numeric(sempi_data$Stop) - sempi_data$Type <- stringr::str_trim(tolower(sempi_data$Type)) - utils::write.csv(sempi_data, paste0(write_to, "/sempi.csv"), row.names = FALSE) +sempi_to_csv <- function(project_archive, write_to = getwd()) { + trackid <- NULL # Silence R CMD note + utils::unzip(project_archive, files = "genome_browser/main/Tracks.db", exdir = paste0(write_to, "/SEMPI_TracksDB"), junkpaths = TRUE) + fl <- paste0(stringr::str_extract(write_to, ".*/"), "/SEMPI_TracksDB/Tracks.db") + conn <- RSQLite::dbConnect(RSQLite::SQLite(), fl) + + data <- RSQLite::dbGetQuery(conn, "SELECT * FROM tbl_segments") + RSQLite::dbDisconnect(conn) + unlink(paste0(stringr::str_extract(write_to, ".*/"), "/SEMPI_TracksDB"), recursive = TRUE) + data <- data %>% + dplyr::filter(trackid == 6) + + types <- sapply(data$name, function(x) { + tmp <- stringr::str_trim(x) + tmp <- gsub(", ", "", tmp) + gsub(" ", "__", tmp) + }) + + sempi_data <- data.frame(cbind(seq(1:length(data$trackid)), data$start, data$end, as.character(types))) + colnames(sempi_data) <- c("Cluster", "Start", "Stop", "Type") + sempi_data$Cluster <- as.numeric(sempi_data$Cluster) + sempi_data$Start <- as.numeric(sempi_data$Start) + sempi_data$Stop <- as.numeric(sempi_data$Stop) + sempi_data$Type <- stringr::str_trim(tolower(sempi_data$Type)) + utils::write.csv(sempi_data, paste0(write_to, "/sempi.csv"), row.names = FALSE) } #' prism_to_csv @@ -44,36 +50,39 @@ sempi_to_csv <- function(project_archive, write_to = getwd()) { #' @param write_to - path where to write generated csv file #' #' @return csv file in specified location -#' +#' @examples +#' \dontrun{ +#' prism_to_csv() +#' } #' @export prism_to_csv <- function(file, write_to = getwd()) { - data <- rjson::fromJSON(file = file) - - - types <- sapply(data$prism_results$clusters, function(x) { - tolower(x$type) - }) - - types <- sapply(types, function(x) { - if (length(unlist(x)) > 1) { - tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) - gsub(" ", "__", tmp) - } else { - x - } - }) - - start <- sapply(data$prism_results$clusters, function(x) { - x$start - }) - end <- sapply(data$prism_results$clusters, function(x) { - x$end - }) - - prism_data <- data.frame(cbind(start, end, types)) - prism_data <- prism_data %>% - dplyr::transmute(Cluster = as.numeric(rownames(prism_data)), Start = as.numeric(start), Stop = as.numeric(end), Type = types) - utils::write.csv(prism_data, paste0(write_to, "/prism.csv"), row.names = FALSE) + data <- rjson::fromJSON(file = file) + + + types <- sapply(data$prism_results$clusters, function(x) { + tolower(x$type) + }) + + types <- sapply(types, function(x) { + if (length(unlist(x)) > 1) { + tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) + gsub(" ", "__", tmp) + } else { + x + } + }) + + start <- sapply(data$prism_results$clusters, function(x) { + x$start + }) + end <- sapply(data$prism_results$clusters, function(x) { + x$end + }) + + prism_data <- data.frame(cbind(start, end, types)) + prism_data <- prism_data %>% + dplyr::transmute(Cluster = as.numeric(rownames(prism_data)), Start = as.numeric(start), Stop = as.numeric(end), Type = types) + utils::write.csv(prism_data, paste0(write_to, "/prism.csv"), row.names = FALSE) } #' antismash_to_csv @@ -84,53 +93,56 @@ prism_to_csv <- function(file, write_to = getwd()) { #' @param write_to - path where to write generated csv file #' #' @return csv file in specified location -#' +#' @examples +#' \dontrun{ +#' antismash_to_csv() +#' } #' @export antismash_to_csv <- function(file, write_to = getwd()) { - Start <- Stop <- NULL # To silence R CMD notes - data <- rjson::fromJSON(file = file) - types <- sapply(data$records, function(y) { - lapply(y$features, function(x) { - if (unlist(x$type == "region")) { - tolower(x$qualifiers$product) - } + Start <- Stop <- NULL # To silence R CMD notes + data <- rjson::fromJSON(file = file) + types <- sapply(data$records, function(y) { + lapply(y$features, function(x) { + if (unlist(x$type == "region")) { + tolower(x$qualifiers$product) + } + }) }) - }) - - types <- Filter(Negate(is.null), types) - - types <- sapply(types, function(x) { - if (length(unlist(x)) > 1) { - tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) - gsub(" ", "__", tmp) - } else { - x - } - }) - - location <- sapply(data$records, function(y) { - unlist(sapply(y$features, function(x) { - if (unlist(x$type == "region")) { - unlist(x$location) + + types <- Filter(Negate(is.null), types) + + types <- sapply(types, function(x) { + if (length(unlist(x)) > 1) { + tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) + gsub(" ", "__", tmp) + } else { + x } - })) - }) - - - location <- gsub("\\[", "", location) - location <- gsub("\\]", "", location) - location <- data.frame(location) - colnames(location) <- "split" - anti_data <- location %>% - tidyr::separate(split, c("Start", "Stop")) %>% - dplyr::transmute(ID = rownames(location), Start, Stop) - - anti_data <- cbind(anti_data, types) - colnames(anti_data) <- c("Cluster", "Start", "Stop", "Type") - anti_data$Cluster <- as.numeric(anti_data$Cluster) - anti_data$Start <- as.numeric(anti_data$Start) - anti_data$Stop <- as.numeric(anti_data$Stop) - utils::write.csv(anti_data, paste0(write_to, "/antismash.csv"), row.names = FALSE) + }) + + location <- sapply(data$records, function(y) { + unlist(sapply(y$features, function(x) { + if (unlist(x$type == "region")) { + unlist(x$location) + } + })) + }) + + + location <- gsub("\\[", "", location) + location <- gsub("\\]", "", location) + location <- data.frame(location) + colnames(location) <- "split" + anti_data <- location %>% + tidyr::separate(split, c("Start", "Stop")) %>% + dplyr::transmute(ID = rownames(location), Start, Stop) + + anti_data <- cbind(anti_data, types) + colnames(anti_data) <- c("Cluster", "Start", "Stop", "Type") + anti_data$Cluster <- as.numeric(anti_data$Cluster) + anti_data$Start <- as.numeric(anti_data$Start) + anti_data$Stop <- as.numeric(anti_data$Stop) + utils::write.csv(anti_data, paste0(write_to, "/antismash.csv"), row.names = FALSE) } #' arts_to_csv @@ -141,82 +153,181 @@ antismash_to_csv <- function(file, write_to = getwd()) { #' @param write_to - path where to write generated csv file #' #' @return csv file in specified location +#' @examples +#' \dontrun{ +#' arts_to_csv() +#' } #' #' @export arts_to_csv <- function(project_archive, write_to = getwd()) { - Start <- NULL # Silence R CMD note - utils::unzip(project_archive, files = c("tables/duptable.tsv", "tables/knownhits.tsv"), exdir = paste0(write_to, "/ARTS_tables"), junkpaths = T) - known_hits <- utils::read.delim(paste0(stringr::str_extract(write_to, ".*/"), "/ARTS_tables/knownhits.tsv")) - dupl_table <- utils::read.delim(paste0(stringr::str_extract(write_to, ".*/"), "/ARTS_tables/duptable.tsv")) - locations <- sapply(known_hits$Sequence.description, function(x) { - utils::tail(stringr::str_split(x, "\\|")[[1]], 1) - }) - - start <- sapply(locations, function(x) { - stringr::str_split(x, "_")[[1]][1] - }) - stop <- sapply(locations, function(x) { - stringr::str_split(x, "_")[[1]][2] - }) - # Parse known_hits data - known_table <- data.frame(cbind(start, stop)) - colnames(known_table) <- c("Start", "Stop") - rownames(known_table) <- seq(1:dim(known_table)[1]) - known_table$Start <- as.numeric(known_table$Start) - known_table$Stop <- as.numeric(known_table$Stop) - known_table$Description <- known_hits$Description - known_table$Model <- known_hits$X.Model - known_table$Evalue <- known_hits$evalue - known_table$Bitscore <- known_hits$bitscore - known_table$ID <- seq(1:dim(known_table)[1]) - known_table$Cluster <- known_table$ID - known_table$Type <- "resistance" - known_table$Type2 <- known_table$Type - known_table$Hit <- NA - known_table$Core <- "Not_core" - known_table$Count <- 1 - # Parse duplication data - get_location_duptable <- function(x, y) { - test <- stringr::str_split(x, ";") - test2 <- sub(".*loc\\|", "", test[[1]]) - test3 <- stringr::str_split(test2, " ") - res <- list() - for (i in seq(1:length(test3))) { - id <- paste("hit", as.character(i), sep = "_") - start <- test3[[i]][1] - stop <- test3[[i]][2] - res_1 <- list(id, start, stop) - res <- append(res, list(res_1)) + Start <- NULL # Silence R CMD note + utils::unzip(project_archive, files = c("tables/duptable.tsv", "tables/knownhits.tsv"), exdir = paste0(write_to, "/ARTS_tables"), junkpaths = TRUE) + known_hits <- utils::read.delim(paste0(stringr::str_extract(write_to, ".*/"), "/ARTS_tables/knownhits.tsv")) + dupl_table <- utils::read.delim(paste0(stringr::str_extract(write_to, ".*/"), "/ARTS_tables/duptable.tsv")) + locations <- sapply(known_hits$Sequence.description, function(x) { + utils::tail(stringr::str_split(x, "\\|")[[1]], 1) + }) + + start <- sapply(locations, function(x) { + stringr::str_split(x, "_")[[1]][1] + }) + stop <- sapply(locations, function(x) { + stringr::str_split(x, "_")[[1]][2] + }) + # Parse known_hits data + known_table <- data.frame(cbind(start, stop)) + colnames(known_table) <- c("Start", "Stop") + rownames(known_table) <- seq(1:dim(known_table)[1]) + known_table$Start <- as.numeric(known_table$Start) + known_table$Stop <- as.numeric(known_table$Stop) + known_table$Description <- known_hits$Description + known_table$Model <- known_hits$X.Model + known_table$Evalue <- known_hits$evalue + known_table$Bitscore <- known_hits$bitscore + known_table$ID <- seq(1:dim(known_table)[1]) + known_table$Cluster <- known_table$ID + known_table$Type <- "resistance" + known_table$Type2 <- known_table$Type + known_table$Hit <- NA + known_table$Core <- "Not_core" + known_table$Count <- 1 + # Parse duplication data + get_location_duptable <- function(x, y) { + test <- stringr::str_split(x, ";") + test2 <- sub(".*loc\\|", "", test[[1]]) + test3 <- stringr::str_split(test2, " ") + res <- list() + for (i in seq(1:length(test3))) { + id <- paste("hit", as.character(i), sep = "_") + start <- test3[[i]][1] + stop <- test3[[i]][2] + res_1 <- list(id, start, stop) + res <- append(res, list(res_1)) + } + return(res) } - return(res) - } + + dup_table <- data.frame() + for (i in seq(1:dim(dupl_table)[1])) { + lst <- get_location_duptable(dupl_table$X.Hits_listed.[i]) + fin_data <- data.frame(do.call("rbind", lst)) + fin_data$Core_gene <- dupl_table$X.Core_gene[i] + fin_data$Description <- dupl_table$Description[i] + fin_data$Count <- dupl_table$Count[i] + colnames(fin_data) <- c("Hit", "Start", "Stop", "Core", "Description", "Count") + dup_table <- rbind(dup_table, fin_data) + } + dup_table$Hit <- unlist(dup_table$Hit) + dup_table$Start <- unlist(dup_table$Start) + dup_table$Stop <- unlist(dup_table$Stop) + dup_table$Start <- as.numeric(dup_table$Start) + dup_table$Stop <- as.numeric(dup_table$Stop) + dup_table$ID <- seq(1:dim(dup_table)[1]) + dup_table$Cluster <- dup_table$ID + dup_table$Type <- "core" + dup_table$Type2 <- dup_table$Type + dup_table$Evalue <- NA + dup_table$Bitscore <- NA + dup_table$Model <- "Core" + arts_data <- rbind(dup_table, known_table) + arts_data <- arts_data %>% + dplyr::arrange(Start) + arts_data$ID <- seq(1:dim(arts_data)[1]) + arts_data$Cluster <- arts_data$ID + utils::write.csv(arts_data, paste0(write_to, "/arts.csv"), row.names = FALSE) +} - dup_table <- data.frame() - for (i in seq(1:dim(dupl_table)[1])) { - lst <- get_location_duptable(dupl_table$X.Hits_listed.[i]) - fin_data <- data.frame(do.call("rbind", lst)) - fin_data$Core_gene <- dupl_table$X.Core_gene[i] - fin_data$Description <- dupl_table$Description[i] - fin_data$Count <- dupl_table$Count[i] - colnames(fin_data) <- c("Hit", "Start", "Stop", "Core", "Description", "Count") - dup_table <- rbind(dup_table, fin_data) +arts_to_nevik <- function(file, write_to = getwd()) +{ + utils::unzip(project_archive, files = c("trees/*"), exdir = paste0(write_to, "/ARTS_tables"), junkpaths = TRUE) + lists <- list.files("/ARTS_tables", full.names = TRUE, recursive = TRUE) + return(lists) +} + + +#' all data to json +#' +#' @description Function, function that takes csv file and converts it to json +#' +#' @param csv_file path to csv +#' +#' @return json + + +data_to_json <- function(csv_file) { + + # Read CSV file + data <- utils::read.csv(csv_file) + + # Group by label + grouped_data <- dplyr::group_by(data, label) + + # Build records list + records <- list() + + # Iterate through each label + for (name in unique(grouped_data$label)) { + + # Truncate the name to a specific length (e.g., 20 characters) + truncated_name <- strtrim(name, width = 20) + + # Filter rows for this label + group <- dplyr::filter(grouped_data, label == name) + + # Create features + subregions <- lapply(1:nrow(group), function(i) { + list( + start = group$start[i], + end = group$end[i], + label = truncated_name, # Use truncated name + details = list( + score = "group" # use actual score + ) + ) + }) + + protoclusters <- lapply(1:nrow(group), function(i) { + list( + core_start = group$start[i], + core_end = group$end[i], + product = "bht", + details = list( + some_detail = "value" # add actual details + ) + ) + }) + + # Build record + record <- list( + name = truncated_name, # Use truncated name + subregions = subregions, + protoclusters = protoclusters + ) + + # Append + records <- base::c(records, list(record)) + } - dup_table$Hit <- unlist(dup_table$Hit) - dup_table$Start <- unlist(dup_table$Start) - dup_table$Stop <- unlist(dup_table$Stop) - dup_table$Start <- as.numeric(dup_table$Start) - dup_table$Stop <- as.numeric(dup_table$Stop) - dup_table$ID <- seq(1:dim(dup_table)[1]) - dup_table$Cluster <- dup_table$ID - dup_table$Type <- "core" - dup_table$Type2 <- dup_table$Type - dup_table$Evalue <- NA - dup_table$Bitscore <- NA - dup_table$Model <- "Core" - arts_data <- rbind(dup_table, known_table) - arts_data <- arts_data %>% - dplyr::arrange(Start) - arts_data$ID <- seq(1:dim(arts_data)[1]) - arts_data$Cluster <- arts_data$ID - utils::write.csv(arts_data, paste0(write_to, "/arts.csv"), row.names = FALSE) + + # Build top level + # Build top level + result <- list( + tool = list( + name = as.character("Example tool"), + version = as.character("1.2.3"), + description = as.character("Example of external result sideloading in antiSMASH"), + configuration = list( + verbose = as.character("true"), + multisetting = c("first", "second") + ) + ), + records = records + ) + + # Convert to JSON + json <- jsonlite::toJSON(result, pretty = TRUE, auto_unbox = TRUE) + writeLines(json,"result_for_antismash.json") + + return(json) } + diff --git a/R/fct_group_table.R b/R/fct_group_table.R index 1a4f12a..494d164 100644 --- a/R/fct_group_table.R +++ b/R/fct_group_table.R @@ -6,17 +6,17 @@ #' #' @noRd refine_unique <- function(data) { - n <- utils::tail(data, n = 1) - data <- utils::head(data, -1) - n_list <- stringr::str_split(n, ",") - out <- sapply(n_list[[1]], function(x) { - x %in% unlist(stringr::str_split(data, ",")) - }) - res <- sapply(out, function(x) { - if (x == F) { - x - } - }) + n <- utils::tail(data, n = 1) + data <- utils::head(data, -1) + n_list <- stringr::str_split(n, ",") + out <- sapply(n_list[[1]], function(x) { + x %in% unlist(stringr::str_split(data, ",")) + }) + res <- sapply(out, function(x) { + if (x == FALSE) { + x + } + }) - return(paste(names(Filter(Negate(is.null), res)), collapse = ",")) + return(paste(names(Filter(Negate(is.null), res)), collapse = ",")) } diff --git a/R/fct_helpers.R b/R/fct_helpers.R index 830b407..8548c9b 100644 --- a/R/fct_helpers.R +++ b/R/fct_helpers.R @@ -1,4 +1,4 @@ -#' rename_vector + #' rename_vector #' #' @description Function, that given the dataframe, and renaming dataframe, returns renamed vector. #' @@ -46,13 +46,13 @@ rename_vector <- function(data, renamed_dataframe, renaming_notification) { #' #' @noRd correct_width <- function(data, label, sempi_width, prism_supp_data_input_width, arts_width, rre_width) { - if ((label == "SEMPI") & (sempi_width == T)) { + if ((label == "SEMPI") & (sempi_width == TRUE)) { data$Stop <- data$Stop + 30000 - } else if ((label == "PRISM-Supp") & (prism_supp_data_input_width == T)) { + } else if ((label == "PRISM-Supp") & (prism_supp_data_input_width == TRUE)) { data$Stop <- data$Stop + 20000 - } else if ((label == "ARTS") & (arts_width == T)) { + } else if ((label == "ARTS") & (arts_width == TRUE)) { data$Stop <- data$Stop + 30000 - } else if ((label == "RRE-Finder") & (rre_width == T)) { + } else if ((label == "RRE-Finder") & (rre_width == TRUE)) { data$Stop <- data$Stop + 50000 } return(data) @@ -85,11 +85,14 @@ hybrid_col <- function(data) { #' #' @return csv file in specified location #' +#' @examples +#' get_defaults() +#' #' @export get_defaults <- function(write_to = getwd()) { - rename_file <- system.file("extdata", "rename.csv", package = "BGCViz") - option_data <- utils::read.csv(rename_file) - utils::write.csv(option_data, paste0(write_to, "/BGCViz_options.csv"), row.names = FALSE) + rename_file <- system.file("extdata", "rename.csv", package = "BGCViz") + option_data <- utils::read.csv(rename_file) + utils::write.csv(option_data, paste0(write_to, "/BGCViz_options.csv"), row.names = FALSE) } #' set_defaults #' @@ -98,9 +101,36 @@ get_defaults <- function(write_to = getwd()) { #' #' @param csv_file - path to csv file with default options. #' +#' @return csv file, written to package settings +#' +#' @examples +#' \dontrun{ +#' set_defaults() +#' } +#' #' @export set_defaults <- function(csv_file) { - rename_file <- system.file("extdata", "rename.csv", package = "BGCViz") - option_data <- utils::read.csv(csv_file) - utils::write.csv(option_data, rename_file, row.names = FALSE) + rename_file <- system.file("extdata", "rename.csv", package = "BGCViz") + option_data <- utils::read.csv(csv_file) + utils::write.csv(option_data, rename_file, row.names = FALSE) +} +#' get_dissect_example +#' +#' @description Function, which downloads a csv file for dissect.py into specified path. +#' Use to separate regions into separate clusters +#' +#' @param write_to - path to write csv file to. +#' +#' @return csv file, written to package settings +#' +#' @examples +#' \dontrun{ +#' get_dissect_example(write_to) +#' } +#' +#' @export +get_dissect_example <- function(write_to = getwd()) { + rename_file <- system.file("extdata", "dissect.csv", package = "BGCViz") + option_data <- utils::read.csv(rename_file) + utils::write.csv(option_data, paste0(write_to, "/dissect.csv"), row.names = FALSE) } diff --git a/R/fct_reading_functions.R b/R/fct_reading_functions.R index 4c443fb..32e642c 100644 --- a/R/fct_reading_functions.R +++ b/R/fct_reading_functions.R @@ -1,25 +1,110 @@ + +#' #' @description A function, that reads RiPPMiner-Genome file txt +#' #' +#' #' @return csv file +#' #' +#' #' @noRd + + +read_compare <- function(data){ + compare_data <- read.csv(data) + compare_data <- subset(compare_data, select = c("Type", "Cluster", "Start", "Stop")) + compare_data$chromosome <- rep("C", nrow(compare_data)) # Use nrow for clarity + # Type magic + compare_data$Type <- stringr::str_trim(tolower(compare_data$Type)) + compare_data$Type2 <- stringr::str_trim(tolower(compare_data$Type)) + # Mutate NAs + compare_data <- dplyr::mutate(compare_data, Cluster = 1:nrow(compare_data)) + + # Convert "Start" and "Stop" to integers + compare_data$Cluster <- 1:length(compare_data$Cluster) + + compare_data$Start <- as.integer(compare_data$Start) + compare_data$Stop <- as.integer(compare_data$Stop) + 25000 + + return(compare_data) +} + + +read_emerald <- function(data) { + # get rid off unneeded rows + all <- readLines(data) + filtered_lines <- all[!grepl("^#|^$", all)] + emerald_data <- paste(filtered_lines, collapse = "\n") + data_connection <- textConnection(emerald_data) + + # create dataframe + emerald_data <- read.table(data_connection, header = FALSE, sep = "\t", col.names = c("seqname", "source", "Cluster", "Start", + "Stop", "score", "strand", "frame", + "Type")) + close(data_connection) + emerald_data$chromosome <- rep("EM",length(emerald_data$Cluster)) + pattern <- "nearest_MiBIG_class=([^;]+)" + emerald_data$Type <- sapply(emerald_data$Type, function(x) { + substring <- regmatches(x, regexec(pattern, x))[[1]][2] + return(substring) + }) + emerald_data$Type <- stringr::str_trim(tolower(emerald_data$Type)) + emerald_data$Type2 <- emerald_data$Type + emerald_data$Cluster <- 1:length(emerald_data$Cluster) + return (emerald_data) +} + + + + + +read_ripp <- function(data) { + all <- readLines(data) + filtered_lines <- all[!grepl("^#|^$", all)] + data <- paste(filtered_lines, collapse = "\n") + data_connection <- textConnection(data) + ripp_data <- read.table(data_connection, header = FALSE, sep = "\t", col.names = c("Cluster", "Type", "Start", "Stop")) + close(data_connection) + #Validation of input + res_validation <- validate_basic_input(ripp_data) + if (!(res_validation[[1]])) { + ripp_data <- NULL + return(NULL) + } else { + ripp_data <- res_validation[[2]] + } + #ADDING CHROMOSOME COLUMN + ripp_data$chromosome <- rep("GF", length(ripp_data$Cluster)) + #Type magic + ripp_data$Type <- stringr::str_trim(tolower(ripp_data$Type)) + ripp_data["Type2"] <- stringr::str_trim(tolower(ripp_data$Type)) + #Mutate NAs + ripp_data <- dplyr::mutate(ripp_data, Cluster = 1:length(ripp_data$Type)) + + return(ripp_data) + +} + + + #' read_anti #' -#' @description A function, that reads antismash file +#' @description A function, that reads RRE-finder file #' #' @return csv file #' #' @noRd read_anti <- function(data) { - anti_data <- data - res_validation <- validate_basic_input(anti_data) - if (!(res_validation[[1]])) { - anti_data <- NULL - return(NULL) - } else { - anti_data <- res_validation[[2]] - } - # Add chromosome column - anti_data$chromosome <- rep("A", length(anti_data$Cluster)) - # Type magic - anti_data$Type <- stringr::str_trim(tolower(anti_data$Type)) - anti_data["Type2"] <- stringr::str_trim(tolower(anti_data$Type)) - return(anti_data) + anti_data <- data + res_validation <- validate_basic_input(anti_data) + if (!(res_validation[[1]])) { + anti_data <- NULL + return(NULL) + } else { + anti_data <- res_validation[[2]] + } + # Add chromosome column + anti_data$chromosome <- rep("A", length(anti_data$Cluster)) + # Type magic + anti_data$Type <- stringr::str_trim(tolower(anti_data$Type)) + anti_data["Type2"] <- stringr::str_trim(tolower(anti_data$Type)) + return(anti_data) } #' read_anti #' @@ -29,223 +114,240 @@ read_anti <- function(data) { #' #' @noRd read_gecco <- function(data) { - # Silence R CMD note - polyketide_probability <- other_probability <- - nrp_probability <- alkaloid_probability <- - terpene_probability <- saccharide_probability <- - ripp_probability <- NULL - # Add chromosome column - gecco_data <- data - - gecco_data$chromosome <- rep("G", length(gecco_data$type)) - # Type magic - gecco_data$Cluster <- seq(1:length(gecco_data$chromosome)) - gecco_data$ID <- gecco_data$Cluster - gecco_data$Type <- stringr::str_trim(tolower(gecco_data$type)) - gecco_data$Type <- gsub("polyketide", "pks", gecco_data$Type) - gecco_data$Type <- gsub("nrp", "nrps", gecco_data$Type) - gecco_data$Type <- gsub("unknown", "under_threshold", gecco_data$Type) - gecco_data["Type2"] <- stringr::str_trim(tolower(gecco_data$Type)) - drop_cols <- c( - "alkaloid_probability", "polyketide_probability", "ripp_probability", "saccharide_probability", - "terpene_probability", "nrp_probability", "other_probability" - ) - # Read data - gecco_data <- gecco_data %>% - dplyr::mutate( - pks = polyketide_probability, other = other_probability, nrps = nrp_probability, alkaloid = alkaloid_probability, - terpene = terpene_probability, saccharide = saccharide_probability, ripp = ripp_probability - ) %>% - dplyr::select(-dplyr::one_of(drop_cols)) - gecco_data$num_prot <- sapply(stringr::str_split(as.character(gecco_data$proteins), ";"), length) - gecco_data$num_domains <- sapply(stringr::str_split(as.character(gecco_data$domains), ";"), length) - names(gecco_data)[names(gecco_data) == "start"] <- "Start" - names(gecco_data)[names(gecco_data) == "end"] <- "Stop" - return(gecco_data) -} -read_prism <- function(data, json = T) { - if (json == T) { - processed_data <- process_prism_json_suppl(data) - prism_data <- processed_data[[1]] - prism_supp_data <- processed_data[[2]] - } else { - prism_data <- data - prism_supp_data <- NULL - } - res_validation <- validate_basic_input(prism_data) - if (!(res_validation[[1]])) { - prism_data <- NULL - return(NULL) - } else { - prism_data <- res_validation[[2]] - } - prism_data$Type <- stringr::str_trim(tolower(prism_data$Type)) - prism_data["Type2"] <- stringr::str_trim(tolower(prism_data$Type)) - return(list(prism_data, prism_supp_data)) + # Silence R CMD note + polyketide_probability <- other_probability <- + nrp_probability <- alkaloid_probability <- + terpene_probability <- saccharide_probability <- + ripp_probability <- NULL + # Add chromosome column + gecco_data <- data + + gecco_data$chromosome <- rep("G", length(gecco_data$type)) + # Type magic + gecco_data$Cluster <- seq(1:length(gecco_data$chromosome)) + gecco_data$ID <- gecco_data$Cluster + gecco_data$Type <- stringr::str_trim(tolower(gecco_data$type)) + gecco_data$Type <- gsub("polyketide", "pks", gecco_data$Type) + gecco_data$Type <- gsub("nrp", "nrps", gecco_data$Type) + gecco_data$Type <- gsub("unknown", "under_threshold", gecco_data$Type) + gecco_data["Type2"] <- stringr::str_trim(tolower(gecco_data$Type)) + drop_cols <- c( + "alkaloid_probability", "polyketide_probability", "ripp_probability", "saccharide_probability", + "terpene_probability", "nrp_probability" + ) + # Read data + gecco_data <- gecco_data %>% + dplyr::mutate( + pks = polyketide_probability, nrps = nrp_probability, alkaloid = alkaloid_probability, + terpene = terpene_probability, saccharide = saccharide_probability, ripp = ripp_probability + ) %>% + dplyr::select(-dplyr::one_of(drop_cols)) + gecco_data$num_prot <- sapply(stringr::str_split(as.character(gecco_data$proteins), ";"), length) + gecco_data$num_domains <- sapply(stringr::str_split(as.character(gecco_data$domains), ";"), length) + names(gecco_data)[names(gecco_data) == "start"] <- "Start" + names(gecco_data)[names(gecco_data) == "end"] <- "Stop" + return(gecco_data) } -read_sempi <- function(data, zip = T) { - # Silence R CMD note - trackid <- NULL - if (zip == T) { - utils::unzip(data, files = "genome_browser/main/Tracks.db", exdir = "./SEMPI_TracksDB", junkpaths = T) - fl <- "./SEMPI_TracksDB/Tracks.db" - conn <- RSQLite::dbConnect(RSQLite::SQLite(), fl) - - data <- RSQLite::dbGetQuery(conn, "SELECT * FROM tbl_segments") - RSQLite::dbDisconnect(conn) - unlink("./SEMPI_TracksDB", recursive = T) - data <- data %>% - dplyr::filter(trackid == 6) - - types <- sapply(data$name, function(x) { - tmp <- stringr::str_trim(x) - tmp <- gsub(", ", "", tmp) - gsub(" ", "__", tmp) - }) - - sempi_data <- data.frame(cbind(seq(1:length(data$trackid)), data$start, data$end, as.character(types))) - colnames(sempi_data) <- c("Cluster", "Start", "Stop", "Type") - sempi_data$Cluster <- as.numeric(sempi_data$Cluster) - sempi_data$Start <- as.numeric(sempi_data$Start) - sempi_data$Stop <- as.numeric(sempi_data$Stop) - sempi_data$Type <- stringr::str_trim(tolower(sempi_data$Type)) - } else { - sempi_data <- data - } - res_validation <- validate_basic_input(sempi_data) - if (!(res_validation[[1]])) { - sempi_data <- NULL - return(NULL) - } else { - sempi_data <- res_validation[[2]] - } - sempi_data["Type2"] <- stringr::str_trim(tolower(sempi_data$Type)) - return(sempi_data) +read_prism <- function(data, json = TRUE) { + if (json == TRUE) { + processed_data <- process_prism_json_suppl(data) + prism_data <- processed_data[[1]] + prism_supp_data <- processed_data[[2]] + } else { + prism_data <- data + prism_supp_data <- NULL + } + res_validation <- validate_basic_input(prism_data) + if (!(res_validation[[1]])) { + prism_data <- NULL + return(NULL) + } else { + prism_data <- res_validation[[2]] + } + prism_data$Type <- stringr::str_trim(tolower(prism_data$Type)) + prism_data["Type2"] <- stringr::str_trim(tolower(prism_data$Type)) + return(list(prism_data, prism_supp_data)) } -read_arts_archive <- function(archive, zip = T) { - # Silence R CMD note - Start <- Core <- NULL - if (zip == T) { - utils::unzip(archive, files = c("tables/duptable.tsv", "tables/knownhits.tsv"), exdir = "./ARTS_tables", junkpaths = T) - known_hits <- utils::read.delim("./ARTS_tables/knownhits.tsv") - dupl_table <- utils::read.delim("./ARTS_tables/duptable.tsv") - unlink("./ARTS_tables", recursive = T) - locations <- sapply(known_hits$Sequence.description, function(x) { - utils::tail(stringr::str_split(x, "\\|")[[1]], 1) - }) - - start <- sapply(locations, function(x) { - stringr::str_split(x, "_")[[1]][1] - }) - stop <- sapply(locations, function(x) { - stringr::str_split(x, "_")[[1]][2] - }) - # Parse known_hits data - known_table <- data.frame(cbind(start, stop)) - colnames(known_table) <- c("Start", "Stop") - rownames(known_table) <- seq(1:dim(known_table)[1]) - known_table$Start <- as.numeric(known_table$Start) - known_table$Stop <- as.numeric(known_table$Stop) - known_table$Description <- known_hits$Description - known_table$Model <- known_hits$X.Model - known_table$Evalue <- known_hits$evalue - known_table$Bitscore <- known_hits$bitscore - known_table$ID <- seq(1:dim(known_table)[1]) - known_table$Cluster <- known_table$ID - known_table$Type <- "resistance" - known_table$Type2 <- known_table$Type - known_table$Hit <- NA - known_table$Core <- "Not_core" - known_table$Count <- 1 - # Parse duplication data - get_location_duptable <- function(x, y) { - test <- stringr::str_split(x, ";") - test2 <- sub(".*loc\\|", "", test[[1]]) - test3 <- stringr::str_split(test2, " ") - res <- list() - for (i in seq(1:length(test3))) { - id <- paste("hit", as.character(i), sep = "_") - start <- test3[[i]][1] - stop <- test3[[i]][2] - res_1 <- list(id, start, stop) - res <- append(res, list(res_1)) - } - return(res) +read_sempi <- function(data, zip = TRUE) { + # Silence R CMD note + trackid <- NULL + if (zip == TRUE) { + utils::unzip(data, files = "genome_browser/main/Tracks.db", exdir = "./SEMPI_TracksDB", junkpaths = TRUE) + fl <- "./SEMPI_TracksDB/Tracks.db" + conn <- RSQLite::dbConnect(RSQLite::SQLite(), fl) + + data <- RSQLite::dbGetQuery(conn, "SELECT * FROM tbl_segments") + RSQLite::dbDisconnect(conn) + unlink("./SEMPI_TracksDB", recursive = TRUE) + data <- data %>% + dplyr::filter(trackid == 6) + + types <- sapply(data$name, function(x) { + tmp <- stringr::str_trim(x) + tmp <- gsub(", ", "", tmp) + gsub(" ", "__", tmp) + }) + + sempi_data <- data.frame(cbind(seq(1:length(data$trackid)), data$start, data$end, as.character(types))) + colnames(sempi_data) <- c("Cluster", "Start", "Stop", "Type") + sempi_data$Cluster <- as.numeric(sempi_data$Cluster) + sempi_data$Start <- as.numeric(sempi_data$Start) + sempi_data$Stop <- as.numeric(sempi_data$Stop) + sempi_data$Type <- stringr::str_trim(tolower(sempi_data$Type)) + } else { + sempi_data <- data + } + res_validation <- validate_basic_input(sempi_data) + if (!(res_validation[[1]])) { + sempi_data <- NULL + return(NULL) + } else { + sempi_data <- res_validation[[2]] } + sempi_data["Type2"] <- stringr::str_trim(tolower(sempi_data$Type)) + return(sempi_data) +} +read_arts_archive <- function(archive, zip = TRUE) { + # Silence R CMD note + Start <- Core <- NULL + if (zip == TRUE) { + utils::unzip(archive, files = c("tables/duptable.tsv", "tables/knownhits.tsv", "alltrees.zip"), exdir = "./ARTS_tables", junkpaths = TRUE) + known_hits <- utils::read.delim("./ARTS_tables/knownhits.tsv") + dupl_table <- utils::read.delim("./ARTS_tables/duptable.tsv") + utils::unzip("./ARTS_tables/alltrees.zip",exdir = "./ARTS_tables/trees", junkpaths = TRUE) + trees_id_list <- list.files("./ARTS_tables/trees/") + locations <- sapply(known_hits$Sequence.description, function(x) { + utils::tail(stringr::str_split(x, "\\|")[[1]], 1) + }) + + start <- sapply(locations, function(x) { + stringr::str_split(x, "_")[[1]][1] + }) + stop <- sapply(locations, function(x) { + stringr::str_split(x, "_")[[1]][2] + }) + # Parse known_hits data + known_table <- data.frame(cbind(start, stop)) + colnames(known_table) <- c("Start", "Stop") + rownames(known_table) <- seq(1:dim(known_table)[1]) + known_table$Start <- as.numeric(known_table$Start) + known_table$Stop <- as.numeric(known_table$Stop) + known_table$Description <- known_hits$Description + known_table$Model <- known_hits$X.Model + known_table$Evalue <- known_hits$evalue + known_table$Bitscore <- known_hits$bitscore + known_table$ID <- seq(1:dim(known_table)[1]) + known_table$Cluster <- known_table$ID + known_table$Type <- "resistance" + known_table$Type2 <- known_table$Type + known_table$Hit <- NA + known_table$Core <- "Not_core" + known_table$Count <- 1 + # Parse duplication data + get_location_duptable <- function(x, y) { + test <- stringr::str_split(x, ";") + test2 <- sub(".*loc\\|", "", test[[1]]) + test3 <- stringr::str_split(test2, " ") + res <- list() + for (i in seq(1:length(test3))) { + id <- paste("hit", as.character(i), sep = "_") + start <- test3[[i]][1] + stop <- test3[[i]][2] + res_1 <- list(id, start, stop) + res <- append(res, list(res_1)) + } + return(res) + } - dup_table <- data.frame() - for (i in seq(1:dim(dupl_table)[1])) { - lst <- get_location_duptable(dupl_table$X.Hits_listed.[i]) - fin_data <- data.frame(do.call("rbind", lst)) - fin_data$Core_gene <- dupl_table$X.Core_gene[i] - fin_data$Description <- dupl_table$Description[i] - fin_data$Count <- dupl_table$Count[i] - colnames(fin_data) <- c("Hit", "Start", "Stop", "Core", "Description", "Count") - dup_table <- rbind(dup_table, fin_data) + dup_table <- data.frame() + for (i in seq(1:dim(dupl_table)[1])) { + lst <- get_location_duptable(dupl_table$X.Hits_listed.[i]) + fin_data <- data.frame(do.call("rbind", lst)) + fin_data$Core_gene <- dupl_table$X.Core_gene[i] + fin_data$Description <- dupl_table$Description[i] + fin_data$Count <- dupl_table$Count[i] + colnames(fin_data) <- c("Hit", "Start", "Stop", "Core", "Description", "Count") + dup_table <- rbind(dup_table, fin_data) + } + dup_table$Hit <- unlist(dup_table$Hit) + dup_table$Start <- unlist(dup_table$Start) + dup_table$Stop <- unlist(dup_table$Stop) + dup_table$Start <- as.numeric(dup_table$Start) + dup_table$Stop <- as.numeric(dup_table$Stop) + dup_table$ID <- seq(1:dim(dup_table)[1]) + dup_table$Cluster <- dup_table$ID + dup_table$Type <- "core" + dup_table$Type2 <- dup_table$Type + dup_table$Evalue <- NA + dup_table$Bitscore <- NA + dup_table$Model <- "Core" + arts_data <- rbind(dup_table, known_table) + arts_data <- arts_data %>% + dplyr::arrange(Start) + arts_data$ID <- seq(1:dim(arts_data)[1]) + arts_data$Cluster <- arts_data$ID + num_rows <- nrow(arts_data) + extended_trees_list <- lapply(seq_len(num_rows), function(i) { + trees_id_list[i %% length(trees_id_list) + 1] + }) + arts_data$TreesFiles <- unlist(extended_trees_list) + # unsure about the efficacy of this, since unlinking was crucial here... + actual_trees_list = list() + for (tree in arts_data$TreesFiles){ + tree <- ggtree::read.tree(file = paste0("./ARTS_tables/trees/", tree)) + actual_trees_list <- append(actual_trees_list,list(tree)) + } + unlink("./ARTS_tables", recursive = TRUE) + + arts_data$Trees <- actual_trees_list + + + } else { + arts_data <- archive } - dup_table$Hit <- unlist(dup_table$Hit) - dup_table$Start <- unlist(dup_table$Start) - dup_table$Stop <- unlist(dup_table$Stop) - dup_table$Start <- as.numeric(dup_table$Start) - dup_table$Stop <- as.numeric(dup_table$Stop) - dup_table$ID <- seq(1:dim(dup_table)[1]) - dup_table$Cluster <- dup_table$ID - dup_table$Type <- "core" - dup_table$Type2 <- dup_table$Type - dup_table$Evalue <- NA - dup_table$Bitscore <- NA - dup_table$Model <- "Core" - arts_data <- rbind(dup_table, known_table) - arts_data <- arts_data %>% - dplyr::arrange(Start) - arts_data$ID <- seq(1:dim(arts_data)[1]) - arts_data$Cluster <- arts_data$ID - } else { - arts_data <- archive - } - return(arts_data) + return(arts_data) } read_deep <- function(data) { - polyketide <- nrp <- NULL # Silence R CMD error - # Fix colnames in deepbgc data - colnames(data) <- stringr::str_to_lower(colnames(data)) - res_validation <- validate_deep_input(data) - if (!(res_validation[[1]])) { - deep_data <- NULL - return(NULL) - } else { - deep_data <- res_validation[[2]] - } - drop_cols <- c("nrp", "polyketide") - # Read data - deep_data <- deep_data %>% - dplyr::mutate(pks = polyketide, nrps = nrp) %>% - dplyr::select(-dplyr::one_of(drop_cols)) - return(deep_data) + polyketide <- nrp <- NULL # Silence R CMD error + # Fix colnames in deepbgc data + colnames(data) <- stringr::str_to_lower(colnames(data)) + res_validation <- validate_deep_input(data) + if (!(res_validation[[1]])) { + deep_data <- NULL + return(NULL) + } else { + deep_data <- res_validation[[2]] + } + drop_cols <- c("nrp", "polyketide") + # Read data + deep_data <- deep_data %>% + dplyr::mutate(pks = polyketide, nrps = nrp) %>% + dplyr::select(-dplyr::one_of(drop_cols)) + return(deep_data) } read_rre <- function(data) { - Gene.name <- Coordinates <- NULL # Silence R CMD error - res_validation <- validate_rre_input(data) - if (!(res_validation[[1]])) { - data <- NULL - return(NULL) - } else { - data <- res_validation[[2]] - } - # Clean RRE data. Extract coordinates and Locus tag with double underscore delimiter (__) - rre_data <- data %>% - tidyr::separate(Gene.name, c("Sequence", "Coordinates", "Locus_tag"), sep = "__") %>% - tidyr::separate(Coordinates, c("Start", "Stop"), sep = "-") - # Add chromosome info column - rre_data$chromosome <- rep("RRE", length(rre_data$Sequence)) - # Add ID column - rre_data$ID <- seq(1:length(rre_data$Sequence)) - rre_data$Cluster <- rre_data$ID - rre_data <- data.frame(rre_data) - rre_data["Type"] <- "ripp" - rre_data["Type2"] <- "ripp" - rre_data$Start <- as.numeric(rre_data$Start) - rre_data$Stop <- as.numeric(rre_data$Stop) - # Store rre data into local variable - rre_data <- data.frame(rre_data) + Gene.name <- Coordinates <- NULL # Silence R CMD error + res_validation <- validate_rre_input(data) + if (!(res_validation[[1]])) { + data <- NULL + return(NULL) + } else { + data <- res_validation[[2]] + } + # Clean RRE data. Extract coordinates and Locus tag with double underscore delimiter (__) + rre_data <- data %>% + tidyr::separate(Gene.name, c("Sequence", "Coordinates", "Locus_tag"), sep = "__") %>% + tidyr::separate(Coordinates, c("Start", "Stop"), sep = "-") + # Add chromosome info column + rre_data$chromosome <- rep("RRE", length(rre_data$Sequence)) + # Add ID column + rre_data$ID <- seq(1:length(rre_data$Sequence)) + rre_data$Cluster <- rre_data$ID + rre_data <- data.frame(rre_data) + rre_data["Type"] <- "ripp" + rre_data["Type2"] <- "ripp" + rre_data$Start <- as.numeric(rre_data$Start) + rre_data$Stop <- as.numeric(rre_data$Stop) + # Store rre data into local variable + rre_data <- data.frame(rre_data) } diff --git a/R/fct_reading_processing.R b/R/fct_reading_processing.R index dd178e4..a49d232 100644 --- a/R/fct_reading_processing.R +++ b/R/fct_reading_processing.R @@ -7,42 +7,42 @@ #' #' @noRd fix_duplicates <- function(test_score, order_vec, regul_genes_orfs, test_name) { - dupl_names <- regul_genes_orfs[duplicated(regul_genes_orfs)] - duplicated_values <- which(duplicated(regul_genes_orfs[order_vec])) - test_score <- test_score[order_vec] - to_add <- test_score[(which(duplicated(regul_genes_orfs[order_vec])))] - test_score <- test_score[-(which(duplicated(regul_genes_orfs[order_vec])))] - iterate_one_more_time <- c() - should_iterate <- F - for (i in seq(1:length(test_name))) { - if (length(dupl_names) == 0) { - should_iterate <- F - break + dupl_names <- regul_genes_orfs[duplicated(regul_genes_orfs)] + duplicated_values <- which(duplicated(regul_genes_orfs[order_vec])) + test_score <- test_score[order_vec] + to_add <- test_score[(which(duplicated(regul_genes_orfs[order_vec])))] + test_score <- test_score[-(which(duplicated(regul_genes_orfs[order_vec])))] + iterate_one_more_time <- c() + should_iterate <- FALSE + for (i in seq(1:length(test_name))) { + if (length(dupl_names) == 0) { + should_iterate <- FALSE + break + } + if (test_name[i] == dupl_names[1]) { + dupl_names <- dupl_names[-1] + test_score[i] <- paste0(test_score[i], "/", to_add[1]) + to_add <- to_add[-1] + iterate_one_more_time <- c(iterate_one_more_time, i) + } } - if (test_name[i] == dupl_names[1]) { - dupl_names <- dupl_names[-1] - test_score[i] <- paste0(test_score[i], "/", to_add[1]) - to_add <- to_add[-1] - iterate_one_more_time <- c(iterate_one_more_time, i) + if ((length(iterate_one_more_time) > 1) && (length(dupl_names) != 0)) { + should_iterate <- TRUE } - } - if ((length(iterate_one_more_time) > 1) && (length(dupl_names) != 0)) { - should_iterate <- T - } - while (should_iterate == T) { - for (i in iterate_one_more_time) { - if (test_name[i] == dupl_names[1]) { - dupl_names <- dupl_names[-1] - test_score[i] <- paste0(test_score[i], "/", to_add[1]) - to_add <- to_add[-1] - } - if (length(dupl_names) == 0) { - should_iterate <- F - break - } + while (should_iterate == TRUE) { + for (i in iterate_one_more_time) { + if (test_name[i] == dupl_names[1]) { + dupl_names <- dupl_names[-1] + test_score[i] <- paste0(test_score[i], "/", to_add[1]) + to_add <- to_add[-1] + } + if (length(dupl_names) == 0) { + should_iterate <- FALSE + break + } + } } - } - return(test_score) + return(test_score) } #' process_prism_json_suppl #' @@ -52,98 +52,98 @@ fix_duplicates <- function(test_score, order_vec, regul_genes_orfs, test_name) { #' #' @noRd process_prism_json_suppl <- function(data) { - Start <- NULL # Silence R CMD note - types <- sapply(data$prism_results$clusters, function(x) { - tolower(x$type) - }) - - types <- sapply(types, function(x) { - if (length(unlist(x)) > 1) { - tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) - gsub(" ", "__", tmp) - } else { - x - } - }) - - start <- sapply(data$prism_results$clusters, function(x) { - x$start - }) - end <- sapply(data$prism_results$clusters, function(x) { - x$end - }) - - - prism_data <- data.frame(Cluster = as.numeric(seq(1:length(start))), Start = as.numeric(start), Stop = as.numeric(end), Type = types) - - regul_genes_orfs <- sapply(data$prism_results$regulatory_genes, function(x) { - x$orf - }) - - names <- sapply(data$prism_results$orfs[[1]]$orfs, function(y) { - y$name - }) - coordinates <- sapply(data$prism_results$orfs[[1]]$orfs, function(y) { - y$coordinates - }) - - test_coords <- as.matrix(coordinates[, names %in% regul_genes_orfs]) - - - reg_genes <- data.frame(t(test_coords)) - colnames(reg_genes) <- c("Start", "Stop") - reg_genes$Type <- "regulatory" - reg_genes$Type2 <- reg_genes$Type - - test_name <- names[names %in% regul_genes_orfs] - ref_names <- test_name - order_vec <- order(match(regul_genes_orfs, test_name)) - - - test_score <- sapply(data$prism_results$regulatory_genes, function(x) { - x$score - }) - reg_genes$Score <- fix_duplicates(test_score, order_vec, regul_genes_orfs, ref_names) - test_name <- sapply(data$prism_results$regulatory_genes, function(x) { - x$name - }) - reg_genes$Name <- fix_duplicates(test_name, order_vec, regul_genes_orfs, ref_names) - test_full_name <- sapply(data$prism_results$regulatory_genes, function(x) { - x$full_name - }) - reg_genes$Full_name <- fix_duplicates(test_full_name, order_vec, regul_genes_orfs, ref_names) - resist_genes_orfs <- sapply(data$prism_results$resistance_genes, function(x) { - x$orf - }) - - test_coords_res <- as.matrix(coordinates[, names %in% resist_genes_orfs]) - - - res_genes <- data.frame(t(test_coords_res)) - - colnames(res_genes) <- c("Start", "Stop") - res_genes$Type <- "resistance" - res_genes$Type2 <- res_genes$Type - test_name <- names[names %in% resist_genes_orfs] - order_vec <- order(match(resist_genes_orfs, test_name)) - - - test_score <- sapply(data$prism_results$resistance_genes, function(x) { - x$score - }) - res_genes$Score <- fix_duplicates(test_score, order_vec, resist_genes_orfs, ref_names) - test_name <- sapply(data$prism_results$resistance_genes, function(x) { - x$name - }) - res_genes$Name <- fix_duplicates(test_name, order_vec, resist_genes_orfs, ref_names) - test_full_name <- sapply(data$prism_results$resistance_genes, function(x) { - x$full_name - }) - res_genes$Full_name <- fix_duplicates(test_full_name, order_vec, resist_genes_orfs, ref_names) - - final_reg <- rbind(res_genes, reg_genes) %>% dplyr::arrange(Start) - final_reg$ID <- seq(1:dim(final_reg)[1]) - final_reg$Cluster <- final_reg$ID - rownames(final_reg) <- as.numeric(seq(1:dim(final_reg)[1])) - return(list(prism_data, final_reg)) + Start <- NULL # Silence R CMD note + types <- sapply(data$prism_results$clusters, function(x) { + tolower(x$type) + }) + + types <- sapply(types, function(x) { + if (length(unlist(x)) > 1) { + tmp <- stringr::str_trim(paste0(unlist(x), collapse = "", sep = " ")) + gsub(" ", "__", tmp) + } else { + x + } + }) + + start <- sapply(data$prism_results$clusters, function(x) { + x$start + }) + end <- sapply(data$prism_results$clusters, function(x) { + x$end + }) + + + prism_data <- data.frame(Cluster = as.numeric(seq(1:length(start))), Start = as.numeric(start), Stop = as.numeric(end), Type = types) + + regul_genes_orfs <- sapply(data$prism_results$regulatory_genes, function(x) { + x$orf + }) + + names <- sapply(data$prism_results$orfs[[1]]$orfs, function(y) { + y$name + }) + coordinates <- sapply(data$prism_results$orfs[[1]]$orfs, function(y) { + y$coordinates + }) + + test_coords <- as.matrix(coordinates[, names %in% regul_genes_orfs]) + + + reg_genes <- data.frame(t(test_coords)) + colnames(reg_genes) <- c("Start", "Stop") + reg_genes$Type <- "regulatory" + reg_genes$Type2 <- reg_genes$Type + + test_name <- names[names %in% regul_genes_orfs] + ref_names <- test_name + order_vec <- order(match(regul_genes_orfs, test_name)) + + + test_score <- sapply(data$prism_results$regulatory_genes, function(x) { + x$score + }) + reg_genes$Score <- fix_duplicates(test_score, order_vec, regul_genes_orfs, ref_names) + test_name <- sapply(data$prism_results$regulatory_genes, function(x) { + x$name + }) + reg_genes$Name <- fix_duplicates(test_name, order_vec, regul_genes_orfs, ref_names) + test_full_name <- sapply(data$prism_results$regulatory_genes, function(x) { + x$full_name + }) + reg_genes$Full_name <- fix_duplicates(test_full_name, order_vec, regul_genes_orfs, ref_names) + resist_genes_orfs <- sapply(data$prism_results$resistance_genes, function(x) { + x$orf + }) + + test_coords_res <- as.matrix(coordinates[, names %in% resist_genes_orfs]) + + + res_genes <- data.frame(t(test_coords_res)) + + colnames(res_genes) <- c("Start", "Stop") + res_genes$Type <- "resistance" + res_genes$Type2 <- res_genes$Type + test_name <- names[names %in% resist_genes_orfs] + order_vec <- order(match(resist_genes_orfs, test_name)) + + + test_score <- sapply(data$prism_results$resistance_genes, function(x) { + x$score + }) + res_genes$Score <- fix_duplicates(test_score, order_vec, resist_genes_orfs, ref_names) + test_name <- sapply(data$prism_results$resistance_genes, function(x) { + x$name + }) + res_genes$Name <- fix_duplicates(test_name, order_vec, resist_genes_orfs, ref_names) + test_full_name <- sapply(data$prism_results$resistance_genes, function(x) { + x$full_name + }) + res_genes$Full_name <- fix_duplicates(test_full_name, order_vec, resist_genes_orfs, ref_names) + + final_reg <- rbind(res_genes, reg_genes) %>% dplyr::arrange(Start) + final_reg$ID <- seq(1:dim(final_reg)[1]) + final_reg$Cluster <- final_reg$ID + rownames(final_reg) <- as.numeric(seq(1:dim(final_reg)[1])) + return(list(prism_data, final_reg)) } diff --git a/R/fct_validation.R b/R/fct_validation.R index f4c2f08..1570774 100644 --- a/R/fct_validation.R +++ b/R/fct_validation.R @@ -6,12 +6,12 @@ #' #' @noRd check_if_column_exists <- function(data_names, column_name) { - if (column_name %in% stringr::str_to_lower(data_names)) { - return(TRUE) - } else { - shiny::showNotification(paste0(column_name, " column does not exist. Data was not integrated into analysis. Please recheck your data and try one more time"), type = "warning") - return(FALSE) - } + if (column_name %in% stringr::str_to_lower(data_names)) { + return(TRUE) + } else { + shiny::showNotification(paste0(column_name, " column does not exist. Data was not integrated into analysis. Please recheck your data and try one more time"), type = "warning") + return(FALSE) + } } #' validate_basic_input #' @@ -22,46 +22,46 @@ check_if_column_exists <- function(data_names, column_name) { #' #' @noRd validate_basic_input <- function(data) { - data_names <- names(data) - if (!(check_if_column_exists(data_names, "cluster"))) { - shiny::showNotification(paste0("Cluster columns was created on the fly."), type = "message") - data$Cluster <- seq(1:dim(data)[1]) - } - if (!(check_if_column_exists(data_names, "start"))) { - return(FALSE) - } - if (!(check_if_column_exists(data_names, "stop"))) { - return(FALSE) - } - if (!(check_if_column_exists(data_names, "type"))) { - return(FALSE) - } - if (length(unique(data$Cluster)) != length(data$Cluster)) { - shiny::showNotification(paste0("Cluster columns contains non unique values. It was regenerated"), type = "message") - data$Cluster <- seq(1:dim(data)[1]) - } - if ((T %in% is.na(data$Start)) | (T %in% is.na(data$Stop))) { - shiny::showNotification(paste0(" Start or Stop columns contain missing values. Please fix this and redownload dataframe"), type = "error") - return(FALSE) - } - if ((T %in% is.na(data$Type)) | ("" %in% data$Type)) { - shiny::showNotification(paste0("Type column contain empty data. It was populated with 'unknown' "), type = "warning") - data$Type[is.na(data$Type)] <- "unknown" - data$Type["" %in% data$Type] <- "unknown" - } - if (!(is.numeric(data$Cluster))) { - data$Cluster <- as.numeric(data$Cluster) - } - if (!(is.numeric(data$Start))) { - data$Start <- as.numeric(data$Start) - } - if (!(is.numeric(data$Stop))) { - data$Stop <- as.numeric(data$Stop) - } - if (!(is.character(data$Type))) { - data$Type <- as.character(data$Type) - } - return(list(TRUE, data)) + data_names <- names(data) + if (!(check_if_column_exists(data_names, "cluster"))) { + shiny::showNotification(paste0("Cluster columns was created on the fly."), type = "message") + data$Cluster <- seq(1:dim(data)[1]) + } + if (!(check_if_column_exists(data_names, "start"))) { + return(FALSE) + } + if (!(check_if_column_exists(data_names, "stop"))) { + return(FALSE) + } + if (!(check_if_column_exists(data_names, "type"))) { + return(FALSE) + } + if (length(unique(data$Cluster)) != length(data$Cluster)) { + shiny::showNotification(paste0("Cluster columns contains non unique values. It was regenerated"), type = "message") + data$Cluster <- seq(1:dim(data)[1]) + } + if ((TRUE %in% is.na(data$Start)) | (TRUE %in% is.na(data$Stop))) { + shiny::showNotification(paste0(" Start or Stop columns contain missing values. Please fix this and redownload dataframe"), type = "error") + return(FALSE) + } + if ((TRUE %in% is.na(data$Type)) | ("" %in% data$Type)) { + shiny::showNotification(paste0("Type column contain empty data. It was populated with 'unknown' "), type = "warning") + data$Type[is.na(data$Type)] <- "unknown" + data$Type["" %in% data$Type] <- "unknown" + } + if (!(is.numeric(data$Cluster))) { + data$Cluster <- as.numeric(data$Cluster) + } + if (!(is.numeric(data$Start))) { + data$Start <- as.numeric(data$Start) + } + if (!(is.numeric(data$Stop))) { + data$Stop <- as.numeric(data$Stop) + } + if (!(is.character(data$Type))) { + data$Type <- as.character(data$Type) + } + return(list(TRUE, data)) } #' validate_rre_input #' @@ -71,46 +71,46 @@ validate_basic_input <- function(data) { #' #' @noRd validate_rre_input <- function(data) { - data_names <- names(data) - if (!(check_if_column_exists(data_names, "gene.name"))) { - return(FALSE) - } - if (F %in% grepl("__", data$Gene.name)) { - return(FALSE) - } - if (!(check_if_column_exists(data_names, "e.value"))) { - return(FALSE) - } else { - data$E.value <- as.numeric(data$E.value) - } - if (!is.null(data$Probability)) { - if (!(check_if_column_exists(data_names, "score"))) { - return(FALSE) - } else { - data$Score <- as.numeric(data$Score) + data_names <- names(data) + if (!(check_if_column_exists(data_names, "gene.name"))) { + return(FALSE) } - if (!(check_if_column_exists(data_names, "p.value"))) { - return(FALSE) - } else { - data$P.value <- as.numeric(data$P.value) + if (FALSE %in% grepl("__", data$Gene.name)) { + return(FALSE) } - if (!(check_if_column_exists(data_names, "rre.start"))) { - return(FALSE) + if (!(check_if_column_exists(data_names, "e.value"))) { + return(FALSE) } else { - data$RRE.start <- as.numeric(data$RRE.start) + data$E.value <- as.numeric(data$E.value) } - if (!(check_if_column_exists(data_names, "rre.end"))) { - return(FALSE) - } else { - data$RRE.end <- as.numeric(data$RRE.end) + if (!is.null(data$Probability)) { + if (!(check_if_column_exists(data_names, "score"))) { + return(FALSE) + } else { + data$Score <- as.numeric(data$Score) + } + if (!(check_if_column_exists(data_names, "p.value"))) { + return(FALSE) + } else { + data$P.value <- as.numeric(data$P.value) + } + if (!(check_if_column_exists(data_names, "rre.start"))) { + return(FALSE) + } else { + data$RRE.start <- as.numeric(data$RRE.start) + } + if (!(check_if_column_exists(data_names, "rre.end"))) { + return(FALSE) + } else { + data$RRE.end <- as.numeric(data$RRE.end) + } + if (!(check_if_column_exists(data_names, "probability"))) { + return(FALSE) + } else { + data$Probability <- as.numeric(data$Probability) + } } - if (!(check_if_column_exists(data_names, "probability"))) { - return(FALSE) - } else { - data$Probability <- as.numeric(data$Probability) - } - } - return(list(TRUE, data)) + return(list(TRUE, data)) } #' validate_deep_input #' @@ -121,35 +121,35 @@ validate_rre_input <- function(data) { #' #' @noRd validate_deep_input <- function(data) { - data_names <- names(data) - col_names <- c( - "nucl_start", "nucl_end", "num_proteins", "num_domains", "num_bio_domains", "deepbgc_score", "antibacterial", - "cytotoxic", "inhibitor", "antifungal", "alkaloid", "nrp", "other", "polyketide", "ripp", "saccharide", "terpene", - "bgc_candidate_id", "sequence_id" - ) - num_columns <- c( - "nucl_start", "nucl_end", "num_proteins", "num_domains", "num_bio_domains", "deepbgc_score", "antibacterial", - "cytotoxic", "inhibitor", "antifungal", "alkaloid", "nrp", "other", "polyketide", "ripp", "saccharide", "terpene" - ) - if (!("cluster" %in% stringr::str_to_lower(data_names))) { - data$Cluster <- seq(1:dim(data)[1]) - } - for (column_name in col_names) { - if (!(check_if_column_exists(data_names, column_name))) { - return(FALSE) - } - if (T %in% is.na(data[[column_name]])) { - return(FALSE) - } - if ("" %in% data[[column_name]]) { - return(FALSE) - } - if (column_name %in% num_columns) { - names(data)[stringi::stri_trans_tolower(names(data)) == column_name] <- column_name - data[[column_name]] <- as.numeric(data[[column_name]]) - } - } - return(list(TRUE, data)) + data_names <- names(data) + col_names <- c( + "nucl_start", "nucl_end", "num_proteins", "num_domains", "num_bio_domains", "deepbgc_score", "antibacterial", + "cytotoxic", "inhibitor", "antifungal", "alkaloid", "nrp", "other", "polyketide", "ripp", "saccharide", "terpene", + "bgc_candidate_id", "sequence_id" + ) + num_columns <- c( + "nucl_start", "nucl_end", "num_proteins", "num_domains", "num_bio_domains", "deepbgc_score", "antibacterial", + "cytotoxic", "inhibitor", "antifungal", "alkaloid", "nrp", "other", "polyketide", "ripp", "saccharide", "terpene" + ) + if (!("cluster" %in% stringr::str_to_lower(data_names))) { + data$Cluster <- seq(1:dim(data)[1]) + } + for (column_name in col_names) { + if (!(check_if_column_exists(data_names, column_name))) { + return(FALSE) + } + if (TRUE %in% is.na(data[[column_name]])) { + return(FALSE) + } + if ("" %in% data[[column_name]]) { + return(FALSE) + } + if (column_name %in% num_columns) { + names(data)[stringi::stri_trans_tolower(names(data)) == column_name] <- column_name + data[[column_name]] <- as.numeric(data[[column_name]]) + } + } + return(list(TRUE, data)) } #' validate_gecco_input #' @@ -160,33 +160,33 @@ validate_deep_input <- function(data) { #' #' @noRd validate_gecco_input <- function(data) { - data_names <- names(data) - col_names <- c( - "start", "end", "average_p", "max_p", "type", "alkaloid_probability", "polyketide_probability", - "ripp_probability", "saccharide_probability", "terpene_probability", "nrp_probability", "other_probability", - "proteins", "domains" - ) - num_columns <- c( - "start", "end", "average_p", "max_p", "alkaloid_probability", "polyketide_probability", - "ripp_probability", "saccharide_probability", "terpene_probability", "nrp_probability", "other_probability" - ) - if (!("cluster" %in% stringr::str_to_lower(data_names))) { - data$Cluster <- seq(1:dim(data)[1]) - } - for (column_name in col_names) { - if (!(check_if_column_exists(data_names, column_name))) { - return(FALSE) - } - if (T %in% is.na(data[[column_name]])) { - return(FALSE) - } - if ("" %in% data[[column_name]]) { - return(FALSE) - } - if (column_name %in% num_columns) { - names(data)[stringi::stri_trans_tolower(names(data)) == column_name] <- column_name - data[[column_name]] <- as.numeric(data[[column_name]]) - } - } - return(list(TRUE, data)) + data_names <- names(data) + col_names <- c( + "start", "end", "average_p", "max_p", "type", "alkaloid_probability", "polyketide_probability", + "ripp_probability", "saccharide_probability", "terpene_probability", "nrp_probability", + "proteins", "domains" + ) + num_columns <- c( + "start", "end", "average_p", "max_p", "alkaloid_probability", "polyketide_probability", + "ripp_probability", "saccharide_probability", "terpene_probability", "nrp_probability" + ) + if (!("cluster" %in% stringr::str_to_lower(data_names))) { + data$Cluster <- seq(1:dim(data)[1]) + } + for (column_name in col_names) { + if (!(check_if_column_exists(data_names, column_name))) { + return(FALSE) + } + if (TRUE %in% is.na(data[[column_name]])) { + return(FALSE) + } + if ("" %in% data[[column_name]]) { + return(FALSE) + } + if (column_name %in% num_columns) { + names(data)[stringi::stri_trans_tolower(names(data)) == column_name] <- column_name + data[[column_name]] <- as.numeric(data[[column_name]]) + } + } + return(list(TRUE, data)) } diff --git a/R/mod_arts_tree.R b/R/mod_arts_tree.R new file mode 100644 index 0000000..46d9c2e --- /dev/null +++ b/R/mod_arts_tree.R @@ -0,0 +1,70 @@ +#' ARTS tree UI functions +#' + +mod_arts_tree_ui <- function(id) { + ns <- NS(id) + tagList( + shiny::fluidRow( + tags$div( + id = "arts_tree_data1", + div( + id = "id1", + shinydashboardPlus::box( + title = "Phylogenetic tree", + id = "arts_tree_box", + collapsible = TRUE, + closable = TRUE, + width = 12, + shiny::selectInput(ns("phylo_file"), "Choose a file to build a tree", choices = c(), selected = ""), + div( + style = "height: 600px; overflow-y: scroll; overflow-x: scroll", # Adjust height as needed, may be needed indeed + shiny::plotOutput(ns("arts_tree"), height = "2000px",width = "1500px") %>% + shinycssloaders::withSpinner() + ) + ) + ) + ) + ) + ) +} + +#'arts_tree server function +#' + +mod_arts_tree_server <- function(id, vals) { + moduleServer(id, function(input, output, session) { + ns <- session$ns + + # observing changes of input for tree plot + observe({ + shiny::updateSelectInput( + session, + 'phylo_file', + choices = paste0(vals$arts_tree_data$TreesFiles), + selected = vals$arts_tree_data$TreesFiles[1] + ) + }) + + # Define a reactive expression for the tree + tree_data <- reactive({ + tree <- list() + # Create the tree object + tree$core <- vals$arts_tree_data$Trees[vals$arts_tree_data$TreesFiles == input$phylo_file][[1]] + tree$type <- "rectangular" + return(tree) + }) + + # Render the plot directly within renderPlot + output$arts_tree <- renderPlot(res = 90,{ + req(vals$arts_data_input == TRUE) + + # Create and render the plot + tree_plot <- ggtree::ggtree(tree_data()$core, layout = tree_data()$type) + + ggtree::geom_tree() + + ggtree::theme_tree() + + ggtree::geom_tiplab(size = 2.2, color = 'firebrick') + + return(tree_plot) + }) + }) +} diff --git a/R/mod_barplot_rank.R b/R/mod_barplot_rank.R index 2260062..9c6a117 100644 --- a/R/mod_barplot_rank.R +++ b/R/mod_barplot_rank.R @@ -8,98 +8,101 @@ #' #' @importFrom shiny NS tagList mod_barplot_rank_ui <- function(id) { - ns <- NS(id) - tagList( - div( - id = "id1", - shinyjqui::jqui_resizable( - shinydashboardPlus::box( - title = "Ranking barplot", - id = "ranking_barplot_box", - collapsible = TRUE, - closable = TRUE, - height = "100%", - plotly::plotlyOutput(ns("barplot_rank"), height = "600px") %>% - shinycssloaders::withSpinner() - ), - options = list(handles = "w,e") - ) + ns <- NS(id) + tagList( + div( + id = "id1", + shinyjqui::jqui_resizable( + shinydashboardPlus::box( + title = "Ranking barplot", + id = "ranking_barplot_box", + collapsible = TRUE, + closable = TRUE, + height = "100%", + plotly::plotlyOutput(ns("barplot_rank"), height = "600px") %>% + shinycssloaders::withSpinner() + ), + options = list(handles = "w,e") + ) + ) ) - ) } #' barplot_rank Server Functions #' #' @noRd mod_barplot_rank_server <- function(id, vals, data_uploads, soft_names, soft_namings, data_to_use, abbr) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - # Silence R CMD note - Cluster <- Count <- Type <- - Start <- Start <- Stop <- - Label <- NULL - output$barplot_rank <- plotly::renderPlotly({ - shiny::req(vals$data_upload_count > 1) - shiny::req(vals$need_filter == F) - shiny::req(vals$can_plot_barplot_rank == T) + moduleServer(id, function(input, output, session) { + ns <- session$ns + # Silence R CMD note + Cluster <- Count <- Type <- + Start <- Start <- Stop <- + Label <- NULL + output$barplot_rank <- plotly::renderPlotly({ + shiny::req(vals$data_upload_count > 1) + shiny::req(vals$need_filter == FALSE) + shiny::req(vals$can_plot_barplot_rank == TRUE) - antismash_count <- NULL - prism_count <- NULL - deep_count <- NULL - rre_count <- NULL - sempi_count <- NULL - prism_supp_count <- NULL - arts_count <- NULL - gecco_count <- NULL + antismash_count <- NULL + compare_count <- NULL + emerald_count <- NULL + ripp_count <- NULL + prism_count <- NULL + deep_count <- NULL + rre_count <- NULL + sempi_count <- NULL + prism_supp_count <- NULL + arts_count <- NULL + gecco_count <- NULL - if (is.null(vals$inters_filtered)) { - inters <- vals$inters - } else { - inters <- vals$inters_filtered - } - index <- 1 - ranking_data <- NULL - for (upload in data_uploads) { - if (vals[[upload]] == T) { - counts_var <- plyr::count(as.factor(unlist(sapply(inters[[soft_names[index]]], function(x) { - x$to - })))) - # Check if ID is in dataframe and if it is - extract all information about to the local dataframe - anot_var <- vals[[data_to_use[index]]][vals[[data_to_use[index]]]$Cluster %in% as.numeric(levels(counts_var$x)), ] - # Add prefices to the ID to plot for a barplot. - counts_var$x <- sapply(counts_var$x, function(x) paste0(abbr[index], ": ", x)) - # Add label column to the dataframe, from which we will plot - counts_var$label <- rep(soft_namings[index], length(counts_var$x)) - # Add type to the dataframe, from which we would plot (from annotation dataframe) - counts_var$Type <- anot_var$Type - # Add Start positions (to visualize on hover) - counts_var$Start <- anot_var$Start - # Add Stop positions (to visualize on hover) - counts_var$Stop <- anot_var$Stop - if (is.null(ranking_data)) { - ranking_data <- counts_var - } else { - ranking_data <- rbind(ranking_data, counts_var) - } - } - index <- index + 1 - } + if (is.null(vals$inters_filtered)) { + inters <- vals$inters + } else { + inters <- vals$inters_filtered + } + index <- 1 + ranking_data <- NULL + for (upload in data_uploads) { + if (vals[[upload]] == TRUE) { + counts_var <- plyr::count(as.factor(unlist(sapply(inters[[soft_names[index]]], function(x) { + x$to + })))) + # Check if ID is in dataframe and if it is - extract all information about to the local dataframe + anot_var <- vals[[data_to_use[index]]][vals[[data_to_use[index]]]$Cluster %in% as.numeric(levels(counts_var$x)), ] + # Add prefices to the ID to plot for a barplot. + counts_var$x <- sapply(counts_var$x, function(x) paste0(abbr[index], ": ", x)) + # Add label column to the dataframe, from which we will plot + counts_var$label <- rep(soft_namings[index], length(counts_var$x)) + # Add type to the dataframe, from which we would plot (from annotation dataframe) + counts_var$Type <- anot_var$Type + # Add Start positions (to visualize on hover) + counts_var$Start <- anot_var$Start + # Add Stop positions (to visualize on hover) + counts_var$Stop <- anot_var$Stop + if (is.null(ranking_data)) { + ranking_data <- counts_var + } else { + ranking_data <- rbind(ranking_data, counts_var) + } + } + index <- index + 1 + } - # Fix column names in the master dataframe - colnames(ranking_data) <- c("Cluster", "Count", "Label", "Type", "Start", "Stop") - # Plot - plotly::ggplotly(ggplot2::ggplot(ranking_data, ggplot2::aes(x = Cluster, y = Count, Type = Type, Start = Start, Stop = Stop)) + - ggplot2::geom_bar(stat = "identity", ggplot2::aes(fill = Label)) + - ggplot2::theme( - axis.text.x = ggplot2::element_text(angle = 60, hjust = 1, size = 10), - axis.text.y = ggplot2::element_text(size = 14) - ) + - ggplot2::ggtitle("Number of times cluster is annotated with other tool"), - tooltip = c("Type", "Start", "Stop") - ) + # Fix column names in the master dataframe + colnames(ranking_data) <- c("Cluster", "Count", "Label", "Type", "Start", "Stop") + # Plot + plotly::ggplotly(ggplot2::ggplot(ranking_data, ggplot2::aes(x = Cluster, y = Count, Type = Type, Start = Start, Stop = Stop)) + + ggplot2::geom_bar(stat = "identity", ggplot2::aes(fill = Label)) + + ggplot2::theme( + axis.text.x = ggplot2::element_text(angle = 60, hjust = 1, size = 10), + axis.text.y = ggplot2::element_text(size = 14) + ) + + ggplot2::ggtitle("Number of times cluster is annotated with other tool"), + tooltip = c("Type", "Start", "Stop") + ) + }) }) - }) } ## To be copied in the UI diff --git a/R/mod_biocircos.R b/R/mod_biocircos.R index 9982d2a..ed3edb0 100644 --- a/R/mod_biocircos.R +++ b/R/mod_biocircos.R @@ -8,100 +8,100 @@ #' #' @importFrom shiny NS tagList mod_biocircos_ui <- function(id) { - ns <- NS(id) - tagList( - shiny::fluidRow( - tags$div( - id = "biocircos_data1", - div( - id = "id1", - shinydashboardPlus::box( - title = "Biocircos plot", - id = "biocircos_plot_box", - collapsible = TRUE, - closable = TRUE, - width = 12, - shiny::checkboxInput(ns("ShowBiocircosColoring"), "Show Biocircos coloring scheme"), - sidebar = shinydashboardPlus::boxSidebar( - id = "biocircos_box_sidebar", - width = 25, - shiny::checkboxInput("biocircos_color", "Make arcs in biocircos colorful, based on the class"), - shiny::checkboxInput("label_color", "Make links in biocircos colorful, based on the class"), - shiny::selectInput("label_color_class", "Choose the mode to color the links", - choices = c( - "Hierarchical-based" = "H", - "Purity-based" = "P", - "Reference column-based" = "R" - ), - selected = "H" - ), - shiny::selectInput("ref_col_biocircos", "Choose reference column to color the links", choices = c(""), selected = "") - ), - BioCircos::BioCircosOutput(ns("biocircos"), height = "900px") %>% - shinycssloaders::withSpinner() - ) + ns <- NS(id) + tagList( + shiny::fluidRow( + tags$div( + id = "biocircos_data1", + div( + id = "id1", + shinydashboardPlus::box( + title = "Biocircos plot", + id = "biocircos_plot_box", + collapsible = TRUE, + closable = TRUE, + width = 12, + shiny::checkboxInput(ns("ShowBiocircosColoring"), "Show Biocircos coloring scheme"), + sidebar = shinydashboardPlus::boxSidebar( + id = "biocircos_box_sidebar", + width = 25, + shiny::checkboxInput("biocircos_color", "Make arcs in biocircos colorful, based on the class"), + shiny::checkboxInput("label_color", "Make links in biocircos colorful, based on the class"), + shiny::selectInput("label_color_class", "Choose the mode to color the links", + choices = c( + "Hierarchical-based" = "H", + "Purity-based" = "P", + "Reference column-based" = "R" + ), + selected = "H" + ), + shiny::selectInput("ref_col_biocircos", "Choose reference column to color the links", choices = c(""), selected = "") + ), + BioCircos::BioCircosOutput(ns("biocircos"), height = "900px") %>% + shinycssloaders::withSpinner() + ) + ) + ) + ), + shiny::fluidRow( + tags$div( + id = "biocircos_data2", + div( + id = "id1", + shiny::uiOutput(ns("biocircos_coloring")) + ) + ) ) - ) - ), - shiny::fluidRow( - tags$div( - id = "biocircos_data2", - div( - id = "id1", - shiny::uiOutput(ns("biocircos_coloring")) - ) - ) ) - ) } #' biocircos Server Functions #' #' @noRd mod_biocircos_server <- function(id, vals) { - moduleServer(id, function(input, output, session) { - ns <- session$ns + moduleServer(id, function(input, output, session) { + ns <- session$ns - output$biocircos <- BioCircos::renderBioCircos({ - shiny::req(vals$data_upload_count > 1) + output$biocircos <- BioCircos::renderBioCircos({ + shiny::req(vals$data_upload_count > 1) - # Plot BioCircos - BioCircos::BioCircos(vals$tracklist, genome = vals$Biocircos_chromosomes, genomeTicksScale = 1e+6) - }) + # Plot BioCircos + BioCircos::BioCircos(vals$tracklist, genome = vals$Biocircos_chromosomes, genomeTicksScale = 1e+6) + }) - output$biocircos_legend <- DT::renderDataTable({ - shiny::req(vals$data_upload_count >= 1) - rownames <- FALSE - new_data <- vals$coloring_datatable - color_vec <- new_data$x$data$Color - options(DT.options = list(pageLength = 50)) - new_data %>% DT::formatStyle("Color", backgroundColor = DT::styleEqual(color_vec, color_vec)) - }) + output$biocircos_legend <- DT::renderDataTable({ + shiny::req(vals$data_upload_count >= 1) + rownames <- FALSE + new_data <- vals$coloring_datatable + color_vec <- new_data$x$data$Color + options(DT.options = list(pageLength = 50)) + new_data %>% DT::formatStyle("Color", backgroundColor = DT::styleEqual(color_vec, color_vec)) + }) - output$biocircos_coloring <- shiny::renderUI({ - if (input$ShowBiocircosColoring == T) { - shinydashboardPlus::box( - title = "Biocircos coloring scheme", - closable = TRUE, - collapsible = TRUE, - DT::dataTableOutput(ns("biocircos_legend")) %>% - shinycssloaders::withSpinner() - ) - } - }) + output$biocircos_coloring <- shiny::renderUI({ + if (input$ShowBiocircosColoring == TRUE) { + shinydashboardPlus::box( + title = "Biocircos coloring scheme", + closable = TRUE, + collapsible = TRUE, + DT::dataTableOutput(ns("biocircos_legend")) %>% + shinycssloaders::withSpinner() + ) + } + }) - # Updating values in Datatable on edit - shiny::observeEvent(input$biocircos_legend_cell_edit, { - if (input$biocircos_legend_cell_edit$col[1] == 0) { - vals$coloring_datatable$x$data$Name <- input$biocircos_legend_cell_edit$value - } else if (input$biocircos_legend_cell_edit$col[1] == 1) { - vals$coloring_datatable$x$data$Color <- input$biocircos_legend_cell_edit$value - } else if (input$biocircos_legend_cell_edit$col[1] == 2) { - vals$coloring_datatable$x$data$Hierarchy <- input$biocircos_legend_cell_edit$value - } + # Updating values in Datatable on edit + shiny::observeEvent(input$biocircos_legend_cell_edit, { + if (input$biocircos_legend_cell_edit$col[1] == 0) { + vals$coloring_datatable$x$data$Name <- input$biocircos_legend_cell_edit$value + } else if (input$biocircos_legend_cell_edit$col[1] == 1) { + vals$coloring_datatable$x$data$Color <- input$biocircos_legend_cell_edit$value + } else if (input$biocircos_legend_cell_edit$col[1] == 2) { + vals$coloring_datatable$x$data$Hierarchy <- input$biocircos_legend_cell_edit$value + } + }) }) - }) } ## To be copied in the UI diff --git a/R/mod_deep_reference.R b/R/mod_deep_reference.R index 15aa6b0..dc2a768 100644 --- a/R/mod_deep_reference.R +++ b/R/mod_deep_reference.R @@ -8,44 +8,44 @@ #' #' @importFrom shiny NS tagList mod_deep_reference_ui <- function(id) { - ns <- NS(id) - shiny::tagList( - div( - id = "id2", - shinyjqui::jqui_resizable( - shinydashboardPlus::box( - title = "Annotation comparison to the reference", - id = "annotation_reference_comparison_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - height = "100%", - shiny::selectInput("ref", "Choose reference data", - choices = c(""), - selected = "" - ), - plotly::plotlyOutput(ns("deep_reference")) %>% - shinycssloaders::withSpinner() - ), - options = list(handles = "w,e") - ) + ns <- NS(id) + shiny::tagList( + div( + id = "id2", + shinyjqui::jqui_resizable( + shinydashboardPlus::box( + title = "Annotation comparison to the reference", + id = "annotation_reference_comparison_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + height = "100%", + shiny::selectInput("ref", "Choose reference data", + choices = c(""), + selected = "" + ), + plotly::plotlyOutput(ns("deep_reference")) %>% + shinycssloaders::withSpinner() + ), + options = list(handles = "w,e") + ) + ) ) - ) } #' deep_reference Server Functions #' #' @noRd mod_deep_reference_server <- function(id, vals) { - shiny::moduleServer(id, function(input, output, session) { - ns <- session$ns + shiny::moduleServer(id, function(input, output, session) { + ns <- session$ns - output$deep_reference <- plotly::renderPlotly({ - shiny::req(vals$deep_reference_to_plot) - vals$can_plot_deep_ref_2 <- T - vals$deep_reference_to_plot + output$deep_reference <- plotly::renderPlotly({ + shiny::req(vals$deep_reference_to_plot) + vals$can_plot_deep_ref_2 <- TRUE + vals$deep_reference_to_plot + }) }) - }) } ## To be copied in the UI diff --git a/R/mod_deep_reference_2.R b/R/mod_deep_reference_2.R index 685c599..f9c80dc 100644 --- a/R/mod_deep_reference_2.R +++ b/R/mod_deep_reference_2.R @@ -8,162 +8,184 @@ #' #' @importFrom shiny NS tagList mod_deep_reference_2_ui <- function(id) { - ns <- NS(id) - tagList( - div( - id = "anno_div_1", - shinyjqui::jqui_resizable( - shinydashboardPlus::box( - title = "Annotations reference", - id = "annotation_reference_box", - height = "100%", - width = NULL, - collapsible = TRUE, - closable = TRUE, - plotly::plotlyOutput(ns("deep_reference_2")) %>% - shinycssloaders::withSpinner() - ), - options = list(handles = "w,e") - ) + ns <- NS(id) + tagList( + div( + id = "anno_div_1", + shinyjqui::jqui_resizable( + shinydashboardPlus::box( + title = "Annotations reference", + id = "annotation_reference_box", + height = "100%", + width = NULL, + collapsible = TRUE, + closable = TRUE, + plotly::plotlyOutput(ns("deep_reference_2")) %>% + shinycssloaders::withSpinner() + ), + options = list(handles = "w,e") + ) + ) ) - ) } #' deep_reference_2 Server Functions #' #' @noRd mod_deep_reference_2_server <- function(id, vals, data_uploads, data_to_use) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - # Silence R CMD note - x <- y <- yend <- xend <- - ID <- Software <- Chr <- - Type2 <- Start <- Stop <- - Type <- num_domains <- deepbgc_score <- - activity <- Score <- E_value <- P_value <- - RRE_start <- RRE_stop <- Probability <- - Name <- Full_name <- Hit <- Core <- - Bitscore <- Count <- Model <- Num_proteins <- - Num_domains <- Average_p <- Max_p <- NULL - output$deep_reference_2 <- plotly::renderPlotly({ - shiny::req(vals$can_plot_deep_ref_2 == T) - vals$can_plot_deep_ref_2 == F - rename_y_axis <- shiny::isolate(vals$rename_y_axis) - data <- NULL + moduleServer(id, function(input, output, session) { + ns <- session$ns + # Silence R CMD note + x <- y <- yend <- xend <- + ID <- Software <- Chr <- + Type2 <- Start <- Stop <- + Type <- num_domains <- deepbgc_score <- + activity <- Score <- E_value <- P_value <- + RRE_start <- RRE_stop <- Probability <- + Name <- Full_name <- Hit <- Core <- + Bitscore <- Count <- Model <- Num_proteins <- + Num_domains <- Average_p <- Max_p <- NULL + output$deep_reference_2 <- plotly::renderPlotly({ + shiny::req(vals$can_plot_deep_ref_2 == TRUE) + vals$can_plot_deep_ref_2 == FALSE + rename_y_axis <- shiny::isolate(vals$rename_y_axis) + data <- NULL - index <- 1 - for (upload in data_uploads) { - if (is.null(data)) { - if (vals[[upload]] == T) { - if (dim(vals[[data_to_use[index]]])[1] != 0) { - data <- vals[[data_to_use[index]]] + index <- 1 + for (upload in data_uploads) { + if (is.null(data)) { + if (vals[[upload]] == TRUE) { + if (dim(vals[[data_to_use[index]]])[1] != 0) { + data <- vals[[data_to_use[index]]] + } + } + } + index <- index + 1 } - } - } - index <- index + 1 - } - tooltip <- c( - "Software", "ID", "Start", "Stop", "Type", "num_domains", "deepbgc_score", "activity", "Score", "E_value", - "P_value", "RRE_start", "RRE_stop", "Probability", "Name", "Full_name", "Hit", "Core", "Count", "Bitscore", "Model", - "Num_domains", "Num_proteins", "Average_p", "Max_p" - ) + tooltip <- c( + "Software", "ID", "Start", "Stop", "Type", "num_domains", "deepbgc_score", "activity", "Score", "E_value", + "P_value", "RRE_start", "RRE_stop", "Probability", "Name", "Full_name", "Hit", "Core", "Count", "Bitscore", "Model", + "Num_domains", "Num_proteins", "Average_p", "Max_p" + ) - plot <- ggplot2::ggplot(data, ggplot2::aes(x = vals$chr_len, y = Chr)) - if (vals$anti_data_input == TRUE) { - plot <- plot + - suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_a, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type - ), size = 3)) - } - if (vals$deep_data_input == TRUE) { - if (dim(vals$seg_df_ref_d)[1] > 0) { - plot <- plot + - suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_d, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, num_domains = num_domains, - deepbgc_score = deepbgc_score, activity = activity - ), size = 3)) - } - } - if (vals$rre_data_input == TRUE) { - if (vals$rre_more == T) { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_r, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Score = Score, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value, - P_value = P_value, RRE_start = RRE_start, RRE_stop = RRE_stop, - Probability = Probability - ), size = 3)) - } else { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_r, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value - ), size = 3)) - } - } - if (vals$prism_data_input == TRUE) { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_p, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type - ), size = 3)) - } - if (vals$sempi_data_input == TRUE) { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_s, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type - ), size = 3)) - } - if (vals$prism_supp_plot == TRUE) { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_p_s, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, ID = ID, - Start = Start, Stop = Stop, Type = Type, Name = Name, Full_name = Full_name, - Score = Score - ), size = 3)) - } - if (vals$arts_data_input == TRUE) { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_ar, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, Hit = Hit, - Core = Core, E_value = E_value, Bitscore = Bitscore, Count = Count, - Model = Model - ), size = 3)) - } - if (vals$gecco_data_input == TRUE) { - if (dim(vals$seg_df_ref_g)[1] > 0) { - plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_g, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, Num_proteins = Num_proteins, - Num_domains = Num_domains, Average_p = Average_p, Max_p = Max_p - ), size = 3)) - } - } - to_plot <- plotly::ggplotly(plot + - ggplot2::scale_y_discrete(labels = rename_y_axis) + - ggplot2::theme(axis.text.y = ggplot2::element_text(size = 10)) + - ggplot2::ylab("") + - ggplot2::xlab("Chromosome length") + - ggplot2::theme(legend.title = ggplot2::element_blank()) + - ggplot2::ggtitle("All annotations"), - # What actually to visualize in tooltip - tooltip = tooltip - ) - to_plot %>% plotly::layout( - legend = list( - font = list( - family = "sans-serif", - size = 12, - color = "#000" - ), - bordercolor = "#FFFFFF", - borderwidth = 2, - title = list(text = " Cluster Types ") - ), - autosize = TRUE - ) - }) # %>% shiny::debounce(200) - }) + plot <- ggplot2::ggplot(data, ggplot2::aes(x = vals$chr_len, y = Chr)) + + if (vals$emerald_data_input == TRUE){ + plot <- plot + + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_emer, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3)) + } + if (vals$compare_data_input == TRUE){ + plot <- plot + + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_compare, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3)) + } + if (vals$ripp_data_input == TRUE){ + plot <- plot + + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_ri, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3)) + } + if (vals$anti_data_input == TRUE) { + plot <- plot + + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_a, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3)) + } + if (vals$deep_data_input == TRUE) { + if (dim(vals$seg_df_ref_d)[1] > 0) { + plot <- plot + + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_d, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, num_domains = num_domains, + deepbgc_score = deepbgc_score, activity = activity + ), size = 3)) + } + } + if (vals$rre_data_input == TRUE) { + if (vals$rre_more == TRUE) { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_r, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Score = Score, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value, + P_value = P_value, RRE_start = RRE_start, RRE_stop = RRE_stop, + Probability = Probability + ), size = 3)) + } else { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_r, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value + ), size = 3)) + } + } + if (vals$prism_data_input == TRUE) { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_p, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3)) + } + if (vals$sempi_data_input == TRUE) { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_s, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3)) + } + if (vals$prism_supp_plot == TRUE) { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_p_s, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, ID = ID, + Start = Start, Stop = Stop, Type = Type, Name = Name, Full_name = Full_name, + Score = Score + ), size = 3)) + } + if (vals$arts_data_input == TRUE) { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_ar, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, Hit = Hit, + Core = Core, E_value = E_value, Bitscore = Bitscore, Count = Count, + Model = Model + ), size = 3)) + } + if (vals$gecco_data_input == TRUE) { + if (dim(vals$seg_df_ref_g)[1] > 0) { + plot <- plot + suppressWarnings(ggplot2::geom_segment(data = vals$seg_df_ref_g, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, Num_proteins = Num_proteins, + Num_domains = Num_domains, Average_p = Average_p, Max_p = Max_p + ), size = 3)) + } + } + to_plot <- plotly::ggplotly(plot + + ggplot2::scale_y_discrete(labels = rename_y_axis) + + ggplot2::theme(axis.text.y = ggplot2::element_text(size = 10)) + + ggplot2::ylab("") + + ggplot2::xlab("Chromosome length") + + ggplot2::theme(legend.title = ggplot2::element_blank()) + + ggplot2::ggtitle("All annotations"), + # What actually to visualize in tooltip + tooltip = tooltip + ) + to_plot %>% plotly::layout( + legend = list( + font = list( + family = "sans-serif", + size = 12, + color = "#000" + ), + bordercolor = "#FFFFFF", + borderwidth = 2, + title = list(text = " Cluster Types ") + ), + autosize = TRUE + ) + }) # %>% shiny::debounce(200) + }) } ## To be copied in the UI diff --git a/R/mod_deepbgc_plots.R b/R/mod_deepbgc_plots.R index 53b0422..4518433 100644 --- a/R/mod_deepbgc_plots.R +++ b/R/mod_deepbgc_plots.R @@ -8,240 +8,293 @@ #' #' @importFrom shiny NS tagList mod_deepbgc_plots_ui <- function(id) { - ns <- NS(id) - tagList( - shiny::fluidRow( - tags$div( - id = "deep_data1", - div( - id = "id1", - shinyjqui::jqui_resizable(shinydashboardPlus::box( - title = "DeepBGC comparison", - id = "deep_comparison_box", - collapsible = TRUE, - closable = TRUE, - height = "100%", - shiny::plotOutput(ns("deep_barplot"), height = "500px", ) %>% - shinycssloaders::withSpinner() - ), options = list(handles = "w,e")) + ns <- NS(id) + tagList( + shiny::fluidRow( + tags$div( + id = "deep_data1", + div( + id = "id1", + shinyjqui::jqui_resizable(shinydashboardPlus::box( + title = "DeepBGC comparison", + id = "deep_comparison_box", + collapsible = TRUE, + closable = TRUE, + height = "100%", + shiny::plotOutput(ns("deep_barplot"), height = "500px", ) %>% + shinycssloaders::withSpinner() + ), options = list(handles = "w,e")) + ), + div( + id = "id2", + shinyjqui::jqui_resizable(shinydashboardPlus::box( + title = "DeepBGC comparison controls", + id = "deep_comparison_controls_box", + collapsible = TRUE, + closable = TRUE, + shiny::selectInput(ns("ref_comparison"), "Choose data for comparison with DeepBGC", choices = c(""), selected = ""), + # Score to use for thresholds + shiny::selectInput(ns("score_type"), "Choose score type to set threshold", + choices = c( + "Activity score" = "Activity", + "Cluster_type score" = "Cluster_Type", + "DeepBGC score" = "DeepBGC" + ), + selected = "Activity score" + ), + # Chose step for barplot (as a threshold to draw a bar) + shiny::sliderInput(ns("plot_step"), "Choose step for plots(barplot)", min = 1, max = 50, value = 10), + shiny::sliderInput(ns("plot_start"), "Chose plot start point(barplot)", min = 0, max = 99, value = 0) + ), options = list(handles = "w,e")) + ) + ) ), - div( - id = "id2", - shinyjqui::jqui_resizable(shinydashboardPlus::box( - title = "DeepBGC comparison controls", - id = "deep_comparison_controls_box", - collapsible = TRUE, - closable = TRUE, - shiny::selectInput(ns("ref_comparison"), "Choose data for comparison with DeepBGC", choices = c(""), selected = ""), - # Score to use for thresholds - shiny::selectInput(ns("score_type"), "Choose score type to set threshold", - choices = c( - "Activity score" = "Activity", - "Cluster_type score" = "Cluster_Type", - "DeepBGC score" = "DeepBGC" - ), - selected = "Activity score" - ), - # Chose step for barplot (as a threshold to draw a bar) - shiny::sliderInput(ns("plot_step"), "Choose step for plots(barplot)", min = 1, max = 50, value = 10), - shiny::sliderInput(ns("plot_start"), "Chose plot start point(barplot)", min = 0, max = 99, value = 0) - ), options = list(handles = "w,e")) + shiny::fluidRow( + tags$div( + id = "deep_data2", + div( + id = "id2", + shinyjqui::jqui_resizable(shinydashboardPlus::box( + title = "DeepBGC rate", + id = "deep_rate_box", + collapsible = TRUE, + height = "100%", + plotly::plotlyOutput(ns("deep_rate"), height = "500px", ) %>% + shinycssloaders::withSpinner() + ), options = list(handles = "w,e")) + ) + ) ) - ) - ), - shiny::fluidRow( - tags$div( - id = "deep_data2", - div( - id = "id2", - shinyjqui::jqui_resizable(shinydashboardPlus::box( - title = "DeepBGC rate", - id = "deep_rate_box", - collapsible = TRUE, - height = "100%", - plotly::plotlyOutput(ns("deep_rate"), height = "500px", ) %>% - shinycssloaders::withSpinner() - ), options = list(handles = "w,e")) - ) - ) ) - ) } #' deep_barplot Server Functions #' #' @noRd mod_deepbgc_plots_server <- function(id, vals, score_a, score_d, score_c) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - # Silence R CMD note - Start <- Stop <- Source <- - Quantity <- Score <- Novelty_rate <- - Annotation_rate <- Skip_rate <- Rates_data <- - Rates <- NULL - output$deep_barplot <- shiny::renderPlot({ - shiny::req((vals$deep_data_input == T) & ((vals$anti_data_input == T) | (vals$prism_data_input == T) | (vals$sempi_data_input == T))) - - - # Create empty dataframe to populate later - fullnes_of_annotation <- data.frame(NA, NA, NA) - colnames(fullnes_of_annotation) <- c("Score", "Source", "Quantity") - fullnes_of_annotation <- tidyr::drop_na(fullnes_of_annotation) - - deep_inter_1 <- vals$deep_data_filtered - # Decide which score to use for basic thresholds on x axis - if (input$score_type == "Activity") { - score <- "score_a" - } else if (input$score_type == "DeepBGC") { - score <- "score_d" - } else if (input$score_type == "Cluster_Type") { - score <- "score_c" - } - deep_inter_1$score <- deep_inter_1[[score]] - # Loop over thresholds with given step. Get the interception of antismash data with DeepBGC one at given x axis thresholds with additionsl ones - for (dataframe_1 in seq(input$plot_start, 99, input$plot_step)) { - deep_inter <- deep_inter_1 %>% - dplyr::filter(score >= dataframe_1 / 100) %>% - dplyr::select(Start, Stop) - if (length(deep_inter$Start) > 0) { - deep_inter$seqnames <- "chr" - } - - - # Store antismash bgc start amd atop values as matrix - if (input$ref_comparison == "Antismash") { - anti_inter <- shiny::isolate(vals$anti_data) %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } else if (input$ref_comparison == "PRISM") { - anti_inter <- shiny::isolate(vals$prism_data) %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } else if (input$ref_comparison == "SEMPI") { - anti_inter <- shiny::isolate(vals$sempi_data) %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } - - - - # Get the interception of two matrices - if (length(deep_inter$Start) > 0) { - query <- GenomicRanges::makeGRangesFromDataFrame(deep_inter) - subject <- GenomicRanges::makeGRangesFromDataFrame(anti_inter) - interseption <- GenomicRanges::findOverlaps(query, subject) - inter_bgc <- length(interseption@from) - len_new <- length(deep_inter$seqnames) - inter_bgc - } else { - inter_bgc <- 0 - len_new <- 0 - } - - if (input$ref_comparison == "Antismash") { - used_antismash <- length(shiny::isolate(vals$anti_data$Cluster)) - inter_bgc - cols <- c("Only Antismash", "DeepBGC+Antismash", "Only DeepBGC") - title <- ggplot2::ggtitle("Comparison of Antismash and DeepBGC annotations at given score threshold") - } else if (input$ref_comparison == "PRISM") { - used_antismash <- length(shiny::isolate(vals$prism_data$Cluster)) - inter_bgc - cols <- c("Only PRISM", "DeepBGC+PRISM", "Only DeepBGC") - title <- ggplot2::ggtitle("Comparison of PRISM and DeepBGC annotations at given score threshold") - } else if (input$ref_comparison == "SEMPI") { - used_antismash <- length(shiny::isolate(vals$sempi_data$Cluster)) - inter_bgc - cols <- c("Only SEMPI", "DeepBGC+SEMPI", "Only DeepBGC") - title <- ggplot2::ggtitle("Comparison of SEMPI and DeepBGC annotations at given score threshold") - } - - # Combine all vectors into one dataframe - fullnes_of_annotation_1 <- data.frame( - c(rep(c(as.character(dataframe_1)), 3)), - cols, c(used_antismash, inter_bgc, len_new) - ) - colnames(fullnes_of_annotation_1) <- c("Score", "Source", "Quantity") - # Combine previously created empty dataframe with this one to store results - fullnes_of_annotation <- rbind(fullnes_of_annotation, fullnes_of_annotation_1) - } - - # Store dataframe in reactive value for later use. - vals$fullness_deep <- data.frame(fullnes_of_annotation) - # write.csv(fullnes_of_annotation, "fullness.csv", row.names = F) - - # Make text to show on a barplot to point on additional scores' thresholds - annotateText <- paste("Applied additional thresholds", paste("Activity score:", as.character(score_a)), - paste("DeepBGC score:", as.character(score_d)), - paste("Cluster type score:", as.character(score_c)), - sep = "\n" - ) - - # Plot the barplot - ggplot2::ggplot(fullnes_of_annotation, ggplot2::aes(fill = Source, y = Quantity, x = Score)) + - ggplot2::geom_bar(position = "dodge", stat = "identity") + - ggplot2::geom_text(ggplot2::aes(label = Quantity), position = ggplot2::position_dodge(width = 0.9), vjust = -0.25) + - ggplot2::xlab(paste(input$score_type, "Score")) + - title + - ggplot2::geom_label(ggplot2::aes(x = Inf, y = Inf, hjust = 1, vjust = 1, label = annotateText), show.legend = F) - }) + moduleServer(id, function(input, output, session) { + ns <- session$ns + # Silence R CMD note + Start <- Stop <- Source <- + Quantity <- Score <- Novelty_rate <- + Annotation_rate <- Skip_rate <- Rates_data <- + Rates <- NULL + output$deep_barplot <- shiny::renderPlot({ + shiny::req((vals$deep_data_input == TRUE) & ((vals$anti_data_input == TRUE) | (vals$prism_data_input == TRUE) | (vals$sempi_data_input == TRUE) | (vals$ripp_data_input == TRUE) | (vals$emerald_data_input == TRUE) | (vals$compare_data_input == TRUE) )) + + + # Create empty dataframe to populate later + fullnes_of_annotation <- data.frame(NA, NA, NA) + colnames(fullnes_of_annotation) <- c("Score", "Source", "Quantity") + fullnes_of_annotation <- tidyr::drop_na(fullnes_of_annotation) + + deep_inter_1 <- vals$deep_data_filtered + # Decide which score to use for basic thresholds on x axis + if (input$score_type == "Activity") { + deep_inter_1$score <- deep_inter_1$score_a + } else if (input$score_type == "DeepBGC") { + deep_inter_1$score <- deep_inter_1$score_d + } else if (input$score_type == "Cluster_Type") { + deep_inter_1$score <- deep_inter_1$score_c + } + # Loop over thresholds with given step. Get the interception of antismash data with DeepBGC one at given x axis thresholds with additionsl ones + for (dataframe_1 in seq(input$plot_start, 99, input$plot_step)) { + deep_inter <- deep_inter_1 %>% + dplyr::filter(score >= dataframe_1 / 100) %>% + dplyr::select(Start, Stop) + if (length(deep_inter$Start) > 0) { + deep_inter$seqnames <- "chr" + } + + + # Store antismash bgc start amd atop values as matrix + if (input$ref_comparison == "Antismash") { + anti_inter <- shiny::isolate(vals$anti_data) %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison == "PRISM") { + anti_inter <- shiny::isolate(vals$prism_data) %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison == "SEMPI") { + anti_inter <- shiny::isolate(vals$sempi_data) %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison == "RippMiner"){ + anti_inter <- shiny::isolate(vals$ripp_data) %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison == "Emerald/SanntiS"){ + anti_inter <- shiny::isolate(vals$emerald_data) %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison == "Compare"){ + anti_inter <- shiny::isolate(vals$compare_data) %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } + + # Get the interception of two matrices + if (length(deep_inter$Start) > 0) { + query <- GenomicRanges::makeGRangesFromDataFrame(deep_inter) + subject <- GenomicRanges::makeGRangesFromDataFrame(anti_inter) + interseption <- GenomicRanges::findOverlaps(query, subject) + inter_bgc <- length(unique(interseption@from)) + len_new <- length(deep_inter$seqnames) - inter_bgc + } else { + inter_bgc <- 0 + len_new <- 0 + } + if (input$ref_comparison == "Antismash") { + used_antismash <- length(shiny::isolate(vals$anti_data$Cluster)) - inter_bgc + cols <- c("Only Antismash", "DeepBGC+Antismash", "Only DeepBGC") + title <- ggplot2::ggtitle("Comparison of Antismash and DeepBGC annotations at given score threshold") + } else if (input$ref_comparison == "PRISM") { + used_antismash <- length(shiny::isolate(vals$prism_data$Cluster)) - inter_bgc + cols <- c("Only PRISM", "DeepBGC+PRISM", "Only DeepBGC") + title <- ggplot2::ggtitle("Comparison of PRISM and DeepBGC annotations at given score threshold") + } else if (input$ref_comparison == "SEMPI") { + used_antismash <- length(shiny::isolate(vals$sempi_data$Cluster)) - inter_bgc + cols <- c("Only SEMPI", "DeepBGC+SEMPI", "Only DeepBGC") + title <- ggplot2::ggtitle("Comparison of SEMPI and DeepBGC annotations at given score threshold") + } else if (input$ref_comparison == "RipMinner"){ + used_antismash <- length(shiny::isolate(vals$ripp_data$Cluster)) - inter_bgc + cols <-c("Only RippMiner", "DeepBGC+RippMiner", "Only DeepBGC") + title <- ggplot2::ggtitle("Comparsion of RippMiner and DeepBGC annotations at given score threshold") + } else if (input$ref_comparison == "Emerald/SanntiS") { + used_antismash <- length(shiny::isolate(vals$emerald_data$Cluster)) - inter_bgc + cols <- c("Only Emerald/SanntiS", "GECCO+Emerald/SanntiS", "Only Emerald/SanntiS") + title <- ggplot2::ggtitle("Comparsion of Emerald/SanntiS and GECCO annotations at given score threshold") + } else if (input$ref_comparison == "Compare") { + used_antismash <- length(shiny::isolate(vals$compare_data$Cluster)) - inter_bgc + cols <- c("Only compare", "GECCO+compare", "Only compare") + title <- ggplot2::ggtitle("Comparsion of RippMiner-genome and GECCO annotations at given score threshold") + } + + # Combine all vectors into one dataframe + fullnes_of_annotation_1 <- data.frame( + c(rep(c(as.character(dataframe_1)), 3)), + cols, c(used_antismash, inter_bgc, len_new) + ) + colnames(fullnes_of_annotation_1) <- c("Score", "Source", "Quantity") + # Combine previously created empty dataframe with this one to store results + fullnes_of_annotation <- rbind(fullnes_of_annotation, fullnes_of_annotation_1) + } + + # Store dataframe in reactive value for later use. + vals$fullness_deep <- data.frame(fullnes_of_annotation) + + # Make text to show on a barplot to point on additional scores' thresholds + annotateText <- paste("Applied additional thresholds", paste("Activity score:", shiny::isolate(as.character(vals$score_a))), + paste("DeepBGC score:", shiny::isolate(as.character(vals$score_d))), + paste("Cluster type score:", shiny::isolate(as.character(vals$score_c))), + sep = "\n" + ) + + # Plot the barplot + ggplot2::ggplot(fullnes_of_annotation, ggplot2::aes(fill = Source, y = Quantity, x = Score)) + + ggplot2::geom_bar(position = "dodge", stat = "identity") + + ggplot2::geom_text(ggplot2::aes(label = Quantity), position = ggplot2::position_dodge(width = 0.9), vjust = -0.25) + + ggplot2::xlab(paste(input$score_type, "Score")) + + title + + ggplot2::geom_label(ggplot2::aes(x = Inf, y = Inf, hjust = 1, vjust = 1, label = annotateText), show.legend = FALSE) + }) + + output$deep_rate <- plotly::renderPlotly({ + shiny::req(!is.null(vals$fullness_deep)) + + + # Reuse stored dataframe from previous plot + # This dataframe stores data for number of intercepted/non intercepted clusters for DeepBGC and antismash data + # For more information please see previous shiny::renderPlot + fullnes_of_annotation <- data.frame(vals$fullness_deep) + + # Store dataframe into variable. Widen it to calculate rates + test <- fullnes_of_annotation %>% + tidyr::pivot_wider(names_from = Source, values_from = Quantity) + if (input$ref_comparison == "Antismash") { + data <- vals$anti_data + title <- ggplot2::ggtitle("Rates of DeepBGC/Antismash data annotation") + test <- test %>% + # Calculate rates. Novelty is nummber of clusters annotated only by deepbgc/ all clusters annotated by antismash + (antismash + deepbgc) + dplyr::mutate( + Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+Antismash` + test$`Only Antismash`), + # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) + Annotation_rate = test$`DeepBGC+Antismash` / length(data$Cluster), + # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed + Skip_rate = test$`Only Antismash` / length(data$Cluster) + ) + } else if (input$ref_comparison == "PRISM") { + data <- vals$prism_data + title <- ggplot2::ggtitle("Rates of DeepBGC/PRISM data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+PRISM` + test$`Only PRISM`), + # Annotation rate = clusters, annotated by PRISM+deepBGC/ clusters annotated only by prism (We assume that prism annotation is full and reference) + Annotation_rate = test$`DeepBGC+PRISM` / length(data$Cluster), + # Skip rate = clusters, annotated only by PRISM/ all prism clusters. Points to how much clusters DeepBGC missed + Skip_rate = test$`Only PRISM` / length(data$Cluster) + ) + } else if (input$ref_comparison == "SEMPI") { + data <- vals$sempi_data + title <- ggplot2::ggtitle("Rates of DeepBGC/SEMPI data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+SEMPI` + test$`Only SEMPI`), + # Annotation rate = clusters, annotated by SEMPI+deepBGC/ clusters annotated only by sempi (We assume that sempi annotation is full and reference) + Annotation_rate = test$`DeepBGC+SEMPI` / length(data$Cluster), + # Skip rate = clusters, annotated only by SEMPI/ all sempi clusters. Points to how much clusters DeepBGC missed + Skip_rate = test$`Only SEMPI` / length(data$Cluster) + ) + } else if (input$ref_comparsion == "RippMiner"){ + data <- vals$ripp_data + title <- ggplot2::ggtitle("Rates of DeepBGC/RippMiner data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+RippMiner` + test$`Only RippMiner`), + + Annotation_rate = test$`DeepBGC+RippMiner` / length(data$Cluster), + + Skip_rate = test$`Only RippMiner`/ length(data$Cluster) + ) + } else if (input$ref_comparsion == "Emerald/SanntiS"){ + data <- vals$emerald_data + title <- ggplot2::ggtitle("Rates of DeepBGC/Emerald-SanntiS data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+Emerald/SanntiS` + test$`Only Emerald/SanntiS`), + + Annotation_rate = test$`DeepBGC+Emerald/SanntiS` / length(data$Cluster), + + Skip_rate = test$`Only Emerald/SanntiS`/ length(data$Cluster) + ) + } else if (input$ref_comparsion == "Compare"){ + data <- vals$compare_data + title <- ggplot2::ggtitle("Rates of DeepBGC/Compare data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+Compare` + test$`Only Compare`), + + Annotation_rate = test$`DeepBGC+Compare` / length(data$Cluster), + + Skip_rate = test$`Only Compare`/ length(data$Cluster) + ) + } + - output$deep_rate <- plotly::renderPlotly({ - shiny::req(!is.null(vals$fullness_deep)) - - - # Reuse stored dataframe from previous plot - # This dataframe stores data for number of intercepted/non intercepted clusters for DeepBGC and antismash data - # For more information please see previous shiny::renderPlot - fullnes_of_annotation <- data.frame(vals$fullness_deep) - - # Store dataframe into variable. Widen it to calculate rates - test <- fullnes_of_annotation %>% - tidyr::pivot_wider(names_from = Source, values_from = Quantity) - if (input$ref_comparison == "Antismash") { - data <- vals$anti_data - title <- ggplot2::ggtitle("Rates of DeepBGC/Antismash data annotation") - test <- test %>% - # Calculate rates. Novelty is nummber of clusters annotated only by deepbgc/ all clusters annotated by antismash + (antismash + deepbgc) - dplyr::mutate( - Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+Antismash` + test$`Only Antismash`), - # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) - Annotation_rate = test$`DeepBGC+Antismash` / length(data$Cluster), - # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed - Skip_rate = test$`Only Antismash` / length(data$Cluster) - ) - } else if (input$ref_comparison == "PRISM") { - data <- vals$prism_data - title <- ggplot2::ggtitle("Rates of DeepBGC/PRISM data annotation") - test <- test %>% - dplyr::mutate( - Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+PRISM` + test$`Only PRISM`), - # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) - Annotation_rate = test$`DeepBGC+PRISM` / length(data$Cluster), - # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed - Skip_rate = test$`Only PRISM` / length(data$Cluster) - ) - } else if (input$ref_comparison == "SEMPI") { - data <- vals$sempi_data - title <- ggplot2::ggtitle("Rates of DeepBGC/SEMPI data annotation") - test <- test %>% - dplyr::mutate( - Novelty_rate = test$`Only DeepBGC` / (test$`DeepBGC+SEMPI` + test$`Only SEMPI`), - # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) - Annotation_rate = test$`DeepBGC+SEMPI` / length(data$Cluster), - # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed - Skip_rate = test$`Only SEMPI` / length(data$Cluster) - ) - } - - # Calculate rates and plot interactive plot with plotly - plotly::ggplotly(test %>% - tidyr::pivot_longer(cols = c(Novelty_rate, Annotation_rate, Skip_rate), names_to = "Rates", values_to = "Rates_data") %>% - ggplot2::ggplot(ggplot2::aes(x = as.numeric(Score), y = as.numeric(Rates_data), Rate = as.numeric(Rates_data))) + - ggplot2::geom_line(ggplot2::aes(color = Rates)) + - ggplot2::geom_point(ggplot2::aes(shape = Rates), alpha = .4, size = 3) + - title + - ggplot2::ylab("Rate") + - ggplot2::xlab(paste(input$score_type, "Score threshold")), - tooltip = c("Rate") - ) + # Calculate rates and plot interactive plot with plotly + plotly::ggplotly(test %>% + tidyr::pivot_longer(cols = c(Novelty_rate, Annotation_rate, Skip_rate), names_to = "Rates", values_to = "Rates_data") %>% + ggplot2::ggplot(ggplot2::aes(x = as.numeric(Score), y = as.numeric(Rates_data), Rate = as.numeric(Rates_data))) + + ggplot2::geom_line(ggplot2::aes(color = Rates)) + + ggplot2::geom_point(ggplot2::aes(shape = Rates), alpha = .4, size = 3) + + title + + ggplot2::ylab("Rate") + + ggplot2::xlab(paste(input$score_type, "Score threshold")), + tooltip = c("Rate") + ) + }) }) - }) } ## To be copied in the UI # mod_deepbgc_plots_ui("deep_barplot_ui_1") diff --git a/R/mod_download.R b/R/mod_download.R index 19d5542..0e9ad97 100644 --- a/R/mod_download.R +++ b/R/mod_download.R @@ -8,54 +8,88 @@ #' #' @importFrom shiny NS tagList mod_download_ui <- function(id) { - ns <- NS(id) - tagList( - div( - id = "id6", - shinydashboardPlus::box( - title = "Download data", - id = "download_data_box", - collapsible = TRUE, - closable = TRUE, - width = NULL, - shiny::downloadButton(ns("download"), "Download currently used datasets (as for Biocircos plot)") - ) + ns <- NS(id) + tagList( + div( + id = "id6", + shinydashboardPlus::box( + title = "Download data", + id = "download_data_box", + collapsible = TRUE, + closable = TRUE, + width = NULL, + shiny::downloadButton(ns("download"), "Download currently used datasets (as for Biocircos plot)") + ) + ) ) - ) } -#' download Server Functions -#' -#' @noRd -mod_download_server <- function(id) { +mod_download_anti_ui <- function(id) { + ns <- NS(id) + tagList( + div( + style = "text-align: center", + shinydashboard::menuItem( + tabName = "download_data_anti", + shiny::downloadButton(ns("download_data_anti"), "Download JSON for AntiSMASH") + ) + ) + ) + +} + +mod_download_anti_server <- function(id){ moduleServer(id, function(input, output, session) { ns <- session$ns - output$download <- shiny::downloadHandler( - filename = function() { - paste("datasets.zip") - }, + if (!file.exists('data_all.csv')) { + shiny::showNotification("No data to download", type = "warning")} + + output$download_data_anti <- shiny::downloadHandler( + filename ='antiSMASH_data.json', content = function(file) { - flst <- c() - # List files in directory - files_in_dir <- list.files() - # Iterate over those files and if found "_biocircos.csv" add to the flst vector - for (file_names in files_in_dir) { - if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { - flst <- c(flst, file_names) - } else if (grepl("group_by.csv", file_names, fixed = TRUE)) { - flst <- c(flst, file_names) - } - } - # create the zip file from flst vector - group_by_script <- system.file("scripts", "group.py", package = "BGCViz") - flst <- c(flst, group_by_script) - utils::zip(file, flst) - }, - contentType = "application/zip" + if (file.exists('data_all.csv')) { + json <- data_to_json('data_all.csv') + write(json, file)} + } ) }) + +} + +#' download Server Functions +#' +#' @noRd +mod_download_server <- function(id) { + moduleServer(id, function(input, output, session) { + ns <- session$ns + output$download <- shiny::downloadHandler( + filename = function() { + paste("datasets.zip") + }, + content = function(file) { + flst <- c() + # List files in directory + files_in_dir <- list.files() + # Iterate over those files and if found "_biocircos.csv" add to the flst vector + for (file_names in files_in_dir) { + if (grepl("_biocircos.csv", file_names, fixed = TRUE)) { + flst <- c(flst, file_names) + } else if (grepl("group_by.csv", file_names, fixed = TRUE)) { + flst <- c(flst, file_names) + } + } + # create the zip file from flst vector + group_by_script <- system.file("scripts", "group.py", package = "BGCViz") + dissect_script <- system.file("scripts", "dissect.py", package = "BGCViz") + flst <- c(flst, group_by_script, dissect_script) + utils::zip(file, flst, flags = '-r9Xj') + }, + contentType = "application/zip" + ) + }) } + ## To be copied in the UI # mod_download_ui("download_ui_1") diff --git a/R/mod_gecco_plots.R b/R/mod_gecco_plots.R index 01f2b72..c47e71e 100644 --- a/R/mod_gecco_plots.R +++ b/R/mod_gecco_plots.R @@ -8,239 +8,292 @@ #' #' @importFrom shiny NS tagList mod_gecco_plots_ui <- function(id) { - ns <- NS(id) - tagList( - shiny::fluidRow( - tags$div( - id = "gecco_data1", - div( - id = "id1", - shinyjqui::jqui_resizable(shinydashboardPlus::box( - title = "GECCO comparison", - id = "gecco_comparison_box", - collapsible = TRUE, - closable = TRUE, - height = "100%", - shiny::plotOutput(ns("gecco_barplot"), height = "500px") %>% - shinycssloaders::withSpinner() - ), options = list(handles = "w,e")) + ns <- NS(id) + tagList( + shiny::fluidRow( + tags$div( + id = "gecco_data1", + div( + id = "id1", + shinyjqui::jqui_resizable(shinydashboardPlus::box( + title = "GECCO comparison", + id = "gecco_comparison_box", + collapsible = TRUE, + closable = TRUE, + height = "100%", + shiny::plotOutput(ns("gecco_barplot"), height = "500px") %>% + shinycssloaders::withSpinner() + ), options = list(handles = "w,e")) + ), + div( + id = "id2", + shinyjqui::jqui_resizable(shinydashboardPlus::box( + title = "GECCO rate", + id = "gecco_rate_box", + collapsible = TRUE, + closable = TRUE, + height = "100%", + plotly::plotlyOutput(ns("gecco_rate"), height = "500px", ) %>% + shinycssloaders::withSpinner() + ), options = list(handles = "w,e")) + ), + ) ), - div( - id = "id2", - shinyjqui::jqui_resizable(shinydashboardPlus::box( - title = "GECCO rate", - id = "gecco_rate_box", - collapsible = TRUE, - closable = TRUE, - height = "100%", - plotly::plotlyOutput(ns("gecco_rate"), height = "500px", ) %>% - shinycssloaders::withSpinner() - ), options = list(handles = "w,e")) - ), - ) - ), - shiny::fluidRow( - tags$div( - id = "gecco_data2", - div( - id = "id1", - shinyjqui::jqui_resizable(shinydashboardPlus::box( - title = "GECCO comparison controls", - id = "gecco_comparison_controls_box", - collapsible = TRUE, - closable = TRUE, - shiny::selectInput(ns("ref_comparison_gecco"), "Choose data for comparison with Gecco", choices = c(""), selected = ""), - shiny::selectInput(ns("score_type_gecco"), "Choose score type to set threshold", - choices = c( - "Average p-value" = "avg_p", - "Cluster_type score" = "Cluster_Type" - ), - selected = "avg_p" - ), - shiny::sliderInput(ns("plot_step_gecco"), "Choose step for plots(barplot)", min = 1, max = 50, value = 10), - shiny::sliderInput(ns("plot_start_gecco"), "Chose plot start point(barplot)", min = 0, max = 99, value = 0) - ), options = list(handles = "w,e")) + shiny::fluidRow( + tags$div( + id = "gecco_data2", + div( + id = "id1", + shinyjqui::jqui_resizable(shinydashboardPlus::box( + title = "GECCO comparison controls", + id = "gecco_comparison_controls_box", + collapsible = TRUE, + closable = TRUE, + shiny::selectInput(ns("ref_comparison_gecco"), "Choose data for comparison with Gecco", choices = c(""), selected = ""), + shiny::selectInput(ns("score_type_gecco"), "Choose score type to set threshold", + choices = c( + "Average p-value" = "avg_p", + "Cluster_type score" = "Cluster_Type" + ), + selected = "avg_p" + ), + shiny::sliderInput(ns("plot_step_gecco"), "Choose step for plots(barplot)", min = 1, max = 50, value = 10), + shiny::sliderInput(ns("plot_start_gecco"), "Chose plot start point(barplot)", min = 0, max = 99, value = 0) + ), options = list(handles = "w,e")) + ) + ) ) - ) ) - ) } #' gecco_plots Server Functions #' #' @noRd mod_gecco_plots_server <- function(id, vals, score_average_gecco, score_cluster_gecco) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - # Silence R CMD note - Start <- Stop <- Source <- Quantity <- - Score <- Novelty_rate <- Annotation_rate <- - Skip_rate <- Skip_rate <- Rates_data <- - Rates <- NULL - output$gecco_barplot <- shiny::renderPlot({ - shiny::req((vals$gecco_data_input == T) & ((vals$anti_data_input == T) | (vals$prism_data_input == T) | (vals$sempi_data_input == T))) - - # Create empty dataframe to populate later - fullnes_of_annotation <- data.frame(NA, NA, NA) - colnames(fullnes_of_annotation) <- c("Score", "Source", "Quantity") - fullnes_of_annotation <- tidyr::drop_na(fullnes_of_annotation) - - gecco_inter_1 <- vals$gecco_data_filtered - # Decide which score to use for basic thresholds on x axis - if (input$score_type_gecco == "avg_p") { - score <- "score_a" - } else if (input$score_type_gecco == "Cluster_Type") { - score <- "score_c" - } - gecco_inter_1$score <- gecco_inter_1[[score]] - - # Loop over thresholds with given step. Get the interception of antismash data with DeepBGC one at given x axis thresholds with additionsl ones - for (dataframe_1 in seq(input$plot_start_gecco, 99, input$plot_step_gecco)) { - - # dplyr::filter dataframe. Get only rows, which >= of a given thresholds. dplyr::select only start and stop of those rows as a matrix - gecco_inter <- gecco_inter_1 %>% - dplyr::filter(score >= dataframe_1 / 100) %>% - dplyr::select(Start, Stop) - if (length(gecco_inter$Start) > 0) { - gecco_inter$seqnames <- "chr" - } - - - # Store antismash bgc start amd atop values as matrix - if (input$ref_comparison_gecco == "Antismash") { - anti_inter <- vals$anti_data %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } else if (input$ref_comparison_gecco == "PRISM") { - anti_inter <- vals$prism_data %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } else if (input$ref_comparison_gecco == "SEMPI") { - anti_inter <- vals$sempi_data %>% - dplyr::select(Start, Stop) - anti_inter$seqnames <- "chr" - } - - - - - # Get the interception of two matrices - if (length(gecco_inter$Start) > 0) { - query <- GenomicRanges::makeGRangesFromDataFrame(gecco_inter) - subject <- GenomicRanges::makeGRangesFromDataFrame(anti_inter) - interseption <- GenomicRanges::findOverlaps(query, subject) - inter_bgc <- length(interseption@from) - len_new <- length(gecco_inter$seqnames) - inter_bgc - } else { - inter_bgc <- 0 - len_new <- 0 - } - - - if (input$ref_comparison_gecco == "Antismash") { - used_antismash <- length(vals$anti_data$Cluster) - inter_bgc - cols <- c("Only Antismash", "GECCO+Antismash", "Only GECCO") - title <- ggplot2::ggtitle("Comparison of Antismash and GECCO annotations at given score threshold") - } else if (input$ref_comparison_gecco == "PRISM") { - used_antismash <- length(vals$prism_data$Cluster) - inter_bgc - cols <- c("Only PRISM", "GECCO+PRISM", "Only GECCO") - title <- ggplot2::ggtitle("Comparison of PRISM and GECCO annotations at given score threshold") - } else if (input$ref_comparison_gecco == "SEMPI") { - used_antismash <- length(vals$sempi_data$Cluster) - inter_bgc - cols <- c("Only SEMPI", "GECCO+SEMPI", "Only GECCO") - title <- ggplot2::ggtitle("Comparison of SEMPI and GECCO annotations at given score threshold") - } - - # Combine all vectors into one dataframe - fullnes_of_annotation_1 <- data.frame( - c(rep(c(as.character(dataframe_1)), 3)), - cols, c(used_antismash, inter_bgc, len_new) - ) - colnames(fullnes_of_annotation_1) <- c("Score", "Source", "Quantity") - # Combine previously created empty dataframe with this one to store results - fullnes_of_annotation <- rbind(fullnes_of_annotation, fullnes_of_annotation_1) - } - - # Store dataframe in reactive value for later use. - vals$fullness_gecco <- data.frame(fullnes_of_annotation) - # write.csv(fullnes_of_annotation, "fullness.csv", row.names = F) - - # Make text to show on a barplot to point on additional scores' thresholds - annotateText <- paste("Applied additional thresholds", paste("Average p-value:", as.character(score_average_gecco)), - paste("Cluster type score:", as.character(score_cluster_gecco)), - sep = "\n" - ) - - # Plot the barplot - ggplot2::ggplot(fullnes_of_annotation, ggplot2::aes(fill = Source, y = Quantity, x = Score)) + - ggplot2::geom_bar(position = "dodge", stat = "identity") + - ggplot2::geom_text(ggplot2::aes(label = Quantity), position = ggplot2::position_dodge(width = 0.9), vjust = -0.25) + - ggplot2::xlab(paste(input$score_type, "Score")) + - title + - ggplot2::geom_label(ggplot2::aes(x = Inf, y = Inf, hjust = 1, vjust = 1, label = annotateText), show.legend = F) - }) + moduleServer(id, function(input, output, session) { + ns <- session$ns + # Silence R CMD note + Start <- Stop <- Source <- Quantity <- + Score <- Novelty_rate <- Annotation_rate <- + Skip_rate <- Skip_rate <- Rates_data <- + Rates <- NULL + output$gecco_barplot <- shiny::renderPlot({ + shiny::req((vals$gecco_data_input == TRUE) & ((vals$anti_data_input == TRUE) | (vals$compare_data_input == TRUE) | (vals$ripp_data_input == TRUE) | (vals$emerald_data_input == TRUE) | (vals$prism_data_input == TRUE) | (vals$sempi_data_input == TRUE))) + + # Create empty dataframe to populate later + fullnes_of_annotation <- data.frame(NA, NA, NA) + colnames(fullnes_of_annotation) <- c("Score", "Source", "Quantity") + fullnes_of_annotation <- tidyr::drop_na(fullnes_of_annotation) + + gecco_inter_1 <- vals$gecco_data_filtered + # Decide which score to use for basic thresholds on x axis + if (input$score_type_gecco == "avg_p") { + gecco_inter_1$score <- gecco_inter_1$score_a + } else if (input$score_type_gecco == "Cluster_Type") { + gecco_inter_1$score <- gecco_inter_1$score_c + } + + # Loop over thresholds with given step. Get the interception of antismash data with DeepBGC one at given x axis thresholds with additionsl ones + for (dataframe_1 in seq(input$plot_start_gecco, 99, input$plot_step_gecco)) { + + # dplyr::filter dataframe. Get only rows, which >= of a given thresholds. dplyr::select only start and stop of those rows as a matrix + gecco_inter <- gecco_inter_1 %>% + dplyr::filter(score >= dataframe_1 / 100) %>% + dplyr::select(Start, Stop) + if (length(gecco_inter$Start) > 0) { + gecco_inter$seqnames <- "chr" + } + + + # Store antismash bgc start amd atop values as matrix + if (input$ref_comparison_gecco == "Antismash") { + anti_inter <- vals$anti_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison_gecco == "PRISM") { + anti_inter <- vals$prism_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if (input$ref_comparison_gecco == "SEMPI") { + anti_inter <- vals$sempi_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <- "chr" + } else if(input$ref_comparison_gecco == "RippMiner") { + anti_inter <- vals$ripp_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <-"chr" + } else if(input$ref_comparison_gecco == "Emerald/SanntiS") { + anti_inter <- vals$emerald_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <-"chr" + } else if(input$ref_comparison_gecco == "Compare") { + anti_inter <- vals$compare_data %>% + dplyr::select(Start, Stop) + anti_inter$seqnames <-"chr" + } + + + + + # Get the interception of two matrices + if (length(gecco_inter$Start) > 0) { + query <- GenomicRanges::makeGRangesFromDataFrame(gecco_inter) + subject <- GenomicRanges::makeGRangesFromDataFrame(anti_inter) + interseption <- GenomicRanges::findOverlaps(query, subject) + inter_bgc <- length(unique(interseption@from)) + len_new <- length(gecco_inter$seqnames) - inter_bgc + } else { + inter_bgc <- 0 + len_new <- 0 + } + + if (input$ref_comparison_gecco == "Antismash") { + used_antismash <- length(vals$anti_data$Cluster) - inter_bgc + cols <- c("Only Antismash", "GECCO+Antismash", "Only GECCO") + title <- ggplot2::ggtitle("Comparison of Antismash and GECCO annotations at given score threshold") + } else if (input$ref_comparison_gecco == "PRISM") { + used_antismash <- length(vals$prism_data$Cluster) - inter_bgc + cols <- c("Only PRISM", "GECCO+PRISM", "Only GECCO") + title <- ggplot2::ggtitle("Comparison of PRISM and GECCO annotations at given score threshold") + } else if (input$ref_comparison_gecco == "SEMPI") { + used_antismash <- length(vals$sempi_data$Cluster) - inter_bgc + cols <- c("Only SEMPI", "GECCO+SEMPI", "Only GECCO") + title <- ggplot2::ggtitle("Comparison of SEMPI and GECCO annotations at given score threshold") + } else if (input$ref_comparison_gecco == "RippMiner") { + used_antismash <- length(vals$ripp_data$Cluster) - inter_bgc + cols <- c("Only RippMiner", "GECCO+RippMiner", "Only RippMiner") + title <- ggplot2::ggtitle("Comparsion of RippMiner-genome and GECCO annotations at given score threshold") + } else if (input$ref_comparison_gecco == "Emerald/SanntiS") { + used_antismash <- length(vals$emerald_data$Cluster) - inter_bgc + cols <- c("Only Emerald/SanntiS", "GECCO+Emerald/SanntiS", "Only Emerald/SanntiS") + title <- ggplot2::ggtitle("Comparsion of Emerald/SanntiS and GECCO annotations at given score threshold") + } else if (input$ref_comparison_gecco == "Compare") { + used_antismash <- length(vals$compare_data$Cluster) - inter_bgc + cols <- c("Only compare", "GECCO+compare", "Only compare") + title <- ggplot2::ggtitle("Comparsion of compare and GECCO annotations at given score threshold") + } + + # Combine all vectors into one dataframe + fullnes_of_annotation_1 <- data.frame( + c(rep(c(as.character(dataframe_1)), 3)), + cols, c(used_antismash, inter_bgc, len_new) + ) + colnames(fullnes_of_annotation_1) <- c("Score", "Source", "Quantity") + # Combine previously created empty dataframe with this one to store results + fullnes_of_annotation <- rbind(fullnes_of_annotation, fullnes_of_annotation_1) + } + + # Store dataframe in reactive value for later use. + vals$fullness_gecco <- data.frame(fullnes_of_annotation) + # Make text to show on a barplot to point on additional scores' thresholds + annotateText <- paste("Applied additional thresholds", paste("Average p-value:", shiny::isolate(as.character(vals$score_average_gecco))), + paste("Cluster type score:", shiny::isolate(as.character(vals$score_cluster_gecco))), + sep = "\n" + ) + + # Plot the barplot + ggplot2::ggplot(fullnes_of_annotation, ggplot2::aes(fill = Source, y = Quantity, x = Score)) + + ggplot2::geom_bar(position = "dodge", stat = "identity") + + ggplot2::geom_text(ggplot2::aes(label = Quantity), position = ggplot2::position_dodge(width = 0.9), vjust = -0.25) + + ggplot2::xlab(paste(input$score_type, "Score")) + + title + + ggplot2::geom_label(ggplot2::aes(x = Inf, y = Inf, hjust = 1, vjust = 1, label = annotateText), show.legend = FALSE) + }) + + # Render interactive plot with plotly for rates of DeepBGC data in regards with antismash data + output$gecco_rate <- plotly::renderPlotly({ + shiny::req(!is.null(vals$fullness_gecco)) + + # Reuse stored dataframe from previous plot + # This dataframe stores data for number of intercepted/non intercepted clusters for DeepBGC and antismash data + # For more information please see previous shiny::renderPlot + fullnes_of_annotation <- data.frame(vals$fullness_gecco) + + # Store dataframe into variable. Widen it to calculate rates + test <- fullnes_of_annotation %>% + tidyr::pivot_wider(names_from = Source, values_from = Quantity) + if (input$ref_comparison_gecco == "Antismash") { + data <- vals$anti_data + title <- ggplot2::ggtitle("Rates of GECCO/Antismash data annotation") + test <- test %>% + # Calculate rates. Novelty is nummber of clusters annotated only by deepbgc/ all clusters annotated by antismash + (antismash + deepbgc) + dplyr::mutate( + Novelty_rate = test$`Only GECCO` / (test$`GECCO+Antismash` + test$`Only Antismash`), + # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) + Annotation_rate = test$`GECCO+Antismash` / length(data$Cluster), + # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed + Skip_rate = test$`Only Antismash` / length(data$Cluster) + ) + } else if (input$ref_comparison_gecco == "PRISM") { + data <- vals$prism_data + title <- ggplot2::ggtitle("Rates of GECCO/PRISM data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only GECCO` / (test$`GECCO+PRISM` + test$`Only PRISM`), + # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) + Annotation_rate = test$`GECCO+PRISM` / length(data$Cluster), + # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed + Skip_rate = test$`Only PRISM` / length(data$Cluster) + ) + } else if (input$ref_comparison_gecco == "SEMPI") { + data <- vals$sempi_data + title <- ggplot2::ggtitle("Rates of GECCO/SEMPI data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only GECCO` / (test$`GECCO+SEMPI` + test$`Only SEMPI`), + # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) + Annotation_rate = test$`GECCO+SEMPI` / length(data$Cluster), + # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed + Skip_rate = test$`Only SEMPI` / length(data$Cluster) + ) + } else if (input$ref_comparison_gecco == "RippMiner"){ + data <- vals$ripp_data + title <- ggplot2::ggtitle("Rates of GECCO/RippMiner data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only GECCO`/ (test$`GECCO+RippMiner` + test$`Only RippMiner`), + # + Annotation_rate = test$`GECCO+RippMiner`/ length(data$Cluster), + # + Skip_rate = test$`Only RippMiner`/length(data$Cluster) + ) + } else if (input$ref_comparison_gecco == "Emerald/SanntiS"){ + data <- vals$emerald_data + title <- ggplot2::ggtitle("Rates of GECCO/Emerald-SanntiS data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only GECCO`/ (test$`GECCO+Emerald/SanntiS` + test$`Only Emerald/SanntiS`), + # + Annotation_rate = test$`GECCO+Emerald/SanntiS`/ length(data$Cluster), + # + Skip_rate = test$`Only Emerald/SanntiS`/length(data$Cluster) + ) + } else if (input$ref_comparison_gecco == "Compare"){ + data <- vals$compare_data + title <- ggplot2::ggtitle("Rates of GECCO/compare data annotation") + test <- test %>% + dplyr::mutate( + Novelty_rate = test$`Only Compare`/ (test$`GECCO+Compare` + test$`Only Compare`), + # + Annotation_rate = test$`GECCO+Compare`/ length(data$Cluster), + # + Skip_rate = test$`Only Compare`/length(data$Cluster) + ) + } - # Render interactive plot with plotly for rates of DeepBGC data in regards with antismash data - output$gecco_rate <- plotly::renderPlotly({ - shiny::req(!is.null(vals$fullness_gecco)) - - # Reuse stored dataframe from previous plot - # This dataframe stores data for number of intercepted/non intercepted clusters for DeepBGC and antismash data - # For more information please see previous shiny::renderPlot - fullnes_of_annotation <- data.frame(vals$fullness_gecco) - - # Store dataframe into variable. Widen it to calculate rates - test <- fullnes_of_annotation %>% - tidyr::pivot_wider(names_from = Source, values_from = Quantity) - if (input$ref_comparison_gecco == "Antismash") { - data <- vals$anti_data - title <- ggplot2::ggtitle("Rates of GECCO/Antismash data annotation") - test <- test %>% - # Calculate rates. Novelty is nummber of clusters annotated only by deepbgc/ all clusters annotated by antismash + (antismash + deepbgc) - dplyr::mutate( - Novelty_rate = test$`Only GECCO` / (test$`GECCO+Antismash` + test$`Only Antismash`), - # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) - Annotation_rate = test$`GECCO+Antismash` / length(data$Cluster), - # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed - Skip_rate = test$`Only Antismash` / length(data$Cluster) - ) - } else if (input$ref_comparison_gecco == "PRISM") { - data <- vals$prism_data - title <- ggplot2::ggtitle("Rates of GECCO/PRISM data annotation") - test <- test %>% - dplyr::mutate( - Novelty_rate = test$`Only GECCO` / (test$`GECCO+PRISM` + test$`Only PRISM`), - # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) - Annotation_rate = test$`GECCO+PRISM` / length(data$Cluster), - # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed - Skip_rate = test$`Only PRISM` / length(data$Cluster) - ) - } else if (input$ref_comparison_gecco == "SEMPI") { - data <- vals$sempi_data - title <- ggplot2::ggtitle("Rates of GECCO/SEMPI data annotation") - test <- test %>% - dplyr::mutate( - Novelty_rate = test$`Only GECCO` / (test$`GECCO+SEMPI` + test$`Only SEMPI`), - # Annotation rate = clusters, annotated by antismash+deepBGC/ clusters annotated only by antismash (We assume that antismash annotation is full and reference) - Annotation_rate = test$`GECCO+SEMPI` / length(data$Cluster), - # Skip rate = clusters, annotated only by antismash/ all antismash clusters. Points to how much clusters DeepBGC missed - Skip_rate = test$`Only SEMPI` / length(data$Cluster) - ) - } - - # Calculate rates and plot interactive plot with plotly - plotly::ggplotly(test %>% - tidyr::pivot_longer(cols = c(Novelty_rate, Annotation_rate, Skip_rate), names_to = "Rates", values_to = "Rates_data") %>% - ggplot2::ggplot(ggplot2::aes(x = as.numeric(Score), y = as.numeric(Rates_data), Rate = as.numeric(Rates_data))) + - ggplot2::geom_line(ggplot2::aes(color = Rates)) + - ggplot2::geom_point(ggplot2::aes(shape = Rates), alpha = .4, size = 3) + - title + - ggplot2::ylab("Rate") + - ggplot2::xlab(paste(input$score_type, "Score threshold")), - tooltip = c("Rate") - ) + # Calculate rates and plot interactive plot with plotly + plotly::ggplotly(test %>% + tidyr::pivot_longer(cols = c(Novelty_rate, Annotation_rate, Skip_rate), names_to = "Rates", values_to = "Rates_data") %>% + ggplot2::ggplot(ggplot2::aes(x = as.numeric(Score), y = as.numeric(Rates_data), Rate = as.numeric(Rates_data))) + + ggplot2::geom_line(ggplot2::aes(color = Rates)) + + ggplot2::geom_point(ggplot2::aes(shape = Rates), alpha = .4, size = 3) + + title + + ggplot2::ylab("Rate") + + ggplot2::xlab(paste(input$score_type, "Score threshold")), + tooltip = c("Rate") + ) + }) }) - }) } ## To be copied in the UI diff --git a/R/mod_group_table.R b/R/mod_group_table.R index c029cfa..6a8bf42 100644 --- a/R/mod_group_table.R +++ b/R/mod_group_table.R @@ -1,4 +1,4 @@ -#' group_table UI Function +#'#' group_table UI Function #' #' @description A shiny Module. #' @@ -39,8 +39,8 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami ns <- session$ns output$group_table <- shiny::renderTable({ shiny::req(vals$data_upload_count > 1) - shiny::req(vals$need_filter == F) - shiny::req(vals$can_plot_group_table == T) + shiny::req(vals$need_filter == FALSE) + shiny::req(vals$can_plot_group_table == TRUE) if (is.null(vals$inters_filtered)) { inters <- vals$inters } else { @@ -50,7 +50,7 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami colnames(df_test) <- abbr added_inters <- c(soft_names[match(input$group_by, soft_namings)]) add_inters <- list() - if (input$count_all == F) { + if (input$count_all == FALSE) { df_test[nrow(df_test) + 1, ] <- NA } else { selected_dataframe <- data_to_use[match(input$group_by, soft_namings)] @@ -70,10 +70,10 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami for (h in seq(1:length(soft_n))) { if (name == soft_names[match(soft_n, soft_names)][h]) { colnames(df_tmp) <- c(abbr[i], abbr[match(soft_n, soft_names)][h]) - df_test <- merge(df_test, df_tmp, all = T) + df_test <- merge(df_test, df_tmp, all = TRUE) } } - + index <- index + 1 } excluded_names <- abbr[abbr != as.name(abbr[i])] @@ -86,7 +86,10 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami d = paste(eval(as.name(excluded_names[4])), collapse = ","), e = paste(eval(as.name(excluded_names[5])), collapse = ","), f = paste(eval(as.name(excluded_names[6])), collapse = ","), - g = paste(eval(as.name(excluded_names[7])), collapse = ",") + g = paste(eval(as.name(excluded_names[7])), collapse = ","), + h = paste(eval(as.name(excluded_names[8])), collapse = ","), + i = paste(eval(as.name(excluded_names[9])), collapse = ","), + j = paste(eval(as.name(excluded_names[10])), collapse = ",") ) colnames(data) <- c(abbr[i], excluded_names) for (p in abbr) { @@ -104,7 +107,7 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami } else { if (!(soft_names[i] %in% added_inters)) { matched_v <- match(added_inters, names(inters[[soft_names[i]]])) - soft_n <- soft_names[ -(matched_v[!is.na(matched_v)])] + soft_n <- soft_names[-(matched_v[!is.na(matched_v)])] for (inter in names(inters[[soft_names[i]]])) { if (!(inter %in% added_inters)) { add_inters[[soft_names[i]]] <- c(add_inters[[soft_names[i]]], inters[[soft_names[i]]][[inter]]$to) @@ -115,14 +118,14 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami } } } - + for (name in names(add_inters)) { data_to_add <- sort(unique(add_inters[[name]])) - data[nrow(data), soft_namings[match(name, soft_names)]] <- - paste(data_to_add[!(data_to_add %in% + data[nrow(data), soft_namings[match(name, soft_names)]] <- + paste(data_to_add[!(data_to_add %in% unique(unlist(c(data[soft_namings[match(name, soft_names)]]))))], collapse = ",") } - utils::write.csv(data, "group_by.csv", row.names = F) + utils::write.csv(data, "group_by.csv", row.names = FALSE) data }) }) @@ -132,5 +135,5 @@ mod_group_table_server <- function(id, vals, data_uploads, soft_names, soft_nami # mod_group_table_ui("group_table_ui_1") ## To be copied in the server -# mod_group_table_server("group_table_ui_1", vals=vals, data_uploads = data_uploads, -#soft_names = soft_names, soft_namings = soft_namings, data_to_use = data_to_use, abbr = abbr) +# mod_group_table_server("group_table_ui_1", vals=vals, data_uploads = data_uploads, +# soft_names = soft_names, soft_namings = soft_namings, data_to_use = data_to_use, abbr = abbr) diff --git a/R/run_app.R b/R/run_app.R index ad9915c..4b3476b 100644 --- a/R/run_app.R +++ b/R/run_app.R @@ -3,6 +3,12 @@ #' @param ... arguments to pass to golem_opts. #' See `?golem::get_golem_options` for more details. #' @inheritParams shiny::shinyApp +#' @return running shiny app +#' +#' @examples +#' \dontrun{ +#' run_app() +#' } #' #' @export #' @importFrom shiny shinyApp @@ -12,15 +18,15 @@ run_app <- function(onStart = NULL, enableBookmarking = NULL, uiPattern = "/", ...) { - with_golem_options( - app = shinyApp( - ui = app_ui, - server = app_server, - onStart = onStart, - options = options, - enableBookmarking = enableBookmarking, - uiPattern = uiPattern - ), - golem_opts = list(...) - ) + with_golem_options( + app = shinyApp( + ui = app_ui, + server = app_server, + onStart = onStart, + options = options, + enableBookmarking = enableBookmarking, + uiPattern = uiPattern + ), + golem_opts = list(...) + ) } diff --git a/R/sysdata.rda b/R/sysdata.rda index 319f0ee..2f541fd 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/utils_deep_reference.R b/R/utils_deep_reference.R index b07cae1..1f33810 100644 --- a/R/utils_deep_reference.R +++ b/R/utils_deep_reference.R @@ -1,83 +1,133 @@ #' geom_anti #' -#' @description A function, that returns antismash geom with the legend, +#' @description A function, that returns antismash geom with the legend, #' specific to this software (to show on mouse hover). #' #' @return geom_segment with specific fields #' #' @noRd geom_anti <- function(data, rre_more) { + # Silence R CMD note + x <- y <- xend <- yend <- Type2 <- + Software <- ID <- Start <- Stop <- Type <- NULL + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3) +} +geom_compare <- function(data, rre_more) { # Silence R CMD note x <- y <- xend <- yend <- Type2 <- Software <- ID <- Start <- Stop <- Type <- NULL ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type ), size = 3) } -#' geom_prism +#' geom_anti #' -#' @description A function, that returns prism geom with the legend, specific to this software (to show on mouse hover). +#'#' @description A function, that return Emerald/SanntiS geom with the legend, +#' specific to this software (to show on mouse hover). #' #' @return geom_segment with specific fields #' #' @noRd -geom_prism <- function(data, rre_more) { +geom_emerald <- function(data, rre_more) { # Silence R CMD note x <- y <- xend <- yend <- Type2 <- Software <- ID <- Start <- Stop <- Type <- NULL ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type ), size = 3) } -#' geom_deep -#' -#' @description A function, that returns deepbgc geom with the legend, +geom_ref <- function(data, rre_more) { + # Silence R CMD note + x <- y <- xend <- yend <- Type2 <- + Software <- ID <- Start <- Stop <- Type <- NULL + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3) +} +#' @description A function, that returns rippminer-genome geom with the legend, #' specific to this software (to show on mouse hover). #' #' @return geom_segment with specific fields #' #' @noRd -geom_deep <- function(data, rre_more) { +geom_ripp <- function(data, rre_more) { # Silence R CMD note - x <- y <- xend <- yend <- Type <- - Software <- ID <- Start <- Stop <- - Type <- num_domains <- deepbgc_score <- - activity <- NULL + x <- y <- xend <- yend <- Type2 <- + Software <- ID <- Start <- Stop <- Type <- NULL ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, num_domains = num_domains, - deepbgc_score = deepbgc_score, activity = activity + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type ), size = 3) } -#' geom_rre +#' geom_prism #' -#' @description A function, that returns RRE-Finder geom with the legend, -#' specific to this software (to show on mouse hover). +#' @description A function, that returns prism geom with the legend, specific to this software (to show on mouse hover). #' #' @return geom_segment with specific fields #' #' @noRd -geom_rre <- function(data, rre_more) { - # Silence R CMD note - x <- y <- xend <- yend <- Type <- - Score <- Software <- ID <- Start <- - Stop <- Type <- E_value <- P_value <- RRE_start <- - RRE_stop <- Probability <- NULL - if (rre_more == T) { +geom_prism <- function(data, rre_more) { + # Silence R CMD note + x <- y <- xend <- yend <- Type2 <- + Software <- ID <- Start <- Stop <- Type <- NULL ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type, Score = Score, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value, - P_value = P_value, RRE_start = RRE_start, RRE_stop = RRE_stop, - Probability = Probability + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type ), size = 3) - } else { +} +#' geom_deep +#' +#' @description A function, that returns deepbgc geom with the legend, +#' specific to this software (to show on mouse hover). +#' +#' @return geom_segment with specific fields +#' +#' @noRd +geom_deep <- function(data, rre_more) { + # Silence R CMD note + x <- y <- xend <- yend <- Type <- + Software <- ID <- Start <- Stop <- + Type <- num_domains <- deepbgc_score <- + activity <- NULL ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value + xend = xend, yend = yend, color = Type, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, num_domains = num_domains, + deepbgc_score = deepbgc_score, activity = activity ), size = 3) - } +} +#' geom_rre +#' +#' @description A function, that returns RRE-Finder geom with the legend, +#' specific to this software (to show on mouse hover). +#' +#' @return geom_segment with specific fields +#' +#' @noRd +geom_rre <- function(data, rre_more) { + # Silence R CMD note + x <- y <- xend <- yend <- Type <- + Score <- Software <- ID <- Start <- + Stop <- Type <- E_value <- P_value <- RRE_start <- + RRE_stop <- Probability <- NULL + if (rre_more == TRUE) { + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type, Score = Score, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value, + P_value = P_value, RRE_start = RRE_start, RRE_stop = RRE_stop, + Probability = Probability + ), size = 3) + } else { + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, E_value = E_value + ), size = 3) + } } #' geom_sempi #' @@ -87,53 +137,53 @@ geom_rre <- function(data, rre_more) { #' #' @noRd geom_sempi <- function(data, rre_more) { - # Silence R CMD note - x <- y <- xend <- yend <- Type2 <- - Software <- ID <- Start <- Stop <- - Type <- NULL - ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type - ), size = 3) + # Silence R CMD note + x <- y <- xend <- yend <- Type2 <- + Software <- ID <- Start <- Stop <- + Type <- NULL + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type + ), size = 3) } #' deep_reference #' -#' @description A function, that returns Prism-Supplement geom with the legend, +#' @description A function, that returns Prism-Supplement geom with the legend, #' specific to this software (to show on mouse hover). #' #' @return geom_segment with specific fields #' #' @noRd geom_prism_supp <- function(data, rre_more) { - # Silence R CMD note - x <- y <- xend <- yend <- Type2 <- - Software <- ID <- Start <- Stop <- Type <- Name <- - Full_name <- Score <- NULL - ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, ID = ID, - Start = Start, Stop = Stop, Type = Type, Name = Name, Full_name = Full_name, - Score = Score - ), size = 3) + # Silence R CMD note + x <- y <- xend <- yend <- Type2 <- + Software <- ID <- Start <- Stop <- Type <- Name <- + Full_name <- Score <- NULL + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, ID = ID, + Start = Start, Stop = Stop, Type = Type, Name = Name, Full_name = Full_name, + Score = Score + ), size = 3) } #' geom_arts #' -#' @description A function, that returns ARTS geom with the legend, +#' @description A function, that returns ARTS geom with the legend, #' specific to this software (to show on mouse hover). #' #' @return geom_segment with specific fields #' #' @noRd geom_arts <- function(data, rre_more) { - # Silence R CMD error - x <- y <- xend <- yend <- Type2 <- - Start <- Stop <- Type <- ID <- Hit <- - Software <- Core <- E_value <- - Bitscore <- Count <- Model <- NULL - ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, Hit = Hit, - Core = Core, E_value = E_value, Bitscore = Bitscore, Count = Count, Model = Model - ), size = 3) + # Silence R CMD error + x <- y <- xend <- yend <- Type2 <- + Start <- Stop <- Type <- ID <- Hit <- + Software <- Core <- E_value <- + Bitscore <- Count <- Model <- NULL + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, Hit = Hit, + Core = Core, E_value = E_value, Bitscore = Bitscore, Count = Count, Model = Model + ), size = 3) } #' geom_gecco #' @@ -143,15 +193,15 @@ geom_arts <- function(data, rre_more) { #' #' @noRd geom_gecco <- function(data, rre_more) { - # Silence R CMD note - x <- y <- xend <- yend <- Type2 <- Software <- - ID <- Start <- Stop <- Type <- Num_proteins <- - Num_domains <- Average_p <- Max_p <- NULL - ggplot2::geom_segment(data = data, ggplot2::aes(x, y, - xend = xend, yend = yend, color = Type2, Software = Software, - ID = ID, Start = Start, Stop = Stop, Type = Type, Num_proteins = Num_proteins, - Num_domains = Num_domains, Average_p = Average_p, Max_p = Max_p - ), size = 3) + # Silence R CMD note + x <- y <- xend <- yend <- Type2 <- Software <- + ID <- Start <- Stop <- Type <- Num_proteins <- + Num_domains <- Average_p <- Max_p <- NULL + ggplot2::geom_segment(data = data, ggplot2::aes(x, y, + xend = xend, yend = yend, color = Type2, Software = Software, + ID = ID, Start = Start, Stop = Stop, Type = Type, Num_proteins = Num_proteins, + Num_domains = Num_domains, Average_p = Average_p, Max_p = Max_p + ), size = 3) } #' add_more_annot #' @@ -161,26 +211,32 @@ geom_gecco <- function(data, rre_more) { #' #' @noRd add_more_annot <- function(seg_df, plot, soft_names, index, rre_more) { - if (dim(seg_df)[1] > 0) { - if (soft_names[index] == "anti") { - plot <- plot + geom_anti(seg_df) - } else if (soft_names[index] == "sempi") { - plot <- plot + geom_sempi(seg_df) - } else if (soft_names[index] == "prism") { - plot <- plot + geom_prism(seg_df) - } else if (soft_names[index] == "prism_supp") { - plot <- plot + geom_prism_supp(seg_df) - } else if (soft_names[index] == "arts") { - plot <- plot + geom_arts(seg_df) - } else if (soft_names[index] == "deep") { - plot <- plot + geom_deep(seg_df) - } else if (soft_names[index] == "rre") { - plot <- plot + geom_rre(seg_df, rre_more) - } else if (soft_names[index] == "gecco") { - plot <- plot + geom_gecco(seg_df) + if (dim(seg_df)[1] > 0) { + if (soft_names[index] == "anti") { + plot <- plot + geom_anti(seg_df) + } else if (soft_names[index] == "sempi") { + plot <- plot + geom_sempi(seg_df) + } else if (soft_names[index] == "prism") { + plot <- plot + geom_prism(seg_df) + } else if (soft_names[index] == "prism_supp") { + plot <- plot + geom_prism_supp(seg_df) + } else if (soft_names[index] == "arts") { + plot <- plot + geom_arts(seg_df) + } else if (soft_names[index] == "deep") { + plot <- plot + geom_deep(seg_df) + } else if (soft_names[index] == "rre") { + plot <- plot + geom_rre(seg_df, rre_more) + } else if (soft_names[index] == "gecco") { + plot <- plot + geom_gecco(seg_df) + } else if (soft_names[index] == "ripp"){ + plot <- plot + geom_gecco(seg_df) + } else if (soft_names[index] == "emerald/sanntis"){ + plot <- plot + geom_gecco(seg_df) + } else if (soft_names[index] == "compare"){ + plot <- plot + geom_gecco(seg_df) + } + return(plot) + } else { + return(plot) } - return(plot) - } else { - return(plot) - } } diff --git a/README.Rmd b/README.Rmd index fa139a8..99cfdd1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -60,6 +60,10 @@ Currently, the app supports annotations from such sources: 6. ARTS (raw tsv files) +7. Gecco () + +7. RiPPMiner-genome (txt file) + # Contributing diff --git a/README.md b/README.md index bd8bdec..b917867 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ Currently, the app supports annotations from such sources: 6. ARTS (raw tsv files) +7. RiPPMiner-genome (txt file) + # Contributing There are no contributing guidelines yet. But feel free to resolve any diff --git a/anti_biocircos.csv b/anti_biocircos.csv new file mode 100644 index 0000000..0410951 --- /dev/null +++ b/anti_biocircos.csv @@ -0,0 +1,28 @@ +"Cluster","Start","Stop","Type","chromosome","Type2" +1,86693,139654,"hgle-ks__t1pks","A","hgle-ks__t1pks" +2,166700,191657,"terpene","A","terpene" +3,246867,270397,"lanthipeptide-class-i","A","lanthipeptide-class-i" +4,494259,544087,"nrps","A","nrps" +5,791700,799942,"ripp-like","A","ripp-like" +6,1258217,1297040,"t3pks","A","t3pks" +7,1995499,2005898,"ectoine","A","ectoine" +8,2939305,2949875,"melanin","A","melanin" +9,3034430,3045603,"siderophore","A","siderophore" +10,3524827,3603907,"nrps","A","nrps" +11,5496473,5567376,"t2pks","A","t2pks" +12,5671274,5691836,"terpene","A","terpene" +13,5751944,5824487,"t2pks","A","t2pks" +14,6336090,6346443,"siderophore","A","siderophore" +15,6429574,6475291,"nrps-like__t1pks__prodigiosin","A","nrps-like__t1pks__prodigiosin" +16,6632342,6643659,"ripp-like","A","ripp-like" +17,6656903,6676224,"terpene","A","terpene" +18,6842314,6855522,"siderophore","A","siderophore" +19,6881334,6951537,"t1pks__butyrolactone","A","t1pks__butyrolactone" +20,7088263,7142447,"thioamide-nrp__nrps","A","thioamide-nrp__nrps" +21,7409741,7432456,"lanthipeptide-class-iii","A","lanthipeptide-class-iii" +22,7506307,7532117,"terpene","A","terpene" +23,7570411,7618555,"pks-like__t1pks","A","pks-like__t1pks" +24,7682906,7709360,"lanthipeptide-class-i","A","lanthipeptide-class-i" +25,7973469,8047403,"other__t3pks","A","other__t3pks" +26,8269636,8290764,"indole","A","indole" +27,8475101,8548352,"t3pks__nrps__terpene","A","t3pks__nrps__terpene" diff --git a/arts_biocircos.csv b/arts_biocircos.csv new file mode 100644 index 0000000..a015ee2 --- /dev/null +++ b/arts_biocircos.csv @@ -0,0 +1,162 @@ +"Hit","Start","Stop","Core","Description","Count","ID","Cluster","Type","Type2","Evalue","Bitscore","Model" +"hit_1",161236,162553,"TIGR01513","NAPRTase_put: nicotinate phosphoribosyltransferase",2,1,1,"core","core",NA,NA,"Core" +"hit_1",197220,199917,"TIGR01828","pyru_phos_dikin: pyruvate, phosphate dikinase",2,2,2,"core","core",NA,NA,"Core" +"hit_1",457483,457654,"TIGR01031","rpmF_bact: ribosomal protein bL32",2,3,3,"core","core",NA,NA,"Core" +"hit_1",544193,545717,"TIGR01311","glycerol_kin: glycerol kinase",2,4,4,"core","core",NA,NA,"Core" +NA,583522,586897,"Not_core","Biotin_lipoyl",1,5,5,"resistance","resistance",4.7e-15,54,"PF00364.17" +"hit_1",612155,612320,"TIGR01023","rpmG_bact: ribosomal protein bL33",3,6,6,"core","core",NA,NA,"Core" +"hit_1",620366,620879,"TIGR02133","RPI_actino: ribose 5-phosphate isomerase",2,7,7,"core","core",NA,NA,"Core" +NA,740313,742083,"Not_core","ABC_efflux",1,8,8,"resistance","resistance",3.9e-74,248.9,"RF0007" +NA,796976,797723,"Not_core","Biotin_lipoyl",1,9,9,"resistance","resistance",3.4e-06,25.7,"PF00364.17" +"hit_1",1035207,1036800,"TIGR01344","malate_syn_A: malate synthase A",2,10,10,"core","core",NA,NA,"Core" +"hit_1",1116446,1117826,"TIGR03356","BGL: beta-galactosidase",5,11,11,"core","core",NA,NA,"Core" +NA,1202479,1204282,"Not_core","ABC_efflux",1,12,12,"resistance","resistance",8.4e-81,270.9,"RF0007" +"hit_1",1210321,1210594,"TIGR00105","L31: ribosomal protein bL31",3,13,13,"core","core",NA,NA,"Core" +"hit_1",1282581,1283235,"TIGR00079","pept_deformyl: peptide deformylase",3,14,14,"core","core",NA,NA,"Core" +NA,1338247,1339366,"Not_core","Biotin_lipoyl",1,15,15,"resistance","resistance",7.5e-09,34.2,"PF00364.17" +"hit_1",1396237,1397416,"TIGR00485","EF-Tu: translation elongation factor Tu",2,16,16,"core","core",NA,NA,"Core" +"hit_1",1401206,1402082,"TIGR00753","undec_PP_bacA: undecaprenyl-diphosphatase UppP",2,17,17,"core","core",NA,NA,"Core" +NA,1494023,1495871,"Not_core","HSP90",1,18,18,"resistance","resistance",3.6e-07,28,"PF00183.13" +NA,1590406,1591387,"Not_core","OTCace",1,19,19,"resistance","resistance",4.9e-37,126.3,"PF00185.19" +"hit_1",1636965,1638942,"TIGR00418","thrS: threonine--tRNA ligase",2,20,20,"core","core",NA,NA,"Core" +"hit_1",1662891,1666530,"TIGR03160","cobT_DBIPRT: nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase",2,21,21,"core","core",NA,NA,"Core" +NA,1757648,1758413,"Not_core","Proteasome",1,22,22,"resistance","resistance",1.9e-26,91.6,"PF00227.21" +NA,1758474,1759320,"Not_core","Proteasome",1,23,23,"resistance","resistance",2.6e-38,130.3,"PF00227.21" +"hit_2",1778725,1780264,"TIGR01311","glycerol_kin: glycerol kinase",2,24,24,"core","core",NA,NA,"Core" +"hit_1",1923749,1924289,"TIGR00481","TIGR00481: Raf kinase inhibitor-like protein, YbhB/YbcL family",2,25,25,"core","core",NA,NA,"Core" +"hit_1",2048175,2049129,"TIGR00674","dapA: 4-hydroxy-tetrahydrodipicolinate synthase",2,26,26,"core","core",NA,NA,"Core" +NA,2059303,2060092,"Not_core","AAC3",1,27,27,"resistance","resistance",3.5e-54,182.5,"RF0002" +"hit_1",2068834,2069953,"TIGR00876","tal_mycobact: transaldolase",2,28,28,"core","core",NA,NA,"Core" +"hit_1",2069956,2071480,"TIGR00871","zwf: glucose-6-phosphate dehydrogenase",2,29,29,"core","core",NA,NA,"Core" +"hit_1",2071476,2072532,"TIGR00534","OpcA: glucose-6-phosphate dehydrogenase assembly protein OpcA",2,30,30,"core","core",NA,NA,"Core" +"hit_1",2080142,2081153,"TIGR01534","GAPDH-I: glyceraldehyde-3-phosphate dehydrogenase, type I",3,31,31,"core","core",NA,NA,"Core" +NA,2080142,2081153,"Not_core","Gp_dh_N",1,32,32,"resistance","resistance",2e-61,205.4,"PF00044.19" +"hit_1",2156336,2157773,"TIGR01064","pyruv_kin: pyruvate kinase",2,33,33,"core","core",NA,NA,"Core" +"hit_1",2162745,2165253,"TIGR02412","pepN_strep_liv: aminopeptidase N",3,34,34,"core","core",NA,NA,"Core" +"hit_1",2185227,2186259,"TIGR00544","lgt: prolipoprotein diacylglyceryl transferase",2,35,35,"core","core",NA,NA,"Core" +"hit_1",2308850,2309915,"TIGR01245","trpD: anthranilate phosphoribosyltransferase",2,36,36,"core","core",NA,NA,"Core" +"hit_1",2316757,2318494,"TIGR02891","CtaD_CoxA: cytochrome c oxidase, subunit I",2,37,37,"core","core",NA,NA,"Core" +"hit_2",2338273,2339347,"TIGR03160","cobT_DBIPRT: nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase",2,38,38,"core","core",NA,NA,"Core" +NA,2345795,2347568,"Not_core","Biotin_lipoyl",1,39,39,"resistance","resistance",8.7e-46,152.5,"PF00364.17" +"hit_1",2348739,2351439,"TIGR00759","aceE: pyruvate dehydrogenase (acetyl-transferring), homodimeric type",3,40,40,"core","core",NA,NA,"Core" +"hit_1",2445704,2446967,"TIGR01412","tat_substr_1: Tat-translocated enzyme",2,41,41,"core","core",NA,NA,"Core" +"hit_2",2539575,2542332,"TIGR00759","aceE: pyruvate dehydrogenase (acetyl-transferring), homodimeric type",3,42,42,"core","core",NA,NA,"Core" +NA,2624025,2625402,"Not_core","Carboxyl_trans",1,43,43,"resistance","resistance",2.6e-27,94.4,"PF01039.17" +NA,2651102,2653031,"Not_core","ABC_efflux",1,44,44,"resistance","resistance",4.6e-95,318,"RF0007" +"hit_2",2684903,2687633,"TIGR01828","pyru_phos_dikin: pyruvate, phosphate dikinase",2,45,45,"core","core",NA,NA,"Core" +"hit_2",2728979,2730326,"TIGR03356","BGL: beta-galactosidase",5,46,46,"core","core",NA,NA,"Core" +"hit_1",2745001,2746024,"TIGR01430","aden_deam: adenosine deaminase",5,47,47,"core","core",NA,NA,"Core" +"hit_1",2753869,2755006,"TIGR02349","DnaJ_bact: chaperone protein DnaJ",2,48,48,"core","core",NA,NA,"Core" +"hit_2",2854111,2854591,"TIGR02133","RPI_actino: ribose 5-phosphate isomerase",2,49,49,"core","core",NA,NA,"Core" +"hit_2",2861701,2864203,"TIGR02412","pepN_strep_liv: aminopeptidase N",3,50,50,"core","core",NA,NA,"Core" +"hit_3",2873769,2876343,"TIGR02412","pepN_strep_liv: aminopeptidase N",3,51,51,"core","core",NA,NA,"Core" +NA,3010976,3014708,"Not_core","ABC_efflux",1,52,52,"resistance","resistance",4.6e-170,565.4,"RF0007" +NA,3027513,3029130,"Not_core","Carboxyl_trans",1,53,53,"resistance","resistance",3.2e-164,546,"PF01039.17" +NA,3029168,3031109,"Not_core","Biotin_lipoyl",1,54,54,"resistance","resistance",7.8e-20,69.4,"PF00364.17" +"hit_1",3043785,3045603,"TIGR01135","glmS: glutamine-fructose-6-phosphate transaminase (isomerizing)",2,55,55,"core","core",NA,NA,"Core" +"hit_3",3054669,3056109,"TIGR03356","BGL: beta-galactosidase",5,56,56,"core","core",NA,NA,"Core" +"hit_1",3153869,3154226,"Cpn10","PF00166.17: Chaperonin 10 Kd subunit",2,57,57,"core","core",NA,NA,"Core" +"hit_2",3166795,3168142,"TIGR01513","NAPRTase_put: nicotinate phosphoribosyltransferase",2,58,58,"core","core",NA,NA,"Core" +"hit_1",3390540,3391821,"TIGR01060","eno: phosphopyruvate hydratase",2,59,59,"core","core",NA,NA,"Core" +"hit_2",3521672,3522680,"TIGR01245","trpD: anthranilate phosphoribosyltransferase",2,60,60,"core","core",NA,NA,"Core" +"hit_1",3691373,3692384,"TIGR00233","trpS: tryptophan--tRNA ligase",2,61,61,"core","core",NA,NA,"Core" +"hit_1",3789247,3789487,"TIGR00165","S18: ribosomal protein bS18",2,62,62,"core","core",NA,NA,"Core" +"hit_2",3790677,3790932,"TIGR00105","L31: ribosomal protein bL31",3,63,63,"core","core",NA,NA,"Core" +"hit_2",3790938,3791103,"TIGR01023","rpmG_bact: ribosomal protein bL33",3,64,64,"core","core",NA,NA,"Core" +"hit_1",3791190,3791412,"TIGR00009","L28: ribosomal protein bL28",2,65,65,"core","core",NA,NA,"Core" +"hit_1",3791411,3791717,"Ribosomal_S14","PF00253.17: Ribosomal protein S14p/S29e",2,66,66,"core","core",NA,NA,"Core" +"hit_1",3804031,3804376,"TIGR02200","GlrX_actino: glutaredoxin-like protein",2,67,67,"core","core",NA,NA,"Core" +NA,3966834,3967929,"Not_core","vanS",1,68,68,"resistance","resistance",2.9e-88,295.1,"RF0155" +NA,3967921,3968617,"Not_core","vanR",1,69,69,"resistance","resistance",5.8e-82,273.4,"RF0154" +NA,3973353,3973962,"Not_core","vanX",1,70,70,"resistance","resistance",3.8e-98,325.8,"RF0158" +"hit_2",4048920,4050120,"TIGR02349","DnaJ_bact: chaperone protein DnaJ",2,71,71,"core","core",NA,NA,"Core" +"hit_2",4153149,4154442,"TIGR00418","thrS: threonine--tRNA ligase",2,72,72,"core","core",NA,NA,"Core" +NA,4193531,4194941,"Not_core","Biotin_lipoyl",1,73,73,"resistance","resistance",3.7e-23,80,"PF00364.17" +NA,4210795,4212271,"Not_core","Biotin_lipoyl",1,74,74,"resistance","resistance",5.5e-18,63.4,"PF00364.17" +NA,4260740,4263314,"Not_core","DNA_topoisoIV",1,75,75,"resistance","resistance",2.8e-148,493.3,"PF00521.15" +NA,4263377,4265438,"Not_core","DNA_gyraseB",1,76,76,"resistance","resistance",2.3e-61,205.1,"PF00204.20" +NA,4268712,4269843,"Not_core","TIGR00663",1,77,77,"resistance","resistance",8.7e-90,300.1,"TIGR00663" +"hit_2",4304257,4304494,"TIGR00165","S18: ribosomal protein bS18",2,78,78,"core","core",NA,NA,"Core" +"hit_2",4363347,4364685,"TIGR01412","tat_substr_1: Tat-translocated enzyme",2,79,79,"core","core",NA,NA,"Core" +"hit_2",4413741,4414275,"TIGR00481","TIGR00481: Raf kinase inhibitor-like protein, YbhB/YbcL family",2,80,80,"core","core",NA,NA,"Core" +NA,4684158,4684947,"Not_core","Pentapeptide_4",1,81,81,"resistance","resistance",3.4e-18,64.3,"PF13599.1" +NA,4795084,4796683,"Not_core","Carboxyl_trans",1,82,82,"resistance","resistance",6.3e-139,462.5,"PF01039.17" +NA,4796696,4798541,"Not_core","Biotin_lipoyl",1,83,83,"resistance","resistance",1.6e-14,52.4,"PF00364.17" +"hit_2",4977868,4978495,"TIGR00079","pept_deformyl: peptide deformylase",3,84,84,"core","core",NA,NA,"Core" +"hit_3",5061598,5061763,"TIGR01023","rpmG_bact: ribosomal protein bL33",3,85,85,"core","core",NA,NA,"Core" +"hit_2",5068591,5069623,"TIGR01430","aden_deam: adenosine deaminase",5,86,86,"core","core",NA,NA,"Core" +NA,5078060,5081546,"Not_core","TIGR02013",1,87,87,"resistance","resistance",0,1814.6,"TIGR02013" +"hit_1",5089829,5091956,"TIGR00484","EF-G: translation elongation factor G",2,88,88,"core","core",NA,NA,"Core" +"hit_2",5092129,5093323,"TIGR00485","EF-Tu: translation elongation factor Tu",2,89,89,"core","core",NA,NA,"Core" +"hit_2",5134359,5134545,"Ribosomal_S14","PF00253.17: Ribosomal protein S14p/S29e",2,90,90,"core","core",NA,NA,"Core" +"hit_2",5153333,5155181,"TIGR01135","glmS: glutamine-fructose-6-phosphate transaminase (isomerizing)",2,91,91,"core","core",NA,NA,"Core" +"hit_2",5172736,5173045,"Cpn10","PF00166.17: Chaperonin 10 Kd subunit",2,92,92,"core","core",NA,NA,"Core" +"hit_1",5269285,5270731,"SHMT","PF00464.15: Serine hydroxymethyltransferase",4,93,93,"core","core",NA,NA,"Core" +"hit_2",5271518,5272538,"TIGR00233","trpS: tryptophan--tRNA ligase",2,94,94,"core","core",NA,NA,"Core" +"hit_3",5334545,5335736,"TIGR01430","aden_deam: adenosine deaminase",5,95,95,"core","core",NA,NA,"Core" +NA,5355282,5357055,"Not_core","Biotin_lipoyl",1,96,96,"resistance","resistance",7.1e-17,59.9,"PF00364.17" +NA,5359017,5360610,"Not_core","Carboxyl_trans",1,97,97,"resistance","resistance",1.2e-199,662.9,"PF01039.17" +NA,5533652,5534546,"Not_core","ClassB",1,98,98,"resistance","resistance",7.1e-26,89.8,"RF0054" +"hit_2",5644428,5644671,"TIGR02200","GlrX_actino: glutaredoxin-like protein",2,99,99,"core","core",NA,NA,"Core" +"hit_1",5670248,5671262,"TIGR00157","TIGR00157: ribosome small subunit-dependent GTPase A",2,100,100,"core","core",NA,NA,"Core" +"hit_1",5671274,5672591,"TIGR01356","aroA: 3-phosphoshikimate 1-carboxyvinyltransferase",2,101,101,"core","core",NA,NA,"Core" +"hit_3",5680106,5680757,"TIGR00079","pept_deformyl: peptide deformylase",3,102,102,"core","core",NA,NA,"Core" +"hit_3",5830019,5830244,"TIGR00105","L31: ribosomal protein bL31",3,103,103,"core","core",NA,NA,"Core" +"hit_1",5832393,5833041,"TIGR00057","TIGR00057: tRNA threonylcarbamoyl adenosine modification protein, Sua5/YciO/YrdC/YwlC family",2,104,104,"core","core",NA,NA,"Core" +"hit_2",5833805,5835062,"SHMT","PF00464.15: Serine hydroxymethyltransferase",4,105,105,"core","core",NA,NA,"Core" +"hit_1",5880227,5880572,"TIGR03954","integ_memb_HG: integral membrane protein",3,106,106,"core","core",NA,NA,"Core" +"hit_2",5883332,5883662,"TIGR03954","integ_memb_HG: integral membrane protein",3,107,107,"core","core",NA,NA,"Core" +"hit_2",5892974,5894405,"TIGR01064","pyruv_kin: pyruvate kinase",2,108,108,"core","core",NA,NA,"Core" +"hit_3",5955946,5957209,"SHMT","PF00464.15: Serine hydroxymethyltransferase",4,109,109,"core","core",NA,NA,"Core" +NA,5957223,5957601,"Not_core","Biotin_lipoyl",1,110,110,"resistance","resistance",2.1e-05,23.2,"PF00364.17" +"hit_1",5978784,5980992,"TIGR00575","dnlj: DNA ligase, NAD-dependent",2,111,111,"core","core",NA,NA,"Core" +"hit_1",6005642,6006641,"TIGR00465","ilvC: ketol-acid reductoisomerase",2,112,112,"core","core",NA,NA,"Core" +NA,6031664,6033248,"Not_core","Carboxyl_trans",1,113,113,"resistance","resistance",1.5e-201,669,"PF01039.17" +"hit_2",6062058,6062244,"TIGR00009","L28: ribosomal protein bL28",2,114,114,"core","core",NA,NA,"Core" +"hit_2",6069793,6069967,"TIGR01031","rpmF_bact: ribosomal protein bL32",2,115,115,"core","core",NA,NA,"Core" +"hit_1",6099895,6100675,"TIGR02227","sigpep_I_bact: signal peptidase I",4,116,116,"core","core",NA,NA,"Core" +"hit_2",6100667,6102008,"TIGR02227","sigpep_I_bact: signal peptidase I",4,117,117,"core","core",NA,NA,"Core" +"hit_3",6101640,6102651,"TIGR02227","sigpep_I_bact: signal peptidase I",4,118,118,"core","core",NA,NA,"Core" +"hit_4",6102755,6103532,"TIGR02227","sigpep_I_bact: signal peptidase I",4,119,119,"core","core",NA,NA,"Core" +"hit_4",6161551,6162715,"TIGR01430","aden_deam: adenosine deaminase",5,120,120,"core","core",NA,NA,"Core" +"hit_1",6206674,6207832,"TIGR00612","ispG_gcpE: 4-hydroxy-3-methylbut-2-en-1-yl diphosphate synthase",2,121,121,"core","core",NA,NA,"Core" +"hit_2",6266752,6267652,"TIGR00674","dapA: 4-hydroxy-tetrahydrodipicolinate synthase",2,122,122,"core","core",NA,NA,"Core" +NA,6371005,6373129,"Not_core","DNA_gyraseB",1,123,123,"resistance","resistance",1.2e-50,170.1,"PF00204.20" +NA,6386423,6388880,"Not_core","DNA_topoisoIV",1,124,124,"resistance","resistance",7.1e-147,488.7,"PF00521.15" +"hit_3",6470908,6473452,"TIGR03954","integ_memb_HG: integral membrane protein",3,125,125,"core","core",NA,NA,"Core" +NA,6550398,6551406,"Not_core","OTCace",1,126,126,"resistance","resistance",3.7e-47,159.2,"PF00185.19" +"hit_1",6594925,6596854,"TIGR00204","dxs: 1-deoxy-D-xylulose-5-phosphate synthase",2,127,127,"core","core",NA,NA,"Core" +"hit_2",6750130,6751231,"TIGR00157","TIGR00157: ribosome small subunit-dependent GTPase A",2,128,128,"core","core",NA,NA,"Core" +"hit_2",6865735,6867358,"TIGR01344","malate_syn_A: malate synthase A",2,129,129,"core","core",NA,NA,"Core" +NA,6897088,6898861,"Not_core","Biotin_lipoyl",1,130,130,"resistance","resistance",7.1e-17,59.9,"PF00364.17" +NA,6943672,6945265,"Not_core","Carboxyl_trans",1,131,131,"resistance","resistance",1.9e-191,635.7,"PF01039.17" +"hit_2",7301062,7303123,"TIGR00484","EF-G: translation elongation factor G",2,132,132,"core","core",NA,NA,"Core" +"hit_4",7324762,7326169,"TIGR03356","BGL: beta-galactosidase",5,133,133,"core","core",NA,NA,"Core" +"hit_2",7396675,7397611,"TIGR00534","OpcA: glucose-6-phosphate dehydrogenase assembly protein OpcA",2,134,134,"core","core",NA,NA,"Core" +"hit_2",7397607,7399386,"TIGR00871","zwf: glucose-6-phosphate dehydrogenase",2,135,135,"core","core",NA,NA,"Core" +"hit_2",7399382,7400528,"TIGR00876","tal_mycobact: transaldolase",2,136,136,"core","core",NA,NA,"Core" +"hit_2",7524432,7525587,"TIGR00612","ispG_gcpE: 4-hydroxy-3-methylbut-2-en-1-yl diphosphate synthase",2,137,137,"core","core",NA,NA,"Core" +"hit_2",7525612,7527583,"TIGR00204","dxs: 1-deoxy-D-xylulose-5-phosphate synthase",2,138,138,"core","core",NA,NA,"Core" +"hit_4",7560697,7561891,"SHMT","PF00464.15: Serine hydroxymethyltransferase",4,139,139,"core","core",NA,NA,"Core" +"hit_2",7580931,7582254,"TIGR01356","aroA: 3-phosphoshikimate 1-carboxyvinyltransferase",2,140,140,"core","core",NA,NA,"Core" +NA,7782660,7784535,"Not_core","ABC_efflux",1,141,141,"resistance","resistance",1.9e-80,269.8,"RF0007" +"hit_2",7832789,7834235,"TIGR01534","GAPDH-I: glyceraldehyde-3-phosphate dehydrogenase, type I",3,142,142,"core","core",NA,NA,"Core" +NA,7832789,7834235,"Not_core","Gp_dh_N",1,143,143,"resistance","resistance",4e-39,133,"PF00044.19" +"hit_1",7835927,7836392,"TIGR00494","crcB: protein CrcB",2,144,144,"core","core",NA,NA,"Core" +"hit_2",7836729,7837104,"TIGR00494","crcB: protein CrcB",2,145,145,"core","core",NA,NA,"Core" +"hit_2",7837143,7837980,"TIGR00753","undec_PP_bacA: undecaprenyl-diphosphatase UppP",2,146,146,"core","core",NA,NA,"Core" +NA,7913373,7914627,"Not_core","Biotin_lipoyl",1,147,147,"resistance","resistance",8.2e-23,78.9,"PF00364.17" +"hit_3",7914736,7917424,"TIGR00759","aceE: pyruvate dehydrogenase (acetyl-transferring), homodimeric type",3,148,148,"core","core",NA,NA,"Core" +"hit_2",7947902,7948901,"TIGR00465","ilvC: ketol-acid reductoisomerase",2,149,149,"core","core",NA,NA,"Core" +"hit_2",8041720,8043442,"TIGR02891","CtaD_CoxA: cytochrome c oxidase, subunit I",2,150,150,"core","core",NA,NA,"Core" +"hit_5",8077470,8078550,"TIGR01430","aden_deam: adenosine deaminase",5,151,151,"core","core",NA,NA,"Core" +"hit_2",8173118,8173721,"TIGR00057","TIGR00057: tRNA threonylcarbamoyl adenosine modification protein, Sua5/YciO/YrdC/YwlC family",2,152,152,"core","core",NA,NA,"Core" +"hit_3",8325356,8326355,"TIGR01534","GAPDH-I: glyceraldehyde-3-phosphate dehydrogenase, type I",3,153,153,"core","core",NA,NA,"Core" +NA,8325356,8326355,"Not_core","Gp_dh_N",1,154,154,"resistance","resistance",1.5e-53,179.9,"PF00044.19" +NA,8329971,8331888,"Not_core","HSP90",1,155,155,"resistance","resistance",9.9e-117,389.6,"PF00183.13" +"hit_2",8337388,8339560,"TIGR00575","dnlj: DNA ligase, NAD-dependent",2,156,156,"core","core",NA,NA,"Core" +NA,8345073,8346285,"Not_core","Chlor_Efflux_Pump",1,157,157,"resistance","resistance",1e-109,366.1,"RF0051" +"hit_5",8380678,8382118,"TIGR03356","BGL: beta-galactosidase",5,158,158,"core","core",NA,NA,"Core" +"hit_2",8466853,8468158,"TIGR01060","eno: phosphopyruvate hydratase",2,159,159,"core","core",NA,NA,"Core" +NA,8488517,8489696,"Not_core","Chlor_Efflux_Pump",1,160,160,"resistance","resistance",1.2e-124,415.2,"RF0051" +"hit_2",8641684,8642638,"TIGR00544","lgt: prolipoprotein diacylglyceryl transferase",2,161,161,"core","core",NA,NA,"Core" diff --git a/compare_biocircos.csv b/compare_biocircos.csv new file mode 100644 index 0000000..85ba2d0 --- /dev/null +++ b/compare_biocircos.csv @@ -0,0 +1,31 @@ +"Type","Cluster","Start","Stop","chromosome","Type2" +"eicosapentaenoic acid",1,103627,129988,"C","eicosapentaenoic acid" +"isorenieratene (12)",2,173767,199945,"C","isorenieratene (12)" +"lantibiotic",3,255310,281086,"C","lantibiotic" +"deoxysugar",4,395938,422413,"C","deoxysugar" +"coelichelin (5)",5,511034,535822,"C","coelichelin (5)" +"bacteriocin",6,796583,821798,"C","bacteriocin" +"thn (6), flaviolin (7)",7,1277624,1303748,"C","thn (6), flaviolin (7)" +"aromatic polyketide",8,1336563,1360694,"C","aromatic polyketide" +"5-hydroxyectoine (14)",9,1998467,2023979,"C","5-hydroxyectoine (14)" +"melanin",10,2944321,2968455,"C","melanin" +"desferrioxamine (13)",11,3035640,3062082,"C","desferrioxamine (13)" +"calcium-dep. antibiotic (3)",12,3520902,3544448,"C","calcium-dep. antibiotic (3)" +"actinorhodin (1)",13,5514248,5538808,"C","actinorhodin (1)" +"albaflavenone (11)",14,5681015,5707100,"C","albaflavenone (11)" +"gray spore pigment",15,5786087,5810752,"C","gray spore pigment" +"siderophore",16,6338948,6365546,"C","siderophore" +"prodiginine (2)",17,6432565,6458617,"C","prodiginine (2)" +"geosmin (9)",18,6666218,6693398,"C","geosmin (9)" +"γ-butyrolactone scb1 (15)",19,6891292,6917236,"C","γ-butyrolactone scb1 (15)" +"hexaketide",20,6907355,6925897,"C","hexaketide" +"dipeptide",21,7104901,7131289,"C","dipeptide" +"lanthionine-cont. peptide sapb",22,7419663,7447455,"C","lanthionine-cont. peptide sapb" +"hopene (4)",23,7516016,7541927,"C","hopene (4)" +"polyketide",24,7591451,7615411,"C","polyketide" +"lantibiotic",25,7692904,7716780,"C","lantibiotic" +"germicidin (8)",26,8027474,8051305,"C","germicidin (8)" +"aromatic polyketide",27,8494579,8518548,"C","aromatic polyketide" +"coelibactin",28,8506121,8529460,"C","coelibactin" +"2-methylisoborneol (10)",29,8537029,8563351,"C","2-methylisoborneol (10)" +"methylenomycin (18), 2-alkyl-4- (hydroxymethyl)furan-3- carboxylic acids (17)",30,232199,256675,"C","methylenomycin (18), 2-alkyl-4- (hydroxymethyl)furan-3- carboxylic acids (17)" diff --git a/data-raw/anti_data.R b/data-raw/anti_data.R index fc5246c..ad55b45 100644 --- a/data-raw/anti_data.R +++ b/data-raw/anti_data.R @@ -1,8 +1,9 @@ ## code to prepare `anti_data` dataset goes here -anti_data <- utils::read.csv("https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_antismash.csv") +anti_data <- utils::read.csv("https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_antismash.csv") # Add chromosome column anti_data$chromosome <- rep("A", length(anti_data$Cluster)) # Type magic anti_data$Type <- stringr::str_trim(tolower(anti_data$Type)) anti_data["Type2"] <- stringr::str_trim(tolower(anti_data$Type)) -usethis::use_data(anti_data, overwrite = TRUE) +# usethis::use_data(anti_data, overwrite = TRUE) +## Look at use_data_internally.R file! diff --git a/data-raw/arts_data.R b/data-raw/arts_data.R index 2ed21ac..f3fe78c 100644 --- a/data-raw/arts_data.R +++ b/data-raw/arts_data.R @@ -1,3 +1,5 @@ ## code to prepare `arts_data` dataset goes here -arts_data <- utils::read.csv("https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_arts.csv") -usethis::use_data(arts_data, overwrite = TRUE) +arts_data <- utils::read.csv("https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_arts.csv") +# usethis::use_data(arts_data, overwrite = TRUE) +## Look at use_data_internally.R file! + diff --git a/data-raw/deep_data.R b/data-raw/deep_data.R index fce0b17..0a7d175 100644 --- a/data-raw/deep_data.R +++ b/data-raw/deep_data.R @@ -1,10 +1,11 @@ ## code to prepare `deep_data` dataset goes here -deep_data <- utils::read.delim("https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_deep.tsv") +deep_data <- utils::read.delim("https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_deep.tsv") polyketide <- nrp <- NULL # Silence R CMD error drop_cols <- c("nrp", "polyketide") colnames(deep_data) <- stringr::str_to_lower(colnames(deep_data)) # Read data deep_data <- deep_data %>% - dplyr::mutate(pks = polyketide, nrps = nrp) %>% - dplyr::select(-dplyr::one_of(drop_cols)) -usethis::use_data(deep_data, overwrite = TRUE) + dplyr::mutate(pks = polyketide, nrps = nrp) %>% + dplyr::select(-dplyr::one_of(drop_cols)) +# usethis::use_data(deep_data, overwrite = TRUE) +## Look at use_data_internally.R file! \ No newline at end of file diff --git a/data-raw/gecco_data.R b/data-raw/gecco_data.R index 44381e3..143ad36 100644 --- a/data-raw/gecco_data.R +++ b/data-raw/gecco_data.R @@ -1,10 +1,10 @@ ## code to prepare `gecco_data` dataset goes here -gecco_data <- utils::read.delim("https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_gecco.tsv") +gecco_data <- utils::read.delim("https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_gecco.tsv") # Silence R CMD note polyketide_probability <- other_probability <- - nrp_probability <- alkaloid_probability <- - terpene_probability <- saccharide_probability <- - ripp_probability <- NULL + nrp_probability <- alkaloid_probability <- + terpene_probability <- saccharide_probability <- + ripp_probability <- NULL gecco_data$chromosome <- rep("G", length(gecco_data$type)) # Type magic gecco_data$Cluster <- seq(1:length(gecco_data$chromosome)) @@ -15,18 +15,19 @@ gecco_data$Type <- gsub("nrp", "nrps", gecco_data$Type) gecco_data$Type <- gsub("unknown", "under_threshold", gecco_data$Type) gecco_data["Type2"] <- stringr::str_trim(tolower(gecco_data$Type)) drop_cols <- c( - "alkaloid_probability", "polyketide_probability", "ripp_probability", "saccharide_probability", - "terpene_probability", "nrp_probability", "other_probability" + "alkaloid_probability", "polyketide_probability", "ripp_probability", "saccharide_probability", + "terpene_probability", "nrp_probability", "other_probability" ) # Read data gecco_data <- gecco_data %>% - dplyr::mutate( - pks = polyketide_probability, other = other_probability, nrps = nrp_probability, alkaloid = alkaloid_probability, - terpene = terpene_probability, saccharide = saccharide_probability, ripp = ripp_probability - ) %>% - dplyr::select(-dplyr::one_of(drop_cols)) + dplyr::mutate( + pks = polyketide_probability, other = other_probability, nrps = nrp_probability, alkaloid = alkaloid_probability, + terpene = terpene_probability, saccharide = saccharide_probability, ripp = ripp_probability + ) %>% + dplyr::select(-dplyr::one_of(drop_cols)) gecco_data$num_prot <- sapply(stringr::str_split(as.character(gecco_data$proteins), ";"), length) gecco_data$num_domains <- sapply(stringr::str_split(as.character(gecco_data$domains), ";"), length) names(gecco_data)[names(gecco_data) == "start"] <- "Start" names(gecco_data)[names(gecco_data) == "end"] <- "Stop" -usethis::use_data(gecco_data, overwrite = TRUE) +# usethis::use_data(gecco_data, overwrite = TRUE) +## Look at use_data_internally.R file! \ No newline at end of file diff --git a/data-raw/prism_data.R b/data-raw/prism_data.R index a20e8b0..63b3c25 100644 --- a/data-raw/prism_data.R +++ b/data-raw/prism_data.R @@ -2,5 +2,5 @@ # Function ro parse prism_json library(magrittr) source("R/fct_reading_processing.R") -prism_data <- process_prism_json_suppl(rjson::fromJSON(file = "https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_prism.json"))[[1]] -usethis::use_data(prism_data, overwrite = TRUE, internal = TRUE) +prism_data <- process_prism_json_suppl(rjson::fromJSON(file = "https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_prism.json"))[[1]] +# usethis::use_data(prism_data, overwrite = TRUE, internal = TRUE) diff --git a/data-raw/prism_supp_data.R b/data-raw/prism_supp_data.R index c216686..e691141 100644 --- a/data-raw/prism_supp_data.R +++ b/data-raw/prism_supp_data.R @@ -2,4 +2,6 @@ library(magrittr) source("R/fct_reading_processing.R") prism_supp_data <- process_prism_json_suppl(rjson::fromJSON(file = "https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_prism.json"))[[2]] -usethis::use_data(prism_supp_data, overwrite = TRUE) +# usethis::use_data(prism_supp_data, overwrite = TRUE) +## Look at use_data_internally.R file! + diff --git a/data-raw/ripp_data.R b/data-raw/ripp_data.R new file mode 100644 index 0000000..96ef933 --- /dev/null +++ b/data-raw/ripp_data.R @@ -0,0 +1,19 @@ +## code to prepare `ripp_data` dataset goes here +ripp_data <- utils::read.table("https://raw.githubusercontent.com/2061Tsarin/BGCViz-datasets/main/example_data/sco_ripp.txt") + +colnames(ripp_data) <-c("Cluster", "Type", "Start", "Stop") + +#ADDING CHROMOSOME COLUMN +ripp_data$chromosome <- rep("GF", length(ripp_data$Cluster)) +#Type magic +ripp_data$Type <- stringr::str_trim(tolower(ripp_data$Type)) +ripp_data["Type2"] <- stringr::str_trim(tolower(ripp_data$Type)) +#Mutate NAs +ripp_data <- dplyr::mutate(ripp_data, Cluster = 1:length(ripp_data$Type)) + + +# usethis::use_data(ripp_data, overwrite = TRUE) +## Look at use_data_internally.R file! + + + diff --git a/data-raw/rre_data.R b/data-raw/rre_data.R index 10624cf..64e1025 100644 --- a/data-raw/rre_data.R +++ b/data-raw/rre_data.R @@ -1,11 +1,11 @@ ## code to prepare `rre_data` dataset goes here library(magrittr) Gene.name <- Coordinates <- NULL # Silence R CMD error -rre_data <- utils::read.delim("https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_rre.txt") +rre_data <- utils::read.delim("https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_rre.txt") # Clean RRE data. Extract coordinates and Locus tag with double underscore delimiter (__) rre_data <- rre_data %>% - tidyr::separate(Gene.name, c("Sequence", "Coordinates", "Locus_tag"), sep = "__") %>% - tidyr::separate(Coordinates, c("Start", "Stop"), sep = "-") + tidyr::separate(Gene.name, c("Sequence", "Coordinates", "Locus_tag"), sep = "__") %>% + tidyr::separate(Coordinates, c("Start", "Stop"), sep = "-") # Add chromosome info column rre_data$chromosome <- rep("RRE", length(rre_data$Sequence)) # Add ID column @@ -18,4 +18,6 @@ rre_data$Start <- as.numeric(rre_data$Start) rre_data$Stop <- as.numeric(rre_data$Stop) # Store rre data into local variable rre_data <- data.frame(rre_data) -usethis::use_data(rre_data, overwrite = TRUE) +# usethis::use_data(rre_data, overwrite = TRUE) +## Look at use_data_internally.R file! + diff --git a/data-raw/sempi_data.R b/data-raw/sempi_data.R index 17a0afc..e4e469f 100644 --- a/data-raw/sempi_data.R +++ b/data-raw/sempi_data.R @@ -1,4 +1,6 @@ ## code to prepare `sempi_data` dataset goes here -sempi_data <- utils::read.csv("https://raw.githubusercontent.com/pavlohrab/BGCViz-datasets/main/example_data/sco_sempi.csv") +sempi_data <- utils::read.csv("https://raw.githubusercontent.com/ostash-group/BGCViz-datasets/main/example_data/sco_sempi.csv") sempi_data["Type2"] <- stringr::str_trim(tolower(sempi_data$Type)) -usethis::use_data(sempi_data, overwrite = TRUE) +# usethis::use_data(sempi_data, overwrite = TRUE) +## Look at use_data_internally.R file! + diff --git a/data-raw/use_data_internally.R b/data-raw/use_data_internally.R index dcb36fc..ac80220 100644 --- a/data-raw/use_data_internally.R +++ b/data-raw/use_data_internally.R @@ -1 +1 @@ -usethis::use_data(anti_data, arts_data, deep_data, gecco_data, prism_data, prism_supp_data, rre_data, sempi_data, overwrite = TRUE, internal = TRUE) +usethis::use_data(anti_data, arts_data, deep_data, gecco_data, prism_data, prism_supp_data, rre_data, sempi_data, ripp_data, emerald_data, overwrite = TRUE, internal = TRUE) diff --git a/data_all.csv b/data_all.csv new file mode 100644 index 0000000..fcc4912 --- /dev/null +++ b/data_all.csv @@ -0,0 +1,195 @@ +"start","end","label","description" +86693,139654,"hgle-ks__t1pks","anti" +166700,191657,"terpene","anti" +246867,270397,"lanthipeptide-class-i","anti" +494259,544087,"nrps","anti" +791700,799942,"ripp-like","anti" +1258217,1297040,"t3pks","anti" +1995499,2005898,"ectoine","anti" +2939305,2949875,"melanin","anti" +3034430,3045603,"siderophore","anti" +3524827,3603907,"nrps","anti" +5496473,5567376,"t2pks","anti" +5671274,5691836,"terpene","anti" +5751944,5824487,"t2pks","anti" +6336090,6346443,"siderophore","anti" +6429574,6475291,"nrps-like__t1pks__prodigiosin","anti" +6632342,6643659,"ripp-like","anti" +6656903,6676224,"terpene","anti" +6842314,6855522,"siderophore","anti" +6881334,6951537,"t1pks__butyrolactone","anti" +7088263,7142447,"thioamide-nrp__nrps","anti" +7409741,7432456,"lanthipeptide-class-iii","anti" +7506307,7532117,"terpene","anti" +7570411,7618555,"pks-like__t1pks","anti" +7682906,7709360,"lanthipeptide-class-i","anti" +7973469,8047403,"other__t3pks","anti" +8269636,8290764,"indole","anti" +8475101,8548352,"t3pks__nrps__terpene","anti" +103627,129988,"eicosapentaenoic acid","compare" +173767,199945,"isorenieratene (12)","compare" +255310,281086,"lantibiotic","compare" +395938,422413,"deoxysugar","compare" +511034,535822,"coelichelin (5)","compare" +796583,821798,"bacteriocin","compare" +1277624,1303748,"thn (6), flaviolin (7)","compare" +1336563,1360694,"aromatic polyketide","compare" +1998467,2023979,"5-hydroxyectoine (14)","compare" +2944321,2968455,"melanin","compare" +3035640,3062082,"desferrioxamine (13)","compare" +3520902,3544448,"calcium-dep. antibiotic (3)","compare" +5514248,5538808,"actinorhodin (1)","compare" +5681015,5707100,"albaflavenone (11)","compare" +5786087,5810752,"gray spore pigment","compare" +6338948,6365546,"siderophore","compare" +6432565,6458617,"prodiginine (2)","compare" +6666218,6693398,"geosmin (9)","compare" +6891292,6917236,"γ-butyrolactone scb1 (15)","compare" +6907355,6925897,"hexaketide","compare" +7104901,7131289,"dipeptide","compare" +7419663,7447455,"lanthionine-cont. peptide sapb","compare" +7516016,7541927,"hopene (4)","compare" +7591451,7615411,"polyketide","compare" +7692904,7716780,"lantibiotic","compare" +8027474,8051305,"germicidin (8)","compare" +8494579,8518548,"aromatic polyketide","compare" +8506121,8529460,"coelibactin","compare" +8537029,8563351,"2-methylisoborneol (10)","compare" +232199,256675,"methylenomycin (18), 2-alkyl-4- (hydroxymethyl)furan-3- carboxylic acids (17)","compare" +170910,191657,"terpene","deep" +211747,220764,"pks","deep" +251763,262319,"ripp","deep" +320109,322255,"pks","deep" +332310,341318,"pks","deep" +370065,372478,"pks","deep" +431961,437224,"pks","deep" +586986,589546,"pks","deep" +815933,824058,"pks","deep" +1050594,1054583,"ripp","deep" +1275829,1279960,"terpene","deep" +1327990,1363153,"pks","deep" +1405030,1412625,"pks","deep" +2549403,2569910,"pks","deep" +2934113,2967589,"saccharide","deep" +3526136,3603907,"nrps","deep" +3782902,3788943,"ripp","deep" +4115027,4137513,"ripp","deep" +5055827,5059681,"pks","deep" +5510515,5536994,"pks","deep" +5765662,5797479,"pks","deep" +6552934,6557688,"pks","deep" +6889623,6949758,"pks","deep" +7078584,7118257,"nrps","deep" +7385659,7388675,"saccharide","deep" +7623284,7715706,"ripp","deep" +8228870,8235982,"terpene","deep" +8288250,8293976,"pks","deep" +8468384,8563058,"nrps","deep" +8610787,8635487,"pks","deep" +31460,41750,"pks","deep" +12285,133520,"polyketide","emerald" +172170,184797,"terpene","emerald" +202863,266263,"alkaloid","emerald" +395065,425157,"saccharide","emerald" +497375,536408,"nrp","emerald" +583522,601051,"nrp polyketide","emerald" +846084,853212,"ripp","emerald" +1241590,1282541,"nrp","emerald" +1332873,1367440,"polyketide","emerald" +1994053,2009395,"other","emerald" +2539582,2565877,"polyketide","emerald" +2615131,2639798,"ripp","emerald" +2792392,2812253,"saccharide","emerald" +2927749,2964702,"other","emerald" +3004801,3043723,"other","emerald" +3382380,3399072,"other","emerald" +3513073,3606342,"nrp","emerald" +4006935,4028808,"polyketide","emerald" +5502704,5539733,"polyketide","emerald" +5680218,5684576,"terpene","emerald" +5776741,5798684,"polyketide","emerald" +5927800,5941379,"ripp","emerald" +6334614,6343370,"other","emerald" +6433764,6470988,"nrp polyketide","emerald" +6830272,6877184,"polyketide","emerald" +6884252,6984001,"polyketide","emerald" +7084064,7146243,"nrp","emerald" +7508709,7540017,"terpene","emerald" +7581046,7606355,"polyketide","emerald" +7655814,7715820,"other","emerald" +8022244,8030491,"polyketide","emerald" +8490911,8620226,"polyketide","emerald" +103781,121371,"pks","gecco" +168705,177496,"terpene","gecco" +498683,533448,"nrps","gecco" +1335695,1361419,"pks","gecco" +2931776,2950345,"saccharide","gecco" +3524828,3602320,"nrps","gecco" +5515246,5536994,"pks","gecco" +5785753,5791297,"pks","gecco" +6432593,6467702,"pks","gecco" +6847315,6863275,"pks","gecco" +6889594,6948414,"pks","gecco" +7106284,7117874,"nrps","gecco" +7516017,7523399,"terpene","gecco" +7586410,7601569,"pks","gecco" +8258333,8283623,"ripp","gecco" +8493549,8542072,"nrps","gecco" +103780,128290,"pks","prism" +255310,261084,"class_i_lantipeptide","prism" +513988,533448,"nrps","prism" +586986,589546,"pks","prism" +2000499,2000898,"ectoine","prism" +2559339,2563090,"pks","prism" +2938688,2944875,"melanin","prism" +3038894,3040682,"nis_synthase","prism" +3513615,3585724,"nrps","prism" +5511270,5534546,"benzoisochromanequinone","prism" +5785752,5795837,"pentangular_polyphenol","prism" +6432592,6458318,"pks__nrps","prism" +6890527,6947423,"butyrolactone__pks","prism" +7108263,7120497,"nrps","prism" +7414295,7422622,"class_iii_iv_lantipeptide","prism" +7586409,7598555,"pks","prism" +7695015,7699626,"class_i_lantipeptide","prism" +8504460,8523749,"nrps","prism" +231675,251017,"butyrolactone__furan","prism" +246868,272469,"lanthipeptidea","ripp" +4886990,4940861,"linaridin","ripp" +7407798,7459926,"lanthipeptidec_d","ripp" +7681180,7705273,"lanthipeptidea","ripp" +256525,259687,"ripp","rre" +7696261,7699360,"ripp","rre" +106723,110434,"pks","sempi" +307598,308816,"nrps","sempi" +513997,524488,"nrps","sempi" +1405117,1405750,"nrps","sempi" +1477976,1479275,"nrps","sempi" +2290771,2292100,"nrps","sempi" +2559357,2561673,"pks","sempi" +2622623,2623751,"nrps","sempi" +2762807,2764220,"nrps","sempi" +2963180,2964527,"nrps","sempi" +3328396,3329098,"nrps","sempi" +3543364,3584771,"nrps","sempi" +3797561,3798752,"nrps","sempi" +3941523,3942822,"nrps","sempi" +4398632,4399859,"nrps","sempi" +4799973,4801182,"nrps","sempi" +4923207,4924557,"nrps","sempi" +5531212,5532682,"pks","sempi" +5786632,5788168,"pks","sempi" +6447994,6454855,"nrps-pks","sempi" +6558978,6560136,"nrps","sempi" +6804366,6807194,"nrps","sempi" +6901254,6931905,"pks","sempi" +7004464,7004725,"nrps","sempi" +7108290,7116050,"nrps","sempi" +7249252,7250602,"nrps","sempi" +7549538,7550897,"nrps","sempi" +7591514,7598516,"pks","sempi" +7734541,7735729,"nrps","sempi" +8053551,8054721,"nrps","sempi" +8140076,8141438,"nrps","sempi" +8282564,8283500,"nrps","sempi" +8504751,8523677,"nrps","sempi" diff --git a/deep_biocircos.csv b/deep_biocircos.csv new file mode 100644 index 0000000..a0e8e4f --- /dev/null +++ b/deep_biocircos.csv @@ -0,0 +1,32 @@ +"sequence_id","detector","detector_version","detector_label","bgc_candidate_id","nucl_start","nucl_end","nucl_length","num_proteins","num_domains","num_bio_domains","deepbgc_score","product_activity","antibacterial","cytotoxic","inhibitor","antifungal","product_class","alkaloid","other","ripp","saccharide","terpene","protein_ids","bio_pfam_ids","pfam_ids","pks","nrps","chromosome","Start","Stop","ID","Cluster","score","Cluster_type","score_a","score_d","score_c","Type","Type2" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_170910-191657.1",170910,191657,20747,19,75,11,0.66526,"cytotoxic",0.23,0.63,0.12,0.08,"terpene",0.09,0.05,0.01,0.09,0.71,"NC_003888.3_NC_003888.3_149;NC_003888.3_NC_003888.3_150;NC_003888.3_NC_003888.3_151;NC_003888.3_NC_003888.3_152;NC_003888.3_NC_003888.3_153;NC_003888.3_NC_003888.3_154;NC_003888.3_NC_003888.3_155;NC_003888.3_NC_003888.3_156;NC_003888.3_NC_003888.3_157;NC_003888.3_NC_003888.3_158;NC_003888.3_NC_003888.3_159;NC_003888.3_NC_003888.3_160;NC_003888.3_NC_003888.3_161;NC_003888.3_NC_003888.3_162;NC_003888.3_NC_003888.3_163;NC_003888.3_NC_003888.3_164;NC_003888.3_NC_003888.3_165;NC_003888.3_NC_003888.3_166;NC_003888.3_NC_003888.3_167","PF01266;PF08242;PF00494;PF08240;PF01494;PF08241;PF00348;PF02737;PF00107;PF00355;PF01593","PF11066;PF03441;PF04244;PF00348;PF13738;PF01266;PF01946;PF03486;PF00070;PF07992;PF12831;PF00890;PF13450;PF01593;PF00494;PF00355;PF01593;PF01266;PF13450;PF12831;PF00890;PF03486;PF00070;PF01946;PF07992;PF01262;PF01134;PF02737;PF01494;PF01209;PF13489;PF13847;PF13578;PF08241;PF08242;PF13649;PF05175;PF05834;PF07992;PF01266;PF00890;PF13450;PF01593;PF01266;PF13450;PF01946;PF12831;PF03486;PF00890;PF05834;PF07992;PF01494;PF01134;PF02310;PF02607;PF13411;PF12728;PF13518;PF00376;PF13384;PF01527;PF04542;PF08281;PF04545;PF13412;PF14344;PF10099;PF12900;PF00582;PF13602;PF00107;PF01262;PF08240;PF00582;PF07681",0.11,0.02,"D",170910,191657,6,6,0.71,"terpene",0.63,0.66526,0.71,"terpene","terpene" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_211747-220764.1",211747,220764,9017,10,16,4,0.74452,"antibacterial",0.67,0.13,0.25,0.24,"pks",0.02,0.12,0.05,0.04,0.16,"NC_003888.3_NC_003888.3_183;NC_003888.3_NC_003888.3_184;NC_003888.3_NC_003888.3_185;NC_003888.3_NC_003888.3_186;NC_003888.3_NC_003888.3_187;NC_003888.3_NC_003888.3_188;NC_003888.3_NC_003888.3_189;NC_003888.3_NC_003888.3_190;NC_003888.3_NC_003888.3_191;NC_003888.3_NC_003888.3_192","PF00583;PF00106;PF01370;PF08659","PF02613;PF02665;PF12680;PF14534;PF07366;PF13586;PF13302;PF00583;PF14027;PF02909;PF08659;PF01370;PF00106;PF13561;PF13460;PF00440",0.61,0.05,"D",211747,220764,8,8,0.61,"pks",0.67,0.74452,0.61,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_251763-262319.1",251763,262319,10556,9,20,6,0.71258,"antibacterial",0.78,0.11,0.08,0.15,"ripp",0.01,0.12,0.53,0.04,0.01,"NC_003888.3_NC_003888.3_224;NC_003888.3_NC_003888.3_225;NC_003888.3_NC_003888.3_226;NC_003888.3_NC_003888.3_227;NC_003888.3_NC_003888.3_228;NC_003888.3_NC_003888.3_229;NC_003888.3_NC_003888.3_230;NC_003888.3_NC_003888.3_231;NC_003888.3_NC_003888.3_232","PF00903;PF00975;PF01408;PF05147;PF00561;PF04738","PF01408;PF01978;PF12802;PF01047;PF13412;PF09339;PF00392;PF13545;PF00903;PF12833;PF00561;PF00975;PF12697;PF12146;PF12695;PF07819;PF04738;PF14028;PF05147;PF00528",0.17,0.15,"D",251763,262319,11,11,0.53,"ripp",0.78,0.71258,0.53,"ripp","ripp" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_320109-322255.1",320109,322255,2146,2,5,1,0.56163,"antibacterial",0.54,0.1,0.38,0.14,"pks",0.04,0.12,0.12,0,0.08,"NC_003888.3_NC_003888.3_279;NC_003888.3_NC_003888.3_280","PF00107","PF00107;PF13602;PF00135;PF07859;PF00326",0.57,0.14,"D",320109,322255,13,13,0.57,"pks",0.54,0.56163,0.57,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_332310-341318.1",332310,341318,9008,7,15,3,0.5871,"antibacterial",0.81,0.07,0.15,0.33,"pks",0.01,0.06,0.02,0,0.14,"NC_003888.3_NC_003888.3_290;NC_003888.3_NC_003888.3_291;NC_003888.3_NC_003888.3_292;NC_003888.3_NC_003888.3_293;NC_003888.3_NC_003888.3_294;NC_003888.3_NC_003888.3_295;NC_003888.3_NC_003888.3_296","PF00106;PF01370;PF08659","PF00106;PF08659;PF01370;PF13561;PF13460;PF00440;PF13022;PF16859;PF03055;PF10518;PF14200;PF00754;PF03435;PF13460;PF00440",0.76,0.03,"D",332310,341318,15,15,0.76,"pks",0.81,0.5871,0.76,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_370065-372478.1",370065,372478,2413,3,9,4,0.57281,"antibacterial",0.76,0.04,0.19,0.36,"pks",0.01,0.01,0.02,0.03,0.02,"NC_003888.3_NC_003888.3_323;NC_003888.3_NC_003888.3_324;NC_003888.3_NC_003888.3_325","PF02737;PF00106;PF01370;PF08659","PF00106;PF01370;PF08659;PF03435;PF05368;PF02737;PF13561;PF14486;PF06912",0.92,0.03,"D",370065,372478,20,20,0.92,"pks",0.76,0.57281,0.92,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_431961-437224.1",431961,437224,5263,7,11,2,0.66524,"inhibitor",0.31,0.09,0.53,0.1,"pks",0,0.17,0.02,0,0.01,"NC_003888.3_NC_003888.3_371;NC_003888.3_NC_003888.3_372;NC_003888.3_NC_003888.3_373;NC_003888.3_NC_003888.3_374;NC_003888.3_NC_003888.3_375;NC_003888.3_NC_003888.3_376;NC_003888.3_NC_003888.3_377","PF00107;PF08240","PF05719;PF12680;PF14534;PF08281;PF04542;PF06197;PF08894;PF01872;PF08240;PF00107;PF13602",0.69,0.18,"D",431961,437224,22,22,0.69,"pks",0.53,0.66524,0.69,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_586986-589546.1",586986,589546,2560,3,5,4,0.65291,"antibacterial",0.6,0.07,0.26,0.2,"pks",0,0.03,0,0,0.17,"NC_003888.3_NC_003888.3_506;NC_003888.3_NC_003888.3_507;NC_003888.3_NC_003888.3_508","PF00698;PF02801;PF00108;PF00109","PF00698;PF02801;PF00109;PF00108;PF00550",0.77,0.35,"D",586986,589546,30,30,0.77,"pks",0.6,0.65291,0.77,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_815933-824058.1",815933,824058,8125,9,20,2,0.76996,"antibacterial",0.66,0.05,0.06,0.08,"pks",0.12,0.12,0.1,0.02,0.31,"NC_003888.3_NC_003888.3_728;NC_003888.3_NC_003888.3_729;NC_003888.3_NC_003888.3_730;NC_003888.3_NC_003888.3_731;NC_003888.3_NC_003888.3_732;NC_003888.3_NC_003888.3_733;NC_003888.3_NC_003888.3_734;NC_003888.3_NC_003888.3_735;NC_003888.3_NC_003888.3_736","PF00067;PF00248","PF00989;PF13188;PF08448;PF13426;PF13185;PF01590;PF07228;PF13581;PF14016;PF00248;PF02441;PF07080;PF12680;PF14534;PF00440;PF06902;PF13459;PF13370;PF00067;PF04075",0.51,0,"D",815933,824058,38,38,0.51,"pks",0.66,0.76996,0.51,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_1050594-1054583.1",1050594,1054583,3989,4,26,2,0.68095,"antibacterial",0.54,0.42,0.15,0.02,"ripp",0.01,0.15,0.66,0.01,0.04,"NC_003888.3_NC_003888.3_956;NC_003888.3_NC_003888.3_957;NC_003888.3_NC_003888.3_958;NC_003888.3_NC_003888.3_959","PF08241;PF08242","PF02475;PF05401;PF02353;PF13489;PF05724;PF01135;PF01728;PF05175;PF01209;PF13847;PF03848;PF00398;PF13649;PF08241;PF08242;PF01497;PF01032;PF00950;PF00005;PF13604;PF13191;PF13555;PF02463;PF13175;PF13304;PF13476",0.07,0.07,"D",1050594,1054583,42,42,0.66,"ripp",0.54,0.68095,0.66,"ripp","ripp" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_1275829-1279960.1",1275829,1279960,4131,4,7,4,0.63375,"antibacterial",0.61,0.02,0.04,0.21,"terpene",0,0.15,0,0.01,0.77,"NC_003888.3_NC_003888.3_1162;NC_003888.3_NC_003888.3_1163;NC_003888.3_NC_003888.3_1164;NC_003888.3_NC_003888.3_1165","PF08545;PF00171;PF00067;PF08541","PF00171;PF00195;PF08392;PF08545;PF02797;PF08541;PF00067",0.1,0.04,"D",1275829,1279960,45,45,0.77,"terpene",0.61,0.63375,0.77,"terpene","terpene" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_1327990-1363153.1",1327990,1363153,35163,34,86,17,0.839,"antibacterial",0.66,0.4,0.12,0.04,"pks",0,0.12,0.09,0.1,0.05,"NC_003888.3_NC_003888.3_1216;NC_003888.3_NC_003888.3_1217;NC_003888.3_NC_003888.3_1218;NC_003888.3_NC_003888.3_1219;NC_003888.3_NC_003888.3_1220;NC_003888.3_NC_003888.3_1221;NC_003888.3_NC_003888.3_1222;NC_003888.3_NC_003888.3_1223;NC_003888.3_NC_003888.3_1224;NC_003888.3_NC_003888.3_1225;NC_003888.3_NC_003888.3_1226;NC_003888.3_NC_003888.3_1227;NC_003888.3_NC_003888.3_1228;NC_003888.3_NC_003888.3_1229;NC_003888.3_NC_003888.3_1230;NC_003888.3_NC_003888.3_1231;NC_003888.3_NC_003888.3_1232;NC_003888.3_NC_003888.3_1233;NC_003888.3_NC_003888.3_1234;NC_003888.3_NC_003888.3_1235;NC_003888.3_NC_003888.3_1236;NC_003888.3_NC_003888.3_1237;NC_003888.3_NC_003888.3_1238;NC_003888.3_NC_003888.3_1239;NC_003888.3_NC_003888.3_1240;NC_003888.3_NC_003888.3_1241;NC_003888.3_NC_003888.3_1242;NC_003888.3_NC_003888.3_1243;NC_003888.3_NC_003888.3_1244;NC_003888.3_NC_003888.3_1245;NC_003888.3_NC_003888.3_1246;NC_003888.3_NC_003888.3_1247;NC_003888.3_NC_003888.3_1248;NC_003888.3_NC_003888.3_1249","PF01266;PF02801;PF01636;PF08545;PF00202;PF04321;PF02770;PF00155;PF01494;PF00108;PF08541;PF00109;PF07993;PF04820;PF01370;PF03364;PF00561","PF12679;PF12730;PF13304;PF00005;PF13555;PF07730;PF00072;PF04545;PF08281;PF00196;PF01047;PF13463;PF12802;PF09339;PF01978;PF00480;PF07702;PF00392;PF13545;PF12680;PF14534;PF08281;PF04545;PF04542;PF00561;PF12697;PF12146;PF12697;PF12146;PF01674;PF00561;PF05057;PF02801;PF00109;PF00108;PF00550;PF00198;PF00364;PF02780;PF02779;PF00676;PF00456;PF02775;PF13292;PF08541;PF08545;PF00108;PF10501;PF00550;PF07993;PF01370;PF01966;PF07992;PF01494;PF13450;PF04820;PF01266;PF04542;PF08281;PF03109;PF01636;PF01163;PF04072;PF03007;PF01266;PF00890;PF01946;PF13450;PF13454;PF05834;PF07992;PF01613;PF10604;PF03364;PF00202;PF07993;PF01370;PF13460;PF04321;PF02770;PF09924;PF14079;PF00392;PF00155;PF16655;PF09423",0.57,0.23,"D",1327990,1363153,50,50,0.57,"pks",0.66,0.839,0.57,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_1405030-1412625.1",1405030,1412625,7595,6,13,4,0.56512,"antibacterial",0.61,0.11,0.19,0.16,"pks",0.01,0.22,0.04,0.01,0.03,"NC_003888.3_NC_003888.3_1290;NC_003888.3_NC_003888.3_1291;NC_003888.3_NC_003888.3_1292;NC_003888.3_NC_003888.3_1293;NC_003888.3_NC_003888.3_1294;NC_003888.3_NC_003888.3_1295","PF00106;PF00501;PF01370;PF08659","PF00501;PF13191;PF00931;PF13401;PF00196;PF08281;PF08281;PF10101;PF10935;PF13561;PF00106;PF08659;PF01370",0.6,0.13,"D",1405030,1412625,51,51,0.6,"pks",0.61,0.56512,0.6,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_2549403-2569910.1",2549403,2569910,20507,19,36,8,0.83648,"antibacterial",0.52,0.24,0.19,0.25,"pks",0,0.14,0.02,0.01,0.09,"NC_003888.3_NC_003888.3_2337;NC_003888.3_NC_003888.3_2338;NC_003888.3_NC_003888.3_2339;NC_003888.3_NC_003888.3_2340;NC_003888.3_NC_003888.3_2341;NC_003888.3_NC_003888.3_2342;NC_003888.3_NC_003888.3_2343;NC_003888.3_NC_003888.3_2344;NC_003888.3_NC_003888.3_2345;NC_003888.3_NC_003888.3_2346;NC_003888.3_NC_003888.3_2347;NC_003888.3_NC_003888.3_2348;NC_003888.3_NC_003888.3_2349;NC_003888.3_NC_003888.3_2350;NC_003888.3_NC_003888.3_2351;NC_003888.3_NC_003888.3_2352;NC_003888.3_NC_003888.3_2353;NC_003888.3_NC_003888.3_2354;NC_003888.3_NC_003888.3_2355","PF02801;PF08545;PF00108;PF08541;PF00107;PF00698;PF00109;PF00583","PF13302;PF00583;PF13508;PF13527;PF08445;PF13673;PF00144;PF13560;PF04149;PF00652;PF14200;PF03498;PF14440;PF02678;PF07883;PF13556;PF00698;PF00195;PF00108;PF08392;PF08545;PF08541;PF00550;PF14573;PF00109;PF00108;PF02801;PF11343;PF00657;PF13472;PF01263;PF07264;PF02566;PF13602;PF00107;PF16884",0.69,0.23,"D",2549403,2569910,64,64,0.69,"pks",0.52,0.83648,0.69,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_2934113-2967589.1",2934113,2967589,33476,32,50,6,0.94003,"antibacterial",0.55,0.04,0.18,0.17,"saccharide",0.02,0.17,0.04,0.66,0.01,"NC_003888.3_NC_003888.3_2652;NC_003888.3_NC_003888.3_2653;NC_003888.3_NC_003888.3_2654;NC_003888.3_NC_003888.3_2655;NC_003888.3_NC_003888.3_2656;NC_003888.3_NC_003888.3_2657;NC_003888.3_NC_003888.3_2658;NC_003888.3_NC_003888.3_2659;NC_003888.3_NC_003888.3_2660;NC_003888.3_NC_003888.3_2661;NC_003888.3_NC_003888.3_2662;NC_003888.3_NC_003888.3_2663;NC_003888.3_NC_003888.3_2664;NC_003888.3_NC_003888.3_2665;NC_003888.3_NC_003888.3_2666;NC_003888.3_NC_003888.3_2667;NC_003888.3_NC_003888.3_2668;NC_003888.3_NC_003888.3_2669;NC_003888.3_NC_003888.3_2670;NC_003888.3_NC_003888.3_2671;NC_003888.3_NC_003888.3_2672;NC_003888.3_NC_003888.3_2673;NC_003888.3_NC_003888.3_2674;NC_003888.3_NC_003888.3_2675;NC_003888.3_NC_003888.3_2676;NC_003888.3_NC_003888.3_2677;NC_003888.3_NC_003888.3_2678;NC_003888.3_NC_003888.3_2679;NC_003888.3_NC_003888.3_2680;NC_003888.3_NC_003888.3_2681;NC_003888.3_NC_003888.3_2682;NC_003888.3_NC_003888.3_2683","PF02706;PF04932;PF02397;PF02668;PF00534;PF00501","PF07690;PF12832;PF06813;PF13360;PF00400;PF12894;PF02668;PF03781;PF00389;PF02826;PF03446;PF09860;PF08843;PF03777;PF00264;PF06236;PF03205;PF03777;PF13439;PF13579;PF13477;PF00534;PF13692;PF13524;PF13727;PF02397;PF04932;PF03023;PF01554;PF14667;PF01522;PF13439;PF13579;PF13477;PF00534;PF13692;PF02706;PF13480;PF02156;PF09849;PF03777;PF03777;PF00501;PF12034;PF00092;PF13768;PF13519;PF12450;PF12679;PF12730",0.02,0.08,"D",2934113,2967589,66,66,0.66,"saccharide",0.55,0.94003,0.66,"saccharide","saccharide" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_3526136-3603907.1",3526136,3603907,77771,35,87,19,0.94701,"antibacterial",0.74,0.25,0.08,0.1,"nrps",0,0.04,0,0,0.01,"NC_003888.3_NC_003888.3_3169;NC_003888.3_NC_003888.3_3170;NC_003888.3_NC_003888.3_3171;NC_003888.3_NC_003888.3_3172;NC_003888.3_NC_003888.3_3173;NC_003888.3_NC_003888.3_3174;NC_003888.3_NC_003888.3_3175;NC_003888.3_NC_003888.3_3176;NC_003888.3_NC_003888.3_3177;NC_003888.3_NC_003888.3_3178;NC_003888.3_NC_003888.3_3179;NC_003888.3_NC_003888.3_3180;NC_003888.3_NC_003888.3_3181;NC_003888.3_NC_003888.3_3182;NC_003888.3_NC_003888.3_3183;NC_003888.3_NC_003888.3_3184;NC_003888.3_NC_003888.3_3185;NC_003888.3_NC_003888.3_3186;NC_003888.3_NC_003888.3_3187;NC_003888.3_NC_003888.3_3188;NC_003888.3_NC_003888.3_3189;NC_003888.3_NC_003888.3_3190;NC_003888.3_NC_003888.3_3191;NC_003888.3_NC_003888.3_3192;NC_003888.3_NC_003888.3_3193;NC_003888.3_NC_003888.3_3194;NC_003888.3_NC_003888.3_3195;NC_003888.3_NC_003888.3_3196;NC_003888.3_NC_003888.3_3197;NC_003888.3_NC_003888.3_3198;NC_003888.3_NC_003888.3_3199;NC_003888.3_NC_003888.3_3200;NC_003888.3_NC_003888.3_3201;NC_003888.3_NC_003888.3_3202;NC_003888.3_NC_003888.3_3203","PF02770;PF00441;PF00501;PF00155;PF08545;PF08028;PF00903;PF00975;PF01494;PF00668;PF01266;PF01636;PF02801;PF00702;PF08541;PF02668;PF00109;PF04820;PF00561","PF00122;PF00702;PF08282;PF00689;PF00486;PF03704;PF13191;PF00931;PF13401;PF03621;PF03403;PF02153;PF01266;PF09056;PF12679;PF12730;PF13304;PF00005;PF13476;PF13555;PF07730;PF00072;PF00196;PF08281;PF14493;PF00155;PF12897;PF01070;PF01645;PF00977;PF00478;PF03060;PF00903;PF13669;PF14696;PF00501;PF13193;PF00550;PF00668;PF00668;PF00501;PF13193;PF00550;PF00668;PF00501;PF13193;PF00550;PF00975;PF12697;PF00561;PF00561;PF12146;PF12697;PF01636;PF02463;PF00005;PF13555;PF00664;PF02668;PF01663;PF01261;PF02126;PF01026;PF01261;PF01040;PF07994;PF01658;PF01494;PF05834;PF04820;PF00070;PF07992;PF00890;PF01266;PF12831;PF13450;PF01134;PF08541;PF08545;PF00441;PF08028;PF02770;PF02801;PF00109;PF00550;PF00589;PF14659",0.14,0.9,"D",3526136,3603907,70,70,0.9,"nrps",0.74,0.94701,0.9,"nrps","nrps" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_3782902-3788943.1",3782902,3788943,6041,7,14,1,0.63217,"antibacterial",0.55,0.17,0.2,0.16,"ripp",0,0.03,0.92,0,0,"NC_003888.3_NC_003888.3_3369;NC_003888.3_NC_003888.3_3370;NC_003888.3_NC_003888.3_3371;NC_003888.3_NC_003888.3_3372;NC_003888.3_NC_003888.3_3373;NC_003888.3_NC_003888.3_3374;NC_003888.3_NC_003888.3_3375","PF00171","PF12698;PF01061;PF13732;PF02702;PF13304;PF00005;PF13191;PF13555;PF03551;PF00171;PF13560;PF12844;PF13581;PF04149",0.04,0.01,"D",3782902,3788943,72,72,0.92,"ripp",0.55,0.63217,0.92,"ripp","ripp" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_4115027-4137513.1",4115027,4137513,22486,20,36,3,0.90015,"antibacterial",0.62,0.15,0.09,0.11,"ripp",0,0.14,0.58,0.02,0.09,"NC_003888.3_NC_003888.3_3686;NC_003888.3_NC_003888.3_3687;NC_003888.3_NC_003888.3_3688;NC_003888.3_NC_003888.3_3689;NC_003888.3_NC_003888.3_3690;NC_003888.3_NC_003888.3_3691;NC_003888.3_NC_003888.3_3692;NC_003888.3_NC_003888.3_3693;NC_003888.3_NC_003888.3_3694;NC_003888.3_NC_003888.3_3695;NC_003888.3_NC_003888.3_3696;NC_003888.3_NC_003888.3_3697;NC_003888.3_NC_003888.3_3698;NC_003888.3_NC_003888.3_3699;NC_003888.3_NC_003888.3_3700;NC_003888.3_NC_003888.3_3701;NC_003888.3_NC_003888.3_3702;NC_003888.3_NC_003888.3_3703;NC_003888.3_NC_003888.3_3704;NC_003888.3_NC_003888.3_3705","PF08241;PF08242;PF01757","PF05175;PF13489;PF07021;PF13847;PF13649;PF08241;PF08242;PF05724;PF00174;PF13231;PF00313;PF02518;PF07730;PF13796;PF00005;PF13555;PF13191;PF13401;PF13304;PF02687;PF12704;PF02687;PF12704;PF00005;PF13304;PF13555;PF00196;PF08281;PF04545;PF00072;PF07730;PF00487;PF01757;PF06259;PF10756;PF02342",0.14,0.08,"D",4115027,4137513,78,78,0.58,"ripp",0.62,0.90015,0.58,"ripp","ripp" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_5055827-5059681.1",5055827,5059681,3854,2,10,2,0.6628,"antibacterial",0.59,0.18,0.32,0.32,"pks",0.01,0.2,0.05,0.02,0.04,"NC_003888.3_NC_003888.3_4566;NC_003888.3_NC_003888.3_4567","PF00107;PF01370","PF13676;PF00931;PF13401;PF13424;PF13374;PF13460;PF05368;PF01073;PF01370;PF00107",0.53,0.36,"D",5055827,5059681,94,94,0.53,"pks",0.59,0.6628,0.53,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_5510515-5536994.1",5510515,5536994,26479,28,70,17,0.86345,"antibacterial",0.56,0.41,0.15,0.2,"pks",0.02,0.05,0,0.05,0.01,"NC_003888.3_NC_003888.3_4999;NC_003888.3_NC_003888.3_5000;NC_003888.3_NC_003888.3_5001;NC_003888.3_NC_003888.3_5002;NC_003888.3_NC_003888.3_5003;NC_003888.3_NC_003888.3_5004;NC_003888.3_NC_003888.3_5005;NC_003888.3_NC_003888.3_5006;NC_003888.3_NC_003888.3_5007;NC_003888.3_NC_003888.3_5008;NC_003888.3_NC_003888.3_5009;NC_003888.3_NC_003888.3_5010;NC_003888.3_NC_003888.3_5011;NC_003888.3_NC_003888.3_5012;NC_003888.3_NC_003888.3_5013;NC_003888.3_NC_003888.3_5014;NC_003888.3_NC_003888.3_5015;NC_003888.3_NC_003888.3_5016;NC_003888.3_NC_003888.3_5017;NC_003888.3_NC_003888.3_5018;NC_003888.3_NC_003888.3_5019;NC_003888.3_NC_003888.3_5020;NC_003888.3_NC_003888.3_5021;NC_003888.3_NC_003888.3_5022;NC_003888.3_NC_003888.3_5023;NC_003888.3_NC_003888.3_5024;NC_003888.3_NC_003888.3_5025;NC_003888.3_NC_003888.3_5026","PF02801;PF08242;PF00106;PF02770;PF08240;PF00725;PF08659;PF08028;PF00109;PF00441;PF08241;PF01243;PF03992;PF02737;PF00107;PF01370;PF03364","PF00440;PF16884;PF08240;PF00107;PF13602;PF00246;PF13577;PF12680;PF02737;PF00725;PF08240;PF00107;PF13602;PF08240;PF00107;PF13602;PF07690;PF00083;PF05977;PF13347;PF01243;PF12900;PF05368;PF01370;PF16363;PF13460;PF02770;PF08028;PF00441;PF03992;PF02909;PF00440;PF07690;PF06779;PF00083;PF03176;PF12349;PF00486;PF03704;PF13561;PF00106;PF08659;PF01370;PF00109;PF02801;PF00109;PF02801;PF00550;PF10604;PF03364;PF00753;PF01613;PF04978;PF12867;PF13489;PF13847;PF01209;PF05175;PF08241;PF13578;PF08242;PF13649;PF03848;PF05724;PF13560;PF12844;PF13443;PF13413;PF01381;PF07883",0.9,0.09,"D",5510515,5536994,97,97,0.9,"pks",0.56,0.86345,0.9,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_5765662-5797479.1",5765662,5797479,31817,37,60,9,0.74897,"antibacterial",0.58,0.46,0.18,0.14,"pks",0.02,0.07,0.04,0.02,0.02,"NC_003888.3_NC_003888.3_5224;NC_003888.3_NC_003888.3_5225;NC_003888.3_NC_003888.3_5226;NC_003888.3_NC_003888.3_5227;NC_003888.3_NC_003888.3_5228;NC_003888.3_NC_003888.3_5229;NC_003888.3_NC_003888.3_5230;NC_003888.3_NC_003888.3_5231;NC_003888.3_NC_003888.3_5232;NC_003888.3_NC_003888.3_5233;NC_003888.3_NC_003888.3_5234;NC_003888.3_NC_003888.3_5235;NC_003888.3_NC_003888.3_5236;NC_003888.3_NC_003888.3_5237;NC_003888.3_NC_003888.3_5238;NC_003888.3_NC_003888.3_5239;NC_003888.3_NC_003888.3_5240;NC_003888.3_NC_003888.3_5241;NC_003888.3_NC_003888.3_5242;NC_003888.3_NC_003888.3_5243;NC_003888.3_NC_003888.3_5244;NC_003888.3_NC_003888.3_5245;NC_003888.3_NC_003888.3_5246;NC_003888.3_NC_003888.3_5247;NC_003888.3_NC_003888.3_5248;NC_003888.3_NC_003888.3_5249;NC_003888.3_NC_003888.3_5250;NC_003888.3_NC_003888.3_5251;NC_003888.3_NC_003888.3_5252;NC_003888.3_NC_003888.3_5253;NC_003888.3_NC_003888.3_5254;NC_003888.3_NC_003888.3_5255;NC_003888.3_NC_003888.3_5256;NC_003888.3_NC_003888.3_5257;NC_003888.3_NC_003888.3_5258;NC_003888.3_NC_003888.3_5259;NC_003888.3_NC_003888.3_5260","PF01266;PF02801;PF01494;PF00108;PF00109;PF01243;PF03992;PF01050;PF03364","PF05331;PF03029;PF07992;PF01494;PF00070;PF00877;PF07813;PF13560;PF00440;PF09339;PF13340;PF01609;PF13359;PF13612;PF13586;PF11583;PF11695;PF00905;PF01098;PF08239;PF02518;PF00512;PF00672;PF00782;PF05706;PF08378;PF02735;PF13628;PF01243;PF16242;PF11350;PF04673;PF10604;PF03364;PF00550;PF02801;PF00109;PF02801;PF00109;PF00108;PF12852;PF11699;PF07883;PF01050;PF02311;PF04486;PF03992;PF01494;PF01266;PF01734;PF01037;PF13412;PF12802;PF09339;PF13404;PF00881;PF13613;PF04545;PF01609;PF13359",0.84,0.07,"D",5765662,5797479,100,100,0.84,"pks",0.58,0.74897,0.84,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_6552934-6557688.1",6552934,6557688,4754,4,9,2,0.58364,"antibacterial",0.67,0.16,0.2,0.24,"pks",0.01,0.3,0.06,0.01,0.03,"NC_003888.3_NC_003888.3_5907;NC_003888.3_NC_003888.3_5908;NC_003888.3_NC_003888.3_5909;NC_003888.3_NC_003888.3_5910","PF00378;PF01494","PF13581;PF00378;PF16113;PF01494;PF07992;PF13450;PF00724;PF00685;PF13469",0.61,0.02,"D",6552934,6557688,108,108,0.61,"pks",0.67,0.58364,0.61,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_6889623-6949758.1",6889623,6949758,60135,26,107,23,0.84593,"antibacterial",0.67,0.3,0.06,0.09,"pks",0,0.01,0.02,0.04,0.01,"NC_003888.3_NC_003888.3_6196;NC_003888.3_NC_003888.3_6197;NC_003888.3_NC_003888.3_6198;NC_003888.3_NC_003888.3_6199;NC_003888.3_NC_003888.3_6200;NC_003888.3_NC_003888.3_6201;NC_003888.3_NC_003888.3_6202;NC_003888.3_NC_003888.3_6203;NC_003888.3_NC_003888.3_6204;NC_003888.3_NC_003888.3_6205;NC_003888.3_NC_003888.3_6206;NC_003888.3_NC_003888.3_6207;NC_003888.3_NC_003888.3_6208;NC_003888.3_NC_003888.3_6209;NC_003888.3_NC_003888.3_6210;NC_003888.3_NC_003888.3_6211;NC_003888.3_NC_003888.3_6212;NC_003888.3_NC_003888.3_6213;NC_003888.3_NC_003888.3_6214;NC_003888.3_NC_003888.3_6215;NC_003888.3_NC_003888.3_6216;NC_003888.3_NC_003888.3_6217;NC_003888.3_NC_003888.3_6218;NC_003888.3_NC_003888.3_6219;NC_003888.3_NC_003888.3_6220;NC_003888.3_NC_003888.3_6221","PF00106;PF02786;PF00108;PF00289;PF00698;PF08990;PF01370;PF00202;PF03756;PF01039;PF07993;PF02719;PF00975;PF01494;PF02737;PF02785;PF04321;PF01266;PF02801;PF08659;PF00109;PF04820;PF00561","PF00106;PF08659;PF13561;PF00440;PF03756;PF05368;PF03435;PF01370;PF13460;PF02518;PF13581;PF07730;PF02775;PF17147;PF01855;PF02776;PF01558;PF00364;PF13533;PF02785;PF02786;PF15632;PF02222;PF07478;PF00289;PF10518;PF01565;PF08031;PF07993;PF01370;PF16363;PF04321;PF01073;PF00550;PF08659;PF00106;PF13561;PF14765;PF00698;PF16197;PF02801;PF00109;PF00108;PF08990;PF00698;PF00109;PF00108;PF00550;PF08659;PF00106;PF01370;PF02719;PF07993;PF14765;PF16197;PF02801;PF08990;PF00550;PF08659;PF01370;PF02719;PF07993;PF14765;PF16197;PF02801;PF00109;PF00108;PF00106;PF00698;PF01494;PF12831;PF00890;PF00561;PF12697;PF12146;PF07690;PF12832;PF03209;PF05977;PF00083;PF13347;PF00202;PF00486;PF03704;PF10518;PF01565;PF08031;PF13561;PF00106;PF08659;PF13460;PF05368;PF01039;PF13822;PF00440;PF00975;PF12697;PF03959;PF00486;PF03704;PF01266;PF01262;PF07992;PF02737;PF00890;PF13450;PF04820",0.97,0.08,"D",6889623,6949758,117,117,0.97,"pks",0.67,0.84593,0.97,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_7078584-7118257.1",7078584,7118257,39673,25,58,6,0.86082,"antibacterial",0.66,0.24,0.15,0.14,"nrps",0.02,0.14,0.01,0.03,0,"NC_003888.3_NC_003888.3_6336;NC_003888.3_NC_003888.3_6337;NC_003888.3_NC_003888.3_6338;NC_003888.3_NC_003888.3_6339;NC_003888.3_NC_003888.3_6340;NC_003888.3_NC_003888.3_6341;NC_003888.3_NC_003888.3_6342;NC_003888.3_NC_003888.3_6343;NC_003888.3_NC_003888.3_6344;NC_003888.3_NC_003888.3_6345;NC_003888.3_NC_003888.3_6346;NC_003888.3_NC_003888.3_6347;NC_003888.3_NC_003888.3_6348;NC_003888.3_NC_003888.3_6349;NC_003888.3_NC_003888.3_6350;NC_003888.3_NC_003888.3_6351;NC_003888.3_NC_003888.3_6352;NC_003888.3_NC_003888.3_6353;NC_003888.3_NC_003888.3_6354;NC_003888.3_NC_003888.3_6355;NC_003888.3_NC_003888.3_6356;NC_003888.3_NC_003888.3_6357;NC_003888.3_NC_003888.3_6358;NC_003888.3_NC_003888.3_6359;NC_003888.3_NC_003888.3_6360","PF00975;PF06339;PF00668;PF01050;PF00501;PF00202","PF00202;PF07905;PF13556;PF00795;PF01979;PF07969;PF00296;PF02133;PF13229;PF05048;PF12708;PF12218;PF12079;PF02627;PF07730;PF13581;PF00072;PF08281;PF04545;PF00196;PF03099;PF13581;PF07730;PF00672;PF12833;PF00165;PF05977;PF07690;PF01569;PF00041;PF03422;PF07971;PF14403;PF04174;PF05977;PF07690;PF00501;PF13193;PF00550;PF00668;PF00668;PF00501;PF13193;PF00550;PF00975;PF05899;PF07883;PF13434;PF07992;PF07883;PF06339;PF01050;PF11699;PF00190;PF02311;PF05899;PF12852;PF14525",0.05,0.81,"D",7078584,7118257,119,119,0.81,"nrps",0.66,0.86082,0.81,"nrps","nrps" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_7385659-7388675.1",7385659,7388675,3016,3,7,1,0.64353,"antibacterial",0.51,0.16,0.12,0.15,"saccharide",0,0.02,0.21,0.71,0.04,"NC_003888.3_NC_003888.3_6571;NC_003888.3_NC_003888.3_6572;NC_003888.3_NC_003888.3_6573","PF00534","PF01242;PF13579;PF13439;PF00534;PF13692;PF13847;PF13649",0.01,0.01,"D",7385659,7388675,122,122,0.71,"saccharide",0.51,0.64353,0.71,"saccharide","saccharide" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_7623284-7715706.1",7623284,7715706,92422,100,87,10,0.84839,"antibacterial",0.52,0.14,0.13,0.2,"ripp",0,0.26,0.51,0.1,0.01,"NC_003888.3_NC_003888.3_6768;NC_003888.3_NC_003888.3_6769;NC_003888.3_NC_003888.3_6770;NC_003888.3_NC_003888.3_6771;NC_003888.3_NC_003888.3_6772;NC_003888.3_NC_003888.3_6773;NC_003888.3_NC_003888.3_6774;NC_003888.3_NC_003888.3_6775;NC_003888.3_NC_003888.3_6776;NC_003888.3_NC_003888.3_6777;NC_003888.3_NC_003888.3_6778;NC_003888.3_NC_003888.3_6779;NC_003888.3_NC_003888.3_6780;NC_003888.3_NC_003888.3_6781;NC_003888.3_NC_003888.3_6782;NC_003888.3_NC_003888.3_6783;NC_003888.3_NC_003888.3_6784;NC_003888.3_NC_003888.3_6785;NC_003888.3_NC_003888.3_6786;NC_003888.3_NC_003888.3_6787;NC_003888.3_NC_003888.3_6788;NC_003888.3_NC_003888.3_6789;NC_003888.3_NC_003888.3_6790;NC_003888.3_NC_003888.3_6791;NC_003888.3_NC_003888.3_6792;NC_003888.3_NC_003888.3_6793;NC_003888.3_NC_003888.3_6794;NC_003888.3_NC_003888.3_6795;NC_003888.3_NC_003888.3_6796;NC_003888.3_NC_003888.3_6797;NC_003888.3_NC_003888.3_6798;NC_003888.3_NC_003888.3_6799;NC_003888.3_NC_003888.3_6800;NC_003888.3_NC_003888.3_6801;NC_003888.3_NC_003888.3_6802;NC_003888.3_NC_003888.3_6803;NC_003888.3_NC_003888.3_6804;NC_003888.3_NC_003888.3_6805;NC_003888.3_NC_003888.3_6806;NC_003888.3_NC_003888.3_6807;NC_003888.3_NC_003888.3_6808;NC_003888.3_NC_003888.3_6809;NC_003888.3_NC_003888.3_6810;NC_003888.3_NC_003888.3_6811;NC_003888.3_NC_003888.3_6812;NC_003888.3_NC_003888.3_6813;NC_003888.3_NC_003888.3_6814;NC_003888.3_NC_003888.3_6815;NC_003888.3_NC_003888.3_6816;NC_003888.3_NC_003888.3_6817;NC_003888.3_NC_003888.3_6818;NC_003888.3_NC_003888.3_6819;NC_003888.3_NC_003888.3_6820;NC_003888.3_NC_003888.3_6821;NC_003888.3_NC_003888.3_6822;NC_003888.3_NC_003888.3_6823;NC_003888.3_NC_003888.3_6824;NC_003888.3_NC_003888.3_6825;NC_003888.3_NC_003888.3_6826;NC_003888.3_NC_003888.3_6827;NC_003888.3_NC_003888.3_6828;NC_003888.3_NC_003888.3_6829;NC_003888.3_NC_003888.3_6830;NC_003888.3_NC_003888.3_6831;NC_003888.3_NC_003888.3_6832;NC_003888.3_NC_003888.3_6833;NC_003888.3_NC_003888.3_6834;NC_003888.3_NC_003888.3_6835;NC_003888.3_NC_003888.3_6836;NC_003888.3_NC_003888.3_6837;NC_003888.3_NC_003888.3_6838;NC_003888.3_NC_003888.3_6839;NC_003888.3_NC_003888.3_6840;NC_003888.3_NC_003888.3_6841;NC_003888.3_NC_003888.3_6842;NC_003888.3_NC_003888.3_6843;NC_003888.3_NC_003888.3_6844;NC_003888.3_NC_003888.3_6845;NC_003888.3_NC_003888.3_6846;NC_003888.3_NC_003888.3_6847;NC_003888.3_NC_003888.3_6848;NC_003888.3_NC_003888.3_6849;NC_003888.3_NC_003888.3_6850;NC_003888.3_NC_003888.3_6851;NC_003888.3_NC_003888.3_6852;NC_003888.3_NC_003888.3_6853;NC_003888.3_NC_003888.3_6854;NC_003888.3_NC_003888.3_6855;NC_003888.3_NC_003888.3_6856;NC_003888.3_NC_003888.3_6857;NC_003888.3_NC_003888.3_6858;NC_003888.3_NC_003888.3_6859;NC_003888.3_NC_003888.3_6860;NC_003888.3_NC_003888.3_6861;NC_003888.3_NC_003888.3_6862;NC_003888.3_NC_003888.3_6863;NC_003888.3_NC_003888.3_6864;NC_003888.3_NC_003888.3_6865;NC_003888.3_NC_003888.3_6866;NC_003888.3_NC_003888.3_6867","PF01636;PF02770;PF02786;PF00702;PF08028;PF00441;PF02771;PF05147;PF00535;PF04738","PF03771;PF03771;PF03771;PF00069;PF07714;PF01636;PF01163;PF00656;PF05762;PF13519;PF05496;PF07728;PF07726;PF00004;PF13191;PF00535;PF02195;PF12846;PF11203;PF00877;PF01555;PF16259;PF08239;PF01638;PF07690;PF15632;PF01071;PF13549;PF02786;PF02655;PF02222;PF07478;PF13535;PF07690;PF13419;PF00702;PF13242;PF03771;PF03771;PF13620;PF14686;PF13634;PF01068;PF13586;PF01609;PF13340;PF13340;PF01609;PF13586;PF03771;PF03771;PF03432;PF05713;PF02467;PF13730;PF13560;PF01135;PF13649;PF01209;PF00398;PF05175;PF13847;PF14028;PF05147;PF14028;PF04738;PF00196;PF13556;PF08279;PF08281;PF00441;PF08028;PF02770;PF02771;PF01590;PF03029;PF00009;PF05331;PF03259;PF02518;PF13581;PF00723;PF13424;PF01980;PF13581;PF01636;PF03881",0.1,0.07,"D",7623284,7715706,131,131,0.51,"ripp",0.52,0.84839,0.51,"ripp","ripp" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_8228870-8235982.1",8228870,8235982,7112,8,11,3,0.67244,"antibacterial",0.8,0.06,0.19,0.16,"terpene",0.1,0.04,0.14,0.01,0.55,"NC_003888.3_NC_003888.3_7322;NC_003888.3_NC_003888.3_7323;NC_003888.3_NC_003888.3_7324;NC_003888.3_NC_003888.3_7325;NC_003888.3_NC_003888.3_7326;NC_003888.3_NC_003888.3_7327;NC_003888.3_NC_003888.3_7328;NC_003888.3_NC_003888.3_7329","PF00106;PF00067;PF08659","PF00106;PF08659;PF13561;PF13460;PF03631;PF02746;PF13378;PF00067;PF03029;PF13479;PF05331",0.2,0.02,"D",8228870,8235982,154,154,0.55,"terpene",0.8,0.67244,0.55,"terpene","terpene" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_8288250-8293976.1",8288250,8293976,5726,5,6,1,0.6163,"antibacterial",0.66,0.04,0.13,0.4,"pks",0.01,0.17,0.05,0.06,0.03,"NC_003888.3_NC_003888.3_7385;NC_003888.3_NC_003888.3_7386;NC_003888.3_NC_003888.3_7387;NC_003888.3_NC_003888.3_7388;NC_003888.3_NC_003888.3_7389","PF01636","PF05426;PF01636;PF05977;PF09835;PF07690;PF11706",0.69,0,"D",8288250,8293976,157,157,0.69,"pks",0.66,0.6163,0.69,"pks","pks" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_8468384-8563058.1",8468384,8563058,94674,88,209,23,0.91964,"cytotoxic",0.48,0.54,0.21,0.15,"nrps",0.01,0.08,0,0.04,0.02,"NC_003888.3_NC_003888.3_7547;NC_003888.3_NC_003888.3_7548;NC_003888.3_NC_003888.3_7549;NC_003888.3_NC_003888.3_7550;NC_003888.3_NC_003888.3_7551;NC_003888.3_NC_003888.3_7552;NC_003888.3_NC_003888.3_7553;NC_003888.3_NC_003888.3_7554;NC_003888.3_NC_003888.3_7555;NC_003888.3_NC_003888.3_7556;NC_003888.3_NC_003888.3_7557;NC_003888.3_NC_003888.3_7558;NC_003888.3_NC_003888.3_7559;NC_003888.3_NC_003888.3_7560;NC_003888.3_NC_003888.3_7561;NC_003888.3_NC_003888.3_7562;NC_003888.3_NC_003888.3_7563;NC_003888.3_NC_003888.3_7564;NC_003888.3_NC_003888.3_7565;NC_003888.3_NC_003888.3_7566;NC_003888.3_NC_003888.3_7567;NC_003888.3_NC_003888.3_7568;NC_003888.3_NC_003888.3_7569;NC_003888.3_NC_003888.3_7570;NC_003888.3_NC_003888.3_7571;NC_003888.3_NC_003888.3_7572;NC_003888.3_NC_003888.3_7573;NC_003888.3_NC_003888.3_7574;NC_003888.3_NC_003888.3_7575;NC_003888.3_NC_003888.3_7576;NC_003888.3_NC_003888.3_7577;NC_003888.3_NC_003888.3_7578;NC_003888.3_NC_003888.3_7579;NC_003888.3_NC_003888.3_7580;NC_003888.3_NC_003888.3_7581;NC_003888.3_NC_003888.3_7582;NC_003888.3_NC_003888.3_7583;NC_003888.3_NC_003888.3_7584;NC_003888.3_NC_003888.3_7585;NC_003888.3_NC_003888.3_7586;NC_003888.3_NC_003888.3_7587;NC_003888.3_NC_003888.3_7588;NC_003888.3_NC_003888.3_7589;NC_003888.3_NC_003888.3_7590;NC_003888.3_NC_003888.3_7591;NC_003888.3_NC_003888.3_7592;NC_003888.3_NC_003888.3_7593;NC_003888.3_NC_003888.3_7594;NC_003888.3_NC_003888.3_7595;NC_003888.3_NC_003888.3_7596;NC_003888.3_NC_003888.3_7597;NC_003888.3_NC_003888.3_7598;NC_003888.3_NC_003888.3_7599;NC_003888.3_NC_003888.3_7600;NC_003888.3_NC_003888.3_7601;NC_003888.3_NC_003888.3_7602;NC_003888.3_NC_003888.3_7603;NC_003888.3_NC_003888.3_7604;NC_003888.3_NC_003888.3_7605;NC_003888.3_NC_003888.3_7606;NC_003888.3_NC_003888.3_7607;NC_003888.3_NC_003888.3_7608;NC_003888.3_NC_003888.3_7609;NC_003888.3_NC_003888.3_7610;NC_003888.3_NC_003888.3_7611;NC_003888.3_NC_003888.3_7612;NC_003888.3_NC_003888.3_7613;NC_003888.3_NC_003888.3_7614;NC_003888.3_NC_003888.3_7615;NC_003888.3_NC_003888.3_7616;NC_003888.3_NC_003888.3_7617;NC_003888.3_NC_003888.3_7618;NC_003888.3_NC_003888.3_7619;NC_003888.3_NC_003888.3_7620;NC_003888.3_NC_003888.3_7621;NC_003888.3_NC_003888.3_7622;NC_003888.3_NC_003888.3_7623;NC_003888.3_NC_003888.3_7624;NC_003888.3_NC_003888.3_7625;NC_003888.3_NC_003888.3_7626;NC_003888.3_NC_003888.3_7627;NC_003888.3_NC_003888.3_7628;NC_003888.3_NC_003888.3_7629;NC_003888.3_NC_003888.3_7630;NC_003888.3_NC_003888.3_7631;NC_003888.3_NC_003888.3_7632;NC_003888.3_NC_003888.3_7633;NC_003888.3_NC_003888.3_7634","PF08242;PF00106;PF00891;PF00107;PF00583;PF00501;PF01370;PF03364;PF01408;PF00903;PF00975;PF01494;PF00248;PF08241;PF00668;PF00067;PF01266;PF01636;PF00857;PF08240;PF08541;PF08659;PF00561","PF12802;PF01047;PF13463;PF12840;PF03466;PF00126;PF03446;PF12893;PF00903;PF08922;PF16653;PF03435;PF01113;PF02254;PF00440;PF02909;PF13828;PF13499;PF13833;PF13405;PF00036;PF13202;PF00196;PF08281;PF00072;PF07730;PF00561;PF08386;PF13977;PF16859;PF00440;PF13302;PF13420;PF00583;PF00903;PF00248;PF00248;PF13563;PF01186;PF01494;PF01266;PF13450;PF07885;PF00520;PF01872;PF07690;PF06779;PF01035;PF04343;PF11160;PF08240;PF00107;PF13602;PF01966;PF01494;PF05834;PF03486;PF07992;PF01946;PF01266;PF12831;PF00890;PF13450;PF01134;PF13738;PF04191;PF04140;PF08541;PF02797;PF00195;PF08392;PF01040;PF03640;PF13473;PF00127;PF06902;PF13459;PF13370;PF00496;PF00528;PF00528;PF13304;PF00005;PF13481;PF13191;PF13401;PF00004;PF02463;PF13193;PF00501;PF00550;PF00668;PF00501;PF13193;PF00668;PF00501;PF13489;PF01209;PF13847;PF13649;PF08242;PF08241;PF13193;PF00550;PF12697;PF00975;PF00561;PF01738;PF03435;PF01370;PF13460;PF01408;PF00891;PF13489;PF13649;PF08241;PF08242;PF00067;PF00975;PF12697;PF12146;PF13489;PF05219;PF13649;PF08242;PF08241;PF00664;PF00005;PF13555;PF13191;PF13401;PF13479;PF02463;PF00664;PF00005;PF13555;PF13191;PF02463;PF00425;PF13561;PF00106;PF01370;PF08659;PF00440;PF00903;PF12802;PF01047;PF09339;PF13412;PF13545;PF02333;PF01209;PF13649;PF08241;PF08242;PF09278;PF13411;PF00376;PF00027;PF02353;PF08241;PF13649;PF00392;PF07729;PF07690;PF00440;PF02909;PF07992;PF13450;PF01494;PF13560;PF01381;PF11512;PF13483;PF12706;PF12802;PF13463;PF01047;PF12840;PF13412;PF13545;PF01636;PF07730;PF00072;PF08281;PF04545;PF00196;PF13412;PF13302;PF00583;PF04672;PF08327;PF10604;PF03364;PF00440;PF13384;PF11716;PF12867;PF07398;PF04978;PF12867;PF13560;PF09339;PF13404;PF00857;PF01978;PF13463;PF12802;PF01047",0.42,0.64,"D",8468384,8563058,161,161,0.64,"nrps",0.54,0.91964,0.64,"nrps","nrps" +"NC_003888.3","deepbgc","0.1.0","deepbgc","NC_003888.3_8610787-8635487.1",8610787,8635487,24700,30,42,5,0.83777,"antibacterial",0.54,0.26,0.2,0.31,"pks",0.01,0.12,0.08,0.03,0.08,"NC_003888.3_NC_003888.3_7682;NC_003888.3_NC_003888.3_7683;NC_003888.3_NC_003888.3_7684;NC_003888.3_NC_003888.3_7685;NC_003888.3_NC_003888.3_7686;NC_003888.3_NC_003888.3_7687;NC_003888.3_NC_003888.3_7688;NC_003888.3_NC_003888.3_7689;NC_003888.3_NC_003888.3_7690;NC_003888.3_NC_003888.3_7691;NC_003888.3_NC_003888.3_7692;NC_003888.3_NC_003888.3_7693;NC_003888.3_NC_003888.3_7694;NC_003888.3_NC_003888.3_7695;NC_003888.3_NC_003888.3_7696;NC_003888.3_NC_003888.3_7697;NC_003888.3_NC_003888.3_7698;NC_003888.3_NC_003888.3_7699;NC_003888.3_NC_003888.3_7700;NC_003888.3_NC_003888.3_7701;NC_003888.3_NC_003888.3_7702;NC_003888.3_NC_003888.3_7703;NC_003888.3_NC_003888.3_7704;NC_003888.3_NC_003888.3_7705;NC_003888.3_NC_003888.3_7706;NC_003888.3_NC_003888.3_7707;NC_003888.3_NC_003888.3_7708;NC_003888.3_NC_003888.3_7709;NC_003888.3_NC_003888.3_7710;NC_003888.3_NC_003888.3_7711","PF00106;PF01494;PF08659;PF00107;PF01370","PF00881;PF02909;PF13358;PF09076;PF01494;PF07992;PF00890;PF01494;PF00890;PF07992;PF00107;PF00440;PF02909;PF13586;PF01609;PF10756;PF13011;PF01527;PF13518;PF13384;PF13551;PF13565;PF00665;PF13683;PF13333;PF14145;PF01548;PF13022;PF00440;PF00356;PF00106;PF08659;PF01370;PF13561;PF06197;PF13359;PF01609;PF13613;PF04072;PF13561;PF00106;PF00440",0.68,0.08,"D",8610787,8635487,166,166,0.68,"pks",0.54,0.83777,0.68,"pks","pks" +"NC_003903.1","deepbgc","0.1.0","deepbgc","NC_003903.1_31460-41750.1",31460,41750,10290,13,23,4,0.61532,"antibacterial",0.76,0.13,0.12,0.31,"pks",0.02,0.15,0.01,0.08,0.12,"NC_003903.1_NC_003903.1_34;NC_003903.1_NC_003903.1_35;NC_003903.1_NC_003903.1_36;NC_003903.1_NC_003903.1_37;NC_003903.1_NC_003903.1_38;NC_003903.1_NC_003903.1_39;NC_003903.1_NC_003903.1_40;NC_003903.1_NC_003903.1_41;NC_003903.1_NC_003903.1_42;NC_003903.1_NC_003903.1_43;NC_003903.1_NC_003903.1_44;NC_003903.1_NC_003903.1_45;NC_003903.1_NC_003903.1_46","PF02585;PF00106;PF01370;PF08659","PF07228;PF02585;PF12833;PF00165;PF12852;PF00106;PF01370;PF08659;PF13561;PF13460;PF13586;PF01609;PF13340;PF07228;PF16859;PF16925;PF00440;PF00106;PF08659;PF01370;PF16363;PF13460;PF13561",0.58,0.07,"D",31460,41750,168,168,0.58,"pks",0.76,0.61532,0.58,"pks","pks" diff --git a/dev/02_dev.R b/dev/02_dev.R index 5d69697..1506e91 100644 --- a/dev/02_dev.R +++ b/dev/02_dev.R @@ -80,6 +80,10 @@ usethis::use_data_raw( name = "sempi_data", open = FALSE ) usethis::use_data_raw( name = "arts_data", open = FALSE ) usethis::use_data_raw( name = "deep_data", open = FALSE ) usethis::use_data_raw( name = "rre_data", open = FALSE ) +usethis::use_data_raw( name = "ripp_data", open = FALSE) +usethis::use_data_raw( name = "emerald_data", open = FALSE) +# usethis::use_data_raw( name = "reference_data", open = FALSE) + ## Tests ---- ## Add one line by test you want to create @@ -98,7 +102,6 @@ devtools::build_vignettes() usethis::use_coverage() # Create a summary readme for the testthat subdirectory -covrpage::covrpage() ## CI ---- ## Use this part of the script if you need to set up a CI diff --git a/dev/run_dev.R b/dev/run_dev.R index 6729c32..4cafd2d 100644 --- a/dev/run_dev.R +++ b/dev/run_dev.R @@ -10,3 +10,6 @@ golem::document_and_reload() # Run the application run_app() + + + diff --git a/docs/Additional_analysis.md b/docs/Additional_analysis.md index 58a1e41..e1081c3 100644 --- a/docs/Additional_analysis.md +++ b/docs/Additional_analysis.md @@ -44,6 +44,7 @@ pip install biopython pandas clinker Pip should be already installed if you have [Python](https://www.python.org) installed in your system # Step 1. Group the GenBank records + ## Inputs There is one input -> genome sequence, which was used for BCG annotations in GenBank format. @@ -52,11 +53,19 @@ There is one input -> genome sequence, which was used for BCG annotations in Gen The usage is pretty straightforward - you need to specify only one input - master GenBank file : ```bash -python group.py +python group.py -i ``` +Or if you would like to run clinker as well: + +```bash +python group.py -i -cl +``` +The last will run clinker automatically. + +**Note** It can take a while for clinker to run. Also all results will be saved into `clinker_plots` folder as `.html` files. ## Results -The script is working rather slow. The grouping can take up to 20-30 min. The result of the grouping is several folders, which are named as "group_1", "group_2", etc. These folders hold extracted records in GenBank format. +The result of the grouping is several folders, which are named as "group_1", "group_2", etc. These folders hold extracted records in GenBank format. # Step 2. Run clinker @@ -74,10 +83,78 @@ clinker --plot Then, after the analysis is done, the default browser will open with the interactive visualization. For example, for the default S.coelicolor data, results for group 3 (grouped by antismash data) can be viewed as follows (while using antismash results GenBank file) : + ![clinker](/images/clinker_example.png) + This is a result of running clinker with the following command: ```bash clinker group_3/*.gb --plot -``` \ No newline at end of file +``` + +**You can also generate clinker plots automatically in group.py script for all groups. See Step 1** + +# Group_by.py options +Script options: +```bash + $ python group.py -h +usage: group.py [-h] -i INPUT [--force | --no-force] [--quiet | --no-quiet] + [-cl | --run_clinker | --no-run_clinker] [-j JOBS] + +Small helper script for BGCViz + +options: + -h, --help show this help message and exit + --force, --no-force Force overwrite calculated results. [default = False] + --quiet, --no-quiet Run silently. Clinker will run as usual. [default = False] + -cl, --run_clinker, --no-run_clinker + Automatically runs clinker on groups. Results are stored in 'clinker_plots' + folder. [default = False] + -j JOBS, --jobs JOBS Number of threads for clinker analysis (0=all). [default = 0] + +Required arguments: + -i INPUT, --input INPUT + Input .gb/.gbk/.gbff file. One record per file will be used (as one genome) +``` + + +# Step 3. Separate Antismash regions into separate clusters + +Often, antiSMASH annotates closely related clusters as one region. Therefore, if you would like to re-analyse those regions as separate clusters, you can separate them using our `dissect.py` scriptinto separate GenBank files. + +## Input +Script takes as input a csv file with the following format: + +group|separate_before|by_software +| ----------- | ----------- | ----------- | +1|locus_tag_1 (will separate cluster into 2, locus_tag_1 will go into second half)|antismash, gecco, deepbgc, prism, sempi (select one of the following) +3|locus_tag_1, locus_tag_2 (will separate cluster into 3, specified genes goes to next cluster (2nd and 3rd in this example))|antismash + +This exact example can be downloaded as a csv file, using `BGCViz::get_dissect_example()` command, which takes one argument - where to write csv file. + +## Usage + +The script should be run after `group_by.py` (See [Step # 1](#step-1-group-the-genbank-records)) in the folder it was downloaded to. + +``` +python dissect.py -i dissect.csv +``` +## Output + +Script output are GenBank files in `dessected` folder. + +## Dissect.py options + +```bash +usage: dissect.py [-h] -i INPUT + +Small helper script for BGCViz + +options: + -h, --help show this help message and exit + +Required arguments: + -i INPUT, --input INPUT + .csv file with clusters to separate +``` diff --git a/docs/BGCViz_renaming_and_coloring_options.md b/docs/BGCViz_renaming_and_coloring_options.md index 1555ffc..ba49cf8 100644 --- a/docs/BGCViz_renaming_and_coloring_options.md +++ b/docs/BGCViz_renaming_and_coloring_options.md @@ -25,8 +25,10 @@ aminocoumarin|other|||| The data used for renaming is stored under "Code" and "Group" columns. The default file is available is the BGCViz directory or in the [Glossary](Glossary.md). The one is free to use any renaming scheme, but the whole csv file should be uploaded in the file input in BGCViz: + ![rename](/images/rename.png) + **After the renaming, "Rename" button will dissapear, and only "Reset" will be available. After reseting, "Rename" button will exange it. This is made to indicate if the used is already renamed, or not. ** **Please note: After "Rename" button is triggered, SEMPI, Antismash and PRISM data will be renamed prior to plotting automatically** @@ -46,10 +48,13 @@ The "Hierarchy" column defines the order of the link coloring in the 'Hierarchy- The colors for arcs and links can be changes for single session while program is running. The current coloring scheme is situated in "Biocircos plot" sidemenu. To see it first check the checkbox above Biocircos plot and then then scroll down: + ![bio_check](/images/biocircos_colot_check.png) + ![bio_scheme](/images/biocircos_dt.png) + To edit the cell, just double click it. WHen you finish editing, press Ctrl+Enter. **Programs in Hierarchy column are written the same as on Biocircos chromosomes** diff --git a/docs/Input_files_options.md b/docs/Input_files_options.md index dcb10ce..7d2911d 100644 --- a/docs/Input_files_options.md +++ b/docs/Input_files_options.md @@ -45,8 +45,10 @@ BGCViz::sempi_to_csv(project_archive="project.zip") ``` SEMPI project archive can be downloaded from the site with "Project" button: + ![sempi_res_export](/images/sempi_res_web.png) + **This field supports the csv file upload. It can be any csv file, as long as the format is the satisfied. Therefore you can upload any results in this field, in place of SEMPI, but the result plot will label these as "SEMPI"** # DeepBGC We are expecting the default DeepBGC tsv output file. Please see the example in inst/extdata folder @@ -79,6 +81,9 @@ BGCViz::arts_to_csv(project_archive="arts.zip") ``` Zip archive should be downloaded from ARTS Export tab (Zip all files): + ![arts_res_server.png](/images/arts_res_server.png) + + # GECCO We are expecting the default GECCO tsv output file. Please see the example [here](https://github.com/pavlohrab/BGCViz-datasets/tree/main/example_data). \ No newline at end of file diff --git a/docs/Logic_of_the_output.md b/docs/Logic_of_the_output.md index 48bb593..06dde85 100644 --- a/docs/Logic_of_the_output.md +++ b/docs/Logic_of_the_output.md @@ -12,20 +12,28 @@ The "Compare to DeepBGC" sidemenu is available only after DeepBGC data is upload # Boxes Each plot or option group is encapsulated in a box which can be closed or be hidden. Also boxes can be resized, so the plots will be resized accordingly. For example: + ![no_resize](/images/no_resize.png) + ![resize](/images/with_resize.png) + Closed boxes can be restored afterwards with option on sidebar: + ![restore](/images/restore_box.png) + Also, boxes can be **dragged and dropped** to change their order. For example: + ![old_order](/images/old_order.png) + ![new_order](/images/new_order.png) + # Select menus The select menus are populated after certain data was uploaded. Therefore you will not all the possible options right away. This is made to avoid possible errors. All the options for data are: @@ -40,8 +48,10 @@ All the options for data are: Select menus looks like this: + ![select_menu](/images/select_menu.png) + On DeepBGC and GECCO comparison tabs the following data is available for analysis: - Antismash - PRISM @@ -52,8 +62,10 @@ If you have uploaded your custom data, but prepared it according to the [guide]( # Upload Data Data upload is done with the corresponding file upload menus. Detailed description of the input files is [here](Input_files_options.md). + ![upload](/images/data_upload.png) + After the files are uploaded data will be extracted and added to the plots. It is possible, that a short period of unresponsiveness of BGCViz occurs after data upload. This indicates that data is being processed and plots are being rerendered. @@ -64,8 +76,10 @@ After the files are uploaded data will be extracted and added to the plots. It i ## Rename The renaming of a data is going to be applied only for antiSMASH, PRISM and SEMPI datasets. The field looks look like this: + ![rename_2](/images/rename_options.png) + The default renaming will be applied, if the "Rename" button is pressed. If you want to change the scheme, you should upload new dataset. For more in depth explanation please refer [here](BGCViz_renaming_and_coloring_options.md). Renaming options are used for better link and arcs coloring in the biocircos plot. The hybrid checkboxes for antiSMASH, PRISM and SEMPI when checked, causing to clusters to be visualized as 'Hybrid', when they have multiple products. @@ -74,13 +88,18 @@ The hybrid checkboxes for antiSMASH, PRISM and SEMPI when checked, causing to cl This menu is adding some width to result clusters. When checked, the overlap of the clusters will be based on old data, but on the plots they will be visualized thicker. + ![improve_viz](/images/improve_viz.png) - For example for ARTS data: - ![arts_narrow](/images/arts_thin.png) +For example for ARTS data: + + +![arts_narrow](/images/arts_thin.png) + + +![arts_wide](/images/arts_thick.png) - ![arts_wide](/images/arts_thick.png) ## PRISM supplement and ARTS options @@ -88,14 +107,17 @@ This box contain two options: 1. Plot or not the PRISM supplementary genes. 2. Only plot selected core model hit for ARTS data. This option makes it easier to see paralogs og the hit and where then land on a chromosome. + ![pr_arts](/images/pr_arts.png) + ARTS data controls are used for better understanding of the core genes duplication. If the ARTS core gene is intercepted with other BGC, then the one can plot only this core gene paralog to see the location of the duplicated gene and if it is intercepted with other clusters. **Note: ARTS core gene duplication data can have more than two genes. Therefore all paralogs will be visualized**. ## Download results for further analysis Downloads filtered and grouped results as a csv file. See [here](Quick_start.md) for workflow and [here](Additional_analysis.md) for an example of an additional analysis on the output. + ![download](/images/download.png) @@ -103,12 +125,16 @@ Downloads filtered and grouped results as a csv file. See [here](Quick_start.md) The thresholds here are applied globally to all plots and the data cleaning options for DeepBGC are the columns of the .tsv output file. They all are described in the DeepBGC paper. + ![deep_filt](/images/deepbgc_filters.png) + Also, for convenience same Filtering options are available on the sidemenu sidebar: + ![deep_sidebar](/images/deep_sidebar.png) + ## GECCO filtering options Identical to DeepBGC filtering box, but with different options. For in-depth explanation of the metrics see GECCO paper. @@ -116,22 +142,30 @@ Identical to DeepBGC filtering box, but with different options. For in-depth exp # Compare data with DeepBGC This sidemenu is generated after the upload of DeepBGC data if this data was not the only input. Sidemenu can also be divided into plots and filtering options. + ![deep_side](/images/deep_sidebar.png) + Plot part have two plots with their respective options and delivers a purpose to compare DeepBGC data with several filters to the chosen reference one. After the comparison, one can choose the optimal threshold of DeepBGC data, preserving the balance between novel and already annotated clusters. The choice between reference annotation for comparison is Antismash, SEMPI and PRISM. + ![deep_overview](/images/deep_overview.png) + The first plot in this tab visualizes several clusters, which are annotated solely by DeepBGC, only by chosen reference program, or by both. + ![deep_da](/images/deep_anti_comp.png) + On the x-axis, there are different thresholds for a chosen DeepBGC score, on the y-axis - number of clusters (divided into three groups, described above), which are preserved on a given score threshold. Any additional thresholds can be also applied to this plot and will be written on the upper right corner The second plot can be thought of as a mirror of the first one. It contains the same data, but in form of rate, in place of counts. + ![deep_rates](/images/deep_anti_rates.png) + The rates are: - Novelty rate = "# of BGC annotated only by deepbgc"/("# clusters annotated with only by antismash" + "# clusters annotated with antismash and deepbgc"). This rate points to how many clusters are annotated only by DeepBGC. @@ -146,6 +180,7 @@ The remaining third box is for plot controls. They include: - Choice of a step for a barplot. - Choice of the starting point of a barplot + ![deep_cpm](/images/deep_comparison.png) @@ -156,13 +191,17 @@ The same as DeepBGC option above. The plots on this tab can be thought to be "Genes on chromosome". Here are two plots available: - The general "Genes on chromosome" plots, which contain all the annotations from all tools. On mouse hover over BGC, information, particular to the chosen tool, is shown. + ![all_annot](/images/all_annotations.png) + - The second plot visualizes the data from uploaded apps, which is intercepted to the app of a choice. Default reference app is the first uploaded and all the clusters are plotted for it. **Note: if there is no interception available between the uploaded apps and reference one, then this plot will remain empty. The good example is the choice of SEMPI data as a reference. Then the RRE-Finder data is absent from the plot (usually) because no interceptions are made between them ** + ![anot_inter](/images/intercepted_annotations.png) + The second plot has pretty simple controls, which consist of only one option -> the choice of reference data. The type, which is used to color the BGCs, is a subject of change through renaming and combining data as hybrids. More is [here](BGCViz_renaming_and_coloring_options.md) @@ -173,18 +212,24 @@ The type, which is used to color the BGCs, is a subject of change through renami The biocircos tab is become available after uploading two or more datasets. By default, the links (lines that connect different chromosomes) and arcs (the boxes under the chromosomes, that corresponds to the BGC) are grey. This plot is also affected by the chromosome length, which is a mandatory input. - The first plot is a circos plot. It is reactive, which means, the one can make it bigger or smaller with the help of mouse wheel scroll. + ![biocircos](/images/biocircos.png) + On the cursor hover onto a link, the cluster IDs and their types will be shown, as well as, linked software names. The type here is unaffected by renaming and is shown as is. On the cursor hover onto arcs, the type of the cluster and their coordinates is shown. - The second plot (Become available by checking "Show Biocircos coloring scheme" checkbox above biocircos plot) is just a legend for applied colors for biocircos plot. These are a subject to change through coloring dataframe or manually per session (more [here](BGCViz_renaming_and_coloring_options.md)). At default, all data is visualized in grey color. + ![biocircos_legend](/images/biocircos_dt.png) + The controls of biocircos plots are hidden under "More" button on a box on top right: + ![color_biocircos](/images/biocircos_color.png) + After uploading data, these are available right away, however, there is small use in them prior to renaming. The coloring is based on the groups, which are stated in a legend. If the no color is specified for a chosen BGC type, then the base color is used. Therefore before renaming, when the coloring options are ticked, the majority of the links and arcs will remain grey. The coloring options for the links includes three modes. These modes illustrate different logic of dealing with the linking clusters of different types (because one color for a single link must be specified). The modes of coloring: @@ -198,12 +243,16 @@ The coloring options for the links includes three modes. These modes illustrate This tab is being shown after more than two datasets are uploaded. To logic behind this tab is to summarize the links between different BGCs. To support than purpose a barplot and a table is generated: 1. Barplot, which counts to how much other BGCs the chosen one is linked. On the x-axis, the BGCs are plotted and on the y-axis the link count. This plot is useful for basic prioritization of BGCs which are annotated by a chosen app. + ![summarize](/images/summarize_plot.png) + 2. "Group by" table. This table shows the intercepted clusters of a chosen data with the other tools. Therefore this table is similar to the second plot on the Annotation visualization and comparison tab, but the data is in form of a table. The last row indicates cluster, that are inerecepted outside of a chosen data. It means, that they are annotated more that with one tool, but not with the chosen one. + ![group_by_t](/images/summarize_options.png) + The options for data summary includes: - Option to choose the program, by which the interception will be summarized. - Checkbox to visualize all the BGC for a chosen data. By default, only BGC, which are intercepted with any other ones are printed in the first column of the table. But if the purpose of analysis in to emphasise the BGCs, that are only found by the chosen tool, then all the clusters will be printed, if the checkbox is ticked. diff --git a/docs/Quick_start.md b/docs/Quick_start.md index bef11e5..0b3c7ee 100644 --- a/docs/Quick_start.md +++ b/docs/Quick_start.md @@ -21,9 +21,11 @@ The upload of the files is pretty straightforward - use the properly named file After the upload of the first input: + ![anti_upload](/images/anti_upload.png) + **Please also enter the length of the contig in the corresponding field under uploads. This length is used for correct representation of Biocircos plot** @@ -92,6 +94,7 @@ It is a good practice, to rename the type of clusters after uploading. Renaming To rename the dataset you can press "Rename" button under  "Improve visualization" menu.  + ![rename](/images/rename.png) diff --git a/docs/images/rconnect.png b/docs/images/rconnect.png new file mode 100644 index 0000000..04d57e9 Binary files /dev/null and b/docs/images/rconnect.png differ diff --git a/emerald_biocircos.csv b/emerald_biocircos.csv new file mode 100644 index 0000000..cdd55e6 --- /dev/null +++ b/emerald_biocircos.csv @@ -0,0 +1,33 @@ +"seqname","source","Cluster","Start","Stop","score","strand","frame","Type","chromosome","Type2" +"NZ_CP042324.1","SanntiSv0.9.3.3",1,12285,133520,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",2,172170,184797,".",".",".","terpene","EM","terpene" +"NZ_CP042324.1","SanntiSv0.9.3.3",3,202863,266263,".",".",".","alkaloid","EM","alkaloid" +"NZ_CP042324.1","SanntiSv0.9.3.3",4,395065,425157,".",".",".","saccharide","EM","saccharide" +"NZ_CP042324.1","SanntiSv0.9.3.3",5,497375,536408,".",".",".","nrp","EM","nrp" +"NZ_CP042324.1","SanntiSv0.9.3.3",6,583522,601051,".",".",".","nrp polyketide","EM","nrp polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",7,846084,853212,".",".",".","ripp","EM","ripp" +"NZ_CP042324.1","SanntiSv0.9.3.3",8,1241590,1282541,".",".",".","nrp","EM","nrp" +"NZ_CP042324.1","SanntiSv0.9.3.3",9,1332873,1367440,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",10,1994053,2009395,".",".",".","other","EM","other" +"NZ_CP042324.1","SanntiSv0.9.3.3",11,2539582,2565877,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",12,2615131,2639798,".",".",".","ripp","EM","ripp" +"NZ_CP042324.1","SanntiSv0.9.3.3",13,2792392,2812253,".",".",".","saccharide","EM","saccharide" +"NZ_CP042324.1","SanntiSv0.9.3.3",14,2927749,2964702,".",".",".","other","EM","other" +"NZ_CP042324.1","SanntiSv0.9.3.3",15,3004801,3043723,".",".",".","other","EM","other" +"NZ_CP042324.1","SanntiSv0.9.3.3",16,3382380,3399072,".",".",".","other","EM","other" +"NZ_CP042324.1","SanntiSv0.9.3.3",17,3513073,3606342,".",".",".","nrp","EM","nrp" +"NZ_CP042324.1","SanntiSv0.9.3.3",18,4006935,4028808,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",19,5502704,5539733,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",20,5680218,5684576,".",".",".","terpene","EM","terpene" +"NZ_CP042324.1","SanntiSv0.9.3.3",21,5776741,5798684,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",22,5927800,5941379,".",".",".","ripp","EM","ripp" +"NZ_CP042324.1","SanntiSv0.9.3.3",23,6334614,6343370,".",".",".","other","EM","other" +"NZ_CP042324.1","SanntiSv0.9.3.3",24,6433764,6470988,".",".",".","nrp polyketide","EM","nrp polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",25,6830272,6877184,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",26,6884252,6984001,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",27,7084064,7146243,".",".",".","nrp","EM","nrp" +"NZ_CP042324.1","SanntiSv0.9.3.3",28,7508709,7540017,".",".",".","terpene","EM","terpene" +"NZ_CP042324.1","SanntiSv0.9.3.3",29,7581046,7606355,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",30,7655814,7715820,".",".",".","other","EM","other" +"NZ_CP042324.1","SanntiSv0.9.3.3",31,8022244,8030491,".",".",".","polyketide","EM","polyketide" +"NZ_CP042324.1","SanntiSv0.9.3.3",32,8490911,8620226,".",".",".","polyketide","EM","polyketide" diff --git a/gecco_biocircos.csv b/gecco_biocircos.csv new file mode 100644 index 0000000..4682bce --- /dev/null +++ b/gecco_biocircos.csv @@ -0,0 +1,17 @@ +"sequence_id","bgc_id","Start","Stop","average_p","max_p","type","proteins","domains","chromosome","Cluster","ID","Type","Type2","pks","other","nrps","alkaloid","terpene","saccharide","ripp","num_prot","num_domains","score","Cluster_type","score_a","score_c" +"AL645882.2","AL645882.2_cluster_1",103781,121371,0.819288729972613,0.999420718360737,"Polyketide","AL645882.2_97;AL645882.2_98;AL645882.2_99;AL645882.2_100;AL645882.2_101;AL645882.2_102","PF00106;PF00109;PF00550;PF00698;PF00753;PF01370;PF02801;PF07977;PF07993;PF08659;PF13561;PF14765;PF16197;TIGR00128;TIGR01181;TIGR01746;TIGR01829;TIGR01830;TIGR01831;TIGR02813;TIGR02814;TIGR02816;TIGR03131;TIGR03150;TIGR03443;TIGR03466;TIGR03971","G",1,1,"pks","pks",0.65,0.32,0.08,0.01,0.01,0.03,0,6,27,0.65,"pks",0.819288729972613,0.65 +"AL645882.2","AL645882.2_cluster_2",168705,177496,0.659158938092802,0.85012823839182,"Terpene","AL645882.2_148;AL645882.2_149;AL645882.2_150;AL645882.2_151;AL645882.2_152;AL645882.2_153;AL645882.2_154;AL645882.2_155","PF00348;PF00494;PF00582;PF00890;PF01266;PF01593;PF11066;PF13450;TIGR02730;TIGR02733;TIGR02734;TIGR02748;TIGR02749;TIGR03464;TIGR03465;TIGR03467","G",2,2,"terpene","terpene",0.09,0.04,0,0.01,0.87,0,0,8,16,0.87,"terpene",0.659158938092802,0.87 +"AL645882.2","AL645882.2_cluster_4",498683,533448,0.769374704262298,0.99996968309163,"NRP","AL645882.2_440;AL645882.2_441;AL645882.2_442;AL645882.2_443;AL645882.2_444;AL645882.2_445;AL645882.2_446;AL645882.2_447;AL645882.2_448;AL645882.2_449;AL645882.2_450;AL645882.2_451;AL645882.2_452;AL645882.2_453;AL645882.2_454;AL645882.2_455;AL645882.2_456;AL645882.2_457;AL645882.2_458;AL645882.2_459;AL645882.2_460;AL645882.2_461","PF00005;PF00440;PF00501;PF00550;PF00664;PF00668;PF01266;PF01494;PF03621;PF07992;PF09339;PF12802;PF12902;PF13191;PF13193;PF13304;PF13434;PF13450;TIGR00630;TIGR00655;TIGR00954;TIGR00955;TIGR00956;TIGR00957;TIGR00958;TIGR00968;TIGR00972;TIGR01166;TIGR01184;TIGR01186;TIGR01187;TIGR01188;TIGR01189;TIGR01192;TIGR01193;TIGR01194;TIGR01217;TIGR01257;TIGR01271;TIGR01277;TIGR01288;TIGR01318;TIGR01720;TIGR01733;TIGR01734;TIGR01842;TIGR01846;TIGR01923;TIGR01978;TIGR01984;TIGR01988;TIGR02142;TIGR02188;TIGR02203;TIGR02204;TIGR02211;TIGR02262;TIGR02275;TIGR02314;TIGR02315;TIGR02316;TIGR02323;TIGR02324;TIGR02372;TIGR02633;TIGR02673;TIGR02769;TIGR02770;TIGR02857;TIGR02868;TIGR02982;TIGR03005;TIGR03098;TIGR03205;TIGR03208;TIGR03219;TIGR03258;TIGR03265;TIGR03269;TIGR03375;TIGR03384;TIGR03410;TIGR03411;TIGR03415;TIGR03443;TIGR03522;TIGR03608;TIGR03613;TIGR03719;TIGR03740;TIGR03771;TIGR03796;TIGR03797;TIGR03864;TIGR03873;TIGR03968;TIGR04406;TIGR04439;TIGR04520;TIGR04521","G",4,4,"nrps","nrps",0.11,0.04,0.7,0,0.02,0.03,0.12,22,100,0.7,"nrps",0.769374704262298,0.7 +"AL645882.2","AL645882.2_cluster_6",1335695,1361419,0.847326425018297,0.902810689697336,"Polyketide","AL645882.2_1225;AL645882.2_1226;AL645882.2_1227;AL645882.2_1228;AL645882.2_1229;AL645882.2_1230;AL645882.2_1231;AL645882.2_1232;AL645882.2_1233;AL645882.2_1234;AL645882.2_1235;AL645882.2_1236;AL645882.2_1237;AL645882.2_1238;AL645882.2_1239;AL645882.2_1240;AL645882.2_1241;AL645882.2_1242;AL645882.2_1243;AL645882.2_1244;AL645882.2_1245;AL645882.2_1246;AL645882.2_1247;AL645882.2_1248;AL645882.2_1249","PF00109;PF00155;PF00202;PF00364;PF00392;PF00550;PF00561;PF00676;PF00890;PF01266;PF01370;PF01494;PF01613;PF01674;PF01946;PF02089;PF02770;PF02775;PF02801;PF03109;PF03364;PF04072;PF04321;PF04820;PF07993;PF08281;PF08541;PF08545;PF10604;PF12697;PF13292;PF13450;PF13460;TIGR00027;TIGR00292;TIGR00508;TIGR00517;TIGR00699;TIGR00700;TIGR00707;TIGR00709;TIGR00713;TIGR00747;TIGR00748;TIGR01214;TIGR01265;TIGR01347;TIGR01348;TIGR01746;TIGR01835;TIGR01885;TIGR01982;TIGR02018;TIGR02032;TIGR02296;TIGR02325;TIGR02352;TIGR02404;TIGR02407;TIGR02813;TIGR02937;TIGR02946;TIGR02959;TIGR03150;TIGR03181;TIGR03182;TIGR03197;TIGR03246;TIGR03251;TIGR03338;TIGR03364;TIGR03372;TIGR03443;TIGR03466;TIGR03615;TIGR03695;TIGR03947;TIGR04022;TIGR04544","G",5,5,"pks","pks",0.53,0.28,0.15,0.05,0.02,0.02,0.03,25,79,0.53,"pks",0.847326425018297,0.53 +"AL645882.2","AL645882.2_cluster_8",2931776,2950345,0.795181197366316,0.999357302051581,"Saccharide","AL645882.2_2651;AL645882.2_2652;AL645882.2_2653;AL645882.2_2654;AL645882.2_2655;AL645882.2_2656;AL645882.2_2657;AL645882.2_2658;AL645882.2_2659;AL645882.2_2660;AL645882.2_2661;AL645882.2_2662;AL645882.2_2663;AL645882.2_2664;AL645882.2_2665;AL645882.2_2666;AL645882.2_2667;AL645882.2_2668;AL645882.2_2669","PF00264;PF00383;PF00389;PF00400;PF00534;PF00535;PF02397;PF02668;PF02826;PF03777;PF06236;PF07690;PF12832;PF12894;PF13439;PF13477;PF13506;PF13524;PF13579;PF13632;PF13641;PF13692;PF13727;PF14437;TIGR00227;TIGR00326;TIGR00900;TIGR01327;TIGR01508;TIGR02095;TIGR02149;TIGR03013;TIGR03022;TIGR03023;TIGR03025;TIGR03030;TIGR03087;TIGR03088;TIGR03449;TIGR03469;TIGR03937;TIGR03946;TIGR03965;TIGR03999;TIGR04047;TIGR04063;TIGR04157","G",7,7,"saccharide","saccharide",0.02,0.18,0.05,0,0.02,0.74,0,19,47,0.74,"saccharide",0.795181197366316,0.74 +"AL645882.2","AL645882.2_cluster_10",3524828,3602320,0.752421552194178,0.999999999825827,"NRP","AL645882.2_3170;AL645882.2_3171;AL645882.2_3172;AL645882.2_3173;AL645882.2_3174;AL645882.2_3175;AL645882.2_3176;AL645882.2_3177;AL645882.2_3178;AL645882.2_3179;AL645882.2_3180;AL645882.2_3181;AL645882.2_3182;AL645882.2_3183;AL645882.2_3184;AL645882.2_3185;AL645882.2_3186;AL645882.2_3187;AL645882.2_3188;AL645882.2_3189;AL645882.2_3190;AL645882.2_3191;AL645882.2_3192;AL645882.2_3193;AL645882.2_3194;AL645882.2_3195;AL645882.2_3196;AL645882.2_3197;AL645882.2_3198;AL645882.2_3199;AL645882.2_3200;AL645882.2_3201;AL645882.2_3202;AL645882.2_3203;AL645882.2_3204","PF00005;PF00072;PF00109;PF00122;PF00155;PF00196;PF00441;PF00486;PF00501;PF00550;PF00561;PF00664;PF00668;PF00702;PF00903;PF00931;PF00975;PF01026;PF01040;PF01070;PF01261;PF01266;PF01494;PF01636;PF01663;PF02153;PF02668;PF02770;PF02801;PF03403;PF03621;PF03704;PF07730;PF07994;PF08241;PF08541;PF08545;PF09056;PF12146;PF12679;PF12697;PF12730;PF12897;PF13193;PF13304;PF13450;PF13489;PF13649;PF13669;PF14696;TIGR00010;TIGR00517;TIGR00747;TIGR00748;TIGR00954;TIGR00955;TIGR00956;TIGR00957;TIGR00958;TIGR00968;TIGR00972;TIGR01106;TIGR01116;TIGR01140;TIGR01166;TIGR01184;TIGR01186;TIGR01187;TIGR01188;TIGR01189;TIGR01192;TIGR01193;TIGR01194;TIGR01217;TIGR01249;TIGR01257;TIGR01263;TIGR01264;TIGR01265;TIGR01271;TIGR01277;TIGR01288;TIGR01387;TIGR01473;TIGR01474;TIGR01476;TIGR01494;TIGR01497;TIGR01511;TIGR01512;TIGR01517;TIGR01522;TIGR01523;TIGR01524;TIGR01525;TIGR01647;TIGR01657;TIGR01720;TIGR01733;TIGR01734;TIGR01738;TIGR01818;TIGR01835;TIGR01842;TIGR01846;TIGR01923;TIGR01934;TIGR01978;TIGR01984;TIGR01988;TIGR02032;TIGR02056;TIGR02142;TIGR02151;TIGR02154;TIGR02188;TIGR02203;TIGR02204;TIGR02211;TIGR02262;TIGR02275;TIGR02314;TIGR02315;TIGR02316;TIGR02323;TIGR02324;TIGR02335;TIGR02372;TIGR02427;TIGR02633;TIGR02673;TIGR02708;TIGR02769;TIGR02770;TIGR02813;TIGR02857;TIGR02868;TIGR02875;TIGR02915;TIGR02956;TIGR02982;TIGR03005;TIGR03020;TIGR03056;TIGR03081;TIGR03098;TIGR03150;TIGR03197;TIGR03205;TIGR03208;TIGR03219;TIGR03258;TIGR03265;TIGR03269;TIGR03343;TIGR03375;TIGR03410;TIGR03411;TIGR03415;TIGR03443;TIGR03467;TIGR03522;TIGR03540;TIGR03541;TIGR03608;TIGR03611;TIGR03719;TIGR03740;TIGR03771;TIGR03796;TIGR03797;TIGR03864;TIGR03873;TIGR03946;TIGR03947;TIGR03966;TIGR04406;TIGR04520;TIGR04521;TIGR04544","G",9,9,"nrps","nrps",0.195,0,0.97,0,0,0.01,0,35,180,0.97,"nrps",0.752421552194178,0.97 +"AL645882.2","AL645882.2_cluster_11",5515246,5536994,0.713019114810484,0.868896297725067,"Polyketide","AL645882.2_5009;AL645882.2_5010;AL645882.2_5011;AL645882.2_5012;AL645882.2_5013;AL645882.2_5014;AL645882.2_5015;AL645882.2_5016;AL645882.2_5017;AL645882.2_5018;AL645882.2_5019;AL645882.2_5020;AL645882.2_5021;AL645882.2_5022;AL645882.2_5023;AL645882.2_5024;AL645882.2_5025;AL645882.2_5026;AL645882.2_5027;AL645882.2_5028;AL645882.2_5029;AL645882.2_5030;AL645882.2_5031","PF00106;PF00107;PF00109;PF00440;PF00486;PF00550;PF00753;PF01243;PF01370;PF01381;PF01613;PF02801;PF02909;PF03364;PF03704;PF03848;PF03992;PF04978;PF05175;PF05368;PF05724;PF07690;PF07883;PF08028;PF08240;PF08241;PF08242;PF08659;PF10604;PF12349;PF12844;PF13443;PF13460;PF13489;PF13561;PF13602;PF13649;PF13847;TIGR00477;TIGR00517;TIGR00692;TIGR00710;TIGR00711;TIGR00833;TIGR00880;TIGR00881;TIGR00891;TIGR00893;TIGR00894;TIGR00895;TIGR00916;TIGR01500;TIGR01751;TIGR01829;TIGR01830;TIGR01831;TIGR01832;TIGR01963;TIGR02296;TIGR02415;TIGR02632;TIGR02685;TIGR02813;TIGR02817;TIGR02818;TIGR02823;TIGR02824;TIGR02825;TIGR03070;TIGR03150;TIGR03201;TIGR03206;TIGR03325;TIGR03366;TIGR03451;TIGR03466;TIGR03480;TIGR03534;TIGR03615;TIGR03618;TIGR03668;TIGR03971;TIGR03989;TIGR04022;TIGR04023;TIGR04316;TIGR04504;TIGR04558","G",10,10,"pks","pks",0.89,0.05,0.02,0.02,0.02,0.08,0.03,23,88,0.89,"pks",0.713019114810484,0.89 +"AL645882.2","AL645882.2_cluster_12",5785753,5791297,0.57649202361676,0.618435119958616,"Polyketide","AL645882.2_5252;AL645882.2_5253;AL645882.2_5254;AL645882.2_5255;AL645882.2_5256;AL645882.2_5257;AL645882.2_5258","PF00109;PF00550;PF02311;PF02801;PF03364;PF03992;PF04486;PF04673;PF07883;PF10604;TIGR01930;TIGR02813;TIGR03150;TIGR03214","G",11,11,"pks","pks",0.91,0.06,0.01,0,0.02,0,0,7,14,0.91,"pks",0.57649202361676,0.91 +"AL645882.2","AL645882.2_cluster_13",6432593,6467702,0.909289567375217,0.999999958443177,"Polyketide","AL645882.2_5815;AL645882.2_5816;AL645882.2_5817;AL645882.2_5818;AL645882.2_5819;AL645882.2_5820;AL645882.2_5821;AL645882.2_5822;AL645882.2_5823;AL645882.2_5824;AL645882.2_5825;AL645882.2_5826;AL645882.2_5827;AL645882.2_5828;AL645882.2_5829;AL645882.2_5830;AL645882.2_5831;AL645882.2_5832;AL645882.2_5833;AL645882.2_5834;AL645882.2_5835;AL645882.2_5836;AL645882.2_5837;AL645882.2_5838;AL645882.2_5839","PF00108;PF00109;PF00155;PF00196;PF00355;PF00441;PF00486;PF00501;PF00550;PF00698;PF00891;PF00975;PF01053;PF01073;PF01222;PF01326;PF01370;PF02353;PF02544;PF02770;PF02771;PF02801;PF03704;PF04191;PF04321;PF05368;PF05401;PF07993;PF08028;PF08241;PF08242;PF08281;PF08541;PF08545;PF11639;PF12697;PF13193;PF13460;PF13489;PF13520;PF13649;PF13847;PF16197;PF16363;PF17837;TIGR00128;TIGR00517;TIGR00536;TIGR00747;TIGR00748;TIGR00858;TIGR01179;TIGR01181;TIGR01214;TIGR01733;TIGR01734;TIGR01746;TIGR01777;TIGR01821;TIGR01822;TIGR01825;TIGR01923;TIGR01930;TIGR01983;TIGR02072;TIGR02188;TIGR02197;TIGR02262;TIGR02275;TIGR02316;TIGR02372;TIGR02813;TIGR02937;TIGR03020;TIGR03056;TIGR03098;TIGR03131;TIGR03150;TIGR03203;TIGR03204;TIGR03205;TIGR03207;TIGR03208;TIGR03443;TIGR03466;TIGR03533;TIGR03534;TIGR03541;TIGR03589;TIGR04022;TIGR04180;TIGR04543","G",12,12,"pks","pks",0.76,0.11,0.46,0.02,0,0.01,0.02,25,92,0.76,"pks",0.909289567375217,0.76 +"AL645882.2","AL645882.2_cluster_14",6847315,6863275,0.739858389200434,0.958970163028131,"Polyketide","AL645882.2_6167;AL645882.2_6168;AL645882.2_6169;AL645882.2_6170;AL645882.2_6171;AL645882.2_6172;AL645882.2_6173;AL645882.2_6174;AL645882.2_6175;AL645882.2_6176;AL645882.2_6177;AL645882.2_6178;AL645882.2_6179;AL645882.2_6180","PF00150;PF00356;PF00528;PF00532;PF01381;PF01547;PF04183;PF04542;PF06276;PF07398;PF08281;PF11716;PF12680;PF13377;PF13416;PF13581;PF19054;TIGR00969;TIGR01481;TIGR01581;TIGR02139;TIGR02140;TIGR02141;TIGR02405;TIGR02417;TIGR02937;TIGR02939;TIGR02947;TIGR02948;TIGR02957;TIGR03070;TIGR03083;TIGR03086;TIGR03226;TIGR03262;TIGR03850","G",13,13,"pks","pks",0.62,0.22,0.03,0,0.03,0.02,0.1,14,36,0.62,"pks",0.739858389200434,0.62 +"AL645882.2","AL645882.2_cluster_15",6889594,6948414,0.873977476188822,0.999999999840931,"Polyketide","AL645882.2_6205;AL645882.2_6206;AL645882.2_6207;AL645882.2_6208;AL645882.2_6209;AL645882.2_6210;AL645882.2_6211;AL645882.2_6212;AL645882.2_6213;AL645882.2_6214;AL645882.2_6215;AL645882.2_6216;AL645882.2_6217;AL645882.2_6218;AL645882.2_6219;AL645882.2_6220;AL645882.2_6221;AL645882.2_6222;AL645882.2_6223;AL645882.2_6224;AL645882.2_6225;AL645882.2_6226;AL645882.2_6227;AL645882.2_6228;AL645882.2_6229","PF00106;PF00108;PF00109;PF00202;PF00364;PF00440;PF00486;PF00550;PF00561;PF00698;PF00975;PF01039;PF01370;PF01494;PF01565;PF02775;PF02786;PF02801;PF03704;PF03756;PF05368;PF07690;PF07730;PF07993;PF08031;PF08659;PF08990;PF12697;PF12831;PF13460;PF13561;PF13581;PF14765;PF16197;PF17147;TIGR00128;TIGR00508;TIGR00513;TIGR00515;TIGR00699;TIGR00700;TIGR00707;TIGR00709;TIGR00710;TIGR00711;TIGR00713;TIGR00880;TIGR00881;TIGR00893;TIGR00895;TIGR00898;TIGR00900;TIGR01108;TIGR01117;TIGR01249;TIGR01500;TIGR01738;TIGR01746;TIGR01829;TIGR01830;TIGR01831;TIGR01832;TIGR01885;TIGR01930;TIGR01963;TIGR02407;TIGR02415;TIGR02427;TIGR02430;TIGR02632;TIGR02685;TIGR02813;TIGR03056;TIGR03131;TIGR03133;TIGR03134;TIGR03150;TIGR03206;TIGR03246;TIGR03251;TIGR03297;TIGR03325;TIGR03343;TIGR03372;TIGR03384;TIGR03443;TIGR03466;TIGR03611;TIGR03613;TIGR03649;TIGR03695;TIGR03968;TIGR03971;TIGR04316;TIGR04504","G",14,14,"pks","pks",0.99,0,0.06,0,0,0.04,0.01,25,95,0.99,"pks",0.873977476188822,0.99 +"AL645882.2","AL645882.2_cluster_16",7106284,7117874,0.783762333451763,0.999856305267705,"NRP","AL645882.2_6364;AL645882.2_6365;AL645882.2_6366;AL645882.2_6367;AL645882.2_6368","PF00501;PF00550;PF00668;PF00975;PF05899;PF05977;PF07690;PF12697;PF13193;PF13434;TIGR00900;TIGR01733;TIGR01734;TIGR01738;TIGR01923;TIGR02188;TIGR02262;TIGR02275;TIGR02316;TIGR02372;TIGR03098;TIGR03205;TIGR03208;TIGR03443;TIGR04439","G",15,15,"nrps","nrps",0.03,0.06,0.92,0.01,0,0.01,0,5,25,0.92,"nrps",0.783762333451763,0.92 +"AL645882.2","AL645882.2_cluster_17",7516017,7523399,0.939298768070374,0.999999744466846,"Terpene","AL645882.2_6688;AL645882.2_6689;AL645882.2_6690;AL645882.2_6691;AL645882.2_6692;AL645882.2_6693;AL645882.2_6694","PF00348;PF00432;PF00494;PF00890;PF01048;PF01266;PF01593;PF07992;PF12831;PF13243;PF13249;PF13450;TIGR01507;TIGR01559;TIGR01787;TIGR02731;TIGR02732;TIGR02733;TIGR02734;TIGR02748;TIGR02749;TIGR03197;TIGR03463;TIGR03464;TIGR03465;TIGR03467;TIGR03468;TIGR04277","G",16,16,"terpene","terpene",0.03,0.05,0,0.01,0.9,0,0.01,7,28,0.9,"terpene",0.939298768070374,0.9 +"AL645882.2","AL645882.2_cluster_18",7586410,7601569,0.832626210768822,0.998326356077945,"Polyketide","AL645882.2_6750;AL645882.2_6751;AL645882.2_6752;AL645882.2_6753;AL645882.2_6754;AL645882.2_6755;AL645882.2_6756;AL645882.2_6757","PF00083;PF00106;PF00107;PF00109;PF00550;PF00698;PF01370;PF02775;PF02801;PF05977;PF07690;PF07992;PF08240;PF08541;PF08659;PF09335;PF12832;PF12840;PF13460;PF13561;PF13602;PF13738;PF14765;PF16197;TIGR00128;TIGR00710;TIGR00711;TIGR00747;TIGR00879;TIGR00880;TIGR00881;TIGR00891;TIGR00893;TIGR00894;TIGR00895;TIGR00898;TIGR00900;TIGR01289;TIGR01299;TIGR01500;TIGR01751;TIGR01829;TIGR01830;TIGR01831;TIGR01832;TIGR01930;TIGR01963;TIGR02415;TIGR02632;TIGR02685;TIGR02813;TIGR02817;TIGR02823;TIGR02824;TIGR02825;TIGR03131;TIGR03150;TIGR03206;TIGR03297;TIGR03325;TIGR03845;TIGR03971;TIGR04316;TIGR04504","G",17,17,"pks","pks",0.92,0.05,0.03,0,0.02,0,0,8,64,0.92,"pks",0.832626210768822,0.92 +"AL645882.2","AL645882.2_cluster_20",8258333,8283623,0.835037750982841,0.897403579012686,"RiPP","AL645882.2_7365;AL645882.2_7366;AL645882.2_7367;AL645882.2_7368;AL645882.2_7369;AL645882.2_7370;AL645882.2_7371;AL645882.2_7372;AL645882.2_7373;AL645882.2_7374;AL645882.2_7375;AL645882.2_7376;AL645882.2_7377;AL645882.2_7378;AL645882.2_7379;AL645882.2_7380;AL645882.2_7381;AL645882.2_7382;AL645882.2_7383;AL645882.2_7384;AL645882.2_7385;AL645882.2_7386;AL645882.2_7387;AL645882.2_7388;AL645882.2_7389;AL645882.2_7390","PF00005;PF00009;PF00067;PF00089;PF00501;PF00583;PF00743;PF00857;PF00891;PF00989;PF01925;PF03029;PF03259;PF03992;PF04203;PF04486;PF05175;PF05331;PF07992;PF08241;PF08242;PF08448;PF11991;PF13426;PF13434;PF13450;PF13489;PF13581;PF13649;PF13738;PF13847;PF14535;PF16864;TIGR00229;TIGR00231;TIGR00955;TIGR00956;TIGR00958;TIGR00968;TIGR00972;TIGR01076;TIGR01166;TIGR01184;TIGR01186;TIGR01187;TIGR01188;TIGR01189;TIGR01192;TIGR01193;TIGR01257;TIGR01271;TIGR01277;TIGR01288;TIGR01842;TIGR01846;TIGR01978;TIGR01983;TIGR02142;TIGR02155;TIGR02203;TIGR02204;TIGR02211;TIGR02314;TIGR02315;TIGR02323;TIGR02324;TIGR02633;TIGR02673;TIGR02716;TIGR02769;TIGR02770;TIGR02857;TIGR02868;TIGR02982;TIGR03005;TIGR03258;TIGR03265;TIGR03269;TIGR03335;TIGR03375;TIGR03410;TIGR03411;TIGR03415;TIGR03429;TIGR03522;TIGR03608;TIGR03614;TIGR03719;TIGR03740;TIGR03771;TIGR03796;TIGR03797;TIGR03864;TIGR03873;TIGR04018;TIGR04046;TIGR04406;TIGR04520;TIGR04521;TIGR04538;TIGR04543","G",19,19,"ripp","ripp",0.07,0.19,0.05,0.02,0.03,0.11,0.55,26,101,0.55,"ripp",0.835037750982841,0.55 +"AL645882.2","AL645882.2_cluster_21",8493549,8542072,0.912955287850247,0.999999999881635,"NRP","AL645882.2_7588;AL645882.2_7589;AL645882.2_7590;AL645882.2_7591;AL645882.2_7592;AL645882.2_7593;AL645882.2_7594;AL645882.2_7595;AL645882.2_7596;AL645882.2_7597;AL645882.2_7598;AL645882.2_7599;AL645882.2_7600;AL645882.2_7601;AL645882.2_7602;AL645882.2_7603;AL645882.2_7604;AL645882.2_7605;AL645882.2_7606;AL645882.2_7607;AL645882.2_7608;AL645882.2_7609;AL645882.2_7610;AL645882.2_7611;AL645882.2_7612;AL645882.2_7613;AL645882.2_7614;AL645882.2_7615;AL645882.2_7616;AL645882.2_7617;AL645882.2_7618;AL645882.2_7619;AL645882.2_7620;AL645882.2_7621;AL645882.2_7622;AL645882.2_7623","PF00005;PF00027;PF00067;PF00106;PF00195;PF00376;PF00392;PF00425;PF00440;PF00496;PF00501;PF00528;PF00550;PF00664;PF00668;PF00890;PF00903;PF00975;PF01040;PF01266;PF01370;PF01408;PF01494;PF02353;PF02463;PF02797;PF02909;PF03435;PF04140;PF04191;PF05834;PF06902;PF07690;PF07729;PF07992;PF08241;PF08242;PF08392;PF08541;PF12697;PF12802;PF12831;PF13191;PF13193;PF13370;PF13401;PF13411;PF13412;PF13459;PF13460;PF13489;PF13561;PF13649;PF13847;PF18563;PF19086;TIGR00543;TIGR00553;TIGR00564;TIGR00565;TIGR00630;TIGR00747;TIGR00896;TIGR00954;TIGR00955;TIGR00956;TIGR00957;TIGR00958;TIGR00968;TIGR00972;TIGR01166;TIGR01184;TIGR01186;TIGR01187;TIGR01188;TIGR01189;TIGR01192;TIGR01193;TIGR01194;TIGR01217;TIGR01257;TIGR01271;TIGR01277;TIGR01288;TIGR01289;TIGR01292;TIGR01733;TIGR01734;TIGR01761;TIGR01815;TIGR01820;TIGR01823;TIGR01824;TIGR01829;TIGR01830;TIGR01831;TIGR01832;TIGR01842;TIGR01846;TIGR01923;TIGR01963;TIGR01978;TIGR01988;TIGR02018;TIGR02023;TIGR02032;TIGR02043;TIGR02044;TIGR02047;TIGR02051;TIGR02054;TIGR02142;TIGR02188;TIGR02203;TIGR02204;TIGR02211;TIGR02262;TIGR02275;TIGR02294;TIGR02314;TIGR02315;TIGR02316;TIGR02323;TIGR02324;TIGR02325;TIGR02372;TIGR02404;TIGR02415;TIGR02469;TIGR02632;TIGR02633;TIGR02673;TIGR02769;TIGR02770;TIGR02789;TIGR02790;TIGR02812;TIGR02857;TIGR02868;TIGR02982;TIGR03005;TIGR03098;TIGR03205;TIGR03206;TIGR03208;TIGR03258;TIGR03265;TIGR03269;TIGR03338;TIGR03375;TIGR03410;TIGR03411;TIGR03415;TIGR03443;TIGR03494;TIGR03522;TIGR03608;TIGR03613;TIGR03719;TIGR03740;TIGR03771;TIGR03796;TIGR03797;TIGR03864;TIGR03873;TIGR03971;TIGR04028;TIGR04316;TIGR04406;TIGR04458;TIGR04515;TIGR04520;TIGR04521;TIGR04538","G",20,20,"nrps","nrps",0.15,0.1,0.74,0.01,0.04,0.02,0.07,36,174,0.74,"nrps",0.912955287850247,0.74 diff --git a/group_by.csv b/group_by.csv new file mode 100644 index 0000000..9b1379a --- /dev/null +++ b/group_by.csv @@ -0,0 +1,32 @@ +"Compare","Antismash","SEMPI","PRISM","PRISM-Supp","ARTS","DeepBGC","GECCO","RRE-Finder","RippMiner","Emerald/SanntiS","Group" +"1","1","1","1","NA","NA","NA","1","NA","NA","1","group_1" +"2","2","NA","NA","2,3","2","6","2","NA","NA","2","group_2" +"3","3","NA","2","NA","NA","11","NA","1","1","3","group_3" +"4","NA","NA","NA","NA","NA","NA","NA","NA","NA","4","group_4" +"5","4","3","3","NA","NA","NA","4","NA","NA","5","group_5" +"6","5","NA","NA","NA","9","38","NA","NA","NA","NA","group_6" +"7","6","NA","NA","NA","14","45","NA","NA","NA","8","group_7" +"8","NA","NA","NA","NA","15","50","5","NA","NA","9","group_8" +"9","7","NA","5","NA","NA","NA","NA","NA","NA","10","group_9" +"10","8","10","7","NA","NA","66","7","NA","NA","14","group_10" +"11","9","NA","8","NA","55,56","NA","NA","NA","NA","15","group_11" +"12","10","12","9","51,52,53","60","70","9","NA","NA","17","group_12" +"13","11","18","10","89,90,91,92","98","97","10","NA","NA","19","group_13" +"14","12","NA","NA","NA","NA","NA","NA","NA","NA","20","group_14" +"15","13","19","11","NA","NA","100","11","NA","NA","21","group_15" +"16","14","NA","NA","100","NA","NA","NA","NA","NA","23","group_16" +"17","15","20","12","103,104","NA","NA","12","NA","NA","24","group_17" +"18","17","NA","NA","107","NA","NA","NA","NA","NA","NA","group_18" +"19","19","23","13","110","130","117","14","NA","NA","26","group_19" +"20","19","23","13","NA","NA","117","14","NA","NA","26","group_20" +"21","20","25","14","NA","NA","119","15","NA","NA","27","group_21" +"22","21","NA","15","NA","NA","NA","NA","NA","3","NA","group_22" +"23","22","NA","NA","NA","137,138","NA","16","NA","NA","28","group_23" +"24","23","28","16","NA","NA","NA","17","NA","NA","29","group_24" +"25","24","NA","17","NA","NA","131","NA","2","4","30","group_25" +"26","25","NA","NA","NA","150","NA","NA","NA","NA","31","group_26" +"27","27","33","18","NA","NA","161","20","NA","NA","32","group_27" +"28","27","33","18","NA","NA","161","20","NA","NA","32","group_28" +"29","27","NA","NA","125","NA","161","20","NA","NA","32","group_29" +"30","3","NA","2,19","4","NA","11","NA","1","1","3","group_30" +"NA","18,26","4,7,8,17,32","4,6,19","1,7,8,15,17,18,19,20,28,34,35,36,43,50,51,52,53,54,61,62,63,65,66,67,70,85,89,90,91,92,95,103,104,109,111,112,113,114,119,121,123,124","5,8,12,20,27,42,43,44,52,53,54,55,59,68,69,70,77,89,101,102,125,129,131,137,138,140,141,157,160","8,13,15,30,51,64,157,166,168","13,19","","2","6,11,12,16,18,22,25","group_31" diff --git a/inst/.DS_Store b/inst/.DS_Store new file mode 100644 index 0000000..565b9eb Binary files /dev/null and b/inst/.DS_Store differ diff --git a/inst/extdata/dissect.csv b/inst/extdata/dissect.csv new file mode 100644 index 0000000..f8185cf --- /dev/null +++ b/inst/extdata/dissect.csv @@ -0,0 +1,3 @@ +group,separate_before,by_software +1,"locus_tag_1 (will separate cluster into 2, locus_tag_1 will go into second half)","antismash, gecco, deepbgc, prism, sempi (select one of the following)" +3,"locus_tag_1, locus_tag_2 (will separate cluster into 3, specified genes goes to next cluster (2nd and 3rd in this example))",antismash diff --git a/inst/extdata/rename.csv b/inst/extdata/rename.csv index 50148ff..f1273f3 100644 --- a/inst/extdata/rename.csv +++ b/inst/extdata/rename.csv @@ -7,9 +7,9 @@ pentangular_polyphenol,other,melanin,#fb9a99,,GECCO nrps-independent_siderophore_synthase,other,other,#BB34ED,,RRE-Finder angucycline-type,pks,terpene,#fdbf6f,,PRISM-Supp angucycline,pks,alkaloid,#ff7f00,,ARTS -butyrolactone,ripp,hybrid,#cab2d6,, -class_i_lantipeptide,ripp,core,#6a3d9a,, -lasso_peptide,ripp,regulatory,#ffff99,, +butyrolactone,ripp,hybrid,#cab2d6,,RippMiner +class_i_lantipeptide,ripp,core,#6a3d9a,,Emerald/SanntiS +lasso_peptide,ripp,regulatory,#ffff99,,Compare nis_synthase,other,resistance,#b15928,, acyl_amino_acids,other,base,#d4ced6,, aminocoumarin,other,,,, diff --git a/inst/scripts/dissect.py b/inst/scripts/dissect.py new file mode 100644 index 0000000..e442a89 --- /dev/null +++ b/inst/scripts/dissect.py @@ -0,0 +1,109 @@ +from Bio import SeqIO +import sys +import os +import re +import pandas as pd +import argparse + + + +def convert_gbff(seq_file): + if seq_file.split('.')[-1] == 'gbff': + if os.path.exists(os.path.splitext(os.path.basename(seq_file))[0]+'.gbk'): + converted = os.path.splitext(os.path.basename(seq_file))[0]+'.gbk' + else: + file_name = os.path.basename(seq_file) + with open(seq_file, "r") as f: + for index, record in enumerate(SeqIO.parse(f, "genbank")): + if index > 0: + break + else: + SeqIO.write(record, os.path.splitext(os.path.basename(seq_file))[0]+'.gbk', "genbank") + converted = os.path.splitext(os.path.basename(seq_file))[0]+'.gbk' + else: + converted = seq_file + return converted + +def solve_incomplete_CDS(start_new, end_new, loci): + for feat in loci: + if feat.location.start.position <= start_new and feat.location.end.position > start_new: + start_new = feat.location._start.position + if feat.location.start.position <= end_new and feat.location.end.position > end_new: + end_new = feat.location.end.position + return start_new, end_new + +def write_gbs(data,seq_file, fl_name): + counter = 0 + fl = convert_gbff(seq_file) + with open(fl, "r") as handle: + record = SeqIO.read(handle, "genbank") + loci = [feat for feat in record.features if feat.type == "CDS"] + locus_to_separate=data['separate_before'].split(',') + loci_len = len(locus_to_separate) + for to_split in range(loci_len): + for loc in range(len(loci)): + if loci[loc].qualifiers['locus_tag'][0] == locus_to_separate[to_split]: + if to_split == 0: + start = 0 + stop = loci[loc-1].location._end.position + subrecord = record[start:stop] + SeqIO.write(subrecord, "dissected/"+os.path.splitext(fl_name)[0]+"_subcluster_"+str(counter+1)+".gb", "genbank") + counter +=1 + if to_split == (loci_len-1): + start = loci[loc].location._start.position + stop = loci[len(loci)-1].location._end.position + else: + start = loci[loc].location._start.position + stop = [loci[loc-1].location._end.position for loc in range(len(loci)) if loci[loc].qualifiers['locus_tag'][0] == locus_to_separate[to_split+1]] + subrecord = record[start:stop] + SeqIO.write(subrecord, "dissected/"+os.path.splitext(fl_name)[0]+"_subcluster_"+str(counter+1)+".gb", "genbank") + counter +=1 + + + +def split_gb_files(csv_file): + data_to_search = { + "antismash" : "Antismash", + "deepbgc" : "DeepBGC" , + "prism" : "PRISM", + "sempi" : "SEMPI", + "gecco" : "GECCO" + } + data = pd.read_csv(csv_file) + for index, row in data.iterrows(): + if pd.isna(row['group']) : continue + group_name = str('group_' + str(int(row['group']))) + if os.path.isdir(group_name): + pass + else: + print("Didn't found group: "+ group_name) + try: + file_to_open = data_to_search[row['by_software']] + except KeyError(): + print("Could not find software for "+str(group_name)+" : "+str(row['by_software'])) + r = re.compile(file_to_open+".*") + hit = list(filter(r.match, os.listdir(group_name)))[0] + file_to_open=group_name+"/"+hit + write_gbs(row, file_to_open, hit) + + +def main(): + if not os.path.isdir('dissected'): + os.mkdir('dissected') + # Parsing arguments + parser = argparse.ArgumentParser(description='Small helper script for BGCViz') + required = parser.add_argument_group('Required arguments') + required.add_argument("-i", "--input", help=".csv file with clusters to separate", required = True) + args = parser.parse_args() + + # Run grouping for gb files + split_gb_files(args.input) + # Bye message + print("Separation of clusters finished successfuly!") + print("Please find genbank files in 'dissected' folder") + + +if __name__ == "__main__": + main() + + diff --git a/inst/scripts/group.py b/inst/scripts/group.py index f702aa7..bf03e7e 100644 --- a/inst/scripts/group.py +++ b/inst/scripts/group.py @@ -2,94 +2,129 @@ import sys import os import pandas as pd +import argparse -def write_gbs(group_by, data, label, seq_file): - for index, row in pd.DataFrame(group_by[label].dropna()).iterrows(): - start = [] - stop = [] - list_l = row[label].split(",") - for i in range(len(list_l)): - list_l[i] = int(list_l[i]) - start.append(data[list_l[i] == data['Cluster']].Start.item()) - stop.append(data[list_l[i] == data['Cluster']].Stop.item()) - group = group_by.Group[index] - if os.path.isdir(group): - pass - else: - os.mkdir(group) - file = SeqIO.parse(open(seq_file), "genbank") - print("Working on: "+ label+"_"+"cluster_"+str(list_l[i])+"_"+str(group)) - for record in file: - loci = [feat for feat in record.features if feat.type == "CDS"] - start_new = int(start[i]) - end_new = int(stop[i]) - subrecord = record[start_new:end_new] - annotation={"molecule_type":"DNA"} - subrecord.annotations = annotation - SeqIO.write(subrecord, group+"/"+label+"_"+"cluster_"+str(list_l[i])+"_"+str(group)+".gb", "genbank") - + +def convert_gbff(seq_file): + if seq_file.split('.')[-1] == 'gbff': + if os.path.exists(os.path.splitext(os.path.basename(seq_file))[0]+'.gbk'): + converted = os.path.splitext(os.path.basename(seq_file))[0]+'.gbk' + else: + file_name = os.path.basename(seq_file) + with open(seq_file, "r") as f: + for index, record in enumerate(SeqIO.parse(f, "genbank")): + if index > 0: + break + else: + SeqIO.write(record, os.path.splitext(os.path.basename(seq_file))[0]+'.gbk', "genbank") + converted = os.path.splitext(os.path.basename(seq_file))[0]+'.gbk' + else: + converted = seq_file + return converted + +def solve_incomplete_CDS(start_new, end_new, loci): + for feat in loci: + if feat.location.start.position <= start_new and feat.location.end.position > start_new: + start_new = feat.location._start.position + if feat.location.start.position <= end_new and feat.location.end.position > end_new: + end_new = feat.location.end.position + return start_new, end_new + +def write_gbs(group_by, data, label, seq_file, args): + counter = 0 + fl = convert_gbff(seq_file) + with open(fl, "r") as handle: + record = SeqIO.read(handle, "genbank") + loci = [feat for feat in record.features if feat.type == "CDS"] + for index, row in pd.DataFrame(group_by[label].dropna()).iterrows(): + counter += 1 + if (counter >= len(pd.DataFrame(group_by[label].dropna()).index)): + break + else: + start = [] + stop = [] + list_l = row[label].split(",") + for i in range(len(list_l)): + list_l[i] = int(list_l[i]) + start.append(data[list_l[i] == data['Cluster']].Start.item()) + stop.append(data[list_l[i] == data['Cluster']].Stop.item()) + group = group_by.Group[index] + if os.path.isdir(group): + pass + else: + os.mkdir(group) + if not args.quiet: + print("Working on: "+ label+"_"+"cluster_"+str(list_l[i])+"_"+str(group)) + if os.path.exists(group+"/"+label+"_"+"cluster_"+str(list_l[i])+"_"+str(group)+".gb") and not args.force: + if counter <=1: + print("Files exist! Please use --force option to override them") + continue + start_new, end_new = solve_incomplete_CDS(int(start[i]), int(stop[i]), loci) + subrecord = record[start_new:end_new] + annotation={"molecule_type":"DNA"} + subrecord.annotations = annotation + SeqIO.write(subrecord, group+"/"+label+"_"+"cluster_"+str(list_l[i])+"_"+str(group)+".gb", "genbank") + + +def group_gb_files(group_by, seq_file, args): + data_to_search = { + "antiSMASH" : ("anti_biocircos.csv", "Antismash"), + "DeepBGC" : ("deep_biocircos.csv","DeepBGC" ), + "PRISM" : ("prism_biocircos.csv", "PRISM"), + "RREfinder" : ("rre_biocircos.csv", "RRE-Finder"), + "SEMPI" : ("sempi_biocircos.csv", "SEMPI"), + "ARTS" : ("arts_biocircos.csv", "ARTS"), + "PRISM supplement" : ("prism_supp_biocircos.csv", "PRISM-Supp"), + "GECCO" : ("gecco_biocircos.csv", "GECCO") + } + for k,v in data_to_search.items(): + if not args.quiet: + print("Searching for "+str(k)+" files...") + if os.path.exists(v[0]): + if not args.quiet: + print("Found!") + data = pd.read_csv(v[0]) + label = v[1] + write_gbs(group_by, data, label, seq_file, args) + + +def run_clinker(group_by, args): + for index, row in pd.DataFrame(group_by["Group"]).iterrows(): + group = group_by.Group[index] + if os.path.isdir("clinker_plots"): + pass + else: + os.mkdir("clinker_plots") + os.system(f"clinker {group} --plot clinker_plots/{group}.html -i 0.9 -j {args.jobs}") + def main(): - group_by = pd.read_csv("group_by.csv", dtype = str) - seq_file = sys.argv[1] - - print("Searching for antismash files...") - if os.path.exists("antismash_biocircos.csv"): - print("Found!") - data = pd.read_csv("antismash_biocircos.csv") - label = "Antismash" - write_gbs(group_by, data, label, seq_file) - - print("Searching for deepbcg files...") - if os.path.exists("deepbgc_biocircos.csv"): - print("Found!") - data = pd.read_csv("deepbgc_biocircos.csv") - label = "DeepBGC" - write_gbs(group_by, data, label, seq_file) - - print("Searching for prism files...") - if os.path.exists("prism_biocircos.csv"): - print("Found!") - data = pd.read_csv("prism_biocircos.csv") - label = "PRISM" - write_gbs(group_by, data, label, seq_file) - - print("Searching for rre-finder files...") - if os.path.exists("rre_biocircos.csv"): - print("Found!") - data = pd.read_csv("rre_biocircos.csv") - label = "RRE-Finder" - write_gbs(group_by, data, label, seq_file) - - print("Searching for sempi files...") - if os.path.exists("sempi_biocircos.csv"): - print("Found!") - data = pd.read_csv("sempi_biocircos.csv") - label = "SEMPI" - write_gbs(group_by, data, label, seq_file) - - print("Searching for ARTS files...") - if os.path.exists("arts_biocircos.csv"): - print("Found!") - data = pd.read_csv("arts_biocircos.csv") - label = "ARTS" - write_gbs(group_by, data, label, seq_file) - - print("Searching for PRISM supplement files...") - if os.path.exists("prism_supp_biocircos.csv"): - print("Found!") - data = pd.read_csv("prism_supp_biocircos.csv") - label = "PRISM-supp" - write_gbs(group_by, data, label, seq_file) - - print("Searching for GECCO files...") - if os.path.exists("gecco_biocircos.csv"): - print("Found!") - data = pd.read_csv("gecco_biocircos.csv") - label = "GECCO" - write_gbs(group_by, data, label, seq_file) + # Reading data + group_by = pd.read_csv("group_by.csv", dtype = str) + + # Parsing arguments + parser = argparse.ArgumentParser(description='Small helper script for BGCViz') + required = parser.add_argument_group('Required arguments') + required.add_argument("-i", "--input", help="Input .gb/.gbk/.gbff file. One record per file will be used (as one genome)", required = True) + parser.add_argument("--force", help="Force overwrite calculated results. [default = False]",action=argparse.BooleanOptionalAction) + parser.add_argument("--quiet", help="Run silently. Clinker will run as usual. [default = False]",action=argparse.BooleanOptionalAction) + parser.add_argument("-cl", "--run_clinker", help="Automatically runs clinker on groups. Results are stored in 'clinker_plots' folder. [default = False]", + action=argparse.BooleanOptionalAction) + parser.add_argument("-j", "--jobs", help="Number of threads for clinker analysis (0=all). [default = 0] ", default=0) + args = parser.parse_args() + + # Run grouping for gb files + group_gb_files(group_by, args.input, args) + #Run clinker + if args.run_clinker: + run_clinker(group_by, args) + + # Bye message + print("Analysis finished successfuly!") if __name__ == "__main__": - main() \ No newline at end of file + main() + + diff --git a/man/antismash_to_csv.Rd b/man/antismash_to_csv.Rd index 7495d97..95d57d5 100644 --- a/man/antismash_to_csv.Rd +++ b/man/antismash_to_csv.Rd @@ -17,3 +17,8 @@ csv file in specified location \description{ Function, that returns dataframe, out of supplied antismash json file } +\examples{ +\dontrun{ +antismash_to_csv() +} +} diff --git a/man/arts_to_csv.Rd b/man/arts_to_csv.Rd index cc8bf8a..43a28a2 100644 --- a/man/arts_to_csv.Rd +++ b/man/arts_to_csv.Rd @@ -17,3 +17,9 @@ csv file in specified location \description{ Function, which extracts tables from arts result zip archive and transforms them into BGCViz input } +\examples{ +\dontrun{ +arts_to_csv() +} + +} diff --git a/man/data_to_json.Rd b/man/data_to_json.Rd new file mode 100644 index 0000000..187dd3e --- /dev/null +++ b/man/data_to_json.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fct_format_transformation.R +\name{data_to_json} +\alias{data_to_json} +\title{all data to json} +\usage{ +data_to_json(csv_file) +} +\arguments{ +\item{csv_file}{path to csv} +} +\value{ +json +} +\description{ +Function, function that takes csv file and converts it to json +} diff --git a/man/get_defaults.Rd b/man/get_defaults.Rd index 2bea393..d1601aa 100644 --- a/man/get_defaults.Rd +++ b/man/get_defaults.Rd @@ -16,3 +16,7 @@ csv file in specified location Function, which downloads default options csv file into provided location. Then can be used with `set_defaults` function to change the default behaviour } +\examples{ +get_defaults() + +} diff --git a/man/get_dissect_example.Rd b/man/get_dissect_example.Rd new file mode 100644 index 0000000..57c1b9b --- /dev/null +++ b/man/get_dissect_example.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fct_helpers.R +\name{get_dissect_example} +\alias{get_dissect_example} +\title{get_dissect_example} +\usage{ +get_dissect_example(write_to = getwd()) +} +\arguments{ +\item{write_to}{- path to write csv file to.} +} +\value{ +csv file, written to package settings +} +\description{ +Function, which downloads a csv file for dissect.py into specified path. +Use to separate regions into separate clusters +} +\examples{ +\dontrun{ +get_dissect_example(write_to) +} + +} diff --git a/man/mod_arts_tree_server.Rd b/man/mod_arts_tree_server.Rd new file mode 100644 index 0000000..8c54961 --- /dev/null +++ b/man/mod_arts_tree_server.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mod_arts_tree.R +\name{mod_arts_tree_server} +\alias{mod_arts_tree_server} +\title{arts_tree server function} +\usage{ +mod_arts_tree_server(id, vals) +} +\description{ +arts_tree server function +} diff --git a/man/mod_arts_tree_ui.Rd b/man/mod_arts_tree_ui.Rd new file mode 100644 index 0000000..5bb2c2c --- /dev/null +++ b/man/mod_arts_tree_ui.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mod_arts_tree.R +\name{mod_arts_tree_ui} +\alias{mod_arts_tree_ui} +\title{ARTS tree UI functions} +\usage{ +mod_arts_tree_ui(id) +} +\description{ +ARTS tree UI functions +} diff --git a/man/prism_to_csv.Rd b/man/prism_to_csv.Rd index 340bf0f..c6dd417 100644 --- a/man/prism_to_csv.Rd +++ b/man/prism_to_csv.Rd @@ -17,3 +17,8 @@ csv file in specified location \description{ Function, that transforms prism json object into dataframe, which could be written to the csv file } +\examples{ +\dontrun{ +prism_to_csv() +} +} diff --git a/man/read_compare.Rd b/man/read_compare.Rd new file mode 100644 index 0000000..ae66ee3 --- /dev/null +++ b/man/read_compare.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fct_reading_functions.R +\name{read_compare} +\alias{read_compare} +\title{#' @description A function, that reads RiPPMiner-Genome file txt +#' +#' @return csv file +#' +#' @noRd} +\usage{ +read_compare(data) +} +\description{ +#' @description A function, that reads RiPPMiner-Genome file txt +#' +#' @return csv file +#' +#' @noRd +} diff --git a/man/run_app.Rd b/man/run_app.Rd index c6c36f7..b891232 100644 --- a/man/run_app.Rd +++ b/man/run_app.Rd @@ -36,6 +36,15 @@ expression in order for the match to be considered successful.} \item{...}{arguments to pass to golem_opts. See `?golem::get_golem_options` for more details.} } +\value{ +running shiny app +} \description{ Run the Shiny Application } +\examples{ +\dontrun{ +run_app() +} + +} diff --git a/man/sempi_to_csv.Rd b/man/sempi_to_csv.Rd index f2d40c3..6660d83 100644 --- a/man/sempi_to_csv.Rd +++ b/man/sempi_to_csv.Rd @@ -18,3 +18,8 @@ csv file in specified location Function, which transforms Track.db file into dataframe, which could be then written to csv. Download project folder from SEMPI and supply as `project_archive` argument to a function } +\examples{ +\dontrun{ +sempi_to_csv() +} +} diff --git a/man/set_defaults.Rd b/man/set_defaults.Rd index b174e52..14a2b62 100644 --- a/man/set_defaults.Rd +++ b/man/set_defaults.Rd @@ -9,7 +9,16 @@ set_defaults(csv_file) \arguments{ \item{csv_file}{- path to csv file with default options.} } +\value{ +csv file, written to package settings +} \description{ Function, which uploads default options csv file to the package. Use with `get_defaults` function to download currently used default options. } +\examples{ +\dontrun{ +set_defaults() +} + +} diff --git a/prism_biocircos.csv b/prism_biocircos.csv new file mode 100644 index 0000000..042b8dc --- /dev/null +++ b/prism_biocircos.csv @@ -0,0 +1,20 @@ +"Cluster","Start","Stop","Type","Type2","chromosome","ID" +1,103780,128290,"pks","pks","P",1 +2,255310,261084,"class_i_lantipeptide","class_i_lantipeptide","P",2 +3,513988,533448,"nrps","nrps","P",3 +4,586986,589546,"pks","pks","P",4 +5,2000499,2000898,"ectoine","ectoine","P",5 +6,2559339,2563090,"pks","pks","P",6 +7,2938688,2944875,"melanin","melanin","P",7 +8,3038894,3040682,"nis_synthase","nis_synthase","P",8 +9,3513615,3585724,"nrps","nrps","P",9 +10,5511270,5534546,"benzoisochromanequinone","benzoisochromanequinone","P",10 +11,5785752,5795837,"pentangular_polyphenol","pentangular_polyphenol","P",11 +12,6432592,6458318,"pks__nrps","pks__nrps","P",12 +13,6890527,6947423,"butyrolactone__pks","butyrolactone__pks","P",13 +14,7108263,7120497,"nrps","nrps","P",14 +15,7414295,7422622,"class_iii_iv_lantipeptide","class_iii_iv_lantipeptide","P",15 +16,7586409,7598555,"pks","pks","P",16 +17,7695015,7699626,"class_i_lantipeptide","class_i_lantipeptide","P",17 +18,8504460,8523749,"nrps","nrps","P",18 +19,231675,251017,"butyrolactone__furan","butyrolactone__furan","P",19 diff --git a/prism_supp_biocircos.csv b/prism_supp_biocircos.csv new file mode 100644 index 0000000..dc0d60c --- /dev/null +++ b/prism_supp_biocircos.csv @@ -0,0 +1,126 @@ +"Start","Stop","Type","Type2","Score","Name","Full_name","ID","Cluster" +45266,46073,"resistance","resistance","247.3","AMR116","Puromycin MFS transporter",1,1 +192879,194637,"regulatory","regulatory","1326.1","SCO0203","Two-component
Sensor kinase in S. coelicolor. ",2,2 +194759,195461,"regulatory","regulatory","524.5","SCO0204","Two-component
Response regulator in S. coelicolor. Negative regulator of ACT: SCO0203",3,3 +242202,242766,"regulatory","regulatory","433.5","SCO0253","TetR-type regulator
Regulator of SCO0252 of Streptomyces coelicolor: Tetracycline",4,4 +288986,290456,"resistance","resistance","278.5","AMR261","EmrB: drug resistance MFS transporter",5,5 +291314,292592,"regulatory","regulatory","459.1","MmyR","TetR-type regulator
Represses methylenomycin through inhibition of mmyB in S. coelicolor: (+) riggered by either of two environmental signals: alanine growth-rate-limiting conditions and/or an acidic pH shock.",6,6 +322243,323938,"resistance","resistance","965","AMR139","Tylosin ABC transporters",7,7 +333528,334149,"regulatory","regulatory","478","SCO0332","TetR-type regulator
Regulator of SCO0330 of Streptomyces coelicolor",8,8 +373044,374001,"resistance","resistance","247.3","AMR116","Puromycin MFS transporter",9,9 +389530,391123,"resistance","resistance","207.4","AMR116","Puromycin MFS transporter",10,10 +662372,663014,"regulatory","regulatory","250.1","EsmT4","TetR-type regulator
Located in the esmeraldin biosynthesis cluster of Streptomyces antibioticus Tu 2706",11,11 +663066,664686,"resistance","resistance","244.9","AMR224","Multidrug: subunit of efflux pump conferring antibiotic resistance",12,12 +677226,678861,"resistance","resistance","1018.9","AMR297","Lukacidin ABC transporter",13,13 +721494,722964,"resistance","resistance","235.3","AMR116","Puromycin MFS transporter",14,14 +740313,742083,"resistance","resistance","245.8","AMR201","Multidrug ABC antibiotic efflux pump",15,15 +1193397,1193988,"regulatory","regulatory","446.6","XdhR","TetR-type regulator
Regulator of xanthine dehydrogenase of Streptomyces coelicolor",16,16 +1202479,1204282,"resistance","resistance","270.4","AMR201","Multidrug ABC antibiotic efflux pump",17,17 +1262508,1264110,"resistance","resistance","614.3","AMR196","Tetracenomycin MFS transporter",18,18 +1265840,1267331,"resistance","resistance","291.5","AMR261","EmrB: drug resistance MFS transporter",19,19 +1636965,1638942,"resistance","resistance","764","AMR100","Borrelidin target isoform: threonyl-tRNA synthetase",20,20 +1677148,1678681,"resistance","resistance","375.6","AMR271","Multidrug MFS transporter",21,21 +1818350,1818878,"resistance","resistance","248.7","AMR264","Redox-sensitive transcriptional activator SoxR",22,22 +1834288,1834906,"regulatory","regulatory","460.6","SCO1712","TetR-type regulator
Regulator of antibiotic production of Streptomyces coelicolor",23,23 +1863881,1865048,"regulatory","regulatory","874.8","AbrA1","Two-component
Sensor kinase in S. coelicolor: Iron",24,24 +1865044,1865710,"regulatory","regulatory","490.5","AbrA2","Two-component
Response regulator of S. coelicolor. Negative regulator of RED, ACT and CDA: AbrA1",25,25 +1871316,1872921,"resistance","resistance","307.3","AMR271","Multidrug MFS transporter",26,26 +1967793,1969392,"resistance","resistance","262","AMR286","Oxazolidinone ABC transporter",27,27 +2059303,2060092,"resistance","resistance","261","AMR176","Aminoglycoside 3-N-acetyltransferase",28,28 +2326326,2329452,"resistance","resistance","707.4","AMR211","AcrB/AcrD/AcrF family",29,29 +2424358,2425390,"resistance","resistance","403.5","AMR263","Daunorubicin resistance ABC transporter",30,30 +2453706,2454993,"resistance","resistance","326.3","AMR260","Efflux Bcr CflA: drug resistance transporter",31,31 +2478171,2479632,"resistance","resistance","350.9","AMR116","Puromycin MFS transporter",32,32 +2512482,2514552,"resistance","resistance","442","AMR150","Tetracycline MFS",33,33 +2543006,2544617,"resistance","resistance","974.7","AMR196","Tetracenomycin MFS transporter",34,34 +2544732,2545416,"regulatory","regulatory","180","TcmR","TetR-type regulator
Located in the tetracenomycin C biosynthesis cluster of Streptomyces glaucescens",35,35 +2651102,2653031,"resistance","resistance","317.8","AMR201","Multidrug ABC antibiotic efflux pump",36,36 +2691804,2693250,"resistance","resistance","284.5","AMR261","EmrB: drug resistance MFS transporter",37,37 +2712739,2713435,"regulatory","regulatory","490.7","EcrA2","Two-component
Response regulator in S. coelicolor. Positive reg. in RED production.",38,38 +2713431,2714784,"regulatory","regulatory","1014.6","EcrA1","Two-component
Sensory kinase in S. coelicolor.",39,39 +2733654,2735148,"resistance","resistance","221.2","AMR116","Puromycin MFS transporter",40,40 +2738851,2740243,"resistance","resistance","176.4","AMR246","Multidrug MFS transporter",41,41 +2916747,2918412,"resistance","resistance","277.3","AMR286","Oxazolidinone ABC transporter",42,42 +3010976,3014708,"resistance","resistance","278.8","AMR201","Multidrug ABC antibiotic efflux pump",43,43 +3116520,3116931,"resistance","resistance","273.2","AMR298","Rifampin ADP ribosyl transferase",44,44 +3354680,3355955,"regulatory","regulatory","932.8","DraK","Two-component
Histidine kinase of S. coelicolor. ",45,45 +3355989,3356667,"regulatory","regulatory","517.3","DraR(CheY)","Two-component
Response regulator in S. coelicolor. Positive regulator of ACT, negative of RED and yCPK: DraK",46,46 +3467221,3469459,"resistance","resistance","1218.7","AMR311","Calcimycin ABC transporter",47,47 +3506389,3507907,"resistance","resistance","273.2","AMR116","Puromycin MFS transporter",48,48 +3509243,3509852,"regulatory","regulatory","452.7","SCO3201","TetR-type regulator
Regulator of antibiotic production of Streptomyces coelicolor",49,49 +3513615,3515160,"resistance","resistance","715.9","AMR116","Puromycin MFS transporter",50,50 +3529271,3531188,"regulatory","regulatory","1500.3","CdaR","Pathway-specific
CDA production in S. coelicolor",51,51 +3536944,3538660,"regulatory","regulatory","1331.7","AbsA1","Sensor kinase. Global negative regulation of Streptomyces coelicolor antibiotic synthesis",52,52 +3538678,3539347,"regulatory","regulatory","498.4","AbsA2","Response regulator. Global negative regulation of Streptomyces coelicolor antibiotic synthesis",53,53 +3584821,3585724,"resistance","resistance","680.4","AMR15","Calcium-dependent antibiotic phosphotransferase",54,54 +3704767,3705382,"regulatory","regulatory","105.4","Tsn22","TetR-type regulator
Located in the tetronasin biosynthesis cluster of Streptomyces longisporoflavus",55,55 +3723843,3725439,"resistance","resistance","272.5","AMR261","EmrB: drug resistance MFS transporter",56,56 +3725585,3726185,"regulatory","regulatory","213","SaqK","TetR-type regulator
Located in the saquayamycin Z biosynthesis cluster of Micromonospora sp. strain Tu 6368",57,57 +3751836,3752478,"resistance","resistance","247.5","AMR190","Tunicamycin resistance protein ",58,58 +3924250,3924592,"regulatory","regulatory","76.9","RsbV","Two-component
Positive regulator of sigma-B activity in Bacillus subtilis. Non-phosphorylated RsbV binds to RsbW, preventing its association with sigma-B: RsbW",59,59 +3950325,3951000,"regulatory","regulatory","490.9","Crp","DNA transcription regulator - cAMP Kinase in ACT, RED and CDA biosynthesis in S. coelicolor",60,60 +3966834,3967929,"resistance","resistance","295.4","AMR84","VanS: transcriptional regulator of glycopeptide resistance genes",61,61 +3967921,3968617,"resistance","resistance","273.2","AMR83","VanR: transcriptional activator regulating VanA",62,62 +3967921,3968617,"regulatory","regulatory","264.6","CutR","Two-component
Response regulator. neg. reg. of ACT production in S. lividans & coelicolor. Alters copper metabolism",63,63 +3971388,3972306,"resistance","resistance","574.9","AMR73","Glycopeptide Resistance: D-lactate dehydrogenase",64,64 +3972298,3973357,"resistance","resistance","660","AMR74","Glycopeptide: D-ala-D-lactate ligase",65,65 +3973353,3973962,"resistance","resistance","384.8","AMR72","Glycopeptide resistance: D-Ala-D-Ala dipeptidases",66,66 +4026216,4027671,"resistance","resistance","274.8","AMR261","EmrB: drug resistance MFS transporter",67,67 +4197501,4198176,"regulatory","regulatory","477.9","SCO3818","Two-component
Response regulator in S. coelicolor. Negative regulator of ACT: SCO0203",68,68 +4231927,4233400,"resistance","resistance","792","AMR25","Cephamycin penicillin-binding protein",69,69 +4268712,4269843,"resistance","resistance","764.3","AMR314","Griselimycin Resistance - DNA polymerase III Beta Subunit",70,70 +4305059,4306397,"resistance","resistance","367.6","AMR262","matE: MATE efflux family protein",71,71 +4421135,4422659,"resistance","resistance","807.5","AMR305","Virginamycin MFS transporter",72,72 +4422720,4423479,"regulatory","regulatory","374.7","Ecm10","TetR-type regulator
Located in the echinomycin biosynthesis cluster of Streptomyces lasaliensis",73,73 +4427068,4429648,"resistance","resistance","233.9","AMR224","Multidrug: subunit of efflux pump conferring antibiotic resistance",74,74 +4633204,4634479,"regulatory","regulatory","964.2","PhoRs","Two-component
Sensor kinase of S. coelicolor: Phosphate",75,75 +4634475,4635156,"regulatory","regulatory","516.9","PhoPs","Two-component
Response regulator of S. coelicolor. OmpR-type. Positively regulates RED and ACT through ofsS+ropZ: PhoR, Mg2+",76,76 +4679363,4680245,"resistance","resistance","326.5","AMR141","Streptomycin phosphotransferase",77,77 +4680294,4681728,"resistance","resistance","891.5","AMR116","Puromycin MFS transporter",78,78 +4842786,4845768,"regulatory","regulatory","2193.1","AsfR","DNA transcription regulator - LysR-type",79,79 +4872767,4874393,"resistance","resistance","209","AMR224","Multidrug: subunit of efflux pump conferring antibiotic resistance",80,80 +5019698,5020457,"regulatory","regulatory","494.5","AbrC3","Response regulator of S. coelicolor. Positive regulator of RED, CDA, ACT and morphological change.",81,81 +5020677,5022045,"regulatory","regulatory","995.1","AbrC2","Sensory kinase of S. coelicolor. Coexpressed with AbrC1",82,82 +5022158,5023382,"regulatory","regulatory","915.6","AbrC1","Sensory kinase of S. coelicolor. Coexpressed with AbrC2. Have negative affect on antibiotic production. Might have phosphatase activity. ",83,83 +5065113,5066586,"resistance","resistance","302.9","AMR261","EmrB: drug resistance MFS transporter",84,84 +5092129,5093323,"resistance","resistance","864","AMR110","Kirromycin target isoform: Ef-Tu Isoform",85,85 +5179006,5179345,"resistance","resistance","103.1","AMR267","Multidrug transcription factor WhiB",86,86 +5337920,5339528,"regulatory","regulatory","1204.4","AfsQ2","Histidine kinase in S. coelicolor",87,87 +5339524,5340349,"regulatory","regulatory","504","AfsQ1","Response regulator for S. coelicolor. Positive regulator of RED, ACT, CDA and yCPK",88,88 +5517893,5519495,"resistance","resistance","226","AMR116","Puromycin MFS transporter",89,89 +5523182,5523935,"regulatory","regulatory","573.7","ActR","TetR-like protein that represses the adjacent actA operon, encoding the ACT export system (Actinorhodin)",90,90 +5524072,5525809,"resistance","resistance","236.6","AMR116","Puromycin MFS transporter",91,91 +5528093,5528861,"regulatory","regulatory","593.8","ActII-orf4","Pathway specific activator of actinorhodin biosynthesis in Streptomyces coelicolor",92,92 +5875297,5875966,"regulatory","regulatory","491.2","RapA1","Two-component
Response regulator in S. coelicolor. Positive regulator in ACT and yCPK biosynthesis",93,93 +5875962,5877399,"regulatory","regulatory","1042.9","RapA2","Two-component
Sensory kinase in S. coelicolor.",94,94 +5940592,5941267,"regulatory","regulatory","354.6","SamR0468","Response regulator- LuxR-type (CheY superfamily)",95,95 +6008514,6010062,"resistance","resistance","360.4","AMR271","Multidrug MFS transporter",96,96 +6058384,6059554,"resistance","resistance","339.7","AMR77","D-alanine--D-alanine ligase",97,97 +6321290,6322367,"regulatory","regulatory","781.2","Sco5784","Histidine kinase",98,98 +6322366,6323026,"regulatory","regulatory","483.1","Sco5785","Response regulator- LuxR-type (CheY superfamily)",99,99 +6354011,6355559,"resistance","resistance","442","AMR116","Puromycin MFS transporter",100,100 +6419307,6419961,"regulatory","regulatory","493.6","CutR","Two-component
Response regulator. neg. reg. of ACT production in S. lividans & coelicolor. Alters copper metabolism",101,101 +6419967,6421212,"regulatory","regulatory","939.4","CutS","Two-component
Sensor kinase in S. lividans & coelicolor",102,102 +6432592,6433618,"regulatory","regulatory","794.1/297.4","RedD/RphD","Activator of undecylprodigiosin in Streptomyces coelicolor/Activator of prodigiosin biosynthesis in Streptomyces griseoviridis",103,103 +6438205,6438859,"regulatory","regulatory","489.2","RedZ","Regulator of undecylprodigiosin in Streptomyces coelicolor",104,104 +6518092,6519544,"resistance","resistance","277.5","AMR116","Puromycin MFS transporter",105,105 +6527870,6529472,"resistance","resistance","231.6","AMR116","Puromycin MFS transporter",106,106 +6688704,6689961,"resistance","resistance","851.4","AMR165","Macrolide glycosyltransferase: Inactivation enzyme",107,107 +6709908,6711471,"resistance","resistance","253.5","AMR137","Erythromycin esterase",108,108 +6890527,6891175,"regulatory","regulatory","260.9/502.6","FarA/ScbR","GBL receptor
Autoregulator in S. lavendulae showdomycin production/Regulator in RED prod. in S. coelicolor, SCB1",109,109 +6891292,6892237,"regulatory","regulatory","743.9","ScbA","A-factor biosynthesis enzyme",110,110 +6934506,6936138,"resistance","resistance","377.2","AMR271","Multidrug MFS transporter",111,111 +6945704,6946379,"regulatory","regulatory","151.3","SrrB","TetR-type regulator
Involved in the regulation of lankacidin and lankamycin biosynthesis along with SSRA, and C in S. rochei",112,112 +7090612,7091761,"regulatory","regulatory","762","EcrE1","Two-component
Senory kinase of S. coelicolor: (+) co-cultivation with Corallococcus coralloides, or in inculum of 2.5% (v/v) B. subtilis",113,113 +7091757,7092432,"regulatory","regulatory","469.1","EcrE2","Two-component
Response regulator. Positive reg. in RED prod. in S. coelicolor",114,114 +7172393,7174841,"resistance","resistance","272.9","AMR261","EmrB: drug resistance MFS transporter",115,115 +7250905,7252420,"resistance","resistance","237.1","AMR116","Puromycin MFS transporter",116,116 +7497717,7499361,"resistance","resistance","1121.7","AMR127","Hygromycin ABC transporter",117,117 +7499507,7501850,"resistance","resistance","256.1","AMR161","Tex-like protein N-terminal domain",118,118 +7586409,7587975,"resistance","resistance","336.4","AMR196","Tetracenomycin MFS transporter",119,119 +7736806,7738927,"resistance","resistance","254.8","AMR224","Multidrug: subunit of efflux pump conferring antibiotic resistance",120,120 +7782654,7784535,"resistance","resistance","263.5","AMR201","Multidrug ABC antibiotic efflux pump",121,121 +7919672,7921208,"resistance","resistance","236.3","AMR116","Puromycin MFS transporter",122,122 +8345037,8346285,"resistance","resistance","373.3","AMR123","Chloramphenicol efflux pump",123,123 +8488517,8489696,"resistance","resistance","415.1","AMR123","Chloramphenicol efflux pump",124,124 +8553427,8554087,"regulatory","regulatory","472.6","KijR","TetR-type regulator
Regulator of KijX expression and kijanimicin resistance of Streptomyces coelicolor: Kijanimicin, saccharocarcins A and B",125,125 diff --git a/ripp_biocircos.csv b/ripp_biocircos.csv new file mode 100644 index 0000000..72826c4 --- /dev/null +++ b/ripp_biocircos.csv @@ -0,0 +1,5 @@ +"Cluster","Type","Start","Stop","chromosome","Type2" +1,"lanthipeptidea",246868,272469,"GF","lanthipeptidea" +2,"linaridin",4886990,4940861,"GF","linaridin" +3,"lanthipeptidec_d",7407798,7459926,"GF","lanthipeptidec_d" +4,"lanthipeptidea",7681180,7705273,"GF","lanthipeptidea" diff --git a/rre_biocircos.csv b/rre_biocircos.csv new file mode 100644 index 0000000..788920e --- /dev/null +++ b/rre_biocircos.csv @@ -0,0 +1,3 @@ +"Sequence","Start","Stop","Locus_tag","BGC.ID","BGC.product","Domain.name","E.value","Bitscore","End","chromosome","ID","Cluster","Type","Type2" +"NC_003888.3",256525,259687,"SCO0269","N\A","N\A","Lanthipeptide_RRE",1.2e-11,43.9,240,"RRE",1,1,"ripp","ripp" +"NC_003888.3",7696261,7699360,"SCO6930","N\A","N\A","Lanthipeptide_RRE",2.7e-20,71.5,253,"RRE",2,2,"ripp","ripp" diff --git a/rsconnect/shinyapps.io/.DS_Store b/rsconnect/shinyapps.io/.DS_Store new file mode 100644 index 0000000..03db8a2 Binary files /dev/null and b/rsconnect/shinyapps.io/.DS_Store differ diff --git a/rsconnect/shinyapps.io/tsrnlv/bgcviz.dcf b/rsconnect/shinyapps.io/tsrnlv/bgcviz.dcf new file mode 100644 index 0000000..fb731dd --- /dev/null +++ b/rsconnect/shinyapps.io/tsrnlv/bgcviz.dcf @@ -0,0 +1,10 @@ +name: bgcviz +title: +username: tsrnlv +account: tsrnlv +server: shinyapps.io +hostUrl: https://api.shinyapps.io/v1 +appId: 10154357 +bundleId: 7787758 +url: https://tsrnlv.shinyapps.io/bgcviz/ +version: 1 diff --git a/sempi_biocircos.csv b/sempi_biocircos.csv new file mode 100644 index 0000000..18b6aca --- /dev/null +++ b/sempi_biocircos.csv @@ -0,0 +1,34 @@ +"Cluster","Start","Stop","Type","Type2","chromosome","ID" +1,106723,110434,"pks","pks","S",1 +2,307598,308816,"nrps","nrps","S",2 +3,513997,524488,"nrps","nrps","S",3 +4,1405117,1405750,"nrps","nrps","S",4 +5,1477976,1479275,"nrps","nrps","S",5 +6,2290771,2292100,"nrps","nrps","S",6 +7,2559357,2561673,"pks","pks","S",7 +8,2622623,2623751,"nrps","nrps","S",8 +9,2762807,2764220,"nrps","nrps","S",9 +10,2963180,2964527,"nrps","nrps","S",10 +11,3328396,3329098,"nrps","nrps","S",11 +12,3543364,3584771,"nrps","nrps","S",12 +13,3797561,3798752,"nrps","nrps","S",13 +14,3941523,3942822,"nrps","nrps","S",14 +15,4398632,4399859,"nrps","nrps","S",15 +16,4799973,4801182,"nrps","nrps","S",16 +17,4923207,4924557,"nrps","nrps","S",17 +18,5531212,5532682,"pks","pks","S",18 +19,5786632,5788168,"pks","pks","S",19 +20,6447994,6454855,"nrps-pks","nrps-pks","S",20 +21,6558978,6560136,"nrps","nrps","S",21 +22,6804366,6807194,"nrps","nrps","S",22 +23,6901254,6931905,"pks","pks","S",23 +24,7004464,7004725,"nrps","nrps","S",24 +25,7108290,7116050,"nrps","nrps","S",25 +26,7249252,7250602,"nrps","nrps","S",26 +27,7549538,7550897,"nrps","nrps","S",27 +28,7591514,7598516,"pks","pks","S",28 +29,7734541,7735729,"nrps","nrps","S",29 +30,8053551,8054721,"nrps","nrps","S",30 +31,8140076,8141438,"nrps","nrps","S",31 +32,8282564,8283500,"nrps","nrps","S",32 +33,8504751,8523677,"nrps","nrps","S",33 diff --git a/tests/spelling.R b/tests/spelling.R index 13f77d9..2f02e7b 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -1,6 +1,6 @@ if (requireNamespace("spelling", quietly = TRUE)) { - spelling::spell_check_test( - vignettes = TRUE, error = FALSE, - skip_on_cran = TRUE - ) + spelling::spell_check_test( + vignettes = TRUE, error = FALSE, + skip_on_cran = TRUE + ) } diff --git a/tests/testthat/test-app.R b/tests/testthat/test-app.R index 8849056..94b301e 100644 --- a/tests/testthat/test-app.R +++ b/tests/testthat/test-app.R @@ -1,3 +1,3 @@ test_that("multiplication works", { - expect_equal(2 * 2, 4) + expect_equal(2 * 2, 4) }) diff --git a/tests/testthat/test-golem-recommended.R b/tests/testthat/test-golem-recommended.R index facc6b4..c41621c 100644 --- a/tests/testthat/test-golem-recommended.R +++ b/tests/testthat/test-golem-recommended.R @@ -1,26 +1,27 @@ test_that("app ui", { - ui <- app_ui() - golem::expect_shinytaglist(ui) - # Check that formals have not been removed - fmls <- formals(app_ui) - for (i in c("request")) { - expect_true(i %in% names(fmls)) - } + ui <- app_ui() + golem::expect_shinytaglist(ui) + # Check that formals have not been removed + fmls <- formals(app_ui) + for (i in c("request")) { + expect_true(i %in% names(fmls)) + } }) test_that("app server", { - server <- app_server - expect_is(server, "function") - # Check that formals have not been removed - fmls <- formals(app_server) - for (i in c("input", "output", "session")) { - expect_true(i %in% names(fmls)) - } + server <- app_server + expect_is(server, "function") + # Check that formals have not been removed + fmls <- formals(app_server) + for (i in c("input", "output", "session")) { + expect_true(i %in% names(fmls)) + } }) # Configure this test to fit your need test_that( - "app launches", { - golem::expect_running(sleep = 5) - } + "app launches", + { + golem::expect_running(sleep = 5) + } ) diff --git a/vignettes/BGCViz.Rmd b/vignettes/BGCViz.Rmd new file mode 100644 index 0000000..95cafa9 --- /dev/null +++ b/vignettes/BGCViz.Rmd @@ -0,0 +1,293 @@ +--- +title: "BGCViz" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{BGCViz} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Quick start guide + + + +**Note:** The guide is divided into "steps", but no "pipeline" or sequential steps exist. Rather after files upload (Step 1 and 2) you are free to use the analysis according to your needs. This guide is used mostly for a quick introduction to the BGCViz capabilities   + +**BGCViz is intended to be run on one sequence. Therefore, it is not possible to analyze genomes with several contigs. One contig at a time only** + + + +# Step 1. Getting the input files + +A detailed description of every possible input file is [here](https://ostash-group.github.io/BGCViz/#/Input_files_options). In short, prior to use, antismash, prism, sempi and ARTS results should be downloaded. And if, downloaded correct formats, they can be used right away. + + +# Step 2. Uploading input files + +The upload of the files is pretty straightforward - use the properly named file upload section. Prior to upload, no UI toggles and elements are shown. + +### Note: You can use example data from S.coelicolor, pressing the button on "Use Example data" box + +After the upload of the first input: + + + +![anti_upload](anti_upload.png) + + + +**Please also enter the length of the contig in the corresponding field under uploads. This length is used for correct representation of Biocircos plot** + +## Results with only one file upload + +1. The first two plots are available for the analysis on the "Annotation visualization and comparison" sidemenu. The first plot contains all the results for the antismash annotation. With more file uploaded, results for whole annotations will appear here, with respect to the chromosome position. + + + +![anti_all](anti_all.png) + + + +2. The second plot shows the BGC, intercepted with the chosen one. As a rule, on the top, there is an output which is used as a reference. In our case, this output is antismash. After uploading more files, they will be visualized under antismash one. And only clusters, which are intercepted with the chosen one, are visualized. (On the upper plot all the data is visualized). + + + +![anti_one](anti_one.png) + + + +For now, this plot is the same as a plot above, due to lack of data. + + + +### Controls + +"Visualize AntiSMASH BGC with several types as 'Hybrid'" check box is available under "Global options" sidemenu option. This will rename the multiple type regions as "hybrid". + + + +**Tip: We suggest using this option after renaming the clusters (step 3 here). Because several product cluster can become monotype after renaming. Therefore more information about type will be preserved.** + + + +## Results with multiple files upload + +Let's proceed and upload PRISM and SEMPI results + +### The example S.coelicolor data is available upon button press under PRISM and SEMPI input fields. + +The plots in previous sidemenu changed. Also, two additional sidemenus appeared: + +1. Biocircos plot sidemenu contains dynamic circos plot for all-vs-all interception results. More details in [step 4](#step-4-color-the-links-in-biocircos-plot) + +2. Summarize interception sidemenu contains data, that can help to prioritize clusters, based on how many times it is annotated by another tool. More details in [step 5](#step-5-change-the-data-to-group-by) + +Because we uploaded the json PRISM file (from PRISM 4.4.5), which also contains resistance and regulatory genes which can be mapped to the genomic coordinates, a new "chromosome" is visualized with these genes, named "P-supp". More details available [here](https://ostash-group.github.io/BGCViz/#/Input_files_options#PRISM). The genes are invisible on the plots due to plot scale. To make them more visible, you can tick the "Add thickness to PRISM resistance + regulatory genes results visualization" checkbox in Global options. + + + +![p-supp.png](p-supp.png) + + + +They are more controls for data manipulation available, as well as more plots. They all will be discussed in the following steps. + + + +# Step 3. Rename the input clusters + +## Step 3.1 Rename the clusters + +It is a good practice, to rename the type of clusters after uploading. Renaming the clusters will hugely affect the coloring schema. In other ways, it will decrease the clutter in the legend and improve high-level visual comparison. Also, renaming is somewhat essential for coloring the Biocircos links and arcs. + + +To rename the dataset you can press "Rename" button under "Improve visualization" menu. + + +![rename](rename.png) + + + +The legend on the plots changes, but the "Type" field upon mouse cursor hovering on the cluster remains unchanged. Therefore, the initial products are available on mouse hover. Alternatively, the "Reset" button reverts the clusters types to the initial ones. + + + +![renamed](renamed.png) + + +So , the color indicates, that the product is ripp (on a legend), but Type field on hover indicates the true type (lanthipeptide-class-i) + +The renaming scheme is available in the [Glossary](https://ostash-group.github.io/BGCViz/#/Glossary). Guide how to change it available [here](https://ostash-group.github.io/BGCViz/#/BGCViz_renaming_and_coloring_options). + + + +## Step 3.2 If multi-type - make hybrid + +By default, all hybrids remain intact after renaming. However, it is possible to include renaming rule for a particular hybrid (nrps__pks -> nrps). The last step in renaming is to tick the ".....rename as hybrid" checkboxes: + + + +![hybrids](hybrids.png) + + + + +# Step 4. Color the links in biocircos plot + +The other tab is "Biocircos plot". By default, no coloring is done (the data should be renamed, or, at least, match coloring scheme (more about changing colors in [here](BGCViz_renaming_and_coloring_options.md)) and about link coloring modes is [here](https://ostash-group.github.io/BGCViz/#/Logic_of_the_output#biocircos-plot)) + + + +The first plot is a default circos plot with an all-vs-all interception. More information is available on hover. The visualized type information (in the links and arcs) is a default product naming, not the renamed version. + + + +![biocircos_one](biocircos_one.png) + + + +The second plot is actually a legend for circos plot: + + + +![biocircos_legend](biocircos_dt.png) + + + +**Note: "base" color indicates the default color. Please include it, if changing the coloring scheme (more [here](BGCViz_renaming_and_coloring_options.md))** + + + +The coloring options for biocircos plot are available under "More" button at the top of the plot: + + + +![color_biocircos.png](biocircos_color.png) + + + +There are three coloring modes available for the links, which are discussed in much more details in the appropriate [section](https://ostash-group.github.io/BGCViz/#/Logic_of_the_output#biocircos-plot). The problem with links is that they can connect clusters with separate types. Mentioned modes are just different ways to deal with this issue. + + + +# Step 5. Change the data to group by + + + +The last sidemenu called "Summarize interception". It contains count barplot, which shows how many times the chosen cluster was annotated by any other tool, and "Group by" table, which shows which clusters are intercepted with the reference (first column). + +For our example, the barplot looks like this: + + + +![summarize_one.png](summarize_one.png) + + + +From the chart we can see that cluster #10 from antismash and #9 from prism have pretty good amount of data supporting their annotation (let's take arbitrary clusters with high count). The group by table reveals, the intercepted clusters: + + + +![group_by_one.png](group_by_one.png) + + + +We can conclude that cluster #10 from antismash, #9 from PRISM, #12 from SEMPI are the same one. Moreover, it contains 1 resistance genes (#54) and 3 regulatory genes (#51-53), as identified by PRISM. Types of clusters can be viewed with a pop-up with hover with a mouse: + +![hover](group_by_hover.png) + +The interception of all named clusters can be confirmed in the Biocircos plot, or by reviewing PRISM and SEMPI data as a group_by columns: + + + +![change_summarize.png](change_summarize.png) + + + +By default, only intercepted regions are showed. But if the purpose of the analysis is to show novel regions, annotated by just one program, then tick the "Show all BGC for the 'group by' method (+ individually annotated BGC)" checkbox. For example, if we group our data by Antismash and check the box it becomes clear, that many clusters are annotated only by this algorithm. + + + +**Tip: Check the last ("NA") row in the first column. It shows the clusters, that are not intercepted with the reference data but are intercepted among other methods. For example, cluster #6 from PRISM is in NA row, when we group by Antismash. When we group by PRISM, it becomes clear that this cluster is annotated only by SEMPI (#7). The similar situation is with PRISM regulatory (#4) gene, which is not intercepted by Antismash. If we group the data by the PRISM column, it will land in corresponding group** + + + +**Tip #2: You can view the group by column in "Annotation visualization and comparison" tab for feature-rich visualization on hover. Just choose the same column in the select menu for that plot** + +# Step 6. Compare to the DeepBGC + +The clusters in the previous annotations are can be thought to be "boolean". Therefore, they are either annotated (so exists in the data), or not. The DeepBGC data contains several scores alongside cluster information. This program uses the deep learning model, which can annotate different from rule-based methods, novel clusters. The scores are deepbgc score, activity score and cluster type score. More about the model and the scores are available in the paper (DOI: [10.1093/nar/gkz654](https://doi.org/10.1093/nar/gkz654) ). + +The output tsv file is the standard input for BGCViz. After uploading several plots are available in the new "Compare the DeepBGC data" tab. + +1.The DeepBGC comparison to the reference annotation. Therefore, the barplot shows how many clusters annotated at every chosen score threshold only by DeepBGC, only by chosen reference data (here Antismash) and by both. In the upper right corner applied score threshold are visualized for DeepBGC data (the default are 50%). This plot can help to choose the annotated/novelty rates for DeepBGC. + + + +![deep_one](deep_one.png) + + + +2. Connected scatter plot with Novelty, Annotation and Skip rates, where: + +- Novelty rate = "# of BGC annotated only by deepbgc"/("# clusters annotated with only by antismash" + "# clusters annotated with antismash and deepbgc"). This rate points to how many clusters are annotated only by DeepBGC. + +- Annotation rate = "# of BGC annotated by antismash and deepbgc"/"total number of antismash annotated BGC". This rate points to how much DeepBGC annotated clusters alongside with antismash. + +- Skip rate = "# of BGC annotated only by antismash"/"total number of antismash clusters". This rate points of how many clusters DeepBGC missed, assuming, that antismash is a reference annotation + +These rates are just the other way to visualize the bar plot + +![rates_one.png](rates_one.png) + +The controls for the comparison plots looks like this: + + + +![deep_comparison](deep_comparison.png) + + + +Note, that you can choose which data to compare the DeepBGC among PRISM, Antismash and SEMPI, as well as score and plot step. + +The data cleaning options for DeepBGC are basically the columns of .tsv output file and are the following: + + + +![deep_filters](deepbgc_filters.png) + +The filters are applied globally for DeepBGC data. For convenience you can use the filtering sliders on the sidebar, not the one under "Global options": + +![deep_sidebar](deep_sidebar.png) + +Therefore, BGCViz can be used not only as an interception between annotated BGC visualization tool but also serves as a nice GUI for DeepBGC data cleaning. The results on the cleaning can be then downloaded for downstream analysis (Step 7) + + + +**Note: The tab and plots will appear after DeepBGC data upload. Therefore, if no Antismash, PRISM or SEMPI data was supplied the plots will result in error** + + +### The same workflow is available for GECCO data + +# Step 7. Data download + + + +After the analysis, you can download all the data as csv files (under Global options). This is particularly valuable in the case of DeepBGC data cleaning. Alongside the datasets, group_by table will be downloaded (as visualized in the "Summarize interception" tab) and group_by script for downstream analysis with clinker. More about it [here](https://ostash-group.github.io/BGCViz/#/Additional_analysis) + + + +![download_button.png](download.png) + + +```{r} +sessionInfo() +``` + diff --git a/vignettes/anti_all.png b/vignettes/anti_all.png new file mode 100644 index 0000000..7c943f3 Binary files /dev/null and b/vignettes/anti_all.png differ diff --git a/vignettes/anti_one.png b/vignettes/anti_one.png new file mode 100644 index 0000000..181c5d9 Binary files /dev/null and b/vignettes/anti_one.png differ diff --git a/vignettes/anti_upload.png b/vignettes/anti_upload.png new file mode 100644 index 0000000..6f31304 Binary files /dev/null and b/vignettes/anti_upload.png differ diff --git a/vignettes/biocircos_color.png b/vignettes/biocircos_color.png new file mode 100644 index 0000000..f353fa3 Binary files /dev/null and b/vignettes/biocircos_color.png differ diff --git a/vignettes/biocircos_dt.png b/vignettes/biocircos_dt.png new file mode 100644 index 0000000..193471a Binary files /dev/null and b/vignettes/biocircos_dt.png differ diff --git a/vignettes/biocircos_one.png b/vignettes/biocircos_one.png new file mode 100644 index 0000000..610b680 Binary files /dev/null and b/vignettes/biocircos_one.png differ diff --git a/vignettes/change_summarize.png b/vignettes/change_summarize.png new file mode 100644 index 0000000..15b21a4 Binary files /dev/null and b/vignettes/change_summarize.png differ diff --git a/vignettes/deep_comparison.png b/vignettes/deep_comparison.png new file mode 100644 index 0000000..297de4a Binary files /dev/null and b/vignettes/deep_comparison.png differ diff --git a/vignettes/deep_one.png b/vignettes/deep_one.png new file mode 100644 index 0000000..3c5423e Binary files /dev/null and b/vignettes/deep_one.png differ diff --git a/vignettes/deep_sidebar.png b/vignettes/deep_sidebar.png new file mode 100644 index 0000000..fe02831 Binary files /dev/null and b/vignettes/deep_sidebar.png differ diff --git a/vignettes/deepbgc_filters.png b/vignettes/deepbgc_filters.png new file mode 100644 index 0000000..5157427 Binary files /dev/null and b/vignettes/deepbgc_filters.png differ diff --git a/vignettes/download.png b/vignettes/download.png new file mode 100644 index 0000000..556ea98 Binary files /dev/null and b/vignettes/download.png differ diff --git a/vignettes/group_by_hover.png b/vignettes/group_by_hover.png new file mode 100644 index 0000000..a429de9 Binary files /dev/null and b/vignettes/group_by_hover.png differ diff --git a/vignettes/group_by_one.png b/vignettes/group_by_one.png new file mode 100644 index 0000000..c64cc29 Binary files /dev/null and b/vignettes/group_by_one.png differ diff --git a/vignettes/hybrids.png b/vignettes/hybrids.png new file mode 100644 index 0000000..2ab9d71 Binary files /dev/null and b/vignettes/hybrids.png differ diff --git a/vignettes/p-supp.png b/vignettes/p-supp.png new file mode 100644 index 0000000..e8505a4 Binary files /dev/null and b/vignettes/p-supp.png differ diff --git a/vignettes/rates_one.png b/vignettes/rates_one.png new file mode 100644 index 0000000..72a44e6 Binary files /dev/null and b/vignettes/rates_one.png differ diff --git a/vignettes/rename.png b/vignettes/rename.png new file mode 100644 index 0000000..187924a Binary files /dev/null and b/vignettes/rename.png differ diff --git a/vignettes/renamed.png b/vignettes/renamed.png new file mode 100644 index 0000000..227a60a Binary files /dev/null and b/vignettes/renamed.png differ diff --git a/vignettes/summarize_one.png b/vignettes/summarize_one.png new file mode 100644 index 0000000..985f9a6 Binary files /dev/null and b/vignettes/summarize_one.png differ