treeppl · maribraga · Dec 21, 2025 · Dec 21, 2025 · Dec 21, 2025 · Dec 22, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,9 @@ Imports:
     utils,
     gh,
     curl,
-    cli
+    cli,
+    magrittr,
+    bnpsd
 Suggests:
     devtools,
     ggplot2,
@@ -37,8 +39,7 @@ Suggests:
     rmarkdown,
     pandoc,
     dplyr,
-    rlang,
-    magrittr
+    rlang
 VignetteBuilder: knitr
 Config/Needs/website: rmarkdown
 Remotes: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,20 +1,25 @@
 # Generated by roxygen2: do not edit by hand
 
-export(tp_TreePPL_json)
 export(tp_compile)
 export(tp_data)
 export(tp_fp_fetch)
+export(tp_json_to_phylo)
 export(tp_list)
+export(tp_map_tree)
 export(tp_model)
 export(tp_model_names)
 export(tp_parse)
 export(tp_parse_host_rep)
-export(tp_phylo_2_TreePPL)
+export(tp_phylo_to_tpjson)
 export(tp_run)
+export(tp_smc_convergence)
 export(tp_stored_compiled)
 export(tp_stored_data)
 export(tp_stored_model)
 export(tp_tempdir)
 export(tp_treeppl)
+export(tp_treeppl_json)
 export(tp_write)
+importFrom(bnpsd,tree_reorder)
+importFrom(magrittr,"%>%")
 importFrom(methods,is)
diff --git a/R/getter.R b/R/getter.R
@@ -66,10 +66,9 @@ tp_data <- function(data_input) {
   # If path exists, import data from file
   if (!is(res, "try-error") && res) {
     data <- tp_list(jsonlite::fromJSON(data_input))
-    # If path doens't exist
+    # If path doesn't exist
   } else if (assertthat::is.string(data_input)) {
-    res <-
-      try(get(data_input, treepplr::tp_model_names()), silent = TRUE)
+    res <- try(get(data_input, treepplr::tp_model_names()), silent = TRUE)
     # data_input has the name of a known model
     if (!is(res, "try-error")) {
       data <- tp_list(find_file(res, "json"))

diff --git a/R/post_treatment.R b/R/post_treatment.R
@@ -5,29 +5,40 @@
 #'
 #' @param treeppl_out a character vector giving the TreePPL json output
 #' produced by [tp_treeppl].
-#' @param n_runs a [base::integer] giving the number of runs (MCMC) or sweeps (SMC).
 #'
-#'
-#' @return A list (n = n_runs) of data frames with the output from inference
-#' in TreePPL.
+#' @return A data frame with the output from inference in TreePPL.
 #' @export
-tp_parse <- function(treeppl_out, n_runs = 1) {
-
-  if (n_runs == 1) {
-    treeppl_out <- list(treeppl_out)
-  }
+tp_parse <- function(treeppl_out) {
 
   result_df <- list()
 
-  for (index in seq_along(treeppl_out)) {
-    result_df <-
-      rbind(result_df, data.frame(samples=unlist(treeppl_out[[1]]$samples),
-                                  log_weight=unlist(treeppl_out[[1]]$weights)))
+  for (i in seq_along(treeppl_out)) {
+
+    samples_c <- unlist(treeppl_out[[i]]$samples)
+    log_weight_c <- unlist(treeppl_out[[i]]$weights)
+
+    if(is.null(names(samples_c))){
+      result_df <- rbind(result_df,
+                         data.frame(run = i,
+                                    samples = samples_c,
+                                    log_weight = log_weight_c,
+                                    norm_const = treeppl_out[[i]]$normConst)
+      )
+    } else {
+      result_df <- rbind(result_df,
+                         data.frame(run = i,
+                                    parameter = names(samples_c),
+                                    samples = samples_c,
+                                    log_weight = log_weight_c,
+                                    norm_const = treeppl_out[[i]]$normConst)
+      )
+    }
   }
 
   return(result_df)
 }
 
+
 #' Parse TreePPL json output for host repertoire model
 #'
 #' @description
@@ -37,17 +48,11 @@ tp_parse <- function(treeppl_out, n_runs = 1) {
 #' @param treeppl_out a character vector giving the TreePPL json output
 #' produced by [tp_treeppl].
 #'
-#' @param n_runs a [base::integer] giving the number of runs (MCMC) or sweeps (SMC).
-#'
-#'
 #' @return A list (n = n_runs) of data frames with the output from inference
 #' in TreePPL under the host repertoire evolution model.
 #' @export
 
-tp_parse_host_rep <- function(treeppl_out, n_runs = 1) {
-  if (n_runs == 1) {
-    treeppl_out <- list(treeppl_out)
-  }
+tp_parse_host_rep <- function(treeppl_out) {
 
   result_list <- list()
 
@@ -82,7 +87,8 @@ tp_parse_host_rep <- function(treeppl_out, n_runs = 1) {
       "child2_index"
     )
 
-    for (i in seq_along(output_trppl[1][[1]])) {
+    #for (i in seq_along(output_trppl[1][[1]])) {
+    for (i in seq_along(output_trppl$samples)) {
       res <- data.frame(matrix(ncol = nbr_col, nrow = 0))
       colnames(res) <- c(
         "iteration",
@@ -221,3 +227,98 @@ peel_tree <- function(subtree,
   }
   result
 }
+
+
+#' Check for convergence across multiple SMC sweeps/runs
+#'
+#' @param treeppl_out a character vector giving the TreePPL json output
+#' produced by [tp_treeppl].
+#'
+#' @returns Variance in the normalizing constants across SMC sweeps.
+#' @export
+#'
+tp_smc_convergence <- function(treeppl_out) {
+
+  output <- tp_parse(treeppl_out)
+  zs <- output %>%
+    dplyr::slice_head(n = 1, by = run) %>%
+    dplyr::pull(norm_const)
+
+  return(var(zs))
+}
+
+
+
+#' Find the Maximum A Posteriori (MAP) Tree from weighted samples
+#'
+#' @param trees_out The list returned by [treepplr::tp_json_to_phylo]
+#' (containing $trees and $weights)
+#'
+#' @returns The MAP tree as a phylo object
+#' @export
+#'
+tp_map_tree <- function(trees_out) {
+
+  trees <- trees_out$trees
+  weights <- trees_out$weights
+
+  # Handle Log-Weights (Optional Check)
+  # If weights are negative (log-scale), convert them to probabilities first
+  if (any(weights < 0)) {
+    message("Log-weights detected. converting to relative probabilities...")
+    # Subtract max to avoid underflow/overflow issues
+    weights <- exp(weights - max(weights))
+  }
+
+  # Identify unique topologies
+  trees_ready <- lapply(trees, function(tree) {
+
+    # normalize edge lengths for the tip reordering
+    tree$edge.length <- tree$edge.length/max(tree$edge.length)
+    # Ladderize to fix edge indices
+    tree_lad <- ladderize_tree(tree)
+    # Order tip labels as similarly as possible
+    tree_ord <- bnpsd::tree_reorder(tree_lad, sort(tree_lad$tip.label))
+    # Remove edge lengths to only focus on topology
+    tree_ord$edge.length <- NULL
+    return(tree_ord)
+
+  })
+
+  # This compresses the list into unique tree topologies
+  unique_topologies <- ape::unique.multiPhylo(trees_ready, use.edge.length = FALSE)
+
+  # Map every original tree to a unique topology index
+  #match_indices <- match(trees_ready, unique_topologies)
+  match_indices <- attr(unique_topologies, "old.index")
+
+  # Sum weights for each unique topology
+  # tapply splits the weights by the index and sums them
+  topology_probs <- tapply(weights, match_indices, sum)
+
+  # Identify the Best Topology
+  best_index <- as.numeric(names(which.max(topology_probs)))
+
+  # Calculate posterior probability of this MAP topology
+  map_prob <- max(topology_probs) / sum(topology_probs)
+
+  # Compute Mean Branch Lengths for the MAP Topology
+  # We take all samples that matched the MAP topology...
+  matching_indices <- which(match_indices == best_index)
+  matching_trees   <- trees[matching_indices]
+  matching_weights <- weights[matching_indices]
+
+  # ...and compute a consensus to average their branch lengths.
+  # Ideally, we should do a weighted average of the lengths,
+  # but ape::consensus uses simple mean. For most purposes, this is sufficient.
+  final_map <- map <- phangorn::allCompat(matching_trees, rooted=TRUE) |>
+    phangorn::add_edge_length(matching_trees,
+                              fun = function(x) weighted.mean(x, matching_weights))
+
+  print(paste("MAP Topology found"))
+  print(paste("Posterior Probability:", round(map_prob, 4)))
+  print(paste("Based on the topology of", length(matching_indices),
+              "samples out of", length(trees)))
+
+  return(final_map)
+}
diff --git a/R/runner.R b/R/runner.R
@@ -105,7 +105,7 @@
 #' and manual (sample only at manually defined resampling locations).
 #' Use 'NULL' to ignore.
 #'
-#' @return TreePPL output in JSON format.
+#' @return A list of TreePPL output in parsed JSON format.
 #' @export
 
 tp_treeppl <-
@@ -127,8 +127,11 @@ tp_treeppl <-
            prune = FALSE,
            subsample =  NULL,
            resample = NULL) {
+
     tp_write(model, model_file_name, data, data_file_name)
+
     if (compile_model) {
+
       tp_compile(
         model_file_name,
         samples,
@@ -145,6 +148,7 @@ tp_treeppl <-
         resample
       )
     }
+
     return(tp_run(model_file_name, data_file_name, n_runs))
   }
 
@@ -360,7 +364,7 @@ tp_compile <- function(model_file_name = "tmp_model_file",
 #'
 #' @param model_file_name a character vector giving a model name.
 #' @param data_file_name a character vector giving a data name.
-#' @param n_runs a [base::integer] giving the number of run (mcmc)/sweap (smc).
+#' @param n_runs a [base::integer] giving the number of runs (mcmc)/sweaps (smc).
 #'
 #' @details
 #'
@@ -372,27 +376,31 @@ tp_compile <- function(model_file_name = "tmp_model_file",
 #' a data name. Use a [treepplr::tp_stored_data] name if you have already write
 #' your data with [treepplr::tp_treeppl].
 #'
-#' `samples` : The number of samples (mcmc) / particules (smc) during inference.
-#'
 #' `n_runs` : The number of run (mcmc) / sweap (smc) used for the inference.
 #'
-#' @return TreePPL output in JSON format.
+#' @return A list of TreePPL output in parsed JSON format.
 #' @export
 
 tp_run <- function(model_file_name = "tmp_model_file",
                    data_file_name = "tmp_data_file",
-                   n_runs = "1") {
+                   n_runs = 1) {
+
+  n_runs_string <- paste0("--sweeps ", n_runs, " ")
+
   # if dir_path = NULL return temp_dir, if not return dir
   dir_path <- tp_tempdir()
 
   # n_runs
   # Empty LD_LIBRARY_PATH from R_env for this command specifically
   # due to conflict with internal env from treeppl self container
   command <- paste0("LD_LIBRARY_PATH= MCORE_LIBS= ", dir_path, model_file_name, ".exe ",
-                    dir_path, data_file_name, ".json ", n_runs,
+                    dir_path, data_file_name, ".json ", n_runs_string,
                     paste(">", paste0(dir_path, model_file_name, "_out.json")
                     ))
   system(command)
 
-  return(jsonlite::fromJSON(paste0(dir_path, model_file_name, "_out.json"), simplifyVector = FALSE))
+  json_out <- readLines(paste0(dir_path, model_file_name, "_out.json")) %>%
+    lapply(jsonlite::fromJSON, simplifyVector = FALSE)
+
+  return(json_out)
 }