Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ Imports:
utils,
gh,
curl,
cli
cli,
magrittr,
bnpsd
Suggests:
devtools,
ggplot2,
Expand All @@ -37,8 +39,7 @@ Suggests:
rmarkdown,
pandoc,
dplyr,
rlang,
magrittr
rlang
VignetteBuilder: knitr
Config/Needs/website: rmarkdown
Remotes:
Expand Down
9 changes: 7 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
# Generated by roxygen2: do not edit by hand

export(tp_TreePPL_json)
export(tp_compile)
export(tp_data)
export(tp_fp_fetch)
export(tp_json_to_phylo)
export(tp_list)
export(tp_map_tree)
export(tp_model)
export(tp_model_names)
export(tp_parse)
export(tp_parse_host_rep)
export(tp_phylo_2_TreePPL)
export(tp_phylo_to_tpjson)
export(tp_run)
export(tp_smc_convergence)
export(tp_stored_compiled)
export(tp_stored_data)
export(tp_stored_model)
export(tp_tempdir)
export(tp_treeppl)
export(tp_treeppl_json)
export(tp_write)
importFrom(bnpsd,tree_reorder)
importFrom(magrittr,"%>%")
importFrom(methods,is)
5 changes: 2 additions & 3 deletions R/getter.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ tp_data <- function(data_input) {
# If path exists, import data from file
if (!is(res, "try-error") && res) {
data <- tp_list(jsonlite::fromJSON(data_input))
# If path doens't exist
# If path doesn't exist
} else if (assertthat::is.string(data_input)) {
res <-
try(get(data_input, treepplr::tp_model_names()), silent = TRUE)
res <- try(get(data_input, treepplr::tp_model_names()), silent = TRUE)
# data_input has the name of a known model
if (!is(res, "try-error")) {
data <- tp_list(find_file(res, "json"))
Expand Down
143 changes: 122 additions & 21 deletions R/post_treatment.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,40 @@
#'
#' @param treeppl_out a character vector giving the TreePPL json output
#' produced by [tp_treeppl].
#' @param n_runs a [base::integer] giving the number of runs (MCMC) or sweeps (SMC).
#'
#'
#' @return A list (n = n_runs) of data frames with the output from inference
#' in TreePPL.
#' @return A data frame with the output from inference in TreePPL.
#' @export
tp_parse <- function(treeppl_out, n_runs = 1) {

if (n_runs == 1) {
treeppl_out <- list(treeppl_out)
}
tp_parse <- function(treeppl_out) {

result_df <- list()

for (index in seq_along(treeppl_out)) {
result_df <-
rbind(result_df, data.frame(samples=unlist(treeppl_out[[1]]$samples),
log_weight=unlist(treeppl_out[[1]]$weights)))
for (i in seq_along(treeppl_out)) {

samples_c <- unlist(treeppl_out[[i]]$samples)
log_weight_c <- unlist(treeppl_out[[i]]$weights)

if(is.null(names(samples_c))){
result_df <- rbind(result_df,
data.frame(run = i,
samples = samples_c,
log_weight = log_weight_c,
norm_const = treeppl_out[[i]]$normConst)
)
} else {
result_df <- rbind(result_df,
data.frame(run = i,
parameter = names(samples_c),
samples = samples_c,
log_weight = log_weight_c,
norm_const = treeppl_out[[i]]$normConst)
)
}
}

return(result_df)
}


#' Parse TreePPL json output for host repertoire model
#'
#' @description
Expand All @@ -37,17 +48,11 @@ tp_parse <- function(treeppl_out, n_runs = 1) {
#' @param treeppl_out a character vector giving the TreePPL json output
#' produced by [tp_treeppl].
#'
#' @param n_runs a [base::integer] giving the number of runs (MCMC) or sweeps (SMC).
#'
#'
#' @return A list (n = n_runs) of data frames with the output from inference
#' in TreePPL under the host repertoire evolution model.
#' @export

tp_parse_host_rep <- function(treeppl_out, n_runs = 1) {
if (n_runs == 1) {
treeppl_out <- list(treeppl_out)
}
tp_parse_host_rep <- function(treeppl_out) {

result_list <- list()

Expand Down Expand Up @@ -82,7 +87,8 @@ tp_parse_host_rep <- function(treeppl_out, n_runs = 1) {
"child2_index"
)

for (i in seq_along(output_trppl[1][[1]])) {
#for (i in seq_along(output_trppl[1][[1]])) {
for (i in seq_along(output_trppl$samples)) {
res <- data.frame(matrix(ncol = nbr_col, nrow = 0))
colnames(res) <- c(
"iteration",
Expand Down Expand Up @@ -221,3 +227,98 @@ peel_tree <- function(subtree,
}
result
}


#' Check for convergence across multiple SMC sweeps/runs
#'
#' @param treeppl_out a character vector giving the TreePPL json output
#' produced by [tp_treeppl].
#'
#' @returns Variance in the normalizing constants across SMC sweeps.
#' @export
#'
tp_smc_convergence <- function(treeppl_out) {

output <- tp_parse(treeppl_out)
zs <- output %>%
dplyr::slice_head(n = 1, by = run) %>%
dplyr::pull(norm_const)

return(var(zs))
}



#' Find the Maximum A Posteriori (MAP) Tree from weighted samples
#'
#' @param trees_out The list returned by [treepplr::tp_json_to_phylo]
#' (containing $trees and $weights)
#'
#' @returns The MAP tree as a phylo object
#' @export
#'
tp_map_tree <- function(trees_out) {

trees <- trees_out$trees
weights <- trees_out$weights

# Handle Log-Weights (Optional Check)
# If weights are negative (log-scale), convert them to probabilities first
if (any(weights < 0)) {
message("Log-weights detected. converting to relative probabilities...")
# Subtract max to avoid underflow/overflow issues
weights <- exp(weights - max(weights))
}

# Identify unique topologies
trees_ready <- lapply(trees, function(tree) {

# normalize edge lengths for the tip reordering
tree$edge.length <- tree$edge.length/max(tree$edge.length)
# Ladderize to fix edge indices
tree_lad <- ladderize_tree(tree)
# Order tip labels as similarly as possible
tree_ord <- bnpsd::tree_reorder(tree_lad, sort(tree_lad$tip.label))
# Remove edge lengths to only focus on topology
tree_ord$edge.length <- NULL
return(tree_ord)

})

# This compresses the list into unique tree topologies
unique_topologies <- ape::unique.multiPhylo(trees_ready, use.edge.length = FALSE)

# Map every original tree to a unique topology index
#match_indices <- match(trees_ready, unique_topologies)
match_indices <- attr(unique_topologies, "old.index")

# Sum weights for each unique topology
# tapply splits the weights by the index and sums them
topology_probs <- tapply(weights, match_indices, sum)

# Identify the Best Topology
best_index <- as.numeric(names(which.max(topology_probs)))

# Calculate posterior probability of this MAP topology
map_prob <- max(topology_probs) / sum(topology_probs)

# Compute Mean Branch Lengths for the MAP Topology
# We take all samples that matched the MAP topology...
matching_indices <- which(match_indices == best_index)
matching_trees <- trees[matching_indices]
matching_weights <- weights[matching_indices]

# ...and compute a consensus to average their branch lengths.
# Ideally, we should do a weighted average of the lengths,
# but ape::consensus uses simple mean. For most purposes, this is sufficient.
final_map <- map <- phangorn::allCompat(matching_trees, rooted=TRUE) |>
phangorn::add_edge_length(matching_trees,
fun = function(x) weighted.mean(x, matching_weights))

print(paste("MAP Topology found"))
print(paste("Posterior Probability:", round(map_prob, 4)))
print(paste("Based on the topology of", length(matching_indices),
"samples out of", length(trees)))

return(final_map)
}
24 changes: 16 additions & 8 deletions R/runner.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
#' and manual (sample only at manually defined resampling locations).
#' Use 'NULL' to ignore.
#'
#' @return TreePPL output in JSON format.
#' @return A list of TreePPL output in parsed JSON format.
#' @export

tp_treeppl <-
Expand All @@ -127,8 +127,11 @@ tp_treeppl <-
prune = FALSE,
subsample = NULL,
resample = NULL) {

tp_write(model, model_file_name, data, data_file_name)

if (compile_model) {

tp_compile(
model_file_name,
samples,
Expand All @@ -145,6 +148,7 @@ tp_treeppl <-
resample
)
}

return(tp_run(model_file_name, data_file_name, n_runs))
}

Expand Down Expand Up @@ -360,7 +364,7 @@ tp_compile <- function(model_file_name = "tmp_model_file",
#'
#' @param model_file_name a character vector giving a model name.
#' @param data_file_name a character vector giving a data name.
#' @param n_runs a [base::integer] giving the number of run (mcmc)/sweap (smc).
#' @param n_runs a [base::integer] giving the number of runs (mcmc)/sweaps (smc).
#'
#' @details
#'
Expand All @@ -372,27 +376,31 @@ tp_compile <- function(model_file_name = "tmp_model_file",
#' a data name. Use a [treepplr::tp_stored_data] name if you have already write
#' your data with [treepplr::tp_treeppl].
#'
#' `samples` : The number of samples (mcmc) / particules (smc) during inference.
#'
#' `n_runs` : The number of run (mcmc) / sweap (smc) used for the inference.
#'
#' @return TreePPL output in JSON format.
#' @return A list of TreePPL output in parsed JSON format.
#' @export

tp_run <- function(model_file_name = "tmp_model_file",
data_file_name = "tmp_data_file",
n_runs = "1") {
n_runs = 1) {

n_runs_string <- paste0("--sweeps ", n_runs, " ")

# if dir_path = NULL return temp_dir, if not return dir
dir_path <- tp_tempdir()

# n_runs
# Empty LD_LIBRARY_PATH from R_env for this command specifically
# due to conflict with internal env from treeppl self container
command <- paste0("LD_LIBRARY_PATH= MCORE_LIBS= ", dir_path, model_file_name, ".exe ",
dir_path, data_file_name, ".json ", n_runs,
dir_path, data_file_name, ".json ", n_runs_string,
paste(">", paste0(dir_path, model_file_name, "_out.json")
))
system(command)

return(jsonlite::fromJSON(paste0(dir_path, model_file_name, "_out.json"), simplifyVector = FALSE))
json_out <- readLines(paste0(dir_path, model_file_name, "_out.json")) %>%
lapply(jsonlite::fromJSON, simplifyVector = FALSE)

return(json_out)
}
Loading