From d1398450c682453d02a801b09c9b9c2080304cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Wed, 7 Aug 2024 18:36:47 +0200 Subject: [PATCH 01/11] Remove vignette html --- R/mapperHD.html | 875 ------------------------------------------------ 1 file changed, 875 deletions(-) delete mode 100644 R/mapperHD.html diff --git a/R/mapperHD.html b/R/mapperHD.html deleted file mode 100644 index 1bef47d..0000000 --- a/R/mapperHD.html +++ /dev/null @@ -1,875 +0,0 @@ - - - - - - - - - - - - - -Mapper HD - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - -
-

level set flat index to level set multi-index function

-
# We want to flatten out our indexing so that we have just one for loop.
-# For instance, if num_intervals = c(3,2), then we want a function that
-# will associate a flat index with a multi-index (i,j) as follows: 
-# 1 --> (1,1),  4 --> (1,2)
-# 2 --> (2,1),  5 --> (2,2)
-# 3 --> (3,1),  6 --> (3,2)
-# and generalize well to arbitrary length multi-indices.  Basically, we
-# want to generalize this using a function instead of a list:
-#    multi_index <- list( x=rep(1:3, times=2),
-#                         y=rep(1:2, each =3) )
-#    print(multi_index)
-
-
-# function from the level set flat index (lsfi) to the level set multi-index (lsmi)
-lsmi_from_lsfi <- function( lsfi, num_intervals ) {
-    # inputs:
-    # lsfi = an integer in the range 1:prod(v)
-    # num_intervals = c(i1,i1,...) a vector of numbers of intervals
-    # output:
-    # f+1 = a vector of multiindices with length filter_output_dim
-    j <- c(1,num_intervals) # put 1 in front to make indexing easier in the product prod(j[1:k])
-    f <- c()
-    for (k in 1:length(num_intervals)) {
-        # use lsfi-1 to shift from 1-based indexing to 0-based indexing
-        f[k] <- floor( (lsfi-1) / prod(j[1:k])) %% num_intervals[k]
-    }
-    #print(f+1)
-    # lsmi = f+1 = level set multi index
-    return(f+1) # shift from 0-based indexing back to 1-based indexing
-}
-
-# the inverse function
-lsfi_from_lsmi <- function( lsmi, num_intervals ) {
-    lsfi <- lsmi[1]
-    if (length(num_intervals) > 1) {
-        for (i in 2:length(num_intervals)) {
-            lsfi <- lsfi + prod(num_intervals[1:(i-1)]) * (lsmi[i]-1)
-        }
-    }
-    return(lsfi)
-}
-
-
-

lsmi_from_lsfi test case

-
v <- c(2,3,4)
-for (i in 1:prod(v)) {
-    m <- lsmi_from_lsfi(i,v)
-    print( i )
-    print( m )
-    print( lsfi_from_lsmi(m,v) )
-}
-
## [1] 1
-## [1] 1 1 1
-## [1] 1
-## [1] 2
-## [1] 2 1 1
-## [1] 2
-## [1] 3
-## [1] 1 2 1
-## [1] 3
-## [1] 4
-## [1] 2 2 1
-## [1] 4
-## [1] 5
-## [1] 1 3 1
-## [1] 5
-## [1] 6
-## [1] 2 3 1
-## [1] 6
-## [1] 7
-## [1] 1 1 2
-## [1] 7
-## [1] 8
-## [1] 2 1 2
-## [1] 8
-## [1] 9
-## [1] 1 2 2
-## [1] 9
-## [1] 10
-## [1] 2 2 2
-## [1] 10
-## [1] 11
-## [1] 1 3 2
-## [1] 11
-## [1] 12
-## [1] 2 3 2
-## [1] 12
-## [1] 13
-## [1] 1 1 3
-## [1] 13
-## [1] 14
-## [1] 2 1 3
-## [1] 14
-## [1] 15
-## [1] 1 2 3
-## [1] 15
-## [1] 16
-## [1] 2 2 3
-## [1] 16
-## [1] 17
-## [1] 1 3 3
-## [1] 17
-## [1] 18
-## [1] 2 3 3
-## [1] 18
-## [1] 19
-## [1] 1 1 4
-## [1] 19
-## [1] 20
-## [1] 2 1 4
-## [1] 20
-## [1] 21
-## [1] 1 2 4
-## [1] 21
-## [1] 22
-## [1] 2 2 4
-## [1] 22
-## [1] 23
-## [1] 1 3 4
-## [1] 23
-## [1] 24
-## [1] 2 3 4
-## [1] 24
-
-
-

cluster cutoff at first empty bin function

-
cluster_cutoff_at_first_empty_bin <- function(heights, diam, num_bins_when_clustering) {
-  
-  # if there are only two points (one height value), then we have a single cluster
-  if (length(heights) == 1) {
-    if (heights == diam) {
-      cutoff <- Inf
-      return(cutoff)
-    }
-  }
-  
-  bin_breaks <- seq(from=min(heights), to=diam, 
-                    by=(diam - min(heights))/num_bins_when_clustering)
-  myhist <- hist(c(heights,diam), breaks=bin_breaks, plot=FALSE)
-  z <- (myhist$counts == 0)
-  if (sum(z) == 0) {
-    cutoff <- Inf
-    return(cutoff)
-  } else {
-    #  which returns the indices of the logical vector (z == TRUE), min gives the smallest index
-    cutoff <- myhist$mids[ min(which(z == TRUE)) ]
-    return(cutoff)
-  }
-  
-}
-
-
-

mapper function

-
mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, num_bins_when_clustering) {
-##### begin documentation ############
-# inputs
-# f : X \subset R^n \to R^k, a filter function on a data set with numpoints observations
-# filter_values = data.frame(y_1, y_2,..., y_k), where each y_i is a vector of length num_points
-# num_intervals = c(i_1, i_2,..., i_k), a vector of number of intervals for each variable y_i
-# percent_overlap = c(p_1, p_2,..., p_k), a vector of percent overlap for adjacent intervals within each variable y_i
-##### end documentation ###############
-
-    
-#     #filter_output_dim <- length(filter_values)
-#     if (length(num_intervals) == 1) {
-#         num_points <- length(filter_values)
-#         filter_output_dim <- 1
-#         num_levelsets <- num_intervals
-# 
-#         # define some vectors of length k = number of columns = number of variables
-#         filter_min <- min(filter_values)
-#         filter_max <- max(filter_values)
-#         interval_width <- (filter_max - filter_min) / num_intervals
-# 
-#         } else {
-# #    filter_values <- as.matrix(filter_values)
-#         num_points <- dim(filter_values)[1] # number of rows = number of observations 
-#         filter_output_dim <- dim(filter_values)[2] # number of columns = number of variables = length(num_intervals)
-#         num_levelsets <- prod(num_intervals)
-#         
-#         # define some vectors of length k = number of columns = number of variables
-#         filter_min <- as.vector(sapply(filter_values,min))
-#         filter_max <- as.vector(sapply(filter_values,max))
-#         interval_width <- (filter_max - filter_min) / num_intervals
-# 
-#    }
-    
-    # class(filter_values[,1]) = numeric, which has dim(filter_values[,1]) = NULL,
-    # so we coerce filter_values to a data.frame so that its dim is not NULL
-    filter_values <- data.frame(filter_values) 
-    num_points <- dim(filter_values)[1] # number of rows = number of observations 
-    filter_output_dim <- dim(filter_values)[2] # number of columns = number of variables = length(num_intervals)
-    num_levelsets <- prod(num_intervals)
-        
-    # define some vectors of length k = number of columns = number of variables
-    filter_min <- as.vector(sapply(filter_values,min))
-    filter_max <- as.vector(sapply(filter_values,max))
-    interval_width <- (filter_max - filter_min) / num_intervals
-    
-
-    # initialize variables    
-    vertex_index <- 0
-    level_of_vertex <- c()
-    points_in_vertex <- list()
-    points_in_level_set <- vector( "list", num_levelsets )
-    vertices_in_level_set <- vector( "list", num_levelsets )
-    # for future development
-    # cutree_in_level_set <- vector( "list", num_levelsets )
-
-    
-#### begin plot the filter function ##############
-#     # Reality check
-#     # Plot the filter values
-#     plot(filter_values[,1], filter_values[,2], type="n")
-#     # cex = font size as a proportion of default
-#     text(filter_values[,1], filter_values[,2], labels=1:num_points, cex=0.5) 
-#     # midpoint of overlapping intervals
-#     abline(v = filter_min[1]+interval_width[1]*(0:num_intervals[1]), 
-#            h = filter_min[2]+interval_width[2]*(0:num_intervals[2]), col="red")
-#     # left and right interval boundaries
-#     abline(v = filter_min[1]+interval_width[1]*(0:num_intervals[1])
-#            -0.5*interval_width[1]*percent_overlap[1]/100, col = "blue", lty = 3)
-#     abline(v = filter_min[1]+interval_width[1]*(0:num_intervals[1])
-#            +0.5*interval_width[1]*percent_overlap[1]/100, 
-#            col = "blue", lty = 3)
-#     # bottom and top interval boundaries
-#     abline(h = filter_min[2]+interval_width[2]*(0:num_intervals[2])
-#            -0.5*interval_width[2]*percent_overlap[2]/100, col = "blue", lty = 3)
-#     abline(h = filter_min[2]+interval_width[2]*(0:num_intervals[2])
-#            +0.5*interval_width[1]*percent_overlap[2]/100, 
-#            col = "blue", lty = 3)
-#### end plot the filter function ########## 
-    
-
-    # begin loop through all level sets
-    for (lsfi in 1:num_levelsets) {
-
-        ################################
-        # begin covering
-        
-        # level set flat index (lsfi), which is a number, has a corresponding 
-        # level set multi index (lsmi), which is a vector
-        lsmi <- lsmi_from_lsfi( lsfi, num_intervals )
-
-        lsfmin <- filter_min + (lsmi - 1) * interval_width - 0.5 * interval_width * percent_overlap/100
-        lsfmax <- lsfmin + interval_width + interval_width * percent_overlap/100
-
-        # begin loop through all the points and assign them to level sets
-        for (point_index in 1:num_points) {
-            # compare two logical vectors and get a logical vector, 
-            # then check if all entries are true
-            if ( all( lsfmin <= filter_values[point_index,] & 
-                      filter_values[point_index,] <= lsfmax ) ) {
-                points_in_level_set[[lsfi]] <- c( points_in_level_set[[lsfi]], 
-                                                  point_index )
-            }
-        } 
-        # end loop through all the points and assign them to level sets
-        
-        # end covering
-        ######################################
-
-        ######################################
-        # begin clustering
-        
-        points_in_this_level <- points_in_level_set[[lsfi]]
-        num_points_in_this_level <- length(points_in_level_set[[lsfi]])
-        
-        if (num_points_in_this_level == 0) {
-            num_vertices_in_this_level <- 0
-        }
-        
-        if (num_points_in_this_level == 1) {
-            #warning('Level set has only one point')
-            num_vertices_in_this_level <- 1
-            level_internal_indices <- c(1)
-            level_external_indices <- points_in_level_set[[lsfi]]
-        }
-        
-        if (num_points_in_this_level > 1) {
-            # heirarchical clustering
-            level_dist_object <- as.dist(
-                as.matrix(dist_object)[points_in_this_level,points_in_this_level])
-            level_max_dist <- max(level_dist_object)
-            level_hclust   <- hclust( level_dist_object, method="single" )
-            level_heights  <- level_hclust$height
-            
-            # cut the cluster tree
-            # internal indices refers to 1:num_points_in_this_level
-            # external indices refers to the row number of the original data point
-            level_cutoff   <- cluster_cutoff_at_first_empty_bin(level_heights, level_max_dist, num_bins_when_clustering)
-            level_external_indices <- points_in_this_level[level_hclust$order]
-            level_internal_indices <- as.vector(cutree(list(
-                merge = level_hclust$merge, 
-                height = level_hclust$height,
-                labels = level_external_indices), 
-                h=level_cutoff))
-            num_vertices_in_this_level <- max(level_internal_indices)
-            
-        }
-    
-        # end clustering
-        ######################################
-        
-        ######################################
-        # begin vertex construction
-        
-        # check admissibility condition
-        if (num_vertices_in_this_level > 0) { 
-        
-            vertices_in_level_set[[lsfi]] <- vertex_index + (1:num_vertices_in_this_level)
-        
-            for (j in 1:num_vertices_in_this_level) {
-                
-                vertex_index <- vertex_index + 1
-                level_of_vertex[vertex_index] <- lsfi
-                points_in_vertex[[vertex_index]] <- level_external_indices[level_internal_indices == j]
-            
-            }
-        }
-        
-        # end vertex construction
-        ######################################
-
-    } # end loop through all level sets
-    
-    
-    ########################################
-    #  begin simplicial complex
-    
-    # create empty adjacency matrix
-    adja <- mat.or.vec(vertex_index, vertex_index)
-    
-    # loop through all level sets
-    for (lsfi in 1:num_levelsets) {
-        
-        # get the level set multi-index from the level set flat index
-        lsmi <- lsmi_from_lsfi(lsfi,num_intervals)
-        
-        # Find adjacent level sets +1 of each entry in lsmi 
-        # (within bounds of num_intervals, of course).
-        # Need the inverse function lsfi_from_lsmi to do this easily.
-        for (k in 1:filter_output_dim) {
-            
-            # check admissibility condition is met
-            if (lsmi[k] < num_intervals[k]) {
-                lsmi_adjacent <- lsmi + diag(filter_output_dim)[,k]
-                lsfi_adjacent <- lsfi_from_lsmi(lsmi_adjacent, num_intervals)
-            } else { next }
-            
-            # check admissibility condition is met
-            if (length(vertices_in_level_set[[lsfi]]) < 1 |
-                length(vertices_in_level_set[[lsfi_adjacent]]) < 1) { next }
-            
-            # construct adjacency matrix
-            for (v1 in vertices_in_level_set[[lsfi]]) {
-                for (v2 in vertices_in_level_set[[lsfi_adjacent]]) {
-                    adja[v1,v2] <- (length(intersect(
-                        points_in_vertex[[v1]],
-                        points_in_vertex[[v2]])) > 0)
-                    adja[v2,v1] <- adja[v1,v2]
-                }
-            }
-
-        }
-        
-        
-    }
-    
-    #  end simplicial complex
-    #######################################
-    
-    mapperoutput <- list(adjacency = adja,
-                         num_vertices = vertex_index,
-                         level_of_vertex = level_of_vertex,
-                         points_in_vertex = points_in_vertex,
-                         points_in_level_set = points_in_level_set,
-                         vertices_in_level_set = vertices_in_level_set
-                         )
-
-    class(mapperoutput) <- "TDAmapper"
-    
-    return(mapperoutput)
-    
-    
-} # end mapper function
-
-
-
-
-#####################################
-#     filter_min <- c()
-#     filter_max <- c()
-#     interval_width <- c()
-#     for (j in 1:filter_output_dim) {
-#         filter_min[j] <- min(filter_values[,j])
-#         filter_max[j] <- max(filter_values[,j])
-#         interval_width[j] <- (filter_max[j] - filter_min[j]) / num_intervals[j]
-#         # adjacent_overlap_width[i] <- interval_width[i] * 0.5 * percent_overlap[i]/100
-#     }
-# 
-#     print("==============")
-#     print(filter_min)
-#     print(filter_max)
-#     print(interval_width)
-
-
-#         # construct the interval boundaries 
-#         lsfmin <- rep(NA,filter_output_dim)
-#         lsfmax <- rep(NA,filter_output_dim)
-#         for (j in 1:filter_output_dim) {
-#             lsfmin[j] <- filter_min[j] + (lsmi[j] - 1) * interval_width[j] - interval_width[j] * 0.5 * percent_overlap[j]/100
-#             lsfmax[j] <- filter_min[j] + lsmi[j] * interval_width[j] + interval_width[j] * 0.5 * percent_overlap[j]/100
-#             # print(paste(lsfmin[j], lsfmax[j]))
-#         }
-
-##############################
-
-
-

test case

-
X <- data.frame( x = 2*cos(2*pi*(1:100)/100), y = sin(2*pi*(1:100)/100) )
-#f <- list( X$x, X$y )
-f <- X
-
-#range(f[[1]])
-#range(f[[2]])
-m2 <- mapper(dist(X), f, c(3,2), c(50,50), 5)
-
-m2
-
## $adjacency
-##      [,1] [,2] [,3] [,4] [,5] [,6]
-## [1,]    0    1    0    1    0    0
-## [2,]    1    0    1    0    0    0
-## [3,]    0    1    0    0    0    1
-## [4,]    1    0    0    0    1    0
-## [5,]    0    0    0    1    0    1
-## [6,]    0    0    1    0    1    0
-## 
-## $num_vertices
-## [1] 6
-## 
-## $level_of_vertex
-## [1] 1 2 3 4 5 6
-## 
-## $points_in_vertex
-## $points_in_vertex[[1]]
-##  [1] 72 71 70 69 68 67 66 65 64 63 62 61 60 59 58 57 56 55 46 54 53 47 48
-## [24] 52 51 49 50
-## 
-## $points_in_vertex[[2]]
-##  [1] 74 73 72 71 70 69 67 68 75 76 77 78 79 80 81 82 83
-## 
-## $points_in_vertex[[3]]
-##  [1]  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94
-## [18]  95  96   4   3  97  98   2   1  99 100
-## 
-## $points_in_vertex[[4]]
-##  [1] 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 54 53 47 48
-## [24] 52 51 49 50
-## 
-## $points_in_vertex[[5]]
-##  [1] 24 23 22 21 20 19 17 18 25 26 27 28 29 30 31 32 33
-## 
-## $points_in_vertex[[6]]
-##  [1]  22  21  20  19  18  17  16  15  14  13  12  11  10   9   8   7   6
-## [18]   5  96   4   3  97  98   2   1  99 100
-## 
-## 
-## $points_in_level_set
-## $points_in_level_set[[1]]
-##  [1] 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
-## [24] 69 70 71 72
-## 
-## $points_in_level_set[[2]]
-##  [1] 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
-## 
-## $points_in_level_set[[3]]
-##  [1]   1   2   3   4  78  79  80  81  82  83  84  85  86  87  88  89  90
-## [18]  91  92  93  94  95  96  97  98  99 100
-## 
-## $points_in_level_set[[4]]
-##  [1] 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
-## [24] 51 52 53 54
-## 
-## $points_in_level_set[[5]]
-##  [1] 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
-## 
-## $points_in_level_set[[6]]
-##  [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
-## [18]  18  19  20  21  22  96  97  98  99 100
-## 
-## 
-## $vertices_in_level_set
-## $vertices_in_level_set[[1]]
-## [1] 1
-## 
-## $vertices_in_level_set[[2]]
-## [1] 2
-## 
-## $vertices_in_level_set[[3]]
-## [1] 3
-## 
-## $vertices_in_level_set[[4]]
-## [1] 4
-## 
-## $vertices_in_level_set[[5]]
-## [1] 5
-## 
-## $vertices_in_level_set[[6]]
-## [1] 6
-## 
-## 
-## attr(,"class")
-## [1] "TDAmapper"
-
library(igraph)
- 
-g2 <- graph.adjacency(m2$adjacency, mode="undirected")
-plot(g2, layout = layout.auto(g2) )
-

-
-
-

test case: a trefoil knot

-
# parametrize a trefoil knot
-n <- 100
-t <- 2*pi*(1:n)/n
-X <- data.frame(x = sin(t)+2*sin(2*t),
-                y = cos(t)-2*cos(2*t),
-                z = -sin(3*t))
-f <- X
-
-# library(rgl)
-# plot3d(X$x, X$y, X$z)
-# 
-
-#library(igraph)
-
-m1 <- mapper(dist(X), f[,1], 5, 50, 5)
-g1 <- graph.adjacency(m1$adjacency, mode="undirected")
-plot(g1, layout = layout.auto(g1) )
-

-
m1$points_in_vertex
-
## [[1]]
-##  [1] 95 94 93 84 85 92 86 87 91 88 89 90 83 82 81 80 79 77 78
-## 
-## [[2]]
-##  [1] 73 74 75 76 77 78 99 98 97 96 94 95 34 35 36 37 38 39 40 41 48 47 42
-## [24] 46 43 44 45 33 31 32
-## 
-## [[3]]
-##  [1]  49  48  46  47  50  51  52  53  54   1   2 100  98  99  33  32  31
-## [18]  30  29  28  26  27  67  68  69  70  71  72  73  74
-## 
-## [[4]]
-##  [1] 27 26 25 24 22 23  1  2  3  4  5  6 66 65 64 63 62 61 60 59 52 53 58
-## [24] 54 57 55 56 67 68 69
-## 
-## [[5]]
-##  [1]  5  6  7 16 15  8 14 13  9 12 10 11 17 18 19 20 21 22 23
-
m2 <- mapper(dist(X), f[,1:2], c(4,4), c(50,50), 10)
-g2 <- graph.adjacency(m2$adjacency, mode="undirected")
-plot(g2, layout = layout.auto(g2) )
-

-
m3 <- mapper(dist(X), f, c(3,3,3), c(30,30,30), 5)
-g3 <- graph.adjacency(m3$adjacency, mode="undirected")
-plot(g3, layout = layout.auto(g3) )
-

-
tkplot(g3)
-
## Loading required package: tcltk
-
## [1] 1
-
#m1
-#names(m1)
-#str(m1)
-#m1$points_in_level_set
-
-
-

interactive plot

-
library(networkD3)
-mapperVertices <- function(m, pt_labels) {
-
-    # Hovering over vertices gives the point labels:
-    # convert the list of vectors of point indices to a list of vectors of labels
-    labels_in_vertex <- lapply( m$points_in_vertex, FUN=function(v){ pt_labels[v] } )
-    nodename <- sapply( sapply(labels_in_vertex, as.character), paste0, collapse=", ")
-    nodename <- paste0("V", 1:m$num_vertices, ": ", nodename )
-    
-    # Hovering over vertices gives the point indices:
-    # list the points in each vertex
-    # nodename <- sapply( sapply(m$points_in_vertex, as.character), paste0, collapse=", ")
-    # concatenate the vertex number with the labels for the points in each vertex
-    #nodename <- paste0("V", 1:m$num_vertices, ": ", nodename )
-    
-    nodegroup <- m$level_of_vertex
-    nodesize <- sapply(m$points_in_vertex, length)
-    
-    return(data.frame( Nodename=nodename, 
-                       Nodegroup=nodegroup, 
-                       Nodesize=nodesize ))
-    
-}
-
-mapperEdges <- function(m) {
-    linksource <- c()
-    linktarget <- c()
-    linkvalue <- c()
-    k <- 1
-    for (i in 2:m$num_vertices) {
-        for (j in 1:(i-1)) {
-            if (m$adjacency[i,j] == 1) {
-                linksource[k] <- i-1
-                linktarget[k] <- j-1
-                linkvalue[k] <- 2
-                k <- k+1
-            }
-        }
-    }
-    return( data.frame( Linksource=linksource,
-                        Linktarget=linktarget, 
-                        Linkvalue=linkvalue ) )
-    
-}
-
-# create data frames for vertices and edges with the right variable names 
-MapperNodes <- mapperVertices(m3, 1:dim(f)[1] )
-MapperLinks <- mapperEdges(m3)
-
-# interactive plot
-forceNetwork(Nodes = MapperNodes, Links = MapperLinks, 
-            Source = "Linksource", Target = "Linktarget",
-            Value = "Linkvalue", NodeID = "Nodename",
-            Group = "Nodegroup", opacity = 0.8, 
-            linkDistance = 10, charge = -400)
-

-

-
-
-

dist and timing matrix subsetting

-
# From the dist documentation:
-# If d is an n x n distance matrix, the 
-# distance between row i and row j (for i < j <= n) is
-# has index = n*(i-1) - i*(i-1)/2 + j-i 
-# in the vector as.dist(d)
-#matrix_indexing_to_dist_indexing <- function(i,j,n) {
-m2d <- function(i,j,n) {
-    return( n*(i-1) - i*(i-1)/2 + j-i )   
-}
-
-# set.seed("1")
-# n <- 6
-# df <- data.frame( x = rnorm(n,0,1), y = rnorm(n,0,1) )
-# a <- dist(df)
-# class(a)
-# str(a)
-
-# m2d(1, 2:6, 6)
-# m2d(2, 3:6, 6)
-# m2d(3, 4:6, 6)
-# m2d(4, 5:6, 6)
-# m2d(5, 6, 6)
-
-# m2d(1, 2:3, 6)
-# 
-# m2d(2, 3, 6)
-# 
-# c( m2d(1,2,6), m2d(1,3,6), m2d(2,3,6) )
-# a[ c( m2d(1,2,6), m2d(1,3,6), m2d(2,3,6) ) ]
-# 
-# a[1:3]
-# 
-# b <- as.matrix( a )
-# class(b)
-# str(b)
-# 
-# b[1:3,1:3]
-# as.dist(b[1:3,1:3])
-# 
-# a
-# b
-
-m2d_rows1 <- function(m_ind,n) {
-    # m_ind = c(i_1, ..., i_k) = a vector of indices for k rows of the distance matrix
-    # n = number of observations = number of rows in n x n distance matrix
-    m_ind <- sort(m_ind) # make sure the vector is sorted!
-    k <- length(m_ind)
-    d_ind <- c() # dist object indices
-    for (i in 2:k) {
-        d_ind <- c(d_ind, m2d(m_ind[i-1], m_ind[i:k], n) )
-    }
-    return(d_ind)
-}
-
-m2d_rows2 <- function(m_ind,n) {
-    # m_ind = c(i_1, ..., i_k) = a vector of indices for k rows of the distance matrix
-    # n = number of observations = number of rows in n x n distance matrix
-    m_ind <- sort(m_ind) # make sure the vector is sorted!
-    k <- length(m_ind)
-    d_ind <- rep(NA, k*(k-1)/2 ) # dist object indices
-    for (i in 2:k) {
-        d_ind <- c(d_ind, m2d(m_ind[i-1], m_ind[i:k], n) )
-    }
-    return(na.omit(d_ind))
-}
-
-m2d_rows1(1:3,6)
-
## [1] 1 2 6
-
m2d_rows2(1:3,6)
-
## [1] 1 2 6
-## attr(,"na.action")
-## [1] 1 2 3
-## attr(,"class")
-## [1] "omit"
-
set.seed("1")
-n <- 1000 # sample size
-s <- 100 # subset size
-m_ind <- sample(1:n, s, replace=F) # indices of s matrix rows from among the n rows
-df <- data.frame(x=rnorm(n,0,1), y=rnorm(n,0,1))
-d <- dist(df)
-#m_ind <- 20*(1:50)
-# m_ind <- 100*(1:5)
-# as.dist(as.matrix( d )[m_ind,m_ind])
-# d[m2d_rows(m_ind,1000)]
-
-
-
-# library(microbenchmark)
-# 
-# op <- microbenchmark(
-#     OLD=as.dist(as.matrix( d )[m_ind,m_ind]),
-#     NEW1=d[m2d_rows1(m_ind,n)],
-#     NEW2=d[m2d_rows2(m_ind,n)],
-# times=100L)
-# 
-# print(op) #standard data frame of the output
-# boxplot(op) #boxplot of output
-# library(ggplot2) #nice log plot of the output
-# qplot(y=time, data=op, colour=expr) + scale_y_log10()
-
-# results of the microbenchmark suggest that with n=1000 points, 
-# the fastest method for a subset of fewer than 500 points is the
-# function NEW1, while for more than 500 points the fastest method
-# is OLD.
-
-dist_subset <- function(dist_vector, point_indices) {
-    # inputs:
-    # dist_vector = distance vector from dist(X) on the whole data set X
-    # point_indices = a vector of indices for points in the subset of X
-    # output:
-    # a subset of the dist_vector consisting of only those entries relevant
-    # for pairwise distances between the points indexed by point_indices
-    
-    numpoints <- (1 + sqrt(1+8*length(dist_vector)))/2
-    
-    if (length(point_indices) < 500) {
-        
-        m2d_rows1 <- function(m_ind,n) {
-            # m_ind = c(i_1, ..., i_k) = a vector of indices for matrix rows
-            # for k rows of the distance matrix
-            # n = number of observations = number of rows in distance matrix
-            m_ind <- sort(m_ind) # make sure the vector is sorted!
-            k <- length(m_ind)
-            d_ind <- c() # dist object indices
-            for (i in 2:k) {
-                d_ind <- c(d_ind, m2d(m_ind[i-1], m_ind[i:k], n) )
-            }
-            return(d_ind)
-        }
-
-        dist_vector[m2d_rows1(point_indices,numpoints)]
-        
-    } else {
-        
-        return( as.dist(as.matrix( dist_vector )[matrix_indices,matrix_indices]) )
-        
-    }
-    
-}
-
- - -
- - - - - - - - From 28955c9364a4ee7c99eb87c3aa942aaa9c402b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Wed, 7 Aug 2024 18:37:14 +0200 Subject: [PATCH 02/11] Update vignette header --- R/mapperHD.Rmd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/mapperHD.Rmd b/R/mapperHD.Rmd index b817a22..68313e9 100644 --- a/R/mapperHD.Rmd +++ b/R/mapperHD.Rmd @@ -1,8 +1,12 @@ --- title: "Mapper HD" author: "Paul Pearson" -date: "July 2, 2015" +date: "`r format(Sys.time(), '%d %B, %Y')`" output: html_document +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{Mapper HD} + %\VignetteEncoding{UTF-8} --- # level set flat index to level set multi-index function From 47ff17dae0aa94fc601a355ba4317f5bba2bb417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Wed, 7 Aug 2024 18:38:40 +0200 Subject: [PATCH 03/11] Update DESCRIPTION --- DESCRIPTION | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2a6d735..66ab391 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,12 @@ Package: TDAmapper Title: Analyze High-Dimensional Data Using Discrete Morse Theory -Version: 1.0 -Date: 2015-05-26 -Authors@R: c(person("Paul", "Pearson", email = "pearsonp@hope.edu", - role = c("aut", "cre","trl")), person("Daniel", "Muellner", - role = c("aut","ctb")), person("Gurjeet","Singh", role = c("aut","ctb"))) -Author: Paul Pearson [aut, cre, trl], - Daniel Muellner [aut, ctb], - Gurjeet Singh [aut, ctb] -Maintainer: Paul Pearson +Version: 1.1 +Date: 2024-08-17 +Authors@R: c(person("Paul", "Pearson", email = "pearsonp@hope.edu", role = c("aut", "cre", "trl", "cph")), + person("Daniel", "Muellner", role = c("aut","ctb")), + person("Gurjeet","Singh", role = c("aut","ctb")), + person("Iago", "Giné-Vázquez", role = "ctb", email = "iago.gin-vaz@protonmail.com", comment = c(ORCID = "0000-0002-6725-2638")), +) Description: Topological Data Analysis using Mapper (discrete Morse theory). Generate a 1-dimensional simplicial complex from a filter function defined on the data: 1. Define a filter function (lens) on the @@ -19,8 +17,9 @@ Description: Topological Data Analysis using Mapper (discrete Morse theory). codomain R, while the the function mapper2D uses a filter function with codomain R^2. Depends: R (>= 3.1.2) -Suggests: fastcluster, igraph +Suggests: fastcluster, igraph, knitr License: GPL-3 -LazyData: true URL: https://github.com/paultpearson/TDAmapper/ BugReports: https://github.com/paultpearson/TDAmapper/issues +VignetteBuilder: knitr +Encoding: UTF-8 From b7582788bb989c239869841952be90b0af1cd968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Wed, 7 Aug 2024 18:41:06 +0200 Subject: [PATCH 04/11] Move vignette to appropiate folder --- {R => vignettes}/mapperHD.Rmd | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {R => vignettes}/mapperHD.Rmd (100%) diff --git a/R/mapperHD.Rmd b/vignettes/mapperHD.Rmd similarity index 100% rename from R/mapperHD.Rmd rename to vignettes/mapperHD.Rmd From 3c707497a674d90d18cf5eb1a0fcb289404e72a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Wed, 7 Aug 2024 19:08:04 +0200 Subject: [PATCH 05/11] Remove CRAN check NOTE on non-imported functions --- NAMESPACE | 7 ++++++- R/cluster_cutoff_at_first_empty_bin.R | 4 +++- R/mapper1D.R | 2 ++ R/mapper2D.R | 2 ++ man/cluster_cutoff_at_first_empty_bin.Rd | 9 ++++----- man/mapper.Rd | 20 ++++++++++++-------- man/mapper1D.Rd | 21 ++++++++++++--------- man/mapper2D.Rd | 20 +++++++++++--------- man/mapperEdges.Rd | 17 ++++++++--------- man/mapperVertices.Rd | 23 +++++++++++------------ 10 files changed, 71 insertions(+), 54 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 80413cc..99b13ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,12 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand export(mapper) export(mapper1D) export(mapper2D) export(mapperEdges) export(mapperVertices) +importFrom(graphics,hist) +importFrom(stats,as.dist) +importFrom(stats,cutree) +importFrom(stats,dist) +importFrom(stats,hclust) diff --git a/R/cluster_cutoff_at_first_empty_bin.R b/R/cluster_cutoff_at_first_empty_bin.R index 5337042..7e0b950 100644 --- a/R/cluster_cutoff_at_first_empty_bin.R +++ b/R/cluster_cutoff_at_first_empty_bin.R @@ -5,7 +5,9 @@ #' @param heights Height values in hierarchical clustering. #' @param diam Maximum distance between points in a level set. #' @param num_bins_when_clustering Controls how many bins there are in the histogram used to determine cutoff. values -#' +#' +#' @importFrom graphics hist +#' #' @return Numerical value for cutoff point of hierarchical cluster diagram. #' #' @author Paul Pearson, \email{pearsonp@@hope.edu} diff --git a/R/mapper1D.R b/R/mapper1D.R index 808ffcf..d7ff7d9 100644 --- a/R/mapper1D.R +++ b/R/mapper1D.R @@ -8,6 +8,8 @@ #' @param percent_overlap A number between 0 and 100 specifying how much adjacent intervals should overlap. #' @param num_bins_when_clustering A positive integer that controls whether points in the same level set end up in the same cluster. #' +#' @importFrom stats as.dist cutree dist hclust +#' #' @return An object of class \code{TDAmapper} which is a list of items named \code{adjacency} (adjacency matrix for the edges), \code{num_vertices} (integer number of vertices), \code{level_of_vertex} (vector with \code{level_of_vertex[i]} = index of the level set for vertex i), \code{points_in_vertex} (list with \code{points_in_vertex[[i]]} = vector of indices of points in vertex i), \code{points_in_level} (list with \code{points_in_level[[i]]} = vector of indices of points in level set i, and \code{vertices_in_level} (list with \code{vertices_in_level[[i]]} = vector of indices of vertices in level set i. #' #' @author Paul Pearson, \email{pearsonp@@hope.edu} diff --git a/R/mapper2D.R b/R/mapper2D.R index b86c938..e35f48f 100644 --- a/R/mapper2D.R +++ b/R/mapper2D.R @@ -8,6 +8,8 @@ #' @param percent_overlap a number between 0 and 100 specifying how much adjacent intervals should overlap #' @param num_bins_when_clustering a positive integer that controls whether points in the same level set end up in the same cluster #' +#' @importFrom stats as.dist cutree dist hclust +#' #' @return An object of class \code{TDAmapper} which is a list of items named \code{adjacency} (adjacency matrix for the edges), \code{num_vertices} (integer number of vertices), \code{level_of_vertex} (vector with \code{level_of_vertex[i]} = index of the level set for vertex i), \code{points_in_vertex} (list with \code{points_in_vertex[[i]]} = vector of indices of points in vertex i), \code{points_in_level} (list with \code{points_in_level[[i]]} = vector of indices of points in level set i, and \code{vertices_in_level} (list with \code{vertices_in_level[[i]]} = vector of indices of vertices in level set i. #' #' @author Paul Pearson, \email{pearsonp@@hope.edu} diff --git a/man/cluster_cutoff_at_first_empty_bin.Rd b/man/cluster_cutoff_at_first_empty_bin.Rd index 202070f..c9bdc1d 100644 --- a/man/cluster_cutoff_at_first_empty_bin.Rd +++ b/man/cluster_cutoff_at_first_empty_bin.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster_cutoff_at_first_empty_bin.R \name{cluster_cutoff_at_first_empty_bin} \alias{cluster_cutoff_at_first_empty_bin} @@ -19,13 +19,12 @@ Numerical value for cutoff point of hierarchical cluster diagram. \description{ This function decides where to cut the hierarchical clustering tree to define clusters within a level set. } -\author{ -Paul Pearson, \email{pearsonp@hope.edu} -} \references{ \url{https://github.com/paultpearson/TDAmapper} } \seealso{ \code{\link{mapper1D}}, \code{\link{mapper2D}} } - +\author{ +Paul Pearson, \email{pearsonp@hope.edu} +} diff --git a/man/mapper.Rd b/man/mapper.Rd index d30bc68..3e2a9bc 100644 --- a/man/mapper.Rd +++ b/man/mapper.Rd @@ -1,11 +1,16 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapper.R \name{mapper} \alias{mapper} \title{mapper function} \usage{ -mapper(dist_object, filter_values, num_intervals, percent_overlap, - num_bins_when_clustering) +mapper( + dist_object, + filter_values, + num_intervals, + percent_overlap, + num_bins_when_clustering +) } \arguments{ \item{filter_values}{A n x m data frame of real numbers.} @@ -34,20 +39,19 @@ m1 <- mapper( percent_overlap = c(50,50), num_bins_when_clustering = 10) \dontrun{ -#install.packages("igraph") +#install.packages("igraph") library(igraph) g1 <- graph.adjacency(m1$adjacency, mode="undirected") plot(g1, layout = layout.auto(g1) ) } } -\author{ -Paul Pearson, \email{pearsonp@hope.edu} -} \references{ \url{https://github.com/paultpearson/TDAmapper} } \seealso{ \code{\link{mapper1D}}, \code{\link{mapper2D}} } +\author{ +Paul Pearson, \email{pearsonp@hope.edu} +} \keyword{mapper} - diff --git a/man/mapper1D.Rd b/man/mapper1D.Rd index f3ad666..f61ba16 100644 --- a/man/mapper1D.Rd +++ b/man/mapper1D.Rd @@ -1,12 +1,16 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapper1D.R \name{mapper1D} \alias{mapper1D} \title{mapper1D function} \usage{ -mapper1D(distance_matrix = dist(data.frame(x = 2 * cos(0.5 * (1:100)), y = - sin(1:100))), filter_values = 2 * cos(0.5 * (1:100)), num_intervals = 10, - percent_overlap = 50, num_bins_when_clustering = 10) +mapper1D( + distance_matrix = dist(data.frame(x = 2 * cos(0.5 * (1:100)), y = sin(1:100))), + filter_values = 2 * cos(0.5 * (1:100)), + num_intervals = 10, + percent_overlap = 50, + num_bins_when_clustering = 10 +) } \arguments{ \item{distance_matrix}{An n x n matrix of pairwise dissimilarities.} @@ -33,20 +37,19 @@ m1 <- mapper1D( percent_overlap = 50, num_bins_when_clustering = 10) \dontrun{ -#install.packages("igraph") +#install.packages("igraph") library(igraph) g1 <- graph.adjacency(m1$adjacency, mode="undirected") plot(g1, layout = layout.auto(g1) ) } } -\author{ -Paul Pearson, \email{pearsonp@hope.edu} -} \references{ \url{https://github.com/paultpearson/TDAmapper} } \seealso{ \code{\link{mapper2D}} } +\author{ +Paul Pearson, \email{pearsonp@hope.edu} +} \keyword{mapper1D} - diff --git a/man/mapper2D.Rd b/man/mapper2D.Rd index c2a16e4..806f924 100644 --- a/man/mapper2D.Rd +++ b/man/mapper2D.Rd @@ -1,13 +1,16 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapper2D.R \name{mapper2D} \alias{mapper2D} \title{mapper2D function} \usage{ -mapper2D(distance_matrix = dist(data.frame(x = 2 * cos(1:100), y = - sin(1:100))), filter_values = list(2 * cos(1:100), sin(1:100)), - num_intervals = c(5, 5), percent_overlap = 50, - num_bins_when_clustering = 10) +mapper2D( + distance_matrix = dist(data.frame(x = 2 * cos(1:100), y = sin(1:100))), + filter_values = list(2 * cos(1:100), sin(1:100)), + num_intervals = c(5, 5), + percent_overlap = 50, + num_bins_when_clustering = 10 +) } \arguments{ \item{distance_matrix}{an n x n matrix of pairwise dissimilarities} @@ -39,14 +42,13 @@ g2 <- graph.adjacency(m2$adjacency, mode="undirected") plot(g2, layout = layout.auto(g2) ) } } -\author{ -Paul Pearson, \email{pearsonp@hope.edu} -} \references{ \url{https://github.com/paultpearson/TDAmapper} } \seealso{ \code{\link{mapper1D}} } +\author{ +Paul Pearson, \email{pearsonp@hope.edu} +} \keyword{mapper2D} - diff --git a/man/mapperEdges.Rd b/man/mapperEdges.Rd index 997a991..faf35a8 100644 --- a/man/mapperEdges.Rd +++ b/man/mapperEdges.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapperEdges.R \name{mapperEdges} \alias{mapperEdges} @@ -14,7 +14,7 @@ A data frame describing the edges in the graph of the mapper output. } \description{ The input to this function is a TDAmapper class object and the output -is a data frame of edges that can be used as input to the networkD3 +is a data frame of edges that can be used as input to the networkD3 plot utility. } \examples{ @@ -27,27 +27,26 @@ m1 <- mapper( num_intervals = 10, percent_overlap = 50, num_bins_when_clustering = 10) - + pt_labels <- 1:length(f) vertices <- mapperVertices(m1, pt_labels) edges <- mapperEdges(m1) # interactive plot -forceNetwork(Nodes = nodes, Links = links, +forceNetwork(Nodes = nodes, Links = links, Source = "Linksource", Target = "Linktarget", Value = "Linkvalue", NodeID = "Nodename", - Group = "Nodegroup", opacity = 0.8, + Group = "Nodegroup", opacity = 0.8, linkDistance = 10, charge = -400) } } -\author{ -Paul Pearson, \email{pearsonp@hope.edu} -} \references{ \url{https://github.com/paultpearson/TDAmapper} } \seealso{ \code{\link{mapperVertices}} } +\author{ +Paul Pearson, \email{pearsonp@hope.edu} +} \keyword{mapperEdges} - diff --git a/man/mapperVertices.Rd b/man/mapperVertices.Rd index 23e63ee..043c538 100644 --- a/man/mapperVertices.Rd +++ b/man/mapperVertices.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapperVertices.R \name{mapperVertices} \alias{mapperVertices} @@ -7,17 +7,17 @@ mapperVertices(m, pt_labels) } \arguments{ -\item{m}{An object of class TDAmapper that is the output of the mapper +\item{m}{An object of class TDAmapper that is the output of the mapper function.} } \value{ -A data frame describing the vertices in the graph of the mapper -output and the point labels that will be displayed when the mouse +A data frame describing the vertices in the graph of the mapper +output and the point labels that will be displayed when the mouse hovers over a vertex in the graph. } \description{ The input to this function is a TDAmapper class object and the output -is a data frame of vertices that can be used as input to the networkD3 +is a data frame of vertices that can be used as input to the networkD3 plot utility. } \examples{ @@ -30,27 +30,26 @@ m1 <- mapper( num_intervals = 10, percent_overlap = 50, num_bins_when_clustering = 10) - + pt_labels <- 1:length(f) vertices <- mapperVertices(m1, pt_labels) edges <- mapperEdges(m1) # interactive plot -forceNetwork(Nodes = nodes, Links = links, +forceNetwork(Nodes = nodes, Links = links, Source = "Linksource", Target = "Linktarget", Value = "Linkvalue", NodeID = "Nodename", - Group = "Nodegroup", opacity = 0.8, + Group = "Nodegroup", opacity = 0.8, linkDistance = 10, charge = -400) } } -\author{ -Paul Pearson, \email{pearsonp@hope.edu} -} \references{ \url{https://github.com/paultpearson/TDAmapper} } \seealso{ \code{\link{mapperEdges}} } +\author{ +Paul Pearson, \email{pearsonp@hope.edu} +} \keyword{mapperVertices} - From 3d75465337cfbf1aad7eeb589f63f099a5d069a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Thu, 8 Aug 2024 01:59:08 +0200 Subject: [PATCH 06/11] Add NEWS file --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 NEWS.md diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..e6f05b6 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,4 @@ +# TDAmapper v1.1 + +* Minimal update solving warnings in [CRAN Check Results](https://cran-archive.r-project.org/web/checks/2022/2022-06-13_check_results_TDAmapper.html) with the goal of returning the package to [CRAN](https://cran.r-project.org). + From 9eda37115649081a927f30453bda58a6d4cf49c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Thu, 8 Aug 2024 02:00:26 +0200 Subject: [PATCH 07/11] Fix argument name inconsistence --- R/mapper.R | 4 ++-- vignettes/mapperHD.Rmd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mapper.R b/R/mapper.R index 6967d4c..49c461b 100644 --- a/R/mapper.R +++ b/R/mapper.R @@ -34,7 +34,7 @@ #' -mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, num_bins_when_clustering) { +mapper <- function(distance_matrix, filter_values, num_intervals, percent_overlap, num_bins_when_clustering) { ##### begin documentation ############ # inputs # f : X \subset R^n \to R^k, a filter function on a data set with numpoints observations @@ -164,7 +164,7 @@ mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, n if (num_points_in_this_level > 1) { # heirarchical clustering level_dist_object <- as.dist( - as.matrix(dist_object)[points_in_this_level,points_in_this_level]) + as.matrix(distance_matrix)[points_in_this_level,points_in_this_level]) level_max_dist <- max(level_dist_object) level_hclust <- hclust( level_dist_object, method="single" ) level_heights <- level_hclust$height diff --git a/vignettes/mapperHD.Rmd b/vignettes/mapperHD.Rmd index 68313e9..b3107be 100644 --- a/vignettes/mapperHD.Rmd +++ b/vignettes/mapperHD.Rmd @@ -100,7 +100,7 @@ cluster_cutoff_at_first_empty_bin <- function(heights, diam, num_bins_when_clust # mapper function ```{r} -mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, num_bins_when_clustering) { +mapper <- function(distance_matrix, filter_values, num_intervals, percent_overlap, num_bins_when_clustering) { ##### begin documentation ############ # inputs # f : X \subset R^n \to R^k, a filter function on a data set with numpoints observations @@ -230,7 +230,7 @@ mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, n if (num_points_in_this_level > 1) { # heirarchical clustering level_dist_object <- as.dist( - as.matrix(dist_object)[points_in_this_level,points_in_this_level]) + as.matrix(distance_matrix)[points_in_this_level,points_in_this_level]) level_max_dist <- max(level_dist_object) level_hclust <- hclust( level_dist_object, method="single" ) level_heights <- level_hclust$height From f7941cbfc88ab7871f2de5fec572d3be9ef33c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Thu, 8 Aug 2024 02:00:57 +0200 Subject: [PATCH 08/11] Document parameter --- R/mapperVertices.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/mapperVertices.R b/R/mapperVertices.R index 1762ad7..f0f65b5 100644 --- a/R/mapperVertices.R +++ b/R/mapperVertices.R @@ -6,6 +6,8 @@ #' #' @param m An object of class TDAmapper that is the output of the mapper #' function. +#' @param pt_labels Point labels for vertices. Character class or coercible +#' to character #' #' @return A data frame describing the vertices in the graph of the mapper #' output and the point labels that will be displayed when the mouse From 53a7a04b51cec3c567c59da6e8ec498823a38988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Thu, 8 Aug 2024 02:01:17 +0200 Subject: [PATCH 09/11] Roxygenize --- man/mapper.Rd | 6 +++--- man/mapperVertices.Rd | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/man/mapper.Rd b/man/mapper.Rd index 3e2a9bc..b5e3b49 100644 --- a/man/mapper.Rd +++ b/man/mapper.Rd @@ -5,7 +5,7 @@ \title{mapper function} \usage{ mapper( - dist_object, + distance_matrix, filter_values, num_intervals, percent_overlap, @@ -13,6 +13,8 @@ mapper( ) } \arguments{ +\item{distance_matrix}{An n x n matrix of pairwise dissimilarities.} + \item{filter_values}{A n x m data frame of real numbers.} \item{num_intervals}{A length m vector of positive integers.} @@ -20,8 +22,6 @@ mapper( \item{percent_overlap}{A length m vector of numbers between 0 and 100 specifying how much adjacent intervals should overlap.} \item{num_bins_when_clustering}{A positive integer that controls whether points in the same level set end up in the same cluster.} - -\item{distance_matrix}{An n x n matrix of pairwise dissimilarities.} } \value{ An object of class \code{TDAmapper} which is a list of items named \code{adjacency} (adjacency matrix for the edges), \code{num_vertices} (integer number of vertices), \code{level_of_vertex} (vector with \code{level_of_vertex[i]} = index of the level set for vertex i), \code{points_in_vertex} (list with \code{points_in_vertex[[i]]} = vector of indices of points in vertex i), \code{points_in_level} (list with \code{points_in_level[[i]]} = vector of indices of points in level set i, and \code{vertices_in_level} (list with \code{vertices_in_level[[i]]} = vector of indices of vertices in level set i. diff --git a/man/mapperVertices.Rd b/man/mapperVertices.Rd index 043c538..12450aa 100644 --- a/man/mapperVertices.Rd +++ b/man/mapperVertices.Rd @@ -9,6 +9,9 @@ mapperVertices(m, pt_labels) \arguments{ \item{m}{An object of class TDAmapper that is the output of the mapper function.} + +\item{pt_labels}{Point labels for vertices. Character class or coercible +to character} } \value{ A data frame describing the vertices in the graph of the mapper From 49f6141db1bc846913d493bec7bccdc66d63fb0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Thu, 8 Aug 2024 02:01:52 +0200 Subject: [PATCH 10/11] Update DESCRIPTION file --- DESCRIPTION | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 66ab391..0128eb6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,6 @@ Package: TDAmapper -Title: Analyze High-Dimensional Data Using Discrete Morse Theory Version: 1.1 -Date: 2024-08-17 -Authors@R: c(person("Paul", "Pearson", email = "pearsonp@hope.edu", role = c("aut", "cre", "trl", "cph")), - person("Daniel", "Muellner", role = c("aut","ctb")), - person("Gurjeet","Singh", role = c("aut","ctb")), - person("Iago", "Giné-Vázquez", role = "ctb", email = "iago.gin-vaz@protonmail.com", comment = c(ORCID = "0000-0002-6725-2638")), -) +License: GPL-3 Description: Topological Data Analysis using Mapper (discrete Morse theory). Generate a 1-dimensional simplicial complex from a filter function defined on the data: 1. Define a filter function (lens) on the @@ -16,10 +10,16 @@ Description: Topological Data Analysis using Mapper (discrete Morse theory). between vertices. The function mapper1D uses a filter function with codomain R, while the the function mapper2D uses a filter function with codomain R^2. +Title: Analyze High-Dimensional Data Using Discrete Morse Theory +Date: 2024-08-17 +Authors@R: c(person("Paul", "Pearson", email = "pearsonp@hope.edu", role = c("aut", "cre", "trl", "cph")), + person("Daniel", "Muellner", role = c("aut","ctb")), + person("Gurjeet","Singh", role = c("aut","ctb")), + person("Iago", "Giné-Vázquez", role = "ctb", email = "iago.gin-vaz@protonmail.com", comment = c(ORCID = "0000-0002-6725-2638"))) Depends: R (>= 3.1.2) -Suggests: fastcluster, igraph, knitr -License: GPL-3 +Suggests: fastcluster, igraph, networkD3, knitr URL: https://github.com/paultpearson/TDAmapper/ BugReports: https://github.com/paultpearson/TDAmapper/issues VignetteBuilder: knitr Encoding: UTF-8 +RoxygenNote: 7.3.2 From 7c06fc0cff58e59df9134f787e70f91e39ea54e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20Gin=C3=A9-V=C3=A1zquez?= Date: Wed, 18 Sep 2024 00:59:32 +0200 Subject: [PATCH 11/11] Update Date --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0128eb6..a85ed1a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Description: Topological Data Analysis using Mapper (discrete Morse theory). codomain R, while the the function mapper2D uses a filter function with codomain R^2. Title: Analyze High-Dimensional Data Using Discrete Morse Theory -Date: 2024-08-17 +Date: 2024-09-24 Authors@R: c(person("Paul", "Pearson", email = "pearsonp@hope.edu", role = c("aut", "cre", "trl", "cph")), person("Daniel", "Muellner", role = c("aut","ctb")), person("Gurjeet","Singh", role = c("aut","ctb")),