From 8e3c0127fd187070817c01b756e22909dd3708de Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Tue, 17 Sep 2024 14:14:02 -0500
Subject: [PATCH] Fix for dev profvis

For complicated reasons, we had to change the evaluation strategy in profvis and it no longer modifies the environment that it's run in. However, I think `bench::mark()` probably better suited for your use case anyway, especially since it comes with a convenient way to check the outputs are equal.
---
 DESCRIPTION                |   8 +--
 vignettes/Introduction.Rmd | 123 ++++++++++++++++---------------------
 2 files changed, 58 insertions(+), 73 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6c5b652..b44e0b1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: tidyft
 Title: Tidy Verbs for Fast Data Operations by Reference
 Version: 0.4.5
-Authors@R: 
+Authors@R:
     person(given = "Tian-Yuan",
            family = "Huang",
            role = c("aut", "cre"),
@@ -18,13 +18,13 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.0
-Imports: 
+Imports:
     data.table (>= 1.12.8),
     stringr (>= 1.4.0),
     fst (>= 0.9.0)
-Suggests: 
+Suggests:
+    bench,
     knitr,
     rmarkdown,
-    profvis,
     dplyr
 VignetteBuilder: knitr
diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd
index 27ecbbd..71ef733 100644
--- a/vignettes/Introduction.Rmd
+++ b/vignettes/Introduction.Rmd
@@ -42,7 +42,7 @@ class(b)
 
 # convert codes
 lapply(ls(),get) %>%
-  lapply(setDT) %>% 
+  lapply(setDT) %>%
   invisible()
 
 # after
@@ -80,7 +80,7 @@ ls() # only the ft exists
 The `as_fst` could save any data.frame as ".fst" file in temporary file and parse it back as fst table. Fst table is small in RAM, but if you want to get any part of the data.frame, you can get it in almost no time:
 
 ```{r}
-ft %>% 
+ft %>%
   slice_fst(5555:6666)  # get 5555 to 6666 row
 ```
 
@@ -91,17 +91,17 @@ Except for `slice_fst`, there are also other functions for subsetting the data,
 ```{r}
 
 sys_time_print({
-  res =  ft %>% 
-   select_fst(Species,Sepal.Length,Sepal.Width) %>% 
-   rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>% 
-   arrange(group,sl) %>% 
-   filter(sl > 5) %>% 
-   distinct(sl,.keep_all = TRUE) %>% 
+  res =  ft %>%
+   select_fst(Species,Sepal.Length,Sepal.Width) %>%
+   rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>%
+   arrange(group,sl) %>%
+   filter(sl > 5) %>%
+   distinct(sl,.keep_all = TRUE) %>%
    summarise(sw = max(sw),by = group)
 })
 
 res
-  
+
 ```
 
 This should be pretty fast. Becasue when we use the data in fst table, we never get them until using the "_fst" suffix functions, so the tidyft functions never modify the data in the fst file or fst table. That is to say, we do not have to worry about the modification by reference any more. No copies made, fastest ever.
@@ -113,7 +113,6 @@ The fst workflow could also be working with other tools, though less efficient.
 
 rm(list = ls())
 
-library(profvis)
 library(data.table)
 library(dplyr)
 library(dtplyr)
@@ -128,57 +127,53 @@ dim(dt)
 as_fst(dt) -> ft
 # remove the data.frame from RAM
 rm(dt)
-  
-
-profvis({
-  
-  res1 = ft %>% 
-    select_fst(Species,Sepal.Length,Sepal.Width,Petal.Length) %>% 
-    dplyr::select(-Petal.Length) %>% 
-    dplyr::rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>% 
-    dplyr::arrange(group,sl) %>% 
-    dplyr::filter(sl > 5) %>% 
-    dplyr::distinct(sl,.keep_all = TRUE) %>% 
-    dplyr::group_by(group) %>% 
-    dplyr::summarise(sw = max(sw))
-  
-  res2 = ft %>% 
-    select_fst(Species,Sepal.Length,Sepal.Width,Petal.Length) %>% 
-    lazy_dt() %>% 
-    dplyr::select(-Petal.Length) %>% 
-    dplyr::rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>% 
-    dplyr::arrange(group,sl) %>% 
-    dplyr::filter(sl > 5) %>% 
-    dplyr::distinct(sl,.keep_all = TRUE) %>% 
-    dplyr::group_by(group) %>% 
-    dplyr::summarise(sw = max(sw)) %>% 
-    as.data.table()
-  
-  res3 = ft[,c("Species","Sepal.Length","Sepal.Width","Petal.Length")] %>%  
+
+
+bench::mark(
+
+  dplyr = ft %>%
+    select_fst(Species,Sepal.Length,Sepal.Width,Petal.Length) %>%
+    dplyr::select(-Petal.Length) %>%
+    dplyr::rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>%
+    dplyr::arrange(group,sl) %>%
+    dplyr::filter(sl > 5) %>%
+    dplyr::distinct(sl,.keep_all = TRUE) %>%
+    dplyr::group_by(group) %>%
+    dplyr::summarise(sw = max(sw)),
+
+  dtplyr = ft %>%
+    select_fst(Species,Sepal.Length,Sepal.Width,Petal.Length) %>%
+    lazy_dt() %>%
+    dplyr::select(-Petal.Length) %>%
+    dplyr::rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>%
+    dplyr::arrange(group,sl) %>%
+    dplyr::filter(sl > 5) %>%
+    dplyr::distinct(sl,.keep_all = TRUE) %>%
+    dplyr::group_by(group) %>%
+    dplyr::summarise(sw = max(sw)) %>%
+    as.data.table(),
+
+  data.table = ft[,c("Species","Sepal.Length","Sepal.Width","Petal.Length")] %>%
     setDT() %>%
-    .[,.SD,.SDcols = -"Petal.Length"] %>% 
+    .[,.SD,.SDcols = -"Petal.Length"] %>%
     setnames(old =c("Species","Sepal.Length","Sepal.Width"),
-             new = c("group","sl","sw")) %>% 
-    setorder(group,sl) %>% 
-    .[sl>5] %>% unique(by = "sl") %>% 
-    .[,.(sw = max(sw)),by = group]
-  
-  
-  res4 =  ft %>% 
-    tidyft::select_fst(Species,Sepal.Length,Sepal.Width,Petal.Length) %>% 
-    tidyft::select(-Petal.Length) %>% 
-    tidyft::rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>% 
-    tidyft::arrange(group,sl) %>% 
-    tidyft::filter(sl > 5) %>% 
-    tidyft::distinct(sl,.keep_all = TRUE) %>% 
-    tidyft::summarise(sw = max(sw),by = group)
-  
-  
-})
-
-setequal(res1,res2)
-setequal(res2,res3)
-setequal(res3,res4)
+             new = c("group","sl","sw")) %>%
+    setorder(group,sl) %>%
+    .[sl>5] %>% unique(by = "sl") %>%
+    .[,.(sw = max(sw)),by = group],
+
+
+  tidyft =  ft %>%
+    tidyft::select_fst(Species,Sepal.Length,Sepal.Width,Petal.Length) %>%
+    tidyft::select(-Petal.Length) %>%
+    tidyft::rename(group = Species,sl = Sepal.Length,sw = Sepal.Width) %>%
+    tidyft::arrange(group,sl) %>%
+    tidyft::filter(sl > 5) %>%
+    tidyft::distinct(sl,.keep_all = TRUE) %>%
+    tidyft::summarise(sw = max(sw),by = group),
+
+  check = setequal
+)
 
 ```
 
@@ -189,13 +184,3 @@ Because tidyft is based on data.table, therefore, if you always use data.table c
 ```{r}
 sessionInfo()
 ```
-
-
-
-
-
-
-
-
-
-