DorisAmoakohene / Researchwork_Rdata.table

0 stars 0 forks source link

data.table Issue #4200 #6

Closed DorisAmoakohene closed 11 months ago

DorisAmoakohene commented 12 months ago

@tdhock https://github.com/Rdatatable/data.table/issues/4200 -- Discusses Regression https://github.com/Rdatatable/data.table/issues/4200#issuecomment-578629820 https://github.com/Rdatatable/data.table/pull/4558 --- Fixes Regression

Trying to checkout this regression also, but it seem the Branch has been closed(groupby with dogroups (R expression) performance regression #4200)

setup code from Branch:

library(data.table)
N = 1e6L
set.seed(108)
d = data.table(id3 = sample(c(seq.int(N*0.9), sample(N*0.9, N*0.1, TRUE))), # 9e5 unq values
               v1 = sample(5L, N, TRUE),
               v2 = sample(5L, N, TRUE))
system.time(d[, max(v1)-min(v2), by=id3])
system.time(d[, max(v1)-min(v2), by=id3])

atime code:

atime.list <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
      pkg_find_replace <- function(glob, FIND, REPLACE){
        atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
      }
      Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
      Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
      new.Package_ <- paste0(Package_, "_", sha)
      pkg_find_replace(
        "DESCRIPTION", 
        paste0("Package:\\s+", old.Package),
        paste("Package:", new.Package))
      pkg_find_replace(
        file.path("src","Makevars.*in"),
        Package_regex,
        new.Package_)
      pkg_find_replace(
        file.path("R", "onLoad.R"),
        Package_regex,
        new.Package_)
      pkg_find_replace(
        file.path("R", "onLoad.R"),
        sprintf('packageVersion\\("%s"\\)', old.Package),
        sprintf('packageVersion\\("%s"\\)', new.Package))
      pkg_find_replace(
        file.path("src", "init.c"),
        paste0("R_init_", Package_regex),
        paste0("R_init_", gsub("[.]", "_", new.Package_)))
      pkg_find_replace(
        "NAMESPACE",
        sprintf('useDynLib\\("?%s"?', Package_regex),
        paste0('useDynLib(', new.Package_))
    },
  N=10^seq(3,8),
  setup={ 
    set.seed(108)
    d <- data.table(
      id3 = sample(c(seq.int(N*0.9), sample(N*0.9, N*0.1, TRUE))),
      v1 = sample(5L, N, TRUE),
      v2 = sample(5L, N, TRUE))
  },
  expr=data.table:::`[.data.table`(d, max(v1)-min(v2), by = id3),
  "Before"="f750448a2efcd258b3aba57136ee6a95ce56b302",#https://github.com/Rdatatable/data.table/tree/f750448a2efcd258b3aba57136ee6a95ce56b302
  "Regression"="cd48100a96882af025213ef3a0f82fb3dfdd493e", #https://github.com/Rdatatable/data.table/tree/cd48100a96882af025213ef3a0f82fb3dfdd493e
  "Fixed"="20d485587d258f7d820e5e4cc0089dd6bda6a141")

Error message:

Error in value[[3L]](cond) : 
  Error in revparse_single(object, branch): Error in 'git2r_revparse_single': Requested object could not be found

 when trying to checkout f750448a2efcd258b3aba57136ee6a95ce56b302

Traceback:

traceback()
9: stop(e, " when trying to checkout ", sha)
8: value[[3L]](cond)
7: tryCatchOne(expr, names, parentenv, handlers[[1L]])
6: tryCatchList(expr, classes, parentenv, handlers)
5: tryCatch(git2r::checkout(repo, branch = sha, force = TRUE), error = function(e) stop(e, 
       " when trying to checkout ", sha))
4: atime_versions_install(Package, pkg.path, new.Package.vec, SHA.vec, 
       verbose, pkg.edit.fun)
3: (function (pkg.path, expr, sha.vec = NULL, verbose = FALSE, pkg.edit.fun = pkg.edit.default, 
       ...) 
   {
       formal.names <- names(formals())
       mc.args <- as.list(match.call()[-1])
       dots.vec <- mc.args[!names(mc.args) %in% formal.names]
       SHA.vec <- c(dots.vec, sha.vec)
       if (length(SHA.vec) == 0) {
           stop("need to specify at least one git SHA, in either sha.vec, or ...")
       }
       pkg.DESC <- file.path(pkg.path, "DESCRIPTION")
       DESC.mat <- read.dcf(pkg.DESC)
       Package <- DESC.mat[, "Package"]
       new.Package.vec <- paste0(Package, ifelse(SHA.vec == "", 
           "", "."), SHA.vec)
       atime_versions_install(Package, pkg.path, new.Package.vec, 
           SHA.vec, verbose, pkg.edit.fun)
       a.args <- list()
       for (commit.i in seq_along(SHA.vec)) {
           sha <- SHA.vec[[commit.i]]
    ...
2: do.call(atime_versions_exprs, ver.args)
1: atime::atime_versions(pkg.path = tdir, pkg.edit.fun = function(old.Package, 
       new.Package, sha, new.pkg.path) {
       pkg_find_replace <- function(glob, FIND, REPLACE) {
           atime::glob_find_replace(file.path(new.pkg.path, glob), 
               FIND, REPLACE)
       }
       Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE)
       Package_ <- gsub(".", "_", old.Package, fixed = TRUE)
       new.Package_ <- paste0(Package_, "_", sha)
       pkg_find_replace("DESCRIPTION", paste0("Package:\\s+", old.Package), 
           paste("Package:", new.Package))
       pkg_find_replace(file.path("src", "Makevars.*in"), Package_regex, 
           new.Package_)
       pkg_find_replace(file.path("R", "onLoad.R"), Package_regex, 
           new.Package_)
       pkg_find_replace(file.path("R", "onLoad.R"), sprintf("packageVersion\\(\"%s\"\\)", 
           old.Package), sprintf("packageVersion\\(\"%s\"\\)", new.Package))
       pkg_find_replace(file.path("src", "init.c"), paste0("R_init_", 
           Package_regex), paste0("R_init_", gsub("[.]", "_", new.Package_)))
       pkg_find_replace("NAMESPACE", sprintf("useDynLib\\(\"?%s\"?", 
    ...
tdhock commented 12 months ago

I restored the branch, so please try again.

DorisAmoakohene commented 12 months ago

Thank you @tdhock it worked now this is the plot I made atime list 4200

DorisAmoakohene commented 12 months ago

I think I can close this issue now? @tdhock

tdhock commented 11 months ago

great

tdhock commented 11 months ago
> 10^seq(1, 4)
[1]    10   100  1000 10000

> stop("SoMe error")
Error: SoMe error
> traceback()
1: stop("SoMe error")
>