DorisAmoakohene / Researchwork_Rdata.table

0 stars 0 forks source link

Error in atime for when collapse is actually fast #4

Open DorisAmoakohene opened 12 months ago

DorisAmoakohene commented 12 months ago

@tdhock

I am trying to run atime on the microbenchmark on when collapse is actually fast from the link below https://raw.githubusercontent.com/SebKrantz/collapse/master/misc/useR2022%20presentation/collapse_useR2022_final.pdf

library(data.table)
library(dplyr)
library(microbenchmark)
g <- sample.int(1e6, 1e7, replace = TRUE)
w <- abs(x)
x <- rnorm(1e7)
dt <- data.table(x = x, g = g)
dt <- dt[, .(x), by = g]
microbenchmark(clp = fmean(x, na.rm = FALSE), clp_g = fmean(x, g, use.g.names = FALSE, na.rm = FALSE),
clp_g_w = fmean(x, g, w, use.g.names = FALSE, na.rm = FALSE), dt = dt[, mean(x), keyby = g])

this is my atime code and Plot code

ml.colors <- c(
  clp = "#9970AB",
  clp_g = "#D6604D",
  clp_g_w = "#BF812D",
  dt = "#F1EB90"
)

x <- rnorm(1e7)
g <- sample.int(1e6, 1e7, replace = TRUE)
w <- abs(x)
dt <- setkey(data.table(x, g), g)
g <- GRP(g)

atime.operations <- atime::atime(
  setup = {
    library(data.table)
    x <- rnorm(1e7)
    g <- sample.int(1e6, 1e7, replace = TRUE)
    w <- abs(x)
    dt <- setkey(data.table(x, g), g)
    g <- GRP(g)
  },
  clp = {
    fmean(x, na.rm = FALSE)
  },
  clp_g = {
    fmean(x, g, use.g.names = FALSE, na.rm = FALSE)
  },
  clp_g_w = {
    fmean(x, g, w, use.g.names = FALSE, na.rm = FALSE)
  },
  dt = {
    dt[, mean(x), keyby = g]
  }
)
ml.operations.refs <- atime::references_best(atime.operations)
ml.operations.pred <- predict(ml.operations.refs)
ml.operations <- plot(ml.operations.pred) +
  theme(text = element_text(size = 20)) +
  ggtitle(sprintf("microbenchmarking, N times", n.rows)) +
  scale_x_log10("N = number of Mean, SD, Length to compute") +
  scale_y_log10("Computation time (seconds)\nmedian line, min/max band\nover 10 timings") +
  facet_null() +
  scale_fill_manual(values = ml.colors) +
  scale_color_manual(values = ml.colors)

Error Message I am getting

> ml.operations.pred <- predict(ml.operations.refs)
Error in `[.data.table`(data.table(unit = names(L)), , { : 
  seconds=0.01 is too large, please decrease to a value that intersects at least one of the empirical curves