Open DorisAmoakohene opened 4 weeks ago
I'm not aware of what you tried? Please share what you did, cause its not "we've" for what I tried.
I was simply trying to hack the plot method to filter out the time data (for which I sent you a snippet on Monday after our meeting, which you can work on?), and I think modifying this is the easiest way to go about it: https://github.com/tdhock/atime/blob/27bc2530bc86484996cbab365d50bd742e96b532/R/atime.R#L201
I can continue debugging those errors now (will post in another issue) or Toby can ship something out faster (which is why I told you to contact him since the start of this week) but I would suggest you to try something yourself in the meantime instead of just waiting.
just use atime.result$measurements as the data in a ggplot
Yes I did that in this code, But my problem is how to get the base, CRAN and head of the graph as well Can that also be found in the atime.result$measurements?
library(atime)
library(ggplot2)
library(data.table)
tdir <- tempfile()
dir.create(tdir)
git2r::clone("https://github.com/Rdatatable/data.table", tdir)
atime.result <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(1,7),
setup={
DT = replicate(N, 1, simplify = FALSE)
},
expr=data.table:::setDT(DT),
"slow"= "c4a2085e35689a108d67dacb2f8261e4964d7e12", #Parent of the first commit in the PR that fixes the issue(https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801),
"Fast"="1872f473b20fdcddc5c1b35d79fe9229cd9a1d15")#last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits)
png("new.gg.png",res = 200, width = 5, height = 3, unit = "in")
new.gg = ggplot()+
geom_line(aes(x = N, y = median, group = expr.name, colour = expr.name), data = atime.result$measurements)+
geom_ribbon(aes(x = N, ymin = min, ymax = max, fill = expr.name), data = atime.result$measurements, alpha = 0.5 )+
scale_x_log10("N = data size",limits = c(NA, 1e5))+
scale_y_log10("Computational Time (Seconds)")
directlabels::direct.label(new.gg, list(cex = 0.8, "right.polygons"))
dev.off()
you can add HEAD, etc as other versions, arguments to atime_versions
(along with fast and slow)
for CRAN version, use CRAN=""
empty string means use version from CRAN (not github)
yes, sure thanks, got it
make sure to do install.packages("data.table") to get most recent CRAN version, here are the docs from ?atime_versions
For convenience, versions can be specified either as code (...),
data (‘sha.vec’), or both. Each version should be either ‘""’ (to
use currently installed version of package, or if missing, install
most recent version from CRAN) or a SHA1 hash, which is passed as
branch arg to ‘git2r::checkout’; version names used to
identify/interpret the output/plots.
This is the new plot
library(atime)
library(ggplot2)
library(data.table)
tdir <- tempfile()
dir.create(tdir)
git2r::clone("https://github.com/Rdatatable/data.table", tdir)
atime.result <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(1,7),
setup={
DT = replicate(N, 1, simplify = FALSE)
},
expr=data.table:::setDT(DT),
"slow"= "c4a2085e35689a108d67dacb2f8261e4964d7e12", #Parent of the first commit in the PR that fixes the issue(https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801),#last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits)
"Fast"="1872f473b20fdcddc5c1b35d79fe9229cd9a1d15",#tag 1.15.4,https://github.com/Rdatatable/data.table/commit/aa75d79376478b3e8f80fd6f31dcf53be8bf3404
"Head"="ff900d1e6a8bcfaa0385bd1304af7b90657d3c4d", #lastest commit on the data.table page https://github.com/Rdatatable/data.table/commit/ff900d1e6a8bcfaa0385bd1304af7b90657d3c4d
"CRAN"="",
"BASE"="3e5d038ee4a800104b665ac39392ceed46b1189d" #1.14.6,https://github.com/Rdatatable/data.table/commit/3e5d038ee4a800104b665ac39392ceed46b1189d)
)
png("new.gg1.png", res = 600, width = 15, height = 10, unit = "in")
new.gg1 = ggplot() +
geom_line(aes(x = N, y = median, group = expr.name, colour = expr.name), data = atime.result$measurements) +
geom_ribbon(aes(x = N, ymin = min, ymax = max, fill = expr.name), data = atime.result$measurements, alpha = 0.7) +
labs(title = "setDT extremely slow for very wide input #5426") +
theme(
plot.title = element_text(size = 30),
text = element_text(size = 30)
) +
scale_x_log10("N = data size", limits = c(NA, 1e8)) +
scale_y_log10("Computational Time (Seconds)")
directlabels::direct.label(new.gg1, list(cex = 3, "right.polygons"))
dev.off()
This is the new plot
Looks good to me 👍🏻
For that test case (or this in your repository), you might want to update it to the code being used in data.table as currently only the CRAN version is aligned with the Slow label (the PR/5427 was merged to fix this so BASE at present should be with the Fast label)
Seems like you're running this with an older version of data.table
in your system.
looks good overall I would change "BASE" to "base=master" good to keep CRAN all caps though If please keep slow/fast consistently capitalized, either Slow/Fast or slow/fast but not slow/Fast please
HEAD should be all caps https://stackoverflow.com/questions/2304087/what-is-head-in-git why can't we see any geom_line for median?
I used alpha = 0.7, for geom_line and geom_ribbon
png("new.gg1.png", res = 600, width = 15, height = 10, unit = "in")
new.gg1 = ggplot() +
geom_line(aes(x = N, y = median, group = expr.name, colour = expr.name), data = atime.result$measurements, alpha = 0.7) +
geom_ribbon(aes(x = N, ymin = min, ymax = max, fill = expr.name), data = atime.result$measurements, alpha = 0.7) +
labs(title = "setDT extremely slow for very wide input #5426") +
theme(
plot.title = element_text(size = 30),
text = element_text(size = 30)
) +
scale_x_log10("N = data size", limits = c(NA, 1e8)) +
scale_y_log10("Computational Time (Seconds)")
directlabels::direct.label(new.gg1, list(cex = 3, "right.polygons"))
dev.off()
i usually use alpha=0.5 for ribbon and default alpha=1 for line
@tdhock, @Anirban166 and I have been trying to figure out how to create a plot for the gitHub action for only seconds and N, he run into a series of errors that he's been unable to resolve so far. @Anirban166, Kindly share some of the error messages so @tdhock can assist.
I will also share what did also