Closed mem48 closed 4 years ago
Does this help address https://github.com/ropensci/stplanr/issues/425 ? Which I expect is much slower?
Testing the updated version as follows:
# Reproducing the benchmark
reprex::reprex(si = TRUE, {
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
})
# and with the updated version
remotes::install_github("ropensci/stplanr", "overline-datatable")
reprex::reprex(si = TRUE, {
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
})
Test with the latest version:
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> large data detected, using regionalisation, nrow = 181595
#> Error in FUN(X[[i]], ...): subscript out of bounds
#> Timing stopped at: 31.84 0 31.88
Created on 2020-09-07 by the reprex package (v0.3.0)
Test after installing this branch...
Result on the latest version also fails. Suspect sf/dplyr versions are the cluprit:
remotes::install_github("ropensci/stplanr", "overline-datatable")
#> Using github PAT from envvar GITHUB_PAT
#> Skipping install of 'stplanr' from a github remote, the SHA1 (bdf48365) has not changed since last install.
#> Use `force = TRUE` to force installation
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> 2020-09-07 22:05:53 constructing segments
#> 2020-09-07 22:06:14 building geometry
#> 2020-09-07 22:06:20 simplifying geometry
#> large data detected, using regionalisation, nrow = 181595
#> Error in FUN(X[[i]], ...): subscript out of bounds
#> Timing stopped at: 32.74 0 32.74
Created on 2020-09-07 by the reprex package (v0.3.0)
Updated test...
remotes::install_cran("stplanr")
#> Skipping install of 'stplanr' from a cran remote, the SHA1 (0.7.2) has not changed since last install.
#> Use `force = TRUE` to force installation
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> user system elapsed
#> 2.899 0.011 2.930
Created on 2020-09-10 by the reprex package (v0.3.0)
vs...
remotes::install_github("ropensci/stplanr", "overline-datatable")
#> Using github PAT from envvar GITHUB_PAT
#> Skipping install of 'stplanr' from a github remote, the SHA1 (bdf48365) has not changed since last install.
#> Use `force = TRUE` to force installation
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> 2020-09-10 22:13:15 constructing segments
#> 2020-09-10 22:13:16 building geometry
#> 2020-09-10 22:13:16 simplifying geometry
#> 2020-09-10 22:13:16 aggregating flows
#> 2020-09-10 22:13:17 rejoining segments into linestrings
#> user system elapsed
#> 2.708 0.000 2.708
Created on 2020-09-10 by the reprex package (v0.3.0)
Definitely seems quicker.
Another test...
remotes::install_cran("stplanr")
#> Skipping install of 'stplanr' from a cran remote, the SHA1 (0.7.2) has not changed since last install.
#> Use `force = TRUE` to force installation
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> user system elapsed
#> 3.345 0.006 3.372
remotes::install_github("ropensci/stplanr", "overline-datatable")
#> Using github PAT from envvar GITHUB_PAT
#> Downloading GitHub repo ropensci/stplanr@overline-datatable
#> sf (a70316281... -> b5b6fbcf1...) [GitHub]
#> sp (3a47130df... -> 67f6b9073...) [GitHub]
#> stringi (1.4.6 -> 1.5.3 ) [CRAN]
#> Installing 1 packages: stringi
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
#> Downloading GitHub repo r-spatial/sf@HEAD
#>
#> checking for file ‘/tmp/RtmpOsAI9P/remotes1d48870f6f05b/r-spatial-sf-b5b6fbc/DESCRIPTION’ ... ✔ checking for file ‘/tmp/RtmpOsAI9P/remotes1d48870f6f05b/r-spatial-sf-b5b6fbc/DESCRIPTION’
#> ─ preparing ‘sf’:
#> checking DESCRIPTION meta-information ... ✔ checking DESCRIPTION meta-information
#> ─ cleaning src
#> ─ running ‘cleanup’
#> ─ checking for LF line-endings in source and make files and shell scripts
#> ─ checking for empty or unneeded directories
#> ─ building ‘sf_0.9-6.tar.gz’
#>
#>
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
#> Downloading GitHub repo rsbivand/sp@HEAD
#> checking for file ‘/tmp/RtmpOsAI9P/remotes1d48843dad76c/rsbivand-sp-67f6b90/DESCRIPTION’ ... ✔ checking for file ‘/tmp/RtmpOsAI9P/remotes1d48843dad76c/rsbivand-sp-67f6b90/DESCRIPTION’
#> ─ preparing ‘sp’:
#> checking DESCRIPTION meta-information ... ✔ checking DESCRIPTION meta-information
#> ─ cleaning src
#> ─ checking for LF line-endings in source and make files and shell scripts
#> ─ checking for empty or unneeded directories
#> ─ looking to see if a ‘data/datalist’ file should be added
#> ─ building ‘sp_1.4-4.tar.gz’
#>
#>
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
#> checking for file ‘/tmp/RtmpOsAI9P/remotes1d48851bb9929/ropensci-stplanr-bdf4836/DESCRIPTION’ ... ✔ checking for file ‘/tmp/RtmpOsAI9P/remotes1d48851bb9929/ropensci-stplanr-bdf4836/DESCRIPTION’
#> ─ preparing ‘stplanr’:
#> checking DESCRIPTION meta-information ... ✔ checking DESCRIPTION meta-information
#> ─ cleaning src
#> ─ checking for LF line-endings in source and make files and shell scripts
#> ─ checking for empty or unneeded directories
#> ─ building ‘stplanr_0.7.2.tar.gz’
#>
#>
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
rnet2 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> user system elapsed
#> 2.881 0.004 2.885
identical(rnet1, rnet2)
#> [1] TRUE
Created on 2020-09-10 by the reprex package (v0.3.0)
Small tweaks to overline2 to reduce compute time using data.table and dplyr functions