ropensci / stplanr

Sustainable transport planning with R
https://docs.ropensci.org/stplanr
Other
422 stars 66 forks source link

speed boost to overline #429

Closed mem48 closed 4 years ago

mem48 commented 4 years ago

Small tweaks to overline2 to reduce compute time using data.table and dplyr functions

routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
> system.time({
+   rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
+ })
   user  system elapsed 
  93.31    2.83   99.25 
> system.time({
+   rnet2 <- overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
+ })

   user  system elapsed 
  86.86    0.83   88.22 

> identical(rnet1, rnet2)
[1] TRUE
mem48 commented 4 years ago

Does this help address https://github.com/ropensci/stplanr/issues/425 ? Which I expect is much slower?

Robinlovelace commented 4 years ago

Testing the updated version as follows:

# Reproducing the benchmark

reprex::reprex(si = TRUE, {
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
})

# and with the updated version

remotes::install_github("ropensci/stplanr", "overline-datatable")

reprex::reprex(si = TRUE, {
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
})
Robinlovelace commented 4 years ago

Test with the latest version:

library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> large data detected, using regionalisation, nrow = 181595
#> Error in FUN(X[[i]], ...): subscript out of bounds
#> Timing stopped at: 31.84 0 31.88

Created on 2020-09-07 by the reprex package (v0.3.0)

Session info ``` r devtools::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.0.2 (2020-06-22) #> os Ubuntu 20.04.1 LTS #> system x86_64, linux-gnu #> ui X11 #> language en_GB:en #> collate en_GB.UTF-8 #> ctype en_GB.UTF-8 #> tz Europe/London #> date 2020-09-07 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib source #> assertthat 0.2.1 2019-03-21 [3] CRAN (R 4.0.0) #> backports 1.1.9 2020-08-24 [3] CRAN (R 4.0.2) #> callr 3.4.3 2020-03-28 [3] CRAN (R 4.0.0) #> class 7.3-17 2020-04-26 [4] CRAN (R 4.0.0) #> classInt 0.4-3 2020-04-07 [1] CRAN (R 4.0.2) #> cli 2.0.2 2020-02-28 [3] CRAN (R 4.0.0) #> codetools 0.2-16 2018-12-24 [4] CRAN (R 4.0.0) #> crayon 1.3.4 2017-09-16 [3] CRAN (R 4.0.0) #> curl 4.3 2019-12-02 [3] CRAN (R 4.0.0) #> DBI 1.1.0 2019-12-15 [3] CRAN (R 4.0.0) #> desc 1.2.0 2018-05-01 [3] CRAN (R 4.0.0) #> devtools 2.3.1 2020-07-21 [3] CRAN (R 4.0.2) #> digest 0.6.25 2020-02-23 [3] CRAN (R 4.0.0) #> dplyr 1.0.2 2020-08-18 [3] CRAN (R 4.0.2) #> e1071 1.7-3 2019-11-26 [1] CRAN (R 4.0.2) #> ellipsis 0.3.1 2020-05-15 [3] CRAN (R 4.0.0) #> evaluate 0.14 2019-05-28 [3] CRAN (R 4.0.0) #> fansi 0.4.1 2020-01-08 [3] CRAN (R 4.0.0) #> foreign 0.8-80 2020-05-24 [1] CRAN (R 4.0.2) #> fs 1.5.0 2020-07-31 [3] CRAN (R 4.0.2) #> generics 0.0.2 2018-11-29 [3] CRAN (R 4.0.0) #> geosphere 1.5-10 2019-05-26 [1] CRAN (R 4.0.2) #> glue 1.4.2 2020-08-27 [3] CRAN (R 4.0.2) #> highr 0.8 2019-03-20 [3] CRAN (R 4.0.0) #> htmltools 0.5.0 2020-06-16 [3] CRAN (R 4.0.1) #> igraph 1.2.5 2020-03-19 [3] CRAN (R 4.0.0) #> KernSmooth 2.23-17 2020-04-26 [4] CRAN (R 4.0.0) #> knitr 1.29 2020-06-23 [3] CRAN (R 4.0.2) #> lattice 0.20-41 2020-04-02 [4] CRAN (R 4.0.0) #> lifecycle 0.2.0 2020-03-06 [3] CRAN (R 4.0.0) #> magrittr 1.5 2014-11-22 [3] CRAN (R 4.0.0) #> maptools 1.0-2 2020-08-24 [3] CRAN (R 4.0.2) #> memoise 1.1.0 2017-04-21 [3] CRAN (R 4.0.0) #> pbapply 1.4-3 2020-08-18 [1] CRAN (R 4.0.2) #> pct 0.5.0 2020-08-25 [1] local #> pillar 1.4.6 2020-07-10 [3] CRAN (R 4.0.2) #> pkgbuild 1.1.0 2020-07-13 [3] CRAN (R 4.0.2) #> pkgconfig 2.0.3 2019-09-22 [3] CRAN (R 4.0.0) #> pkgload 1.1.0 2020-05-29 [3] CRAN (R 4.0.0) #> prettyunits 1.1.1 2020-01-24 [3] CRAN (R 4.0.0) #> processx 3.4.3 2020-07-05 [3] CRAN (R 4.0.2) #> ps 1.3.4 2020-08-11 [3] CRAN (R 4.0.2) #> purrr 0.3.4 2020-04-17 [3] CRAN (R 4.0.0) #> R6 2.4.1 2019-11-12 [3] CRAN (R 4.0.0) #> raster 3.3-13 2020-07-17 [3] CRAN (R 4.0.2) #> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.2) #> remotes 2.2.0 2020-07-21 [3] CRAN (R 4.0.2) #> rgeos 0.5-3 2020-05-08 [1] CRAN (R 4.0.2) #> rlang 0.4.7 2020-07-09 [3] CRAN (R 4.0.2) #> rmarkdown 2.3 2020-06-18 [3] CRAN (R 4.0.1) #> rprojroot 1.3-2 2018-01-03 [3] CRAN (R 4.0.0) #> s2 1.0.2.9000 2020-08-28 [1] Github (r-spatial/s2@514a5d4) #> sessioninfo 1.1.1 2018-11-05 [3] CRAN (R 4.0.0) #> sf 0.9-6 2020-08-27 [1] Github (r-spatial/sf@a703162) #> sp 1.4-4 2020-08-25 [1] Github (rsbivand/sp@3a47130) #> stplanr * 0.7.1 2020-08-29 [1] local #> stringi 1.4.6 2020-02-17 [3] CRAN (R 4.0.0) #> stringr 1.4.0 2019-02-10 [3] CRAN (R 4.0.0) #> testthat 2.3.2 2020-03-02 [3] CRAN (R 4.0.0) #> tibble 3.0.3 2020-07-10 [3] CRAN (R 4.0.2) #> tidyselect 1.1.0 2020-05-11 [3] CRAN (R 4.0.0) #> units 0.6-7 2020-06-13 [1] CRAN (R 4.0.2) #> usethis 1.6.1.9001 2020-08-19 [1] Github (r-lib/usethis@860c1ea) #> vctrs 0.3.4 2020-08-29 [3] CRAN (R 4.0.2) #> withr 2.2.0 2020-04-20 [3] CRAN (R 4.0.0) #> wk 0.3.2 2020-08-03 [1] CRAN (R 4.0.2) #> xfun 0.16 2020-07-24 [3] CRAN (R 4.0.2) #> yaml 2.2.1 2020-02-01 [3] CRAN (R 4.0.0) #> #> [1] /home/robin/R/x86_64-pc-linux-gnu-library/4.0 #> [2] /usr/local/lib/R/site-library #> [3] /usr/lib/R/site-library #> [4] /usr/lib/R/library ```

Test after installing this branch...

Robinlovelace commented 4 years ago

Result on the latest version also fails. Suspect sf/dplyr versions are the cluprit:

remotes::install_github("ropensci/stplanr", "overline-datatable")
#> Using github PAT from envvar GITHUB_PAT
#> Skipping install of 'stplanr' from a github remote, the SHA1 (bdf48365) has not changed since last install.
#>   Use `force = TRUE` to force installation
library(stplanr)
routes <- pct::get_pct_routes_fast("west-yorkshire", geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> 2020-09-07 22:05:53 constructing segments
#> 2020-09-07 22:06:14 building geometry
#> 2020-09-07 22:06:20 simplifying geometry
#> large data detected, using regionalisation, nrow = 181595
#> Error in FUN(X[[i]], ...): subscript out of bounds
#> Timing stopped at: 32.74 0 32.74

Created on 2020-09-07 by the reprex package (v0.3.0)

Session info ``` r devtools::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.0.2 (2020-06-22) #> os Ubuntu 20.04.1 LTS #> system x86_64, linux-gnu #> ui X11 #> language en_GB:en #> collate en_GB.UTF-8 #> ctype en_GB.UTF-8 #> tz Europe/London #> date 2020-09-07 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib source #> assertthat 0.2.1 2019-03-21 [3] CRAN (R 4.0.0) #> backports 1.1.9 2020-08-24 [3] CRAN (R 4.0.2) #> callr 3.4.3 2020-03-28 [3] CRAN (R 4.0.0) #> class 7.3-17 2020-04-26 [4] CRAN (R 4.0.0) #> classInt 0.4-3 2020-04-07 [1] CRAN (R 4.0.2) #> cli 2.0.2 2020-02-28 [3] CRAN (R 4.0.0) #> codetools 0.2-16 2018-12-24 [4] CRAN (R 4.0.0) #> crayon 1.3.4 2017-09-16 [3] CRAN (R 4.0.0) #> curl 4.3 2019-12-02 [3] CRAN (R 4.0.0) #> DBI 1.1.0 2019-12-15 [3] CRAN (R 4.0.0) #> desc 1.2.0 2018-05-01 [3] CRAN (R 4.0.0) #> devtools 2.3.1 2020-07-21 [3] CRAN (R 4.0.2) #> digest 0.6.25 2020-02-23 [3] CRAN (R 4.0.0) #> dplyr 1.0.2 2020-08-18 [3] CRAN (R 4.0.2) #> e1071 1.7-3 2019-11-26 [1] CRAN (R 4.0.2) #> ellipsis 0.3.1 2020-05-15 [3] CRAN (R 4.0.0) #> evaluate 0.14 2019-05-28 [3] CRAN (R 4.0.0) #> fansi 0.4.1 2020-01-08 [3] CRAN (R 4.0.0) #> foreign 0.8-80 2020-05-24 [1] CRAN (R 4.0.2) #> fs 1.5.0 2020-07-31 [3] CRAN (R 4.0.2) #> generics 0.0.2 2018-11-29 [3] CRAN (R 4.0.0) #> geosphere 1.5-10 2019-05-26 [1] CRAN (R 4.0.2) #> glue 1.4.2 2020-08-27 [3] CRAN (R 4.0.2) #> highr 0.8 2019-03-20 [3] CRAN (R 4.0.0) #> htmltools 0.5.0 2020-06-16 [3] CRAN (R 4.0.1) #> igraph 1.2.5 2020-03-19 [3] CRAN (R 4.0.0) #> KernSmooth 2.23-17 2020-04-26 [4] CRAN (R 4.0.0) #> knitr 1.29 2020-06-23 [3] CRAN (R 4.0.2) #> lattice 0.20-41 2020-04-02 [4] CRAN (R 4.0.0) #> lifecycle 0.2.0 2020-03-06 [3] CRAN (R 4.0.0) #> magrittr 1.5 2014-11-22 [3] CRAN (R 4.0.0) #> maptools 1.0-2 2020-08-24 [3] CRAN (R 4.0.2) #> memoise 1.1.0 2017-04-21 [3] CRAN (R 4.0.0) #> pbapply 1.4-3 2020-08-18 [1] CRAN (R 4.0.2) #> pct 0.5.0 2020-08-25 [1] local #> pillar 1.4.6 2020-07-10 [3] CRAN (R 4.0.2) #> pkgbuild 1.1.0 2020-07-13 [3] CRAN (R 4.0.2) #> pkgconfig 2.0.3 2019-09-22 [3] CRAN (R 4.0.0) #> pkgload 1.1.0 2020-05-29 [3] CRAN (R 4.0.0) #> prettyunits 1.1.1 2020-01-24 [3] CRAN (R 4.0.0) #> processx 3.4.3 2020-07-05 [3] CRAN (R 4.0.2) #> ps 1.3.4 2020-08-11 [3] CRAN (R 4.0.2) #> purrr 0.3.4 2020-04-17 [3] CRAN (R 4.0.0) #> R6 2.4.1 2019-11-12 [3] CRAN (R 4.0.0) #> raster 3.3-13 2020-07-17 [3] CRAN (R 4.0.2) #> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.2) #> remotes 2.2.0 2020-07-21 [3] CRAN (R 4.0.2) #> rgeos 0.5-3 2020-05-08 [1] CRAN (R 4.0.2) #> rlang 0.4.7 2020-07-09 [3] CRAN (R 4.0.2) #> rmarkdown 2.3 2020-06-18 [3] CRAN (R 4.0.1) #> rprojroot 1.3-2 2018-01-03 [3] CRAN (R 4.0.0) #> s2 1.0.2.9000 2020-08-28 [1] Github (r-spatial/s2@514a5d4) #> sessioninfo 1.1.1 2018-11-05 [3] CRAN (R 4.0.0) #> sf 0.9-6 2020-08-27 [1] Github (r-spatial/sf@a703162) #> sp 1.4-4 2020-08-25 [1] Github (rsbivand/sp@3a47130) #> stplanr * 0.7.2 2020-09-07 [1] Github (ropensci/stplanr@bdf4836) #> stringi 1.4.6 2020-02-17 [3] CRAN (R 4.0.0) #> stringr 1.4.0 2019-02-10 [3] CRAN (R 4.0.0) #> testthat 2.3.2 2020-03-02 [3] CRAN (R 4.0.0) #> tibble 3.0.3 2020-07-10 [3] CRAN (R 4.0.2) #> tidyselect 1.1.0 2020-05-11 [3] CRAN (R 4.0.0) #> units 0.6-7 2020-06-13 [1] CRAN (R 4.0.2) #> usethis 1.6.1.9001 2020-08-19 [1] Github (r-lib/usethis@860c1ea) #> vctrs 0.3.4 2020-08-29 [3] CRAN (R 4.0.2) #> withr 2.2.0 2020-04-20 [3] CRAN (R 4.0.0) #> wk 0.3.2 2020-08-03 [1] CRAN (R 4.0.2) #> xfun 0.16 2020-07-24 [3] CRAN (R 4.0.2) #> yaml 2.2.1 2020-02-01 [3] CRAN (R 4.0.0) #> #> [1] /home/robin/R/x86_64-pc-linux-gnu-library/4.0 #> [2] /usr/local/lib/R/site-library #> [3] /usr/lib/R/site-library #> [4] /usr/lib/R/library ```
Robinlovelace commented 4 years ago

Updated test...

remotes::install_cran("stplanr")
#> Skipping install of 'stplanr' from a cran remote, the SHA1 (0.7.2) has not changed since last install.
#>   Use `force = TRUE` to force installation
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#>    user  system elapsed 
#>   2.899   0.011   2.930

Created on 2020-09-10 by the reprex package (v0.3.0)

vs...

remotes::install_github("ropensci/stplanr", "overline-datatable")
#> Using github PAT from envvar GITHUB_PAT
#> Skipping install of 'stplanr' from a github remote, the SHA1 (bdf48365) has not changed since last install.
#>   Use `force = TRUE` to force installation
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#> 2020-09-10 22:13:15 constructing segments
#> 2020-09-10 22:13:16 building geometry
#> 2020-09-10 22:13:16 simplifying geometry
#> 2020-09-10 22:13:16 aggregating flows
#> 2020-09-10 22:13:17 rejoining segments into linestrings
#>    user  system elapsed 
#>   2.708   0.000   2.708

Created on 2020-09-10 by the reprex package (v0.3.0)

Definitely seems quicker.

Robinlovelace commented 4 years ago

Another test...

remotes::install_cran("stplanr")
#> Skipping install of 'stplanr' from a cran remote, the SHA1 (0.7.2) has not changed since last install.
#>   Use `force = TRUE` to force installation
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
  rnet1 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#>    user  system elapsed 
#>   3.345   0.006   3.372

remotes::install_github("ropensci/stplanr", "overline-datatable")
#> Using github PAT from envvar GITHUB_PAT
#> Downloading GitHub repo ropensci/stplanr@overline-datatable
#> sf      (a70316281... -> b5b6fbcf1...) [GitHub]
#> sp      (3a47130df... -> 67f6b9073...) [GitHub]
#> stringi (1.4.6        -> 1.5.3       ) [CRAN]
#> Installing 1 packages: stringi
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
#> Downloading GitHub repo r-spatial/sf@HEAD
#> 
#>      checking for file ‘/tmp/RtmpOsAI9P/remotes1d48870f6f05b/r-spatial-sf-b5b6fbc/DESCRIPTION’ ...  ✔  checking for file ‘/tmp/RtmpOsAI9P/remotes1d48870f6f05b/r-spatial-sf-b5b6fbc/DESCRIPTION’
#>   ─  preparing ‘sf’:
#>      checking DESCRIPTION meta-information ...  ✔  checking DESCRIPTION meta-information
#> ─  cleaning src
#>   ─  running ‘cleanup’
#>   ─  checking for LF line-endings in source and make files and shell scripts
#>   ─  checking for empty or unneeded directories
#>   ─  building ‘sf_0.9-6.tar.gz’
#>      
#> 
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
#> Downloading GitHub repo rsbivand/sp@HEAD
#>      checking for file ‘/tmp/RtmpOsAI9P/remotes1d48843dad76c/rsbivand-sp-67f6b90/DESCRIPTION’ ...  ✔  checking for file ‘/tmp/RtmpOsAI9P/remotes1d48843dad76c/rsbivand-sp-67f6b90/DESCRIPTION’
#>   ─  preparing ‘sp’:
#>    checking DESCRIPTION meta-information ...  ✔  checking DESCRIPTION meta-information
#> ─  cleaning src
#>   ─  checking for LF line-endings in source and make files and shell scripts
#>   ─  checking for empty or unneeded directories
#>   ─  looking to see if a ‘data/datalist’ file should be added
#>   ─  building ‘sp_1.4-4.tar.gz’
#>      
#> 
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
#>      checking for file ‘/tmp/RtmpOsAI9P/remotes1d48851bb9929/ropensci-stplanr-bdf4836/DESCRIPTION’ ...  ✔  checking for file ‘/tmp/RtmpOsAI9P/remotes1d48851bb9929/ropensci-stplanr-bdf4836/DESCRIPTION’
#>   ─  preparing ‘stplanr’:
#>      checking DESCRIPTION meta-information ...  ✔  checking DESCRIPTION meta-information
#>   ─  cleaning src
#>   ─  checking for LF line-endings in source and make files and shell scripts
#>   ─  checking for empty or unneeded directories
#>   ─  building ‘stplanr_0.7.2.tar.gz’
#>      
#> 
#> Installing package into '/home/robin/R/x86_64-pc-linux-gnu-library/4.0'
#> (as 'lib' is unspecified)
library(stplanr)
region = "isle-of-wight"
routes <- pct::get_pct_routes_fast(region, geography = "lsoa")
system.time({
  rnet2 <- stplanr::overline2(routes, c("bicycle","dutch_slc","ebike_slc"))
})
#>    user  system elapsed 
#>   2.881   0.004   2.885

identical(rnet1, rnet2)
#> [1] TRUE

Created on 2020-09-10 by the reprex package (v0.3.0)