ropensci / stplanr

Sustainable transport planning with R
https://docs.ropensci.org/stplanr
Other
417 stars 66 forks source link

Unhelpful error message in when overline fails trying to join 2 route networks #466

Closed natesheehan closed 3 years ago

natesheehan commented 3 years ago

reprex example of error:

library(pct)
#> Warning: package 'pct' was built under R version 4.0.5
library(stplanr)
#> Warning: package 'stplanr' was built under R version 4.0.5
region_name = "north-yorkshire"
rnet_all = get_pct_rnet(region_name)

# get pct rnet data for schools
rnet_school = get_pct_rnet(region = region_name, purpose = "school")
rnet_school = subset(rnet_school, select = -c(`cambridge_slc`)) # subset columns for bind
rnet_all = subset(rnet_all, select = -c(`ebike_slc`,`gendereq_slc`,`govnearmkt_slc`)) # subset columns for bind 
rnet_school_commute = rbind(rnet_all,rnet_school) # bind commute and schools rnet data

rnet_school_commute = overline(rbind(rnet_all,rnet_school_commute),"dutch_slc")
#> 2021-08-24 15:20:12 constructing segments
#> 2021-08-24 15:20:14 building geometry
#> 2021-08-24 15:20:19 simplifying geometry
#> large data detected, using regionalisation, nrow = 155437
#> Error in FUN(X[[i]], ...): subscript out of bounds

Created on 2021-08-24 by the reprex package (v2.0.0)

Robinlovelace commented 3 years ago

Thanks for reporting. I can reproduce this...

Robinlovelace commented 3 years ago

Also tried this...

library(pct)
library(stplanr)
library(sf)
region_name = "north-yorkshire"
rnet_all = get_pct_rnet(region_name)

# get pct rnet data for schools
rnet_school = get_pct_rnet(region = region_name, purpose = "school")
routes_fast_commute = get_pct_routes_fast(region_name)

shared_names = intersect(names(rnet_school), names(routes_fast_commute))
rnet_school_minimal = rnet_school[shared_names]
routes_fast_commute_minimal = routes_fast_commute[shared_names]

rnet_school_plus_routes = rbind(rnet_school_minimal, routes_fast_commute_minimal)
rnet_combined = overline(rnet_school_plus_routes, attrib = "dutch_slc")
Robinlovelace commented 3 years ago

Here's a solution that gets the joined network:

library(pct)
library(stplanr)
library(sf)
library(dplyr)
region_name = "north-yorkshire"
rnet_all = get_pct_rnet(region_name)

# get pct rnet data for schools
rnet_school = get_pct_rnet(region = region_name, purpose = "school")
rnet_school = subset(rnet_school, select = -c(`cambridge_slc`)) # subset columns for bind
rnet_all = subset(rnet_all, select = -c(`ebike_slc`,`gendereq_slc`,`govnearmkt_slc`)) # subset columns for bind 

rnet_school_commute = rbind(rnet_all,rnet_school) # bind commute and schools rnet data
rnet_school_commute$duplicated_geometries = duplicated(rnet_school_commute$geometry)
summary(rnet_school_commute$duplicated_geometries)
rnet_school_commute$geometry_txt = sf::st_as_text(rnet_school_commute$geometry)
head(rnet_school_commute$geometry_txt)
rnet_combined = rnet_school_commute %>% 
  group_by(geometry_txt) %>% 
  summarise_if(is.numeric, .funs = sum)
summary(rnet_combined$dutch_slc)
summary(rnet_all$dutch_slc)

The results are promising:

> summary(rnet_combined$dutch_slc)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    0.0     8.0    40.0   124.3   145.0  3306.0      23 
> summary(rnet_all$dutch_slc)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       6      35     109     129    3264 

The original use case wasn't route networks as inputs but entire routes, which is I think why it failed. However, it would be good of the function to at least provide a helpful error message.

natesheehan commented 3 years ago

aha! Nice spot Robin - I shall close this issue and add the working method to the pct vignette! Thanks

mem48 commented 3 years ago

@Robinlovelace following up on your email. Was the problem that overline doesn't support NA values?

Robinlovelace commented 3 years ago

@Robinlovelace following up on your email. Was the problem that overline doesn't support NA values?

No it still fails if you add na.omit() to the input building on the reproducible example above.

Robinlovelace commented 3 years ago

Updated reprex demonstrating the issue described in the updated title of this issue.

library(pct)
library(stplanr)
region_name = "north-yorkshire"
rnet_all = get_pct_rnet(region_name)

# get pct rnet data for schools
rnet_school = get_pct_rnet(region = region_name, purpose = "school")
rnet_school = subset(rnet_school, select = -c(`cambridge_slc`)) # subset columns for bind
rnet_all = subset(rnet_all, select = -c(`ebike_slc`,`gendereq_slc`,`govnearmkt_slc`)) # subset columns for bind 
rnet_school_commute = rbind(rnet_all, rnet_school) # bind commute and schools rnet data
rnet_school_commute = na.omit(rnet_school_commute)
summary(rnet_school_commute)
#>     local_id         bicycle        govtarget_slc       dutch_slc     
#>  Min.   :     1   Min.   :   0.00   Min.   :   0.00   Min.   :   0.0  
#>  1st Qu.:  3417   1st Qu.:   0.00   1st Qu.:   1.00   1st Qu.:   8.0  
#>  Median :  6833   Median :   4.00   Median :   7.00   Median :  39.0  
#>  Mean   :118088   Mean   :  22.84   Mean   :  34.74   Mean   : 102.1  
#>  3rd Qu.:257856   3rd Qu.:  15.00   3rd Qu.:  28.00   3rd Qu.: 120.0  
#>  Max.   :481066   Max.   :1183.00   Max.   :1697.00   Max.   :3264.0  
#>           geometry    
#>  LINESTRING   :13665  
#>  epsg:4326    :    0  
#>  +proj=long...:    0  
#>                       
#>                       
#> 

rnet_school_commute = overline(rnet_school_commute,"dutch_slc")
#> 2021-09-01 22:35:35 constructing segments
#> 2021-09-01 22:35:36 building geometry
#> 2021-09-01 22:35:40 simplifying geometry
#> large data detected, using regionalisation, nrow = 154142
#> Error in FUN(X[[i]], ...): subscript out of bounds

Created on 2021-09-01 by the reprex package (v2.0.1)

Any ideas @mem48 ? I thought for a moment NAs were the culprit but it seems something else is going on...

Interestingly it works for Isle of Wight data as shown below.

``` r library(pct) library(stplanr) region_name = "isle-of-wight" rnet_all = get_pct_rnet(region_name) # get pct rnet data for schools rnet_school = get_pct_rnet(region = region_name, purpose = "school") rnet_school = subset(rnet_school, select = -c(`cambridge_slc`)) # subset columns for bind rnet_all = subset(rnet_all, select = -c(`ebike_slc`,`gendereq_slc`,`govnearmkt_slc`)) # subset columns for bind rnet_school_commute = rbind(rnet_all, rnet_school) # bind commute and schools rnet data rnet_school_commute = na.omit(rnet_school_commute) summary(rnet_school_commute) #> local_id bicycle govtarget_slc dutch_slc #> Min. : 1.0 Min. : 0.00 Min. : 0.00 Min. : 0.00 #> 1st Qu.: 507.5 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 13.00 #> Median : 1014.0 Median : 4.00 Median : 7.00 Median : 39.00 #> Mean : 55632.1 Mean : 11.53 Mean : 18.77 Mean : 71.94 #> 3rd Qu.: 17230.5 3rd Qu.: 12.00 3rd Qu.: 21.00 3rd Qu.: 93.00 #> Max. :296168.0 Max. :278.00 Max. :406.00 Max. :987.00 #> geometry #> LINESTRING :2027 #> epsg:4326 : 0 #> +proj=long...: 0 #> #> #> rnet_school_commute = overline(rnet_school_commute,"dutch_slc") #> 2021-09-01 22:34:27 constructing segments #> 2021-09-01 22:34:27 building geometry #> 2021-09-01 22:34:28 simplifying geometry #> 2021-09-01 22:34:28 aggregating flows #> 2021-09-01 22:34:28 rejoining segments into linestrings ``` Created on 2021-09-01 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)
mem48 commented 3 years ago

I've had a look and this might be a bug in sf

The code is failing at:

https://github.com/ropensci/stplanr/blob/ffd4add3245a277cbe602e96f9375caec9a119b8/R/overline.R#L318

Because one of the centroids of the line does not intersect with the grid. This should be impossible as the grid is defined based on the centroids

https://github.com/ropensci/stplanr/blob/ffd4add3245a277cbe602e96f9375caec9a119b8/R/overline.R#L317

I can reproduce the problem in sf

library(sf)

# Make some radom lat/lng points in Yorkshire
p <- data.frame(id = 1:1e5,
                lat = runif(1e5, 53, 54), 
                lng = runif(1e5, -2.7, -0.2))
p <- st_as_sf(p, coords = c("lng","lat"), crs = 4326)

# Make a grid around those points
grid <- st_make_grid(p, what = "polygons")

# Check those points intersect the grid
inter <- st_intersects(p, grid)

summary(lengths(inter)) # Should always be > 0
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0       1       1       1       1       1
Robinlovelace commented 3 years ago

Worth opening an issue in the sf issue tracker I reckon...

mem48 commented 3 years ago

In the meantime, a quick fix is regionalise = 9e99 which will disable the regionalisation which causes the problem. Works for Yorkshire, but may be slow in larger areas.

mem48 commented 3 years ago

I've made a PR, let me know if you find any problems

Robinlovelace commented 3 years ago

:tada: great work Malcolm, that works. Heads-up also @natesheehan on the fix.

remotes::install_github("ropensci/stplanr")
#> Using github PAT from envvar GITHUB_PAT
#> Skipping install of 'stplanr' from a github remote, the SHA1 (e648e99b) has not changed since last install.
#>   Use `force = TRUE` to force installation
library(pct)
library(stplanr)
region_name = "north-yorkshire"
rnet_all = get_pct_rnet(region_name)

# get pct rnet data for schools
rnet_school = get_pct_rnet(region = region_name, purpose = "school")
rnet_school = subset(rnet_school, select = -c(`cambridge_slc`)) # subset columns for bind
rnet_all = subset(rnet_all, select = -c(`ebike_slc`,`gendereq_slc`,`govnearmkt_slc`)) # subset columns for bind 
rnet_school_commute = rbind(rnet_all, rnet_school) # bind commute and schools rnet data
rnet_school_commute = na.omit(rnet_school_commute)
summary(rnet_school_commute)
#>     local_id         bicycle        govtarget_slc       dutch_slc     
#>  Min.   :     1   Min.   :   0.00   Min.   :   0.00   Min.   :   0.0  
#>  1st Qu.:  3417   1st Qu.:   0.00   1st Qu.:   1.00   1st Qu.:   8.0  
#>  Median :  6833   Median :   4.00   Median :   7.00   Median :  39.0  
#>  Mean   :118088   Mean   :  22.84   Mean   :  34.74   Mean   : 102.1  
#>  3rd Qu.:257856   3rd Qu.:  15.00   3rd Qu.:  28.00   3rd Qu.: 120.0  
#>  Max.   :481066   Max.   :1183.00   Max.   :1697.00   Max.   :3264.0  
#>           geometry    
#>  LINESTRING   :13665  
#>  epsg:4326    :    0  
#>  +proj=long...:    0  
#>                       
#>                       
#> 
rnet_school_commute = overline(rnet_school_commute,"dutch_slc")
#> 2021-09-02 12:18:24 constructing segments
#> 2021-09-02 12:18:24 building geometry
#> 2021-09-02 12:18:28 simplifying geometry
#> large data detected, using regionalisation, nrow = 154142
#> Error in st_make_grid(bbox, what = "polygons"): could not find function "st_make_grid"
plot(rnet_school_commute[1])

Created on 2021-09-02 by the reprex package (v2.0.1)