ipeaGIT / gtfs2emis

R package to estimate public transport emissions based on GTFS data
https://ipeagit.github.io/gtfs2emis/
Other
28 stars 2 forks source link

stplanr::overline inside emis_grid #37

Closed Joaobazzo closed 4 years ago

Joaobazzo commented 4 years ago

stplanr::overline seems to be very fast, but it can be used before inputting the data into emis_grid, so we don't need to create extra dependency

https://journals.sagepub.com/doi/full/10.1177/2399808320942779

Joaobazzo commented 4 years ago
> object.size(temp_emi) %>% format("Mb") # no overline
[1] "6145.8 Mb"
> object.size(temp_emi2) %>% format("Mb") # overline
[1] "80.5 Mb"
> dim(temp_emi) # no overline
[1] 7174757      16
> dim(temp_emi2) # overline
[1] 100973     10
rafapereirabr commented 4 years ago

We could simplify this and do two separate analysis for different periods of the day, for example:

Joaobazzo commented 4 years ago

Perhaps we don't need to use stplanr::overline since our geometries from gtfs2gps::gps_as_sflinestring() are fixed between trip_ids, which means that we don't need to use geometries to aggregate emissions. Reprex below

library(magrittr)
library(data.table)
spo <- gtfs2gps::read_gtfs(system.file("extdata/saopaulo.zip", 
                                       package = "gtfs2gps")) %>%
  gtfs2gps::filter_by_shape_id(c("51982", "50784")) %>% 
  gtfs2gps::gtfs2gps() %>% 
  gtfs2gps::gps_as_sflinestring()
#> Reading 'agency.txt'
#> Reading 'routes.txt'
#> Reading 'stops.txt'
#> Reading 'stop_times.txt'
#> Reading 'shapes.txt'
#> Reading 'trips.txt'
#> Reading 'calendar.txt'
#> Reading 'frequencies.txt'
#> Converting shapes to sf objects
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
#> Using 1 CPU core
#> Processing the data
#> Some 'speed' values are NA in the returned data

# let's arrume

spo$CO2_total <- as.numeric(spo$dist)

# using overline
temp_over <- stplanr::overline(sl = spo, 
                               attrib = "CO2_total") %>% data.table::setDT()
#> 2020-10-08 14:55:01 constructing segments
#> 2020-10-08 14:55:02 building geometry
#> 2020-10-08 14:55:02 simplifying geometry
#> 2020-10-08 14:55:02 aggregating flows
#> 2020-10-08 14:55:02 rejoining segments into linestrings

# using data.table
temp_dt <- data.table::setDT(spo)[, "CO2_total" := lapply(.SD, sum), .SDcols = ("CO2_total"), 
                                  by = .(shape_id,stop_sequence)][,.SD[1],by = .(shape_id,stop_sequence)]
temp_dt <- temp_dt[,.(CO2_total,geometry)]
# results
identical(temp_over[order(CO2_total),]$CO2_total, temp_dt[order(CO2_total),]$CO2_total)
#> [1] TRUE

Created on 2020-10-08 by the reprex package (v0.3.0)

Joaobazzo commented 4 years ago

function_overline <- function(i){ # i = spo
  x <- stplanr::overline(sl = i, 
                    attrib = "CO2_total") %>% data.table::setDT()
  return(x)
}
function_datatable <- function(i){
  x <- data.table::setDT(data.table::copy(i))[, "CO2_total" := lapply(.SD, sum), .SDcols = ("CO2_total"), 
                         by = .(shape_id,stop_sequence)][,.SD[1],by = .(shape_id,stop_sequence)]
  return(x)
}

rbench_output <- rbenchmark::benchmark(dt = function_datatable(spo),
                      over = function_overline(spo),
                      replications = 20)
> rbench_output
  test replications elapsed relative user.self sys.self user.child sys.child
1   dt           20    0.90    1.000      0.82     0.10         NA        NA
2 over           20   24.37   27.078     24.29     0.09         NA        NA