Closed Joaobazzo closed 4 years ago
> object.size(temp_emi) %>% format("Mb") # no overline
[1] "6145.8 Mb"
> object.size(temp_emi2) %>% format("Mb") # overline
[1] "80.5 Mb"
> dim(temp_emi) # no overline
[1] 7174757 16
> dim(temp_emi2) # overline
[1] 100973 10
We could simplify this and do two separate analysis for different periods of the day, for example:
Perhaps we don't need to use stplanr::overline
since our geometries from gtfs2gps::gps_as_sflinestring()
are fixed between trip_ids, which means that we don't need to use geometries to aggregate emissions. Reprex below
library(magrittr)
library(data.table)
spo <- gtfs2gps::read_gtfs(system.file("extdata/saopaulo.zip",
package = "gtfs2gps")) %>%
gtfs2gps::filter_by_shape_id(c("51982", "50784")) %>%
gtfs2gps::gtfs2gps() %>%
gtfs2gps::gps_as_sflinestring()
#> Reading 'agency.txt'
#> Reading 'routes.txt'
#> Reading 'stops.txt'
#> Reading 'stop_times.txt'
#> Reading 'shapes.txt'
#> Reading 'trips.txt'
#> Reading 'calendar.txt'
#> Reading 'frequencies.txt'
#> Converting shapes to sf objects
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
#> Using 1 CPU core
#> Processing the data
#> Some 'speed' values are NA in the returned data
# let's arrume
spo$CO2_total <- as.numeric(spo$dist)
# using overline
temp_over <- stplanr::overline(sl = spo,
attrib = "CO2_total") %>% data.table::setDT()
#> 2020-10-08 14:55:01 constructing segments
#> 2020-10-08 14:55:02 building geometry
#> 2020-10-08 14:55:02 simplifying geometry
#> 2020-10-08 14:55:02 aggregating flows
#> 2020-10-08 14:55:02 rejoining segments into linestrings
# using data.table
temp_dt <- data.table::setDT(spo)[, "CO2_total" := lapply(.SD, sum), .SDcols = ("CO2_total"),
by = .(shape_id,stop_sequence)][,.SD[1],by = .(shape_id,stop_sequence)]
temp_dt <- temp_dt[,.(CO2_total,geometry)]
# results
identical(temp_over[order(CO2_total),]$CO2_total, temp_dt[order(CO2_total),]$CO2_total)
#> [1] TRUE
Created on 2020-10-08 by the reprex package (v0.3.0)
function_overline <- function(i){ # i = spo
x <- stplanr::overline(sl = i,
attrib = "CO2_total") %>% data.table::setDT()
return(x)
}
function_datatable <- function(i){
x <- data.table::setDT(data.table::copy(i))[, "CO2_total" := lapply(.SD, sum), .SDcols = ("CO2_total"),
by = .(shape_id,stop_sequence)][,.SD[1],by = .(shape_id,stop_sequence)]
return(x)
}
rbench_output <- rbenchmark::benchmark(dt = function_datatable(spo),
over = function_overline(spo),
replications = 20)
> rbench_output
test replications elapsed relative user.self sys.self user.child sys.child
1 dt 20 0.90 1.000 0.82 0.10 NA NA
2 over 20 24.37 27.078 24.29 0.09 NA NA
stplanr::overline
seems to be very fast, but it can be used before inputting the data into emis_grid, so we don't need to create extra dependencyhttps://journals.sagepub.com/doi/full/10.1177/2399808320942779