transportdata
is a package containing transport datasets for teaching,
methodological research and software development (e.g. for
benchmarking). The aim is for it to be for people working with transport
data what spData
, a package used for teaching and demonstrating
spatial data methods, is for people working with spatial data. The
package will be especially useful for transport/mobility researchers
using R.
Install the development version from GitHub with:
# install.packages("devtools")
devtools::install_github("ITSLeeds/transportdata")
The datasets included in the package, and code chunks used to create them, are outlined below. After you have installed the package, the datasets should be available.
library(transportdata)
To reproduce the following code, you will need to to have installed a number of packages. The following packages must also be loaded:
library(sf)
library(dplyr)
library(stplanr)
OD data is arguably the most fundamental type of data for representing city to global transport systems. The OD dataset used for this package represents the number of commutes between a subset of desire lines between administrative zones in Leeds, UK. The dataset was generated as follows:
od_data = pct::get_od()
# [1] 2402201
nrow(od_data)
od_data_leeds = od_data %>%
filter(la_1 == "Leeds") %>%
filter(la_2 == "Leeds")
nrow(od_data_leeds)
# [1] 35463
od_leeds = od_data_leeds %>%
filter(all >= 50)
usethis::use_data(od_leeds, overwrite = TRUE)
dim(transportdata::od_leeds)
#> [1] 948 18
z = pct::get_pct_zones("west-yorkshire", geography = "msoa")
zones_leeds = z %>%
filter(lad_name == "Leeds") %>%
select(geo_code:taxi_other, dutch_slc)
usethis::use_data(zones_leeds)
dim(zones_leeds)
#> [1] 107 15
names(zones_leeds)
#> [1] "geo_code" "geo_name" "lad11cd" "lad_name"
#> [5] "all" "bicycle" "foot" "car_driver"
#> [9] "car_passenger" "motorbike" "train_tube" "bus"
#> [13] "taxi_other" "dutch_slc" "geometry"
plot(zones_leeds[5:14])
#> Warning: plotting the first 9 out of 10 attributes; use max.plot = 10 to plot
#> all
dim(centroids_leeds)
#> [1] 107 2
names(centroids_leeds)
#> [1] "geo_code" "geometry"
plot(centroids_leeds)
The route data was generated as follows:
od_interzonal = od_leeds %>%
filter(geo_code1 != geo_code2)
desire_lines_leeds = od::od_to_sf(od_interzonal, centroids_leeds)
#> 0 origins with no match in zone ids
#> 0 destinations with no match in zone ids
#> points not in od data removed.
desire_lines_leeds_10 = desire_lines_leeds %>%
top_n(n = 10, wt = all)
plot(desire_lines_leeds_10)
#> Warning: plotting the first 10 out of 18 attributes; use max.plot = 18 to plot
#> all
# get route data for top 10 routes for different modes
routes_leeds_foot_10 = route(l = desire_lines_leeds_10, route_fun = route_osrm, osrm.profile = "foot")
routes_leeds_bike_10 = route(l = desire_lines_leeds_10, route_fun = route_osrm, osrm.profile = "bike")
routes_leeds_car_10 = route(l = desire_lines_leeds_10, route_fun = route_osrm, osrm.profile = "car")
plot(routes_leeds_foot_10$geometry, lwd = 6, col = "green")
plot(routes_leeds_bike_10$geometry, lwd = 4, col = "blue", add = TRUE)
plot(routes_leeds_car_10$geometry, lwd = 2, col = "red", add = TRUE)
You can get the full routes as follows:
routes_leeds_foot = get_td("routes_leeds_foot")
#> Reading in the file from https://github.com/ITSLeeds/transportdata/releases/download/0.1/routes_leeds_foot.Rds
routes_leeds_bike = get_td("routes_leeds_bike")
#> Reading in the file from https://github.com/ITSLeeds/transportdata/releases/download/0.1/routes_leeds_bike.Rds
routes_leeds_car = get_td("routes_leeds_car")
#> Reading in the file from https://github.com/ITSLeeds/transportdata/releases/download/0.1/routes_leeds_car.Rds
rnet_leeds_foot = overline(routes_leeds_foot, "foot")
plot(rnet_leeds_foot["foot"], lwd = rnet_leeds_foot$foot / 500)
rnet_leeds_bike = overline(routes_leeds_bike, "bicycle")
plot(rnet_leeds_bike["bicycle"], lwd = rnet_leeds_bike$bicycle / 500)
rnet_leeds_car = overline(routes_leeds_car, "car_driver")
plot(rnet_leeds_car["car_driver"], lwd = rnet_leeds_car$car_driver / 500)
Data on air quality was obtained as follows:
install.packages("openair")
library(openair)
all_sites = importMeta()
# View(all_sites)
all_sites_sf = sf::st_as_sf(all_sites, coords = c("longitude", "latitude"), crs = 4326)
leeds_sites = all_sites_sf[zones_leeds, ]
headingley = importAURN(site = "LED6", year = 2017:2019, pollutant = c("nox", "no2"))
summary(headingley)
table(headingley$site)
air_quality_leeds_raw = importAURN(site = leeds_sites$code, year = 2017:2019, pollutant = c("nox", "no2"))
air_quality_leeds = right_join(leeds_sites, air_quality_leeds_raw)
usethis::use_data(air_quality_leeds, overwrite = TRUE)
library(openair)
dim(air_quality_leeds)
#> [1] 52560 7
names(air_quality_leeds)
#> [1] "site" "code" "site_type" "geometry" "date" "nox"
#> [7] "no2"
table(air_quality_leeds$site)
#>
#> Leeds Centre Leeds Headingley Kerbside
#> 26280 26280
air_quality_leeds %>%
sf::st_drop_geometry() %>%
filter(site == "Leeds Centre") %>%
TheilSen(pollutant = "no2", ylab = "NO2 (ug/m3)", deseason = TRUE, main = "Leeds Centre")
#> [1] "Taking bootstrap samples. Please wait."
air_quality_leeds %>%
sf::st_drop_geometry() %>%
filter(site == "Leeds Headingley Kerbside") %>%
TheilSen(pollutant = "no2", ylab = "NO2 (ug/m3)", deseason = TRUE, main = "Leeds Headingley Kerbside")
#> [1] "Taking bootstrap samples. Please wait."
air_quality_summary = air_quality_leeds %>%
group_by(site, year = substring(date, 1, 4)) %>%
summarise(
mean_no2 = mean(no2, na.rm = TRUE),
mean_nox = mean(nox, na.rm = TRUE)
)
#> `summarise()` regrouping output by 'site' (override with `.groups` argument)
library(tmap)
tm_shape(air_quality_summary, bbox = tmaptools::bb(air_quality_leeds, 1.5)) +
tm_dots("mean_nox", size = 5) +
tm_facets(by = "year", nrow = 1) +
tm_shape(routes_leeds_foot_10) +
tm_lines()
u = "http://apollochoicemodelling.com/files/apollo_timeUseData.csv"
time_survey_leeds = readr::read_csv(u)
usethis::use_data(time_survey_leeds)
head(time_survey_leeds)
#> # A tibble: 6 x 20
#> indivID day date budget t_a01 t_a02 t_a03 t_a04 t_a05 t_a06 t_a07 t_a08
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 19209 2 2.02e7 1440 39 0 0 54 4 0 0 0
#> 2 19209 3 2.02e7 1440 23 0 0 0 0 0 0 0
#> 3 19209 7 2.02e7 1440 0 0 0 140 0 0 0 0
#> 4 19209 8 2.02e7 1440 89 0 0 43 0 1 164 0
#> 5 19209 9 2.02e7 1440 727 0 0 112 0 0 0 0
#> 6 56459 2 2.02e7 1440 346 511 0 0 0 0 0 0
#> # … with 8 more variables: t_a09 <dbl>, t_a10 <dbl>, t_a11 <dbl>, t_a12 <dbl>,
#> # female <dbl>, age <dbl>, occ_full_time <dbl>, weekend <dbl>
Please note that the transportdata project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.