Collect soil quality data and aggregate up to NUTS2 level to join to Rosés-Wolf database on regional GDP. We do this both for NUTS2 and NUTS3 (and for Sweden and Spain separately).
Download data | ||
It looks like this:
Extract of soil quality data | ||||||
id | nuts | region | country | suitability_class_mean | suitability_class_max | suitability_class_min |
---|---|---|---|---|---|---|
Data on wheat suitability from FAO-GAEZ 4, available at the following link.
The data viewer presents the data like so:
knitr::include_graphics("images/fao_interface.PNG")
The specific extract information is:
FAO-GAEZ 4 metadata | |
Field | Value |
---|---|
It is in a raster format - where the earth is broken into tiles.
This is what it looks like on a map:
library(stars)
tif <- read_stars(here::here("data", "siLr_whe_class.tif"))
ggplot() +
geom_stars(data = tif)
We want to aggregate this to the nuts2 levels used in the Rosés-Wolf database on regional GDP.
The NUTS2 shapefiles are from Eurostat
We’re going to work in WGS84 for convenience (EPSG: 4326).
We now need to aggregate up the NUTS2 regions which are joined together in the Rosés-Wolf database on regional GDP.
They are:
Aggregations of NUTS2 regions | |
NUTS2_regions | Aggregation |
---|---|
The following chunk manipulates the shapefile to make it easier to work with in a WGS84 projection.
library(sf)
# map <- read_sf(here::here("maps", "regions_nuts2.shp")) %>% janitor::clean_names()
# map_tbl <- map %>% as_tibble()
# map_tbl %>%
# ggplot(aes(geometry = geometry)) +
# geom_sf() +
# coord_sf()
## Making the map files smaller
# library(rmapshaper)
# map_simple <- ms_simplify(map, keep = 0.1,
# keep_shapes = FALSE)
#
# map_tbl_simple <- map_simple %>% as_tibble()
#
# map_tbl_simple %>% write_rds(here::here("maps", "NUTS2_simple.rds"))
map_simple <- read_rds(here::here("maps", "NUTS2_simple.rds")) %>%
st_as_sf()
map_simple <- map_simple %>%
st_transform(crs = 4326)
We want to crop to only Europe: this is what the raster file looks like now.
# st_crop removes the raster region outside of europe
tif_bbox <- st_crop(tif, map_simple %>% st_bbox())
ggplot() +
geom_stars(data = tif_bbox)
Now we want to calculate the averages within each polygon, along with the min and max values within each polygon.
# devtools::install_github("michaeldorman/geobgu")
library(geobgu)
map_simple_avgs <-
map_simple %>% mutate(
suitability_class_mean = raster_extract(tif_bbox, map_simple, fun = mean, na.rm = TRUE),
suitability_class_max = raster_extract(tif_bbox, map_simple, fun = max, na.rm = TRUE),
suitability_class_min = raster_extract(tif_bbox, map_simple, fun = min, na.rm = TRUE)
)
This is what the averaging procedure produces:
The distribution of minimum and maximum values is quite erratic. The plot below arranges the NUTS2 regions from lowest mean of suitability class to largest. The ribbon shows the
Here is an example of 10 random regions.
nuts_code | suitability_class_mean | suitability_class_max | suitability_class_min |
---|---|---|---|
RO31 | 3.689165 | 9 | 2 |
NL41 | 5.515789 | 6 | 4 |
NL21 | 5.687500 | 7 | 5 |
PL51 | 4.417344 | 8 | 2 |
ITF2 | 6.318841 | 7 | 4 |
FR62 | 4.510094 | 9 | 2 |
NL33 | 5.532258 | 7 | 5 |
NO07 | 9.001366 | 10 | 7 |
EL24 | 6.030973 | 8 | 4 |
AN | 8.500000 | 9 | 8 |
# read in data file
df <- haven::read_dta(here::here("data", "RosesWolf_Regional_Fahad.dta"))
# keep unique nuts codes
df <- df %>%
distinct(id, nuts, region, country)
# change label of joined regions
map_simple_avgs <- map_simple_avgs %>%
st_set_geometry(NULL) %>%
mutate(nuts = case_when(
nuts_code == "AT123" ~ "AT12+AT13",
nuts_code == "DE712" ~ "DE71+DE72",
nuts_code == "DE912" ~ "DE91+DE92",
TRUE ~ nuts_code
))
# join together
df_out <- df %>%
left_join(map_simple_avgs) %>%
select(-nuts_code)
# df_out %>%
# write_rds(here::here("data", "soil_suitability.rds"))
# df_out %>%
# write.csv(here::here("data", "soil_suitability.csv"))
The NUTS3 shapefiles are from Eurostat
We’re going to work in WGS84 for convenience (EPSG: 4326).
We now need to aggregate up the NUTS3 regions which are joined together in the Rosés-Wolf database on regional GDP for Sweden and Spain.
They are:
Aggregations of NUTS2 regions | |
NUTS2_regions | Aggregation |
---|---|
The following chunk manipulates the shapefile to make it easier to work with in a WGS84 projection.
library(sf)
map <- read_sf(here::here("maps", "NUTS_RG_01M_2021_4326.shp")) %>% janitor::clean_names()
map_tbl <- map %>% as_tibble()
map_tbl %>%
ggplot(aes(geometry = geometry)) +
geom_sf() +
coord_sf()
# Making the map files smaller
library(rmapshaper)
map_simple <- ms_simplify(map, keep = 0.1,
keep_shapes = FALSE)
map_tbl_simple <- map_simple %>% as_tibble()
map_tbl_simple %>% write_rds(here::here("maps", "NUTS3_simple.rds"))
map_simple <- read_rds(here::here("maps", "NUTS3_simple.rds")) %>%
st_as_sf() %>%
filter(levl_code == 3)
map_simple <- map_simple %>%
st_transform(crs = 4326)
We want to crop to only Europe: this is what the raster file looks like now.
# st_crop removes the raster region outside of europe
tif_bbox <- st_crop(tif, map_simple %>% st_bbox())
ggplot() +
geom_stars(data = tif_bbox)
Now we want to calculate the averages within each polygon, along with the min and max values within each polygon.
# devtools::install_github("michaeldorman/geobgu")
library(geobgu)
map_simple_avgs <-
map_simple %>% mutate(
suitability_class_mean = raster_extract(tif_bbox, map_simple, fun = mean, na.rm = TRUE),
suitability_class_max = raster_extract(tif_bbox, map_simple, fun = max, na.rm = TRUE),
suitability_class_min = raster_extract(tif_bbox, map_simple, fun = min, na.rm = TRUE)
)
map_simple_avgs <- map_simple %>%
bind_cols(map_simple_avgs$suitability_class_mean %>% as_tibble() %>% rename(suitability_class_mean = V1)) %>%
bind_cols(map_simple_avgs$suitability_class_max %>% as_tibble() %>% rename(suitability_class_max = V1)) %>%
bind_cols(map_simple_avgs$suitability_class_min %>% as_tibble() %>% rename(suitability_class_min = V1))
This is what the averaging procedure produces:
The distribution of minimum and maximum values is quite erratic. The plot below arranges the NUTS2 regions from lowest mean of suitability class to largest. The ribbon shows the
Here is an example of 10 random regions.
nuts_id | levl_code | cntr_code | name_latn | nuts_name | mount_type | urbn_type | coast_type | fid | suitability_class_mean | suitability_class_max | suitability_class_min |
---|---|---|---|---|---|---|---|---|---|---|---|
DE735 | 3 | DE | Schwalm-Eder-Kreis | Schwalm-Eder-Kreis | 4 | 3 | 3 | DE735 | 3.933333 | 5 | 2 |
EL307 | 3 | EL | Peiraias, Nisoi | Πειραιάς, Νήσοι | 2 | 1 | 1 | EL307 | 6.454546 | 7 | 5 |
AT125 | 3 | AT | Weinviertel | Weinviertel | 4 | 3 | 3 | AT125 | 2.500000 | 4 | 2 |
FRC14 | 3 | FR | Yonne | Yonne | 4 | 3 | 3 | FRC14 | 3.515625 | 6 | 2 |
DE27A | 3 | DE | Lindau (Bodensee) | Lindau (Bodensee) | 3 | 2 | 3 | DE27A | 3.857143 | 10 | 2 |
CZ052 | 3 | CZ | Královéhradecký kraj | Královéhradecký kraj | 4 | 2 | 3 | CZ052 | 3.600000 | 7 | 2 |
ITC13 | 3 | IT | Biella | Biella | 3 | 2 | 3 | ITC13 | 5.066667 | 8 | 3 |
FRJ15 | 3 | FR | Pyrénées-Orientales | Pyrénées-Orientales | 2 | 2 | 1 | FRJ15 | 6.129032 | 9 | 3 |
DE934 | 3 | DE | Lüchow-Dannenberg | Lüchow-Dannenberg | 4 | 3 | 3 | DE934 | 5.307692 | 6 | 4 |
DEB3C | 3 | DE | Bad Dürkheim | Bad Dürkheim | 4 | 2 | 3 | DEB3C | 4.500000 | 6 | 2 |
map_simple_avgs %>%
as_tibble() %>%
select(-geometry) %>%
janitor::clean_names() %>%
# filter(cntr_code %in% c("SE", "ES")) %>%
write_excel_csv2(here::here("data", "soil_suitability_nuts_3.csv"))
map_simple_avgs %>%
as_tibble() %>%
select(-geometry) %>%
janitor::clean_names() %>%
filter(cntr_code %in% c("ES")) %>%
write_excel_csv2(here::here("data", "soil_suitability_nuts_3_ES.csv"))
Apparently we want the older Swedish regions: of which there are 24. See the Issue 1 on Github.
library(histmaps)
se_map <- get_boundaries(1980, "county")
se_map <- se_map %>%
st_transform(crs = 4326)
se_map %>%
ggplot(aes(fill = name)) +
geom_sf(alpha = .5)
We want to crop to only Sweden: this is what the raster file looks like now.
# st_crop removes the raster region outside of europe
tif_bbox <- st_crop(tif, se_map %>% st_bbox())
ggplot() +
geom_stars(data = tif_bbox)
Now we want to calculate the averages within each polygon, along with the min and max values within each polygon.
# devtools::install_github("michaeldorman/geobgu")
library(geobgu)
map_simple_avgs_sweden <-
se_map %>% mutate(
suitability_class_mean = raster_extract(tif_bbox, se_map, fun = mean, na.rm = TRUE),
suitability_class_max = raster_extract(tif_bbox, se_map, fun = max, na.rm = TRUE),
suitability_class_min = raster_extract(tif_bbox, se_map, fun = min, na.rm = TRUE)
)
map_simple_avgs_sweden <- se_map %>%
bind_cols(map_simple_avgs_sweden$suitability_class_mean %>% as_tibble() %>% rename(suitability_class_mean = V1)) %>%
bind_cols(map_simple_avgs_sweden$suitability_class_max %>% as_tibble() %>% rename(suitability_class_max = V1)) %>%
bind_cols(map_simple_avgs_sweden$suitability_class_min %>% as_tibble() %>% rename(suitability_class_min = V1))
This is what the averaging procedure produces:
The distribution of minimum and maximum values is quite erratic. The plot below arranges the NUTS2 regions from lowest mean of suitability class to largest. The ribbon shows the
Here is an example of 10 random regions.
geom_id | ref_code | name | type | type_id | start | end | suitability_class_mean | suitability_class_max | suitability_class_min |
---|---|---|---|---|---|---|---|---|---|
12496 | SE/110000000 | Kristianstads | County | county | 1971 | 9999 | 4.279070 | 6 | 2 |
12506 | SE/130000000 | Hallands | County | county | 1974 | 9999 | 5.074380 | 6 | 5 |
12502 | SE/120000000 | Skåne | County | county | 1971 | 9999 | 2.922330 | 5 | 2 |
12547 | SE/010000000 | Stockholms | County | county | 1971 | 9999 | 5.401316 | 6 | 5 |
12540 | SE/210000000 | Gävleborgs | County | county | 1864 | 9999 | 7.456612 | 10 | 6 |
12548 | SE/230000000 | Jämtlands | County | county | 1974 | 9999 | 8.960954 | 10 | 6 |
12532 | SE/180000000 | Örebro | County | county | 1974 | 9999 | 6.202830 | 10 | 4 |
12555 | SE/250000000 | Norrbottens | County | county | 1869 | 9999 | 8.643200 | 10 | 6 |
12527 | SE/170000000 | Värmlands | County | county | 1971 | 9999 | 6.400891 | 10 | 5 |
12537 | SE/200000000 | Dalarnas | County | county | 0 | 9999 | 7.388579 | 10 | 6 |
Export Data