Discrepancies in results based on sf vs raster objects

echelleburns commented 6 months ago

Using patchwise to prep data for prioritizations results in two different outputs, depending on whether input data are rasters or sf objects. See code and results below:

# Load packages 
devtools::load_all()
library(prioritizr) 

# Create areas
area <- offshoredatr::get_area(area_name = "Bermuda")
projection <- 'PROJCS["ProjWiz_Custom_Lambert_Azimuthal", GEOGCS["GCS_WGS_1984", DATUM["D_WGS_1984", SPHEROID["WGS_1984",6378137.0,298.257223563]], PRIMEM["Greenwich",0.0], UNIT["Degree",0.0174532925199433]], PROJECTION["Lambert_Azimuthal_Equal_Area"], PARAMETER["False_Easting",0.0], PARAMETER["False_Northing",0.0], PARAMETER["Central_Meridian",-64.5], PARAMETER["Latitude_Of_Origin",32], UNIT["Meter",1.0]]'

# Create a planning grid
planning_rast <- offshoredatr::get_planning_grid(area, projection_crs = projection, option = "raster") 

planning_sf <- offshoredatr::get_planning_grid(area, projection_crs = projection, option = "sf_square") %>% 
  dplyr::rename(geometry = x)

# Grab all relevant data
features_rast <- offshoredatr::get_features(area, planning_rast)

features_sf <- offshoredatr::get_features(area, planning_sf %>% dplyr::rename(x = geometry))

# Create a "cost" to protecting a cell - just a uniform cost for this example
cost_rast <- setNames(planning_rast, "cost")

cost_sf <- planning_sf %>%
  dplyr::mutate(cost = 1)

# Separate seamount data - we want to protect entire patches
seamounts_sf <- features_sf %>% 
  dplyr::select(seamounts)

features_sf <- features_sf %>% 
  dplyr::select(-seamounts)

seamounts_rast <- features_rast[["seamounts"]]
features_rast <- features_rast[[names(features_rast)[names(features_rast) != "seamounts"]]]

# Create seamount patches - seamount areas that touch are considered the same patch
patches_sf <- patchwise::create_patches(feature = seamounts_sf, 
                                        planning_grid = planning_sf)

patches_rast <- patchwise::create_patches(feature = seamounts_rast)

# Create patches dataframe - this creates several constraints so that entire seamount units are protected together
patches_df_sf <- patchwise::create_patch_df(planning_grid = planning_sf, features = features_sf, patches = patches_sf, costs = cost_sf)

patches_df_rast <- patchwise::create_patch_df(planning_grid = planning_rast, features = features_rast, patches = patches_rast, costs = cost_rast)

# Create boundary matrix for prioritizr
boundary_matrix_sf <- patchwise::create_boundary_matrix(planning_grid = planning_sf, patches = patches_sf, patch_df = patches_df_sf)

boundary_matrix_rast <- patchwise::create_boundary_matrix(planning_grid = planning_rast, patches = patches_rast, patch_df = patches_df_rast)

# Create targets for protection - let's just do 20% for each feature (including 20% of whole seamounts)
targets_sf <- patchwise::features_targets(targets = rep(0.2, ncol(features_sf)), features = features_sf, pre_patches = seamounts_sf)

targets_rast <- patchwise::features_targets(targets = rep(0.2, terra::nlyr(features_rast) + 1), features = features_rast, pre_patches = seamounts_rast)

# Add these targets to targets for protection for the "constraints" we introduced to protect entire seamount units
constraints_sf <- patchwise::constraints_targets(feature_targets = targets_sf, patch_df = patches_df_sf)

constraints_rast <- patchwise::constraints_targets(feature_targets = targets_rast, patch_df = patches_df_rast)

# Run the prioritization
problem_sf <- prioritizr::problem(x = patches_df_sf, features = constraints_sf$feature, cost_column = "cost") %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_manual_targets(constraints_sf) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002, data = boundary_matrix_sf) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

problem_rast <- prioritizr::problem(x = patches_df_rast, features = constraints_rast$feature, cost_column = "cost") %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_manual_targets(constraints_rast) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002, data = boundary_matrix_rast) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

# Solve the prioritization
solution_sf <- solve(problem_sf)

solution_rast <- solve(problem_rast)

# Convert the prioritization into a more digestible format
result_sf <- patchwise::convert_solution(solution = solution_sf, patch_df = patches_df_sf, planning_grid = planning_sf)

result_rast <- patchwise::convert_solution(solution = solution_rast, patch_df = patches_df_rast, planning_grid = planning_rast)

# Show the results
plot(result_sf)
terra::plot(result_rast)

sf results:

raster results:

echelleburns commented 6 months ago

I've tried just running the prioritization without the seamount patches (treating seamounts as just another feature). See code and results below.

# A test script to see the differences in prioritization results based on inputs of different formats
# This uses offshoredatr version 0.1.0 and prioritizr version 8.0.3

# Load libraries
library(tidyverse)

# Create areas
area <- offshoredatr::get_area(area_name = "Bermuda")
projection <- 'PROJCS["ProjWiz_Custom_Lambert_Azimuthal", GEOGCS["GCS_WGS_1984", DATUM["D_WGS_1984", SPHEROID["WGS_1984",6378137.0,298.257223563]], PRIMEM["Greenwich",0.0], UNIT["Degree",0.0174532925199433]], PROJECTION["Lambert_Azimuthal_Equal_Area"], PARAMETER["False_Easting",0.0], PARAMETER["False_Northing",0.0], PARAMETER["Central_Meridian",-64.5], PARAMETER["Latitude_Of_Origin",32], UNIT["Meter",1.0]]'

# Create a planning grid
planning_rast <- offshoredatr::get_planning_grid(area, projection_crs = projection, option = "raster")

planning_sf <- offshoredatr::get_planning_grid(area, projection_crs = projection, option = "sf_square") %>%
  dplyr::rename(geometry = x)

# Grab all relevant data
features_rast <- offshoredatr::get_features(area, planning_rast)

features_sf <- offshoredatr::get_features(area, planning_sf %>% dplyr::rename(x = geometry)) # get_features is looking for x column for sf objects

The features going into the prioritization differ in the layers selected and the extent of layers.

For a raster planning grid:

terra::plot(features_rast)

For a sf planning grid:

ggplot() + geom_sf(data = features_sf %>%
                     pivot_longer(1:(ncol(.)-1)) %>%
                     filter(!is.na(value)) %>% 
                     mutate(name = factor(name, levels = names(features_sf %>% sf::st_drop_geometry()))),
                   color = "#009c22") + 
  facet_wrap(~name, ncol = 4) +
  theme_void() +
  theme(strip.text = element_text(size = 6))

The major differences are:

the sf planning grid finds data in the bathypelagic zone, whereas the raster planning grid finds data in the abyssopelagic zone
the sf planning grid finds data for major ocean basins that covers the entire planning area (the raster planning grid doesn't find this layer at all)

Here are the results from the two model runs.

# Create a "cost" to protecting a cell - just a uniform cost for this example
cost_rast <- setNames(planning_rast, "cost")

# For sf objects, the cost is a column within the features dataset
features_sf <- features_sf %>%
  mutate(cost = 1)

# Run the prioritization
problem_rast <- prioritizr::problem(x = cost_rast, features = features_rast) %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_relative_targets(0.2) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

problem_sf <- prioritizr::problem(x = features_sf, features = names(features_sf)[!grepl("cost|geometry", names(features_sf))], cost_column = "cost") %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_relative_targets(0.2) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

# Solve the prioritization
solution_rast <- solve(problem_rast)

solution_sf <- solve(problem_sf)

For a raster planning grid:

terra::plot(solution_rast)

For a sf planning grid:

ggplot() +
  geom_sf(data = solution_sf %>%
            dplyr::select(solution_1) %>%
            mutate(solution_1 = as.factor(solution_1)),
                   mapping = aes(fill = solution_1),
          lwd = 0.1) +
  scale_fill_manual("", values = c("0" = "white", "1" = "#009c22"),
                    labels = c("0" = "Not selected", "1" = "Selected")) +
  theme_void()

If I re-run the sf prioritization and take out the major ocean basins feature, we get the same results:

# Re-run the sf prioritization without the major ocean basins
new_features_sf <- features_sf %>%
  dplyr::select(-basins_major_ocean_basins)

new_problem_sf <- prioritizr::problem(x = new_features_sf, features = names(new_features_sf)[!grepl("cost|geometry", names(new_features_sf))], cost_column = "cost") %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_relative_targets(0.2) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

new_solution_sf <- solve(new_problem_sf)

ggplot() +
  geom_sf(data = new_solution_sf %>%
            dplyr::select(solution_1) %>%
            mutate(solution_1 = as.factor(solution_1)),
          mapping = aes(fill = solution_1),
          lwd = 0.1) +
  scale_fill_manual("", values = c("0" = "white", "1" = "#009c22"),
                    labels = c("0" = "Not selected", "1" = "Selected")) +
  theme_void()

So there must be something inherently different in the prioritization process using sf vs raster objects? @jflowernet - have you noticed anything like this on your end using offshoredatr?

jflowernet commented 6 months ago

This is most likely an offshoredatr issue rather than prioritizr. Have you pulled that latest commits in offshoredatr and built the package again? It's possible your build of offshoredatr is quite old if you haven't called devtools::build() in a while.

When I run

devtools::load_all()

#Bermuda
ber_proj_wiz <- "+proj=laea +lon_0=-64.8220825 +lat_0=32.2530756 +datum=WGS84 +units=m +no_defs"

bermuda_eez <- get_area("Bermuda")
planning_rast_ber <- get_planning_grid(bermuda_eez, projection_crs = ber_proj_wiz)
planning_sf_ber <- get_planning_grid(bermuda_eez, projection_crs = ber_proj_wiz, option = "sf_square")

ber_feature_set_rast <- get_features(planning_grid = planning_rast_ber, enviro_clusters = 3)
terra::plot(ber_feature_set_rast)

ber_feature_set_sf <- get_features(planning_grid = planning_sf_ber, enviro_clusters = 3)
plot(ber_feature_set_sf, border=F)

I get similar features for both raster and sf datasets, though I still need to improve the way planning units are intersected as the coverage of some features is very different in sf compared to raster.

echelleburns commented 6 months ago

You're right, it's probably the version I was using.

But the problem persists when we actually run the prioritization - the results are way different. Do you think this is something we should chat with Jeff about? It seems particularly concerning since the inputs are pretty much the same now.

devtools::load_all()

# Bermuda
ber_proj_wiz <- "+proj=laea +lon_0=-64.8220825 +lat_0=32.2530756 +datum=WGS84 +units=m +no_defs"

bermuda_eez <- get_area("Bermuda")
planning_rast_ber <- get_planning_grid(bermuda_eez, projection_crs = ber_proj_wiz)
planning_sf_ber <- get_planning_grid(bermuda_eez, projection_crs = ber_proj_wiz, option = "sf_square")

ber_feature_set_rast <- get_features(planning_grid = planning_rast_ber, enviro_clusters = 3)

ber_feature_set_sf <- get_features(planning_grid = planning_sf_ber, enviro_clusters = 3)

Features going in (raster):

Features going in (sf):


# Create a "cost" to protecting a cell - just a uniform cost for this example
cost_rast <- setNames(planning_rast_ber, "cost")

# For sf objects, the cost is a column within the features dataset
ber_feature_set_sf <- ber_feature_set_sf %>%
  dplyr::mutate(cost = 1) %>% 
  dplyr::rename(geometry = x)

# Run the prioritization
problem_rast <- prioritizr::problem(x = cost_rast, features = ber_feature_set_rast) %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_relative_targets(0.2) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

problem_sf <- prioritizr::problem(x = ber_feature_set_sf, features = names(ber_feature_set_sf)[!grepl("cost|geometry", names(ber_feature_set_sf))], cost_column = "cost") %>%
  prioritizr::add_min_set_objective() %>%
  prioritizr::add_relative_targets(0.2) %>%
  prioritizr::add_binary_decisions() %>%
  prioritizr::add_boundary_penalties(penalty = 0.000002) %>%
  prioritizr::add_gurobi_solver(gap = 0.1, threads = parallel::detectCores()-1)

# Solve the prioritization
solution_rast <- solve(problem_rast)

solution_sf <- solve(problem_sf)

Results for raster inputs:

Results for sf inputs:

jflowernet commented 6 months ago

I know we talked about this, but just for future us/ others: this issue could be due to the difference in the format of the data frame that the sf/ raster objects get converted into for prioritizr. When there is not enough information for the solver to choose between many cells, e.g. when cost is uniform as in this case, it will just 'fill up the bathtub' (see https://github.com/prioritizr/prioritizr/issues/205#issuecomment-894872191). This accounts for the bands in the solutions, and I think that the sf object is arranged such that the data frame has planning units at the top of the planning area first, working left to right down to the bottom, whereas raster objects are in reverse. So essentially solutions with raster inputs fill up from the bottom right whereas sf input solutions fill up from the top left.

echelleburns commented 6 months ago

This is an offshoredatr problem and we addressed/fixed the issue. See issue here.

emlab-ucsb / patchwise

Discrepancies in results based on sf vs raster objects #3