map of number of endemic genera

wcornwell commented 1 year ago

map of number of species in endemic genera

wcornwell commented 1 year ago

data is here: https://www.dropbox.com/s/y6ni1jbtypqdn37/locations_endemic_genera.csv?dl=0

AndreasBartnitzky commented 1 year ago

I've made a map with our data, but the example code I based this off was looking at magpie observations. So I think the map I made shows the total number of observations of plants in each hexagon of Aus, rather than the species count? Although I'm not even sure about that. Can you have a look at it and see how we can map specifically species counts?

This is the code I mostly followed: https://labs.ala.org.au/posts/2021-04-14_hex-maps-for-species-occurrence-data/post.html

`

# packages
library(ggplot2)
library(tidyr)
library(dplyr)
library(ozmaps)
library(sf)
library(hexbin)

#read in our data and have a look at the first few rows
species <- read.csv(file = "locations_endemic_genera.csv")

species %>% head()

#filter to exclude all unclear endemic status and NA coordinates
endemic_species <- species %>% filter(current_knowledge == "endemic", decimalLatitude != "NA", decimalLongitude != "NA")

#making the map
aus <- st_transform(ozmaps::ozmap_country, 4326)

grid_all <- st_make_grid(aus, 
                         cellsize = 1, 
                         what = "polygons", 
                         square = FALSE,
                         flat_topped = TRUE)

ggplot() +
  geom_sf(data = grid_all)

# extract rows that are within AUS land
keep_hexes <- st_intersects(grid_all, aus) %>%
  as.data.frame(.) %>%
  pull(row.id)

# filter full grid to only hexagon IDs in AUS
oz_grid <- grid_all[keep_hexes]

ggplot() + geom_sf(data = oz_grid)

#how many species are in each hexagon?

#convert points to an sf spatial object
species_points_sf <- endemic_species %>% 
  st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), 
  crs = st_crs(4326))

#this bit takes a while to compute...
#intersect returns a list where each data.frame within the list shows which hexagon ID each point is within
intersect <- st_intersects(species_points_sf, oz_grid)

intersect[5:10]

# condense counts into tibble
#With all points in their own separate data.frame, we can use the wicked-fast table() function from base R to count how many points match each hexagon ID, giving us our point counts! A little renaming and wrangling helps to get our counts in the right format.

counts <- as_tibble(table(unlist(intersect)), 
          .name_repair = "unique") %>%
  rename("hex_id" = 1,
         "count" = 2) %>%
  mutate(hex_id = as.integer(hex_id)) %>%
  replace_na(list(count = 0))

# add our count column from complete_counts to our oz_grid

oz_grid <- oz_grid %>%
  as_tibble() %>%
  mutate(id = row_number()) %>%
  full_join(counts,
            by = join_by(id == hex_id)) %>%
  st_as_sf()

oz_grid |> head()

#build the map
ggplot() +
  geom_sf(data = oz_grid, aes(fill = count), size = .01) +
  scale_fill_gradientn(colours = c("#EEECEA", "#E06E53"), 
                       na.value = "white", 
                       trans = "log10",
                       labels = scales::comma_format(),
                       n.breaks = 6,
                       guide = guide_colourbar(title = "Observations")) +
  coord_sf(ylim = c(-45, -10), 
           xlim = c(110, 155)) +
  theme_void()

`

wcornwell commented 1 year ago

try adding this bit in the middle

grid_id<-as.numeric(intersect)
species_points_sf$grid_id<-grid_id

species_points_sf %>%
  group_by(grid_id,genus) %>%
  summarize(geometry=geometry[1]) -> one_gen_per_grid

intersect2 <- st_intersects(one_gen_per_grid, oz_grid)

counts <- as_tibble(table(unlist(intersect2)), 
                    .name_repair = "unique") %>%
  rename("hex_id" = 1,
         "count" = 2) %>%
  mutate(hex_id = as.integer(hex_id)) %>%
  replace_na(list(count = 0))

wcornwell commented 1 year ago

the key is to reduce the intersect object to only one count per genus per grid cell

wcornwell commented 1 year ago

better for the mapping:

#build the map
ggplot() +
  geom_sf(data = oz_grid, aes(fill = count), size = .01) +
  scale_fill_continuous(type = "viridis",na.value="white")+
  coord_sf(ylim = c(-45, -10), 
           xlim = c(110, 155)) +
  theme_void()+
  labs(fill="Number of endemic \n Australian plant genera")

AndreasBartnitzky commented 1 year ago

Note: Change colour scheme to be better for red-green values

wcornwell commented 1 year ago

https://www.dropbox.com/scl/fi/8k9w1885qnn1f8ozllxk9/locations_endemic_genera_v2.csv?rlkey=xi8rxkac7r73ha8kj15k0l3mm&dl=0

AndreasBartnitzky commented 1 year ago

`

# packages
library(ggplot2)
library(tidyr)
library(dplyr)
library(ozmaps)
library(sf)
library(hexbin)

#read in our data and have a look at the first few rows
species <- read.csv(file = "locations_endemic_genera_v2.csv")

species %>% head()

#filter to exclude all unclear endemic status and NA coordinates
endemic_species <- species %>% filter(current_knowledge == "endemic", decimalLatitude != "NA", decimalLongitude != "NA")

#making the map
aus <- st_transform(ozmaps::ozmap_country, 4326)

grid_all <- st_make_grid(aus, 
                         cellsize = 1, 
                         what = "polygons", 
                         square = FALSE,
                         flat_topped = TRUE)

ggplot() +
  geom_sf(data = grid_all)

# extract rows that are within AUS land
keep_hexes <- st_intersects(grid_all, aus) %>%
  as.data.frame(.) %>%
  pull(row.id)

# filter full grid to only hexagon IDs in AUS
oz_grid <- grid_all[keep_hexes]

ggplot() + geom_sf(data = oz_grid)

#how many species are in each hexagon?

#convert points to an sf spatial object
species_points_sf <- endemic_species %>% 
  st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), 
  crs = st_crs(4326))

#this bit takes a while to compute...
#intersect returns a list where each data.frame within the list shows which hexagon ID each point is within
intersect <- st_intersects(species_points_sf, oz_grid)

intersect[5:10]


# condense counts into tibble
#With all points in their own separate data.frame, use table() to count how many points match each hexagon ID, giving us our point counts. A little renaming and wrangling helps to get our counts in the right format.

grid_id<-as.numeric(intersect)
species_points_sf$grid_id<-grid_id

species_points_sf %>%
  group_by(grid_id,genus) %>%
  summarize(geometry=geometry[1]) -> one_gen_per_grid

intersect2 <- st_intersects(one_gen_per_grid, oz_grid)

counts <- as_tibble(table(unlist(intersect2)), 
                    .name_repair = "unique") %>%
  rename("hex_id" = 1,
         "count" = 2) %>%
  mutate(hex_id = as.integer(hex_id)) %>%
  replace_na(list(count = 0))

# add our count column from complete_counts to our oz_grid

oz_grid <- oz_grid %>%
  as_tibble() %>%
  mutate(id = row_number()) %>%
  full_join(counts,
            by = join_by(id == hex_id)) %>%
  st_as_sf()

oz_grid |> head()

#build the map
ggplot() +
  geom_sf(data = oz_grid, aes(fill = count), size = .01) +
  scale_fill_continuous(type = "viridis",na.value="white")+
  coord_sf(ylim = c(-45, -10), 
           xlim = c(110, 155)) +
  theme_void()+
  labs(fill="Number of endemic \n Australian plant genera")

map v2 (gradient) Map v2 (viridis)

`

wcornwell / endemism

map of number of endemic genera #15