traitecoevo / fg_spectral_diversity

0 stars 0 forks source link

get ALA data #3

Closed wcornwell closed 5 months ago

wcornwell commented 10 months ago

image very preliminary effort

wcornwell commented 10 months ago
library(sf)
library(galah)
library(infintitylists)

galah_config(email = "wcornwell@gmail.com", verbose = FALSE)
fg<-infinitylists:::places[2][[1]] #gets the kml file for fowlers

test<-galah_call() |>
  galah_geolocate(fg) |>
 galah::galah_select(
    recordID,
    species,
    genus,
    family,
    decimalLatitude,
    decimalLongitude,
    coordinateUncertaintyInMeters,
    eventDate,
    datasetName,
    basisOfRecord,
    references,
    institutionCode,
    recordedBy,
    outlierLayerCount,
    isDuplicateOf,
    sounds
  ) |>
  atlas_occurrences()

library(APCalign)
test2<-filter(test,grepl("aceae",family)) #hacky

zz <- create_taxonomic_update_lookup(test2$species)
zz$species<-zz$original_name
zzz<-select(zz,suggested_name,species) %>% distinct()

test2<-dplyr::left_join(test,zzz) %>% filter(!is.na(suggested_name))

#this is to fix inconsistencies in the ALA data column
test2$collectionDate = lubridate::ymd_hms(test2$eventDate, tz = "UTC", quiet = TRUE)
test2$collectionDate = dplyr::if_else(
  is.na(test2$collectionDate),
  lubridate::ymd(test2$eventDate, tz = "UTC", quiet = TRUE),
  test2$collectionDate
)

#getting the source of the data, since that's in two columns
test2$source<-case_when(!is.na(test2$datasetName) ~ test2$datasetName,
          is.na(test2$datasetName) ~ test2$institutionCode)

#some errors in the date column excluded here:
test2 %>%
  filter(collectionDate>ymd("1900-01-01")) %>%
ggplot(aes(x=collectionDate,fill=source))+geom_histogram() + theme_bw() +ggtitle("Number of plant records in ALA from Fowlers Gap")

write_csv(test2,"ala_all_plants_fowlers.csv")