Open wcornwell opened 2 years ago
Goal 1: Table with percentage endemics for each group
getting started:
countries<-read_csv("intermediate_data/country_dataset.csv")
countries$genus<-word(countries$species,1,1)
length(unique(countries$genus))
length(unique(countries$species))
Pollia<-filter(countries,genus=="Pollia")
group_by(genus) %>%
summarize()
for joining in family names: https://github.com/traitecoevo/taxonlookup
install.packages("readr")
library(readr)
library(stringr)
library(dplyr)
countries <- read_csv("intermediate_data/country_dataset.csv")
countries$genus <- word(countries$species,1,1)
length(unique(countries$genus))
length(unique(countries$species))
Pollia <- filter(countries, genus == "Pollia")
countries %>%
group_by(genus) %>%
summarize(genus_endemic=all(aus_endemic),
prop_sp_endemic=sum(aus_endemic)/n()) -> genera
genera %>%
ggplot(aes(x=prop_sp_endemic))+geom_histogram()
devtools::install_github("wcornwell/taxonlookup")
library(taxonlookup)
lt<-lookup_table(genera$genus, by_species=TRUE)
genera %>%
left_join(lt) ->genus_with_family
genus_with_family %>%
group_by(family) %>%
summarize(prop_endemic=sum(genus_endemic)/n(),n()) ->family_summary
family_summary %>%
ggplot(aes(x=prop_endemic))+geom_histogram()
lt<-lookup_table(countries$species, by_species=TRUE)
lt<-rownames_to_column(lt,"species")
left_join(countries,lt) -> species_with_family
species_with_family %>%
group_by(group) %>%
summarize(prop_endemic=sum(aus_endemic)/n(),n=n())
not sure how to fix this.
hi @shufoldof it's loading the libraries. if you load the libraries first it should fix it.
remotes::install_github("wcornwell/taxonlookup")
library(taxonlookup)
library(tidyverse)
hi @wcornwell that part worked, thanks. But also have problems with this code. searched but couldn't fix it:
hmmm can you post the whole script here?
Yes. Here it is:
#install.packages("readr")
#install.packages("rlang")
#install.packages("dplyr")
remotes::install_github("wcornwell/taxonlookup")
library(taxonlookup)
library(readr)
library(tidyverse)
library(stringr)
library(dplyr)
library(ggplot2)
countries <- read_csv("intermediate_data/country_dataset.csv")
countries$genus <- word(countries$species,1,1)
Pollia <- filter(countries, genus == "Pollia")
countries %>%
group_by(genus) %>%
summarize(genus_endemic = all(aus_endemic),
prop_sp_endemic = sum(aus_endemic)/n()) -> genera
genera %>%
ggplot(aes(x=prop_sp_endemic))+geom_histogram()
devtools::install_github("wcornwell/taxonlookup")
library(taxonlookup)
lt <- lookup_table(genera$genus, by_species = TRUE)
genera %>%
left_join(lt) -> genus_with_family
genus_with_family %>%
group_by(family) %>%
summarize(prop_endemic = sum(genus_endemic)/n(),n()) -> family_summary
family_summary %>%
ggplot(aes(x=prop_endemic)) + geom_histogram()
lt <- lookup_table(countries$species, by_species = TRUE)
lt <- rownames_to_column(lt, "species")
left_join(countries, lt) -> species_family
species_with_family %>%
group_by(group) %>%
summarize(prop_endemic = sum(aus_endemic)/n(), n=n())
change:
left_join(countries, lt) -> species_family
to:
left_join(countries, lt) -> species_with_family
@wcornwell thank you so much. It worked like a charm. What would be my next step?
I think you should make final results tables and write the captions for them.
@wcornwell should I do it on the MS word program or should I search how to do it in R?
up to you :)
important function is
table
orsummarize
andn_distinct
also
word