choilab / BTN707-CompGen

0 stars 1 forks source link

R code for MgnifyR microbiome analysis #3

Open igchoi opened 1 year ago

igchoi commented 1 year ago

R code from EBI Metagenomics workshop 2021 - MGnifyR

# installation
if (!require("BioCManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("devel")
BiocManager::install("phyloseq")

library("phyloseq")
install.packages("dplyr")
install.packages("ggplot2")
install.packages("devtools")
devtools::install_github("beadyallen/MGnifyR", upgrade = F, force =T)
library(MGnifyR)
library(dplyr)
library(ggplot2)

## fetch a list of analysis 
## fetch a list of analysis 
mgnify_client()
mg = mgnify_client(username="", password="",
                   usecache = T, cache_dir = "./")
##
insect = mgnify_query(mg, qtype="studies", biome_name = "Insecta",
             maxhits=10, usecache = T)
head(insect)
names(insect)
acc = insect$accession

anal = mgnify_analyses_from_studies(mg, acc)

metadata <- mgnify_get_analyses_metadata(mg, anal)
head(metadata)
choilab commented 1 year ago

updated code

# installation
if (!require("BioCManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("devel")
BiocManager::install("phyloseq")

library("phyloseq")
install.packages("dplyr")
install.packages("ggplot2")
install.packages("devtools")
devtools::install_github("beadyallen/MGnifyR", upgrade = F, force =T)
library(MGnifyR)
library(dplyr)
library(ggplot2)

## fetch a list of analysis 
mgnify_client()
mg = mgnify_client(username="Webin-63553", password="korea2022!",
                   usecache = T, cache_dir = "./")
##
insecta = mgnify_query(mg, qtype="samples", biome_name = "Insecta",
                       experiment_type = "amplicon",
             maxhits=-1, usecache = F) # To disable the limit, set maxhits < 0
dim(insecta) # 1797 obs x 47 
head(insecta)
names(insecta)
taxid = insecta$`host-tax-id`
summary(taxid)
sum(table(taxid)) # 753 species
sort(table(taxid, useNA = 'always'),decreasing=T) # 1044 samples - missing NCBI_tax
acc = insecta$accession
table(insecta$`instrument model`)
##
anal = mgnify_analyses_from_samples(mg, acc)
metadata <- mgnify_get_analyses_metadata(mg, anal)
head(metadata)
names(metadata)
save.image(file="insecta.rdata")

##
colnames(metadata)

table(metadata$`analysis_instrument-model`)

ps <- mgnify_get_analyses_phyloseq(mg, metadata$analysis_accession,
                                   returnLists = T) # 2193 analyses
#Warnings!! 14 data missing???
# use 'returnLists = T' for checking
#13: In parse_taxonomy_greengenes(strsplit(char.vec, ";", TRUE)[[1]]) :
#No greengenes prefixes were found. 
#Consider using parse_taxonomy_default() instead if true for all OTUs. 
#Dummy ranks may be included among taxonomic ranks now.

length(ps$sample_metadata) # 2193

save.image(file="insecta.rdata")
library(phyloseq)
head(ps$sample_metadata)
head(ps$phyloseq_objects)
ps.check = unlist(lapply(ps$phyloseq_objects,
                  function(x) { 
                    taxtmp = tax_table(x,errorIfNULL = F)
                    is.null(taxtmp) }))
                   # nrow(taxtmp) }))
acc.null = which(ps.check==TRUE) # 68
acc.nonull = metadata$analysis_accession[-acc.null] # 2125
ps.no <- mgnify_get_analyses_phyloseq(mg, acc.nonull)
tax = tax_table(ps.no)

# Transformations of abundances 
psglom <- tax_glom(ps.no, "Phylum")
normed_ps <- transform_sample_counts(psglom, function(x) x/sum(x))
ps.ch = subset_taxa(normed_ps, Phylum == "Firmicutes")
plot_bar(ps.ch, fill="Phylum")

ord <- ordinate(ps.no, "NMDS", "bray")
plot_ordination(ps.no, ord) + 
  geom_point(aes(color=as.numeric(sample_elevation)), size=1) + 
  theme_bw()

head(ord)
names(ord)
head(ord$points)

#ps.check[24]
#nrow(tax_table(ps$phyloseq_objects[[24]]))
#head(tax_table(ps$phyloseq_objects[[21]]))
#taxtmp = tax_table(ps$phyloseq_objects[[2]])