Open igchoi opened 1 year ago
# installation
if (!require("BioCManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("devel")
BiocManager::install("phyloseq")
library("phyloseq")
install.packages("dplyr")
install.packages("ggplot2")
install.packages("devtools")
devtools::install_github("beadyallen/MGnifyR", upgrade = F, force =T)
library(MGnifyR)
library(dplyr)
library(ggplot2)
## fetch a list of analysis
mgnify_client()
mg = mgnify_client(username="Webin-63553", password="korea2022!",
usecache = T, cache_dir = "./")
##
insecta = mgnify_query(mg, qtype="samples", biome_name = "Insecta",
experiment_type = "amplicon",
maxhits=-1, usecache = F) # To disable the limit, set maxhits < 0
dim(insecta) # 1797 obs x 47
head(insecta)
names(insecta)
taxid = insecta$`host-tax-id`
summary(taxid)
sum(table(taxid)) # 753 species
sort(table(taxid, useNA = 'always'),decreasing=T) # 1044 samples - missing NCBI_tax
acc = insecta$accession
table(insecta$`instrument model`)
##
anal = mgnify_analyses_from_samples(mg, acc)
metadata <- mgnify_get_analyses_metadata(mg, anal)
head(metadata)
names(metadata)
save.image(file="insecta.rdata")
##
colnames(metadata)
table(metadata$`analysis_instrument-model`)
ps <- mgnify_get_analyses_phyloseq(mg, metadata$analysis_accession,
returnLists = T) # 2193 analyses
#Warnings!! 14 data missing???
# use 'returnLists = T' for checking
#13: In parse_taxonomy_greengenes(strsplit(char.vec, ";", TRUE)[[1]]) :
#No greengenes prefixes were found.
#Consider using parse_taxonomy_default() instead if true for all OTUs.
#Dummy ranks may be included among taxonomic ranks now.
length(ps$sample_metadata) # 2193
save.image(file="insecta.rdata")
library(phyloseq)
head(ps$sample_metadata)
head(ps$phyloseq_objects)
ps.check = unlist(lapply(ps$phyloseq_objects,
function(x) {
taxtmp = tax_table(x,errorIfNULL = F)
is.null(taxtmp) }))
# nrow(taxtmp) }))
acc.null = which(ps.check==TRUE) # 68
acc.nonull = metadata$analysis_accession[-acc.null] # 2125
ps.no <- mgnify_get_analyses_phyloseq(mg, acc.nonull)
tax = tax_table(ps.no)
# Transformations of abundances
psglom <- tax_glom(ps.no, "Phylum")
normed_ps <- transform_sample_counts(psglom, function(x) x/sum(x))
ps.ch = subset_taxa(normed_ps, Phylum == "Firmicutes")
plot_bar(ps.ch, fill="Phylum")
ord <- ordinate(ps.no, "NMDS", "bray")
plot_ordination(ps.no, ord) +
geom_point(aes(color=as.numeric(sample_elevation)), size=1) +
theme_bw()
head(ord)
names(ord)
head(ord$points)
#ps.check[24]
#nrow(tax_table(ps$phyloseq_objects[[24]]))
#head(tax_table(ps$phyloseq_objects[[21]]))
#taxtmp = tax_table(ps$phyloseq_objects[[2]])
R code from EBI Metagenomics workshop 2021 - MGnifyR