Closed tarunaaggarwal closed 5 years ago
Hello @tarunaaggarwal,
The issue is that obj$data$sam_data$SampleID
does not exist, so you gave the function NULL
in that case.
In this case, the sample data table is on its own and called sample_data
. See the below modified code where I have made a few changes. I did notice that you have some NA
s in your data and this was not handled well by a few functions. I fixed them, so if you reinstall the development versions they handle the NA
s better.
library(metacoder)
library(taxa)
library(dplyr)
library(readr)
# setwd("~/Downloads/")
#import biom table into metacoder
otu_data <- read_tsv("~/Downloads/feature-table.tsv")
print(otu_data)
#import taxonomy files into metacoder
tax_data <- read_tsv("~/Downloads/taxonomy_NOunassigned.tsv")
print(tax_data)
# They are already characters in this case (see the <chr> below the column names?)
# tax_data$`OTU ID` <- as.character(tax_data$`OTU ID`) # Must be same type for join to work
# otu_data$`OTU ID` <- as.character(otu_data$`OTU ID`) # Must be same type for join to work
otu_data <- left_join(otu_data, tax_data,
by = c("OTU_ID" = "OTU ID")) # identifies cols with shared IDs
print(otu_data)
tail(colnames(otu_data), n = 10)
#import mapping file containing metadata
sample_data <- read_tsv("~/Downloads/16S-bac-QIIME-mapping-MEmicrobiome-FINAL-26Jul17.txt",
col_types = "cccccccccccccccccc")
print(sample_data)
head(otu_data$taxonomy, 10)
#parse taxonomy
obj <- parse_tax_data(otu_data,
class_cols = "taxonomy",
class_sep = ";",
class_regex = "^(D?_?[0-9]*)_?_?(.+)$", #capturing groups are passed to the class_key func
class_key = c("taxon_rank" = "taxon_rank", "name" = "taxon_name"))
obj$data$tax_data <- zero_low_counts(obj, data = "tax_data", min_count = 5,
cols = sample_data$SampleID) # needed because the "Confidence" column is also numeric
no_reads <- rowSums(obj$data$tax_data[, sample_data$SampleID]) == 0
sum(no_reads)
obj$data$tax_data <- calc_obs_props(obj, "tax_data")
print(obj)
# calculate per taxon abundance
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
cols = sample_data$SampleID) # note how the sample ID column from sample_data can be used to subset the abundance matrix
print(obj)
obj$data$diff_table <- compare_groups(obj, dataset = "tax_abund",
cols = sample_data$SampleID, # sample_data is a seperate table in this case
groups = sample_data$OceanRegion) # sample_data is a seperate table in this case
Hey @zachary-foster - thank you for your help and noticing the NAs in my file. Even though I downloaded the dev version, I'm still getting an error about the NAs when I try to make the heat tree matrix. It appears that despite this error message, the heat tree function should work, but no plot shows up at all. And I'm also trying to:
I tried to do the first and second thing in the heat_tree_matrix
but not sure if I did it right. I can't figure out number 3.
Code:
#devtools::install_github("grunwaldlab/metacoder")
library(metacoder)
library(taxa)
library(dplyr)
library(readr)
setwd("~/Dropbox/bik_lab/github/Funginomicon/memb-files/exported-feature-table/")
#import biom table into metacoder
otu_data <- read_tsv("dada2-table-BlankOTUSremoved-noZeroFeatures-BlankSamplesRemoved-noBadNems-noZeroFeatures.tsv")
print(otu_data)
#import taxonomy files into metacoder
tax_data <- read_tsv("taxonomy_NOunassigned.tsv")
print(tax_data)
#tax_data$`OTU ID` <- as.character(tax_data$`OTU ID`) # Must be same type for join to work
#otu_data$`OTU ID` <- as.character(otu_data$`OTU ID`) # Must be same type for join to work
otu_data <- left_join(otu_data, tax_data,
by = c("OTU ID" = "OTU ID")) # identifies cols with shared IDs
print(otu_data)
tail(colnames(otu_data), n = 10)
#import mapping file containing metadata
sample_data <- read_tsv("16S-bac-QIIME-mapping-MEmicrobiome-FINAL-1July2018-Qiime2_noBADnems.txt",
col_types = "cccccccccccccccccc")
print(sample_data)
head(otu_data$taxonomy, 10)
#parse taxonomy
obj <- parse_tax_data(otu_data,
class_cols = "taxonomy",
class_sep = ";",
class_regex = "^(D?_?[0-9]*)_?_?(.+)$", #capturing groups are passed to the class_key func
class_key = c("taxon_rank" = "taxon_rank", "name" = "taxon_name"))
# zero any read counts less than 5
obj$data$tax_data <- zero_low_counts(obj, data = "tax_data", min_count = 5,
cols = sample_data$SampleID)
# count how many OTUs have no reads
no_reads <- rowSums(obj$data$tax_data[, sample_data$SampleID]) == 0
sum(no_reads)
# remove OTUs with no reads
obj <- filter_obs(obj, "tax_data", ! no_reads, drop_taxa = TRUE)
obj$data$tax_data <- calc_obs_props(obj, "tax_data")
print(obj)
# calculate per taxon abundance
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
cols = sample_data$SampleID) # note how the sample ID column from sample_data can be used to subset the abundance matrix
print(obj)
# calculate difference matrix with a treatment group
obj$data$diff_table <- compare_groups(obj, data = "tax_abund",
cols = sample_data$SampleID, # sample_data is a seperate table in this case
groups = sample_data$OceanRegion) # sample_data is a seperate table in this case
print(obj$data$diff_table)
heat_tree_matrix(obj,
data = "diff_table",
node_size = n_obs,
node_label = taxon_names,
node_label_size = n_obs,
node_label_size_range = c(1,3),
node_color_range = c("#008000", "#FFDF00"),
edge_color_range = c("#008000", "#FFDF00"),
output_file = "16S-overview")
Error:
Warning messages:
1: In FUN(X[[i]], ...) :
Could not apply layout 'reingold-tilford' to subgraph. Using 'fruchterman-reingold' instead.
2: In FUN(X[[i]], ...) :
Could not apply layout 'reingold-tilford' to subgraph. Using 'fruchterman-reingold' instead.
3: In FUN(X[[i]], ...) :
Could not apply layout 'reingold-tilford' to subgraph. Using 'fruchterman-reingold' instead.
4: NAs found in the "node_label" option. These may cause plotting errors or other strange behavior. NAs can be created if there is not a value for each taxon. The following 1 of 2552 taxa have NAs for the "node_label" option:
aac
5: In FUN(X[[i]], ...) :
Could not apply layout 'reingold-tilford' to subgraph. Using 'fruchterman-reingold' instead.
It looks like you input files have changed, so I cant test anything, but I see a few potential problems.
The "output_file" should have a file extension, so heat_tree
knows what format to save the file in. PDF is a good one.
I am not sure why you get the NA warning about node_label. Is the taxon "aac" named NA in the taxmap object?
Increase the font size of the node labels because they're super tiny and hard to read.
The units of node_label_size_range
are proportions of plotting space, so anything above 1 does not make sense. Try values around 0.01 to 0.05 to start.
Change the color scheme.
You can use node_color_range
like you did, but if you are plotting differences, you probably want three colors with a neutral color in the middle.
Plot relative abundance instead of OTU count.
Use one of the columns from the "diff_table" that quantifies differences between taxa for color, and choose some other stat for size, I like OTU count (n_obs usually) or total read count/proportion (sum/mean of the taxon counts for all samples). node_color = median_diff
for example.
Closing due to inactivity. If there are still unresolved issues, feel free to reopen this issue or open a new issue.
Hi @zachary-foster - I'm trying to calculate a diff table to compare some Ocean Regions in the sample mapping file, but I keep getting the following error.
Confidence
col is being treated like a sample.Error:
Code:
Files are here
THANK YOU!