Open JeremyCourtin opened 3 years ago
Hello,
Thanks!
Here is an example of how to do that. That would make a good function to have in the package actually.
library(metacoder)
#> This is metacoder verison 0.3.5 (stable)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(purrr)
# Get test data
hmp_samples <- dplyr::ungroup(hmp_samples) # needed due to a problem with how the dataset is saved that I just noticed
x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";",
class_key = c(tax_rank = "taxon_rank", tax_name = "taxon_name"),
class_regex = "^(.+)__(.+)$")
# Make test data prettier
x$data <- x$data[1]
names(x$data) <- "otu_abund"
# Make function to count ASVs with abundances above 0 for a subset of samples
test_subset <- hmp_samples$sample_id[hmp_samples$body_site == "Nose"]
map_dbl(obs(x, data = "otu_abund"),
function(otu_index) {
# subset abundance data to just target samples
abund <- x$data$otu_abund[otu_index, test_subset]
# Return number of ASVs with more than 0 count in these samples
sum(rowSums(abund) > 0)
})
#> ab ac ad ae af ag ah ai aj ak al am an ao ap aq ar as at au
#> 462 90 77 155 132 8 0 0 41 6 71 155 44 88 44 8 1 4 0 0
#> av aw ax ay az ba bb bc bd be bf bg bh bi bj bk bl bm bn bo
#> 0 18 6 71 154 34 48 44 10 8 36 4 11 12 1 1 4 0 0 0
#> bp bq br bs bt bu bv bw bx by bz ca cb cc cd ce cf cg ch ci
#> 0 18 6 13 63 34 47 31 20 8 6 80 6 3 8 27 9 19 4 3
#> cj ck cl cm cn co cp cq cr cs ct cu cv cw cx cy cz da db dc
#> 9 12 1 1 10 0 6 0 1 0 2 4 0 0 4 0 0 0 1 0
#> dd de df dg dh di dj dk dl dm dn do dp dq dr ds dt du dv dw
#> 0 0 1 0 1 0 0 1 0 18 3 11 63 19 47 31 2 8 3 80
#> dx dy dz ea eb ec ed ee ef eg eh ei ej ek el em en eo ep eq
#> 6 3 14 2 26 0 19 4 1 1 5 2 1 2 0 6 0 2 5 0
#> er es et eu ev ew ex ey ez fa fb fc fd fe ff fg fh fi fj fk
#> 0 2 5 0 2 4 0 3 1 0 1 2 1 2 4 2 0 1 2 1
#> fl fm fn fo fp fq fr fs ft fu fv fw fx fy fz ga gb gc gd ge
#> 0 3 0 0 0 0 1 0 0 1 2 1 0 0 0 1 1 0 0 0
#> gf gg gh gi gj gk gl gm gn go gp gq gr gs
#> 1 0 0 1 1 0 1 0 0 0 1 1 0 0
# Generalize this to multiple subsets, based on metadata
tax_counts <- map_dfc(split(hmp_samples$sample_id, hmp_samples$body_site),
function(sample_ids) {
map_dbl(obs(x, data = "otu_abund"),
function(otu_index) {
# subset abundance data to just target samples
abund <- x$data$otu_abund[otu_index, sample_ids]
# Return number of ASVs with more than 0 count in these samples
sum(rowSums(abund) > 0)
})
})
tax_counts
#> # A tibble: 174 x 5
#> Nose Saliva Skin Stool Throat
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 462 440 463 280 445
#> 2 90 98 64 10 97
#> 3 77 127 103 160 122
#> 4 155 30 142 1 32
#> 5 132 148 143 106 152
#> 6 8 35 9 0 40
#> 7 0 2 1 2 1
#> 8 0 0 0 0 1
#> 9 41 41 28 9 44
#> 10 6 19 3 0 13
#> # … with 164 more rows
# Add taxon ids to table and add to taxmap object
x$data$tax_asv_counts <- bind_cols(taxon_id = taxon_ids(x), tax_counts)
# Test plot
heat_tree(x,
node_label = taxon_names,
node_size = Nose,
node_color = Nose,
node_color_axis_label = "ASV count")
Created on 2021-07-23 by the reprex package (v0.3.0)
Optimally, I would like to see the confidence (maybe mean confidence) to which my ASVs are assigned per taxon. Is it possible to plot this?
Yea, you would plot the mean/median confidence, or some other summary stat, since you have per-ASV information and want to plot per-taxon information. So something like this should work:
map_dbl(obs(x, data = "asv_data"), function(index) mean(x$data$asv_data$my_conf_measure[index]))
Let me know if you have questions
Best,
Zach
Hi, thank you very much for this great tool!
I struggle a bit understanding all the functions of it so far. My understanding is that metacoder will not plot ASVs (or OTUs) but the related taxonomy. Therefore, each Taxon can be represented by several ASVs and this is what the n_obs() function returns us. For my research question, I work with time slices. I have many samples, each of them belong to a time slice (as metadata in the sample_data file). I would like to compare and plot the number of ASV assign to each time slice and how this number change between the time slices. I tried to play a bit around with the available options (described in the tutorial and example as well as some answers in different issues) but could not find my way around it so well as I was not able to get the n_obs for each of my time slices.
Would it be possible to access this information? Knowing that not all ASVs nor taxon might appear during each timeslice?
Thanks a lot for your help.
PS: I have another question but do not know if I should create another issue or post it here: I would like to add a color regarding an index not linked to the different samples but to the different ASV (i.e. identity). Optimally, I would like to see the confidence (maybe mean confidence) to which my ASVs are assigned per taxon. Is it possible to plot this? Again, sorry, I am still a beginner with R and your tool!