Open Junhao-Guo opened 1 year ago
I don't think Manhattan
plot is a good idea to visualize the result of DAA, because too much information will be missing. The MicrobiotaProcess
provided mp_plot_diff_boxplot
, mp_plot_diff_cladogram
, mp_plot_diff_res
functions to visualize the results of DAA directly. But you can also use mp_plot_abundance
to visualize the result. You can refer to the following codes.
> library(MicrobiotaProcess)
> data(mouse.time.mpse)
> # You can obtain the result of DAA and extract it with taxonomy information
> mouse.time.mpse %>% mp_rrarefy() %>% mp_diff_analysis(.abundance=RareAbundance, .group=time) %>% mp_extract_taxatree() -> taxa.tree
> taxa.tree
'treedata' S4 object'.
...@ phylo:
Phylogenetic tree with 218 tips and 186 internal nodes.
Tip labels:
OTU_67, OTU_231, OTU_188, OTU_150, OTU_207, OTU_5, ...
Node labels:
r__root, k__Bacteria, p__Actinobacteria, p__Bacteroidetes, p__Cyanobacteria,
p__Deinococcus-Thermus, ...
Rooted; no branch lengths.
with the following features available:
'nodeClass', 'nodeDepth', 'RareAbundanceBySample', 'LDAupper', 'LDAmean',
'LDAlower', 'Sign_time', 'pvalue', 'fdr'.
# The associated data tibble abstraction: 404 × 12
# The 'node', 'label' and 'isTip' are from the phylo tree.
node label isTip nodeCl…¹ nodeD…² RareAb…³ LDAup…⁴ LDAmean LDAlo…⁵ Sign_…⁶
<int> <chr> <lgl> <chr> <dbl> <list> <dbl> <dbl> <dbl> <chr>
1 1 OTU_67 TRUE OTU 8 <tibble> 3.39 3.36 3.32 Late
2 2 OTU_231 TRUE OTU 8 <tibble> NA NA NA NA
3 3 OTU_188 TRUE OTU 8 <tibble> NA NA NA NA
4 4 OTU_150 TRUE OTU 8 <tibble> NA NA NA NA
5 5 OTU_207 TRUE OTU 8 <tibble> NA NA NA NA
6 6 OTU_5 TRUE OTU 8 <tibble> NA NA NA NA
7 7 OTU_1 TRUE OTU 8 <tibble> NA NA NA NA
8 8 OTU_2 TRUE OTU 8 <tibble> NA NA NA NA
9 9 OTU_3 TRUE OTU 8 <tibble> NA NA NA NA
10 10 OTU_4 TRUE OTU 8 <tibble> 4.40 4.38 4.36 Late
# … with 394 more rows, 2 more variables: pvalue <dbl>, fdr <dbl>, and
# abbreviated variable names ¹nodeClass, ²nodeDepth, ³RareAbundanceBySample,
# ⁴LDAupper, ⁵LDAlower, ⁶Sign_time
# ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
>
>
> # Then you can extract the names of differential taxa
> taxa.tree %>% dplyr::filter(nodeClass=='Genus' & !is.na(Sign_time),keep.td=FALSE) %>% dplyr::pull(label)
[1] "g__Bifidobacterium" "g__un_f__Muribaculaceae"
[3] "g__Alistipes" "g__Clostridium_sensu_stricto_1"
[5] "g__ASF356" "g__Lachnospiraceae_NK4A136_group"
[7] "g__Lachnospiraceae_UCG-001" "g__Anaerotruncus"
[9] "g__Oscillibacter" "g__Ruminiclostridium"
[11] "g__Turicibacter" "g__un_f__Mitochondria"
[13] "g__Pseudomonas" "g__Anaeroplasma"
>
>
> # Then, calculate the relative abundance if you want to visualize it and filter the genus.
> mouse.time.mpse %>% mp_rrarefy() %>% mp_cal_abundance(.abundance=RareAbundance) %>% dplyr::filter(Genus %in% keep.genus) %>% mp_plot_abundance(.abundance=RelRareAbundanceBySample, force=T, relative=F, taxa.class=Genus, geom='heatmap', topn='all', .group=time) -> p1
> p1[[1]] <- p1[[1]] + scale_fill_viridis_c(option='H')
> p1[[2]] <- p1[[2]] + scale_fill_manual(values=c('orange', 'deepskyblue'))
> p1
If you want to integrate the P-value or FDR or other statistics results.
> taxa.tree %>% dplyr::filter(nodeClass=='Genus' & !is.na(Sign_time),keep.td=FALSE) %>% ggplot(aes(x=-log10(fdr), y=label, fill=Sign_time)) + geom_col() + ylab(NULL) + theme_bw() + theme(axis.text.y=element_blank()) + scale_x_continuous(expand=c(0,0,0, .1)) + scale_fill_manual(values=c('orange', 'deepskyblue')) -> p2
>
> p1 %>% aplot::insert_right(p2, width=.2)
In addition, since the output of the result is tidy-like, you also can use ggplot2
or ggplot2-extension
to visualize it. For example. Manhattan plot
> # First, obtain the taxonomy information of OTU.
> taxa.da <- mouse.time.mpse %>% mp_extract_taxonomy()
> # Then, add to the `data.frame` of the significant OTUs and visualize it.
> taxa.tree %>% dplyr::filter(nodeClass=='OTU', keep.td=F) %>% dplyr::left_join(taxa.da, by=c('label'='OTU')) %>% dplyr::mutate(Sign_time=dplyr::case_when(is.na(Sign_time) ~ 'NoSign', TRUE ~ Sign_time)) %>% ggplot(aes(x=Phylum, y=-log10(fdr), color=Phylum, shape=Sign_time)) + geom_jitter(size=3) + geom_hline(yintercept=-log10(0.05)) + theme(axis.text.x=element_text(angle=-45, hjust=0))
My experiment contains a large amount of data, and the above method you provided, I think, is not very suitable for my situation.By the way , Ask a basic question, how to export the generated S4 objects to generate a table?
MicrobiotaProcess
provides the functions named starting with mp_extract_
to extract the corresponding slot
.
> library(MicrobiotaProcess)
> mp_extract_
mp_extract_abundance mp_extract_dist mp_extract_internal_attr mp_extract_rarecurve mp_extract_sample mp_extract_taxonomy
mp_extract_assays mp_extract_feature mp_extract_otutree mp_extract_refseq mp_extract_taxatree mp_extract_tree
> data(mouse.time.mpse)
> mouse.time.mpse %>% mp_extract_assays(.abundance=Abundance) %>% head
F3D0 F3D1 F3D141 F3D142 F3D143 F3D144 F3D145 F3D146 F3D147 F3D148 F3D149
OTU_1 579 405 444 289 228 421 645 325 1495 863 883
OTU_2 345 353 362 304 176 277 489 230 1215 729 779
OTU_3 449 231 345 158 204 302 522 254 913 581 723
OTU_4 430 69 502 164 231 357 583 388 1089 853 897
OTU_5 154 140 189 180 130 104 307 179 453 443 417
OTU_6 470 41 331 181 244 353 476 275 1182 872 637
F3D150 F3D2 F3D3 F3D5 F3D6 F3D7 F3D8 F3D9
OTU_1 317 3488 993 327 1015 648 272 511
OTU_2 229 1587 602 268 674 504 352 423
OTU_3 399 1175 465 284 588 438 349 482
OTU_4 471 472 200 158 404 314 147 206
OTU_5 169 338 402 151 476 470 582 596
OTU_6 216 115 25 23 17 11 0 0
> mouse.time.mpse %>% mp_extract_sample(.abundance=Abundance) %>% head
# A tibble: 6 × 2
Sample time
<chr> <chr>
1 F3D0 Early
2 F3D1 Early
3 F3D141 Late
4 F3D142 Late
5 F3D143 Late
6 F3D144 Late
> mouse.time.mpse %>% mp_extract_taxonomy() %>% head
# A tibble: 6 × 8
OTU Kingdom Phylum Class Order Family Genus Species
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 OTU_67 k__Bacteria p__Actinobacteria c__Actinobac… o__B… f__Bi… g__B… s__un_…
2 OTU_231 k__Bacteria p__Actinobacteria c__Coriobact… o__C… f__At… g__O… s__un_…
3 OTU_188 k__Bacteria p__Actinobacteria c__Coriobact… o__C… f__Eg… g__E… s__mur…
4 OTU_150 k__Bacteria p__Actinobacteria c__Coriobact… o__C… f__Eg… g__E… s__un_…
5 OTU_207 k__Bacteria p__Actinobacteria c__Coriobact… o__C… f__Eg… g__E… s__un_…
6 OTU_5 k__Bacteria p__Bacteroidetes c__Bacteroid… o__B… f__Ba… g__B… s__un_…
> mouse.time.mpse %>% mp_extract_taxatree()
'treedata' S4 object'.
...@ phylo:
Phylogenetic tree with 218 tips and 186 internal nodes.
Tip labels:
OTU_67, OTU_231, OTU_188, OTU_150, OTU_207, OTU_5, ...
Node labels:
r__root, k__Bacteria, p__Actinobacteria, p__Bacteroidetes, p__Cyanobacteria,
p__Deinococcus-Thermus, ...
Rooted; no branch lengths.
with the following features available:
'nodeClass', 'nodeDepth'.
# The associated data tibble abstraction: 404 × 5
# The 'node', 'label' and 'isTip' are from the phylo tree.
node label isTip nodeClass nodeDepth
<int> <chr> <lgl> <chr> <dbl>
1 1 OTU_67 TRUE OTU 8
2 2 OTU_231 TRUE OTU 8
3 3 OTU_188 TRUE OTU 8
4 4 OTU_150 TRUE OTU 8
5 5 OTU_207 TRUE OTU 8
6 6 OTU_5 TRUE OTU 8
7 7 OTU_1 TRUE OTU 8
8 8 OTU_2 TRUE OTU 8
9 9 OTU_3 TRUE OTU 8
10 10 OTU_4 TRUE OTU 8
# … with 394 more rows
# ℹ Use `print(n = ...)` to see more rows
> mouse.time.mpse %>% mp_rrarefy() %>% mp_diff_analysis(.abundance=RareAbundance, .group=time) %>% mp_extract_feature(addtaxa=T)
# A tibble: 218 × 15
OTU Kingdom Phylum Class Order Family Genus Species RareAb…¹ LDAup…²
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <list> <dbl>
1 OTU_1 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> NA
2 OTU_2 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> NA
3 OTU_3 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> NA
4 OTU_4 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> 4.40
5 OTU_5 k__Bacteria p__Bact… c__B… o__B… f__Ba… g__B… s__un_… <tibble> NA
6 OTU_6 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> 4.53
7 OTU_7 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> 4.17
8 OTU_8 k__Bacteria p__Bact… c__B… o__B… f__Ri… g__A… s__un_… <tibble> 4.09
9 OTU_9 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> NA
10 OTU_10 k__Bacteria p__Bact… c__B… o__B… f__Mu… g__u… s__un_… <tibble> NA
# … with 208 more rows, 5 more variables: LDAmean <dbl>, LDAlower <dbl>,
# Sign_time <chr>, pvalue <dbl>, fdr <dbl>, and abbreviated variable names
# ¹RareAbundanceBySample, ²LDAupper
# ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
PS: If the experiment contains a large amount of data. I think you can use [
method to filter the rows.
> set.seed(123)
> keep.otus <- rownames(mouse.time.mpse) %>% sample(88)
> sub.mpse <- mouse.time.mpse[rownames(mouse.time.mpse) %in% keep.otus, ]
> sub.mpse
# A MPSE-tibble (MPSE object) abstraction: 1,672 × 11
# OTU=88 | Samples=19 | Assays=Abundance | Taxonomy=Kingdom, Phylum, Class, Order, Family, Genus, Species
OTU Sample Abundance time Kingdom Phylum Class Order Family Genus Species
<chr> <chr> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 OTU_4 F3D0 430 Early k__Bac… p__Ba… c__B… o__B… f__Mu… g__u… s__un_…
2 OTU_6 F3D0 470 Early k__Bac… p__Ba… c__B… o__B… f__Mu… g__u… s__un_…
3 OTU_7 F3D0 282 Early k__Bac… p__Ba… c__B… o__B… f__Mu… g__u… s__un_…
4 OTU_13 F3D0 52 Early k__Bac… p__Fi… c__B… o__L… f__La… g__L… s__un_…
5 OTU_14 F3D0 104 Early k__Bac… p__Ba… c__B… o__B… f__Mu… g__u… s__un_…
6 OTU_16 F3D0 80 Early k__Bac… p__Fi… c__E… o__E… f__Er… g__T… s__un_…
7 OTU_21 F3D0 42 Early k__Bac… p__Fi… c__B… o__L… f__La… g__L… s__un_…
8 OTU_22 F3D0 44 Early k__Bac… p__Fi… c__C… o__C… f__La… g__u… s__un_…
9 OTU_23 F3D0 280 Early k__Bac… p__Fi… c__C… o__C… f__La… g__L… s__un_…
10 OTU_25 F3D0 26 Early k__Bac… p__Fi… c__C… o__C… f__Pe… g__u… s__un_…
# … with 1,662 more rows
# ℹ Use `print(n = ...)` to see more rows
The github version (>=1.11.3) supports using the manhattan plot to visualize the result of DAA.
library(MicrobiotaProcess)
data(mouse.time.mpse)
mouse.time.mpse %<>%
mp_rrarefy()
mouse.time.mpse
mouse.time.mpse %<>%
mp_diff_analysis(.abundance=RareAbundance,
.group=time,
first.test.alpha=0.01,
action="add")
p <- mouse.time.mpse %>%
mp_plot_diff_manhattan(
.group = Sign_time,
.y = fdr,
.size = 2,
taxa.class = OTU,
anno.taxa.class = Phylum,
)
p
THANKS
Can the results be presented in other ways? such like a Manhattan plot?