Closed taylorreiter closed 1 year ago
no promises that it runs, but recording here so it's somewhere
library(ggalluvial) library(magrittr) library(sourmashconsumr) taxonomy_annotate_df <- read_taxonomy_annotate(Sys.glob("tests/testthat/SRR19*lineage*.csv"), separate_lineage = T) %>% dplyr::select(f_unique_to_query, f_unique_weighted, domain, phylum, class, family, order, genus, species) %>% dplyr::group_by(domain, phylum, class, family, order, genus, species) %>% dplyr::summarize(sum_f_unique_weighted = sum(f_unique_weighted)) ggalluvial::is_alluvia_form(taxonomy_annotate_df) ggplot2::ggplot(taxonomy_annotate_df, ggplot2::aes(y = sum_f_unique_weighted, axis1 = domain, axis2 = phylum, axis3 = class, axis4 = order, axis5 = family)) + #ggalluvial::geom_alluvium(aes(fill = order), width = 1/12) + ggalluvial::geom_flow() + ggalluvial::geom_stratum(width = 1/10, alpha = .5, aes(fill = c(family))) + ggplot2::geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 2, hjust = -0.25) + theme_classic() + labs(x = "tanomic rank", y = "abundance-weighted unique fraction\ntotaled across all samples") + scale_x_continuous(labels = c("domain", "phylum", "class", "order", "family"), breaks = c(1, 2, 3, 4, 5)) ggplot(taxonomy_annotate_df, aes(x = survey, stratum = response, alluvium = subject, y = freq, fill = response, label = response)) + scale_x_discrete(expand = c(.1, .1)) + geom_flow() + geom_stratum(alpha = .5) + geom_text(stat = "stratum", size = 3) + theme(legend.position = "none") + ggtitle("vaccination survey responses at three points in time") taxonomy_annotate_df <- read_taxonomy_annotate(Sys.glob("tests/testthat/*lineage*.csv"), separate_lineage = T) %>% dplyr::select(query_name, f_unique_to_query, f_unique_weighted, domain, phylum, class, family, order, genus, species) %>% dplyr::group_by(query_name, domain, phylum, class, family, order, genus, species) %>% dplyr::summarize(sum_f_unique_weighted = sum(f_unique_weighted)) to_lodes_form(taxonomy_annotate_df_long) # create a fill variable --> it will be based on alphabetical order (which is how the alluvial plot is ordered) # and it will be for each level of taxonomy # probably needs to switch to long format taxonomy_annotate_df_long <- taxonomy_annotate_df %>% tidyr::pivot_longer(cols = domain:species, names_to = "taxonomic_rank", values_to = "taxonomic_label") taxonomy_annotate_df_long <- transform(taxonomy_annotate_df_long, taxonomic_label = factor(taxonomic_label)) to_lodes_form(taxonomy_annotate_df_long) ggplot(taxonomy_annotate_df_long, aes(x = taxonomic_rank, stratum = taxonomic_label, alluvium = query_name, y = sum_f_unique_weighted, fill = taxonomic_label, label = taxonomic_label)) + scale_x_discrete(expand = c(.1, .1)) + geom_flow() + geom_stratum(alpha = .5) + geom_text(stat = "stratum", size = 3) + theme(legend.position = "none") + ggtitle("alluvial plot") # test data --------------------------------------------------------------- data(vaccinations) vaccinations <- transform(vaccinations, response = factor(response, rev(levels(response)))) ggplot(vaccinations, aes(x = survey, stratum = response, alluvium = subject, y = freq, fill = response, label = response)) + scale_x_discrete(expand = c(.1, .1)) + geom_flow() + geom_stratum(alpha = .5) + geom_text(stat = "stratum", size = 3) + theme(legend.position = "none") + ggtitle("vaccination survey responses at three points in time") # try parallel sets ------------------------------------------------------- data <- reshape2::melt(Titanic) data <- gather_set_data(data, 1:4) data data <- gather_set_data(taxonomy_annotate_df, 1:7) palette <- colorRampPalette(RColorBrewer::brewer.pal(8, "Set2"))(length(unique(data$y))) ggplot(data, aes(x, id = id, split = y, value = sum_f_unique_weighted)) + geom_parallel_sets(alpha = 0.3, axis.width = 0.1) + geom_parallel_sets_axes(axis.width = 0.2, aes(fill = y)) + geom_parallel_sets_labels(colour = 'black', angle = 360, size = 2, hjust = -0.25) + theme_classic() + theme(axis.line.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(), legend.position = "None") + labs(x = "tanomic rank") + scale_x_continuous(labels = c("domain", "phylum", "class", "order", "family", "genus", "species", ""), breaks = c(1, 2, 3, 4, 5, 6, 7, 8), limits = c(.75, 8)) + scale_fill_manual(values = palette)
no promises that it runs, but recording here so it's somewhere