Open sachasuca opened 6 years ago
This is very interesting and would have much importance in reporting over studies in which less known microbes have important roles. I'd be interested in naming every unspecified taxonomy at every level according to their corresponding above rank.
I think it is also important because from what I understand, the tax_glom function in phyloseq tosses out anything denoted as "NA."
@sachasuca you can use the option NArm=FALSE with tax_glom() if that's a problem.
Hi,
This might be a bit of a late answer, but this is the solution that I use for this problem:
tax.clean <- data.frame(tax_table(psdata))
for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
tax.clean[is.na(tax.clean)] <- ""
for (i in 1:nrow(tax.clean)){ if (tax.clean[i,2] == ""){ kingdom <- paste("Kingdom", tax.clean[i,1], sep = "") tax.clean[i, 2:7] <- kingdom } else if (tax.clean[i,3] == ""){ phylum <- paste("Phylum", tax.clean[i,2], sep = "") tax.clean[i, 3:7] <- phylum } else if (tax.clean[i,4] == ""){ class <- paste("Class", tax.clean[i,3], sep = "") tax.clean[i, 4:7] <- class } else if (tax.clean[i,5] == ""){ order <- paste("Order", tax.clean[i,4], sep = "") tax.clean[i, 5:7] <- order } else if (tax.clean[i,6] == ""){ family <- paste("Family", tax.clean[i,5], sep = "") tax.clean[i, 6:7] <- family } else if (tax.clean[i,7] == ""){ tax.clean$Species[i] <- paste("Genus",tax.clean$Genus[i], sep = "") } }
tax_table(psdata) <- as.matrix(tax.clean)
@MSMortensen can you show a before and after of taxonomy classifications after this? Currently travelling but looking forward to try it out. Thanks!
I dont have a before, but this is the result: ![image](https://user-images.githubusercontent.com/7501760/40989140-358afbea-68ed-11e8-9fad-e14b3b316f07.png
Not sure if anyone is still facing this issue, but I have implemented a flexible naming function called name_na_taxa
that renames NA taxa to the last identified rank in the fantaxtic package.
install_github("gmteunisse/fantaxtic")
require("fantaxtic")
require("phyloseq")
data(GlobalPatterns)
ps <- name_na_taxa(GlobalPatterns, na_label = "Unidentified <tax> (<rank>)")
If you don't feel like installing the package, just copy + pase the code for the function below. This could be integrated into phyloseq if the developers have the time?
require("tidyverse")
require("phyloseq")
name_na_taxa <- function(ps_obj, include_rank = T, na_label = "Unknown <tax> (<rank>)"){
# Check arguments
if(!grepl("<tax>", na_label)){
stop("Error: include '<tax>' in the na_label")
}
if (include_rank){
if(!grepl("<rank>", na_label)){
stop("Error: include_rank = TRUE; include '<rank>' in the na_label")
}
} else {
if(grepl("<rank>", na_label)){
stop("Error: include_rank = FALSE; remove '<rank>' from the na_label")
}
}
# Convert to long data
taxa_long <- tax_table(ps_obj) %>%
data.frame(row_name = row.names(.)) %>%
pivot_longer(!row_name,
names_to = "rank",
values_to = "tax")
# Fill in NAs using the value above
taxa_long <- taxa_long %>%
mutate(na = is.na(tax)) %>%
group_by(row_name) %>%
fill(tax)
# Create na_labels
taxa_long <- taxa_long %>%
mutate(expr = ifelse(na,
na_label,
tax),
na_label = str_replace(expr, "<tax>", tax))
# Add the last annotated rank
if (include_rank){
taxa_long <- taxa_long %>%
mutate(last_rank = ifelse(na,
NA,
rank)) %>%
fill(last_rank) %>%
mutate(na_label = str_replace(na_label, "<rank>", last_rank))
}
# Convert back to tax_table
taxa_mat <- taxa_long %>%
select(row_name, rank, na_label) %>%
pivot_wider(names_from = rank, values_from = na_label) %>%
as.matrix()
row.names(taxa_mat) <- taxa_mat[,"row_name"]
taxa_mat <- taxa_mat[,colnames(taxa_mat) != "row_name"]
tax_table(ps_obj) <- taxa_mat
return(ps_obj)
}
require("phyloseq")
data(GlobalPatterns)
ps <- name_na_taxa(GlobalPatterns, na_label = "Unidentified <tax> (<rank>)")
Hi, How can I replace "unidentified taxa with NA. Please help
[733] "b667a84f798ed09f06cffe843cdf580b" "e628097825f3be7a9d5e01eb66c65d51" "6591f8f6e7370528e435ce4be50902c2" "683c493094a12f9064c714befdb1a67c" [737] "2578dcd51811015368385969ec281c88" "4241f79f3c5c99d74a9bc595f60e66d5" "e3bba4d4efc06e539d44b53ad4bb3cd9" "6e4f428961346159ab7242208f5fb224" [741] "59ed20ee7fce0b7645f01f8ae719f934" "66f3cec4165b89f2dd3eea932ca9aac0" "3680a497aafb8855cbc61658bd514c41" "bccf07401f6b4d8f1a984b2033719947" [745] "e8410e0c16a6b41516cf47b1b795e4bc" "eb4a90a85cc69c25ce687eb299b7b7ce" "35f2e315d6d668810ce709daa6a35d37" "9ce289481ded86f953c89895e13b5bca" [749] "c06dbefa0a7126199f8ff40b1bb799a7" "fec97467ae035451505dea5a40d9f00e" "aecf31e298682ec431ce4b9b463aba27" "19845aba24e6dc46d2d87746b8e926b5" [753] "be9a1fc0dcdc3256c896636aa6ce5ed9" "28dc2a2b5c15e471cb293dce5e0e30f2" "3294c3381a9e3c2c0ffbb981b127aba4" "3237ed964b372268eacf76d52f783908" [757] "a9408d1759d6d4991aa3765a84b042a3" "b86f2cd441126979db4ace1e4d514146" "d1587d1e32f579b6069ed671fe7ff5c8" "987d0713c47f6e9c22a41de1869ca4cc" [761] "483efbc682336bad448bcfbbec8e11ba" "4251e4ed40894b4ed8cb519686adbb04" "fe027b4d45420a0a7d581860c1eab5b1" "39eabf51a4da698523408a7ea4f6c093" [765] "6da261421caee2a1af98d2d76207c423" "b9ab8dba295786b195b08f15d9d50971" "d8fecc125556433ff42223fdb8a27229" "9ae1815011a0b29e96233437206a6143" [769] "b2b2c5b48b1c80fdc94604f6b73782bf" "7d569fee828d1440aa2b2353adbf394b" "44f96bac6228cbd95a06174b9067e60a" "224ef8f1a8c3358127f23ba2f8022314" [773] "17dec61ac2e5883e06cfb4b36d07cb5d" "8596e984cfc8e202360c458ef03f3ae9" "bb69651c3ca085da57086b9219c19f71" "5e573d5ca6a975417335243ad8f02cb6" [777] "60d00a431853d391455f7206eb8fc5ad" "f1edf9c41557cfa947623450008bc692" "905fc2e6b62e28fb36e91983c61aa73a" "02b9cb5056769cbbdf515f7aa291a7d5" [781] "2b59562645ca4baa399d8a04e4a37e37" "11c073990228ba3bb2460971634284f9" "de089183f0380188c532fc703da901da" "664750e77375d1189754575159275160" [785] "53181733f799ad5398429d88f7e5497d" "69837b87939a5cd89ab4c555d7bdc234" "2399e029645f0604456b966f363a403e" "2f2fa6bc9f8a9e492788a1dd960f00de" [789] "9fa49ad05a6a1e071dbca1145a344307" "b0aeed49253d23e80a0f33c67f13fa86" "245855be7ab1830224f97955339c360c" "bce7de1bfc0873c388c7d9e71aabbc15" [793] "7d3eb198a6a88a70c59f4819ed4a20db" "9c90ae25f89d3c101782ac589d2b0e96" "7d4e09af984cc478849ce7555ecba11e" "6d046d42eac8ebcd3df1d6c44ab78ea5" [797] "c117392deb41af2e72a651aea1b71714" "1cac17f94b450b0f6dca93e8731afac0" "2065efd9a0ff66fcf5324531fb6a2939" "096dc37bd0a874ada25d7c2a4d8f7e9b" [801] "dd850726c1cafa0e7787cb2902faf336" "11de4db6157aaa0244bd949349bcafae" "549c8b443922a6378b82895b24cea291" "44bee76e31f71c5676268464f192a7a3" [805] "f86735609b87c60b4219ba31a499008c" "808386a73bb55b3433a943ee18692a0d" "c24cc200e1cea964b58f38679497050f" "21fe4ed1142a275ef70ad6817d6939aa" [809] "9ef4d27f66b017e8c5250733ceb54f1e" "cb5dec129d6886e03dca945ebf1174c2" "67bb8cff22ce95a9ca1494b85cb64e5d" "5d1a528783996e35925031ee5e4e22a1" [813] "6d3d7c1fc187c27870fd1f27fc1dc0f6" "8cf002be2fda3f2694afb76ba0139dce" "be449eed465e8221d5fe3f3948358140" "3c594583c621098a716b334385368f92" [817] "478829db0787855709ad283a2e33a9e4" "259cebb61e786a8b762b1d2b489499f4" "2fa97eade36178b139ce29a65a293c2e" "9ac073570c05ebd2cb37054bd560525c" [821] "80551921ddd9e94ad9e050d817206f85" "870277aa0b6284d3f62d29d1bbcb4fca" "7d30a8b8d19dfb68f6ff260a5aae638a" "2c06eccec6f8fe97171943b47727965d" [825] "eab007563c71982ac71b7b326a89ff38" "466305bc64dc1e10edbb4762c8488914" "ddbfe7fb9437f3a1e184177d244dcc67" "404488c0f76a805efc625a06288f1f11" [829] "d15e9d45451ae83f4f93915ae9ec9475" "6e38a108a08c837fd78bab71c8a18709" "479e2a4b103068745aff256c76358565" "13ddeb5438115b4e517910d7605c07bb" [833] "fc414a271341fbba1a1595935724d418" "5667448db88cb0d29ce91902752e8110" "930cb519df9db2db5539b9c3e48713cf" "33dfeccb75cddeb7ea5bfb004a77413c" [837] "e79b1c6fbed30b232482c569069cf6ef" "33beb51b02e2656a91d82087f8e35b16" "4888f6debb26b190f2b48dd2cac88206" "1a6ac8a3a043f47b414fc06c8c1beb1e" [841] "a15a4c635116da2aa233f745b9c50ac9" "331abd622378d7df8255015d3a1aed8e" "922982256ebe5a97d3618fb50e8c04ab" "9e2b5d5c4d1b632860cf72fd11b997af" [845] "a6d0001e9104b22ec346f0df352d479d" "bcd1c9258bf0a19687fe11b7f1c12af3" "a690176b8e5123292991463d62b8804c" "57874d42bfbb703323de4ad9c181def1" [849] "ad500f03c22f72eda8cff969fbf4765a" "b7117d480148c77d39428eaaec164462" "f42e3ea5cebf18760f1b02ca3d86684d" "db3170824b2f8d7c44bedf7eb72cfa09" [853] "99b46c4263a21c4dff9cf0fcb8142287" "76025d4131e27d6a0ad9074447fda9da" "50c26daa5422a4646983c1f925d5bf9a" "b17e266db4506c7cb42f8041329eb7d5" [857] "062a846cf841e90f7b1c35da14101870" "febdf3a7b9f4c3edc72a41f72f5c4749" "3d01fdc364cb040369ec5de004a72862" "d115889240a61214f4eec0732d12baf0" [861] "698515446362d43903721ad8850d7d36" "81beca4e4e4bd57f78d3182019d30511" "2d5845df516420eba855458ee1afc88e" "329321ddda195472c0bb06804cb45f16" [865] "71930c729d53fe5eb1b496701c706fe1" "f7aef9bad9ac380214352f9045eea899" "763dc16375fc4ae790ff8bd49a32e347" "76a8bcc2fa35f2465310b38dd53064ef" [869] "64e2072332fdf66c7e82bc482c66cee7" "41ad53401c1e1ca497e9cdf2ddff2e95" "00974edd04e10cb4af438320da600e94" "44dd02d2244bbd81780df549e432b43b" [873] "2c6c32688b43f5e67305b0257c88483f" "40ff863ce26c626d9ce884cb6b7a228c" "ed58ffa1d69c0986dc548451633150fa" "7afac427f1d60bfb2f856df138420e9f" [877] "cef869c30e725ef7d505871832079f58" "773565c33033e10c27f526e828e34525" "19d717fee97777192ba9d166e18be3cc" "f63dd4c61213202fef44a28b5b5c879c" [881] "959027c534d6a84d1f50ceb603b7bc5c" "c0604cdcc29e01a5e56f5b927e30f747" "e548bbda84aa0957f89799a0df7f1d1d" "f10e760bb3e6135dda2cd2501b4e1117" [885] "af3d1f51f56a95c4790422726040eef4" "1856772f0d0ecf267b2e2f1213193219" "978d2c3e7569b41af68b993243a966cb" "67331209f00b48afce0bbc72f95fea50" [889] "c3e62293ba654ae1af8a833f345cf7ef" "5e58d6b3cf2504c7f2fa584a6b0ceab7" "5e356b5d4d4e852b692d166ee40ca118" "e622316c93191bfcb51d844f6e1d1d7a" [893] "d67326f2e2d4b4b29dba00c5e0cca231" "c545fbffac43167c1d109eeb40361fe5" "7f0108a626c2a3d275f00c8a13bcac8d" "883225da47bca6f4413f5c66d95c12a7" [897] "309c14a4b98c7e13e52081633502ce71" "11c557d0a5ccf79b30b68d3271429c77" "b40595042c80397b2484e6a3951e6d91" "4e2286850c56db6f80ee97a528bd1c8e" [901] "edfcab40aa780335644a4bc71aa1d0ad" "3af2f9acfa840cb8a9d81b614caa7fcc" "d3377dd874504fd0abf90127619eb29f" "5161fc150d95af9149e2dc69ae927e0f" [905] "3c2d0f0bf9257907f449397156d69920" "4f7bc5e148278d0dd8169b07852134b6" "5939118b32112a5cd61e50b6a622367e" "0bc06adcdb3e3d4908dce72b5d8a110a" [909] "f4e809804a6f76f1a01710e4922cec64" "b0b43cb30ba15d1743cfdec57da6222f" "1a16028e2a14b8d94d9aa590896b90c6" "bcaa31cc8d3590f04aef3bb81605038e" [913] "75c4ed889cfb8b7cefff354f9238f078" "5fcb7ebc46c6c59d04ad8d211d29a086" "b1e7c8e8cfc56d3ff160666ccf206fc4" "5fa648fcb7f363591ecd9c378f3d700d" [917] "b188ad2ffdd2d1c235047010139a979e" "e4163da8c963b176563e9d00bad15178" "c08582ef5e92ddfd7a30b270f9ec799b" "a60ee7aeb8a779ed53bd5c964aaf5b0e" [921] "0128a8474b813e52e7d8077d53a51540" "7bb3542ab1899bffd7012ef940cedf95" "05e86b43434cac3b053dca2690300ac0" "2208e8ee2852aceece932784194c90bf" [925] "913126901b25bdc9c673171b388e7a1f" "86938eb719f498e85f059540c797286f" "d9d1506e93e4ee736572cb287f2b154e" "cc8e7c6b76a02f494de1ee45e015ca82" [929] "eed0ecbae993f2a8a365a13d954dee71" "0b1b653897755c2f98336aa2f2d71faf" "078d4594e38cc50cefa8bc5457c7eb13" "f8dedcba48e2bb7510e300bed6179c36" [933] "f9f70da357666e5c21f1e4df42663fa9" "ce89eb0bb70459ec7e88307318df340d" "adc7710918baf19121a37524a6c5bd1d" "b16721a7d846589e1765188f628db195" [937] "d250d90653f82a8aa1ae1efd0a6573f8" "8a4ca629cb710dc85639a25a44bbc934" "458073ddb18d1baf621dd6c60abe332b" "8c5cc6db73c295e26439853726f9534f" [941] "9d5d96137b53093453e17719ab4695b4" "db300f69e74d0bb1e398593208d2c3b0" "96b277175b65ee6bfe951a2f03a4709e" "d40dff0a733acd35a136fdfdab86f35d" [945] "d82ee705c47441ecc8ff53d28232f8b8" "d638f422a5b975022b444d4ca9ad53ba" "34c14b0c009651f00d74d415c616afb2" "004fdaaae869f2424f8197f004e8a93f" [949] "1dd762516bb80ba960173839e05c85bc" "e22f85c4ec219e9b30fae2b461ab71c4" "d2cf0941aed6c339ab5f0def46544f94" "0d0269780d4b24aab5ad4bf2d770e911" [953] "a3ddfe5ac2c41581bb42f96872b6ed87" "81b4a0beb6d89e914eebb04f4a5db0a5" "304b17c728a13a999d49990c76971407" "fe9ac981911090219716ae52222c01d6" [957] "21fb8bab89ae9bcbfc671aa7c5544ee7" "6fd9b5fe609b4fda187cc7fa102e1da1" "6fa12878e5bd5eb77d8622f83082e301" "debd932a5d59cfdbb7e34e361319bbd7" [961] "923e7f464d294bc97e09d97625c0bb2c" "671eb8339cb12958dff23b2b2777e660" "ce6208dc2be55c0482886364b0277705" "ba7e0898ec672c0b4cf111d407a47eb4" [965] "245b68a91684c8dd557adc5fd030a766" "6a6819c2a9609358c14e00acf101a41b" "7f575a18c13a2f9822d0389a3226fff8" "d29f4437f3a721fb63b35b6bca1eb489" [969] "7925646f5e0d9a2c88dc0a648a949a04" "739c885a16df8bc41f788ff7aa2d5212" "180601693ac0146c62a9413699f75ec7" "99ce3b28b109bc3ca38e5f24bb511244" [973] "d596b49ff635d67c5268a1ecd9a00d68" "0c49dfcb43589f841da3b186a3f51733" "bee1bda0a146ab382cc99c56d65f77cb" "db86b084a7048fbc2949d03179762497" [977] "c84feb9ea08cf3a19aced7c1e098c376" "384a3d08fb58ee975635e236d14d50d0" "95fc0d0b85f5d1dc064a7621932487d7" "9dfe87572c423cf3e8956fb7dcff94fd" [981] "4c32c47ca4f0f8453676c71fb5cd0fb8" "8c0d47ff56152f21db57125b071a4b96" "c8842bf11835c6bc9d63ca823af042f6" "ccd4b8eb3eee0efe0f438c3a91e6b184" [985] "dc48934297365762941a95a4e985bedc" "a9b5e68cc775ab25cf719a95644df3a7" "bded261a10e76eb1eab82107db019ded" "c78c08a1bbc6536d41910d2f1b52f400" [989] "b06a3334bc816426ea5736d0bda96e68" "5c53baa96d86d18a8b4f33f58e9ada18" "054eb36b63c1a333c57efcceaf898185" "ae0c7d5013cb7162c353305073b3de7e" [993] "d96018b370b5d048c17e259e3d4ec117" "85245a70e7306b672e0e3c70f4cb8941" "2f24549a5cd500c746266be599159ae6" "fa7c1f819a34fce7a6ceb6a34ca9a4cf" [997] "69454eb2e17e79ac01719f9b87805eaa" "3e04cb4490c9180e1d7690c81238f75d" "66fbfa825c55848e98f0431d4bbc5e23" "e0bdf911c9c376502ffdf497d7ac5fb5" [ reached getOption("max.print") -- omitted 556 entries ]
tax_anno(physeq) $what [1] "prev" "abund"
$undetected phyloseq-class experiment-level object otu_table() OTU Table: [ 1556 taxa and 120 samples ] sample_data() Sample Data: [ 120 samples by 7 sample variables ] tax_table() Taxonomy Table: [ 1556 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 1556 tips and 1551 internal nodes ]
$which [1] NA
$sizes [1] 15 15
$gap [1] 2
$args NULL
Warning message: 'tax_anno' is deprecated. Use 'taxAnnotation' instead. See help("Deprecated")
tax_agg(physeq) psExtra object - a phyloseq object with extra slots:
phyloseq-class experiment-level object otu_table() OTU Table: [ 1556 taxa and 120 samples ] sample_data() Sample Data: [ 120 samples by 7 sample variables ] tax_table() Taxonomy Table: [ 1556 taxa by 8 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 1556 tips and 1551 internal nodes ]
psExtra info: tax_agg = "unique"
physeq %>%
- tax_fix(unidentified = NULL) %>%
- tax_table() Error in h(simpleError(msg, call)) : error in evaluating the argument 'object' in selecting a method for function 'tax_table': unused argument (unidentified = NULL) physeq %>%
- tax_transform("compositional", rank = "Order") %>%
- comp_heatmap() Error in h(simpleError(msg, call)) : error in evaluating the argument 'physeq' in selecting a method for function 'taxa_names': NAs in tax_table at rank: Order
To fix the problem, try:
yourData %>% tax_fix()
Try tax_fix_interactive() to find and fix further problems
physeq <- physeq %>%
- tax_fix() physeq %>%
- tax_transform("compositional", rank = "Order") %>%
- comp_heatmap() Problematic Order values detected in tax_table: unidentified
Convergent rows:
Taxon name
Kingdom Phylum Class Order
1 e82d1fd257973270273e7afa6a946525 Fungi Ascomycota Leotiomycetes unidentified
2 6216312bc6d47f49897e454a2ab33c46 Fungi Rozellomycota unidentified unidentified
3 373a26a407912bac4da130de18cb1953 Fungi Chytridiomycota unidentified unidentified
4 ec561da51b50d2ff3e692cdca8f9e211 Fungi unidentified unidentified unidentified
5 7785baaf09a0bb304f825e78da9ddc18 Fungi Basidiomycota unidentified unidentified
6 00aa2420135a1e4002bffe57c1882814 Fungi Basidiomycota Agaricomycetes unidentified
7 245b68a91684c8dd557adc5fd030a766 Fungi Glomeromycota unidentified unidentified
8 c8842bf11835c6bc9d63ca823af042f6 Fungi Ascomycota Lecanoromycetes unidentified
9 73bd5e0c92e801a64afac7456bc5e27a Fungi Zoopagomycota unidentified unidentified
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'physeq' in selecting a method for function 'taxa_names': Taxa cannot be aggregated at rank: Order
See last message for convergent taxa rows.
Hi,
Is there any way to rename unknown taxa as unspecified of the last identified taxa?
For example, if I have a sequence identified as "Enterobacteriaceae" (family) but is "NA" (genus), I'd like the genus to = "Unspecified Enterobacteriaceae."
Llikewise, if the phylum is identified as "Firmicutes" but is unknown at lower taxa, I'd like all NAs to be replaced as "Unspecified Firmicutes."