The second version of the Kraken taxonomic sequence classification system
MIT License
683
stars
266
forks
source link
When building the database, the taxid in the hash table was not checked. Only the existence of a minimizer key was queried. If the same minimizer exists in different taxid sequences, then the compare_and_set function should be executed. #802
for (size_t i = 0; i < mm_ct; i++) {
// once we've determined that a minimizer won't be an insertion,
// don't bother calling FindIndex() again
if (insertion_list[i]) {
size_t idx;
insertion_list[i] = ! hash.FindIndex(minimizer_list[i], &idx);
index_list[i] = idx;
}
}
// Determine safe prefix of sorted set to insert in parallel
std::set<uint64_t> novel_insertion_points;
size_t safe_ct = 0;
for (safe_ct = 0; safe_ct < mm_ct; safe_ct++) {
if (insertion_list[safe_ct]) {
if (novel_insertion_points.count(index_list[safe_ct]) > 0)
break;
novel_insertion_points.insert(index_list[safe_ct]);
}
}
// Adjust CHT values for all keys in the safe zone in parallel
#pragma omp parallel for
for (size_t i = 0; i < safe_ct; i++) {
SetMinimizerLCA(hash, minimizer_list[i], taxid, tax);
}
https://github.com/DerrickWood/kraken2/blob/acc22481bbeb32b870c12b403ce595fe7a6db770/src/build_db.cc#L382C7-L390C8
for (size_t i = 0; i < mm_ct; i++) { // once we've determined that a minimizer won't be an insertion, // don't bother calling FindIndex() again if (insertion_list[i]) { size_t idx; insertion_list[i] = ! hash.FindIndex(minimizer_list[i], &idx); index_list[i] = idx; } }