Closed fulmicoton closed 2 weeks ago
The two document indeed contains the following:
{"o_params": {"\u0000\"><script>alert(309)</script>": "1"}}
{"o_params": {" ADw-script AD4-alert(312) ADw-/script AD4-": "1"}}
The "0" and " " at the beginning look suspicious.
A stacktrace would be really helpful (Shouldn't we have them on by default?)
It cannot be reproduced like this:
#[test]
fn test_bug_2442() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let json_field = schema_builder.add_json_field("json", TEXT | FAST);
let schema = schema_builder.build();
let index = Index::builder().schema(schema).create_in_ram()?;
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
let path1 = String::from_utf8(vec![
48, 34, 62, 60, 115, 99, 114, 105, 112, 116, 62, 97, 108, 101, 114, 116, 40, 51, 48,
57, 41, 60, 47, 115, 99, 114, 105, 112, 116, 62,
])
.unwrap();
let path2 = String::from_utf8(vec![
32, 65, 68, 119, 45, 115, 99, 114, 105, 112, 116, 32, 65, 68, 52, 45, 97, 108, 101,
114, 116, 40, 51, 49, 50, 41, 32, 65, 68, 119, 45, 47, 115, 99, 114, 105, 112, 116, 32,
65, 68, 52, 45,
])
.unwrap();
let get_doc_1 = || json!({"o_params": { path1.clone(): "s" }});
let get_doc_2 = || json!({"o_params": { path2.clone(): "s" }});
let add_doc_1 = |index_writer: &mut IndexWriter| {
index_writer
.add_document(doc!(
json_field=>get_doc_1()
))
.unwrap()
};
let add_doc_2 = |index_writer: &mut IndexWriter| {
index_writer
.add_document(doc!(
json_field=>get_doc_2()
))
.unwrap()
};
add_doc_1(&mut index_writer);
add_doc_2(&mut index_writer);
index_writer.commit()?;
add_doc_2(&mut index_writer);
add_doc_1(&mut index_writer);
index_writer.commit()?;
add_doc_1(&mut index_writer);
index_writer.commit()?;
add_doc_2(&mut index_writer);
index_writer.commit()?;
add_doc_2(&mut index_writer);
index_writer.commit()?;
add_doc_1(&mut index_writer);
index_writer.commit()?;
// Merge
{
assert!(index_writer.wait_merging_threads().is_ok());
let mut index_writer: IndexWriter = index.writer_for_tests()?;
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
index_writer.merge(&segment_ids).wait().unwrap();
assert!(index_writer.wait_merging_threads().is_ok());
}
Ok(())
}
thread 'blocking-5' panicked at /Users/fulmicoton/.cargo/git/checkouts/tantivy-f70b7ea03dadae9a/b960e40/sstable/src/lib.rs:257:9:
Keys should be increasing. ([111, 95, 112, 97, 114, 97, 109, 115, 1, 48, 34, 62, 60, 115, 99, 114, 105, 112, 116, 62, 97, 108, 101, 114, 116, 40, 51, 48, 57, 41, 60, 47, 115, 99, 114, 105, 112, 116, 62, 0, 115, 49] > [111, 95, 112, 97, 114, 97, 109, 115, 1, 32, 65, 68, 119, 45, 115, 99, 114, 105, 112, 116, 32, 65, 68, 52, 45, 97, 108, 101, 114, 116, 40, 51, 49, 50, 41, 32, 65, 68, 119, 45, 47, 115, 99, 114, 105, 112, 116, 32, 65, 68, 52, 45, 0, 115, 49])
stack backtrace:
0: _rust_begin_unwind
1: core::panicking::panic_fmt
2: tantivy::postings::serializer::FieldSerializer::new_term
3: <tantivy::postings::json_postings_writer::JsonPostingsWriter<Rec> as tantivy::postings::postings_writer::PostingsWriter>::serialize
4: tantivy::postings::postings_writer::serialize_postings
5: tantivy::indexer::segment_writer::SegmentWriter::finalize
6: quickwit_indexing::models::indexed_split::IndexedSplitBuilder::finalize
7: <tracing::instrument::Instrumented<T> as core::future::future::Future>::poll
8: <quickwit_indexing::actors::index_serializer::IndexSerializer as quickwit_actors::actor::Handler<quickwit_indexing::models::indexed_split::IndexedSplitBatchBuilder>>::handle::{{closure}}
9: <H as quickwit_actors::actor::DeferableReplyHandler<M>>::handle_message::{{closure}}
10: <core::option::Option<(tokio::sync::oneshot::Sender<<A as quickwit_actors::actor::DeferableReplyHandler<M>>::Reply>,M)> as quickwit_actors::envelope::EnvelopeT<A>>::handle_message::{{closure}}
11: quickwit_actors::spawn_builder::ActorExecutionEnv<A>::process_one_message::{{closure}}
12: quickwit_actors::spawn_builder::SpawnBuilder<A>::spawn::{{closure}}
13: tokio::runtime::task::core::Core<T,S>::poll
14: tokio::runtime::task::harness::Harness<T,S>::poll
15: tokio::runtime::scheduler::multi_thread::worker::Context::run_task
16: tokio::runtime::scheduler::multi_thread::worker::Context::run
17: tokio::runtime::context::set_scheduler
18: tokio::runtime::scheduler::multi_thread::worker::run
19: <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll
20: tokio::runtime::task::core::Core<T,S>::poll
21: tokio::runtime::task::raw::poll
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
(Almost) minimum reproducible example on quickwit
2 documents:
{"\u0000B":"1"}
{" A":"1"}
Index config:
{
"version": "0.7",
"index_id": "airmail",
"indexing_settings": {
"commit_timeout_secs": 30
},
"doc_mapping": {
"mode": "dynamic",
"dynamic_mapping": {
"tokenizer": "raw",
"fast": true
}
}
}
I don't know if it is the inverted index or the columnar.
The two strings are respectively: