quickwit-oss / tantivy

Tantivy is a full-text search engine library inspired by Apache Lucene and written in Rust
MIT License
12.21k stars 677 forks source link

Occur::MustNot does not seem to be working as expected #2317

Open hermeGarcia opened 9 months ago

hermeGarcia commented 9 months ago

Describe the bug

Which version of tantivy are you using? 0.21.1

To Reproduce

    use tantivy::collector::DocSetCollector;
    use tantivy::query::{AllQuery, BooleanQuery, Occur, TermQuery};
    use tantivy::schema::IndexRecordOption;
    use tantivy::schema::Schema;
    use tantivy::schema::Term;
    use tantivy::schema::{STORED, STRING};
    use tantivy::Index;

    let dir = tempfile::tempdir().unwrap();
    let mut sb = Schema::builder();
    let uuid = sb.add_text_field("uuid", STRING | STORED);
    let schema = sb.build();
    let index_builder = Index::builder().schema(schema.clone());
    let index = index_builder
        .create_in_dir(&dir.path())
        .expect("Index directory should exist");

    let mut writer = index.writer_with_num_threads(1, 15000000).unwrap();
    let this_doc = tantivy::doc!(
        uuid => "this"
    );
    let that_doc = tantivy::doc!(
        uuid => "that"
    );
    writer.add_document(this_doc).unwrap();
    writer.add_document(that_doc).unwrap();
    writer.commit().unwrap();

    let reader = index.reader().unwrap();
    let searcher = reader.searcher();
    assert_eq!(searcher.num_docs(), 2);

    let query = TermQuery::new(
        Term::from_field_text(uuid, "this"),
        IndexRecordOption::Basic,
    );
    let collector = DocSetCollector;
    let results = searcher.search(&query, &collector).unwrap();
    assert_eq!(results.len(), 1);

    // For some reason this query yields no results, although I would expect one to be returned.
    let unexpected_result_query =
        BooleanQuery::new(vec![(Occur::MustNot, Box::new(query.clone()))]);
    let results = searcher
        .search(&unexpected_result_query, &collector)
        .unwrap();
    assert_eq!(results.len(), 0);

    // Turns out we can get the query to work as expected by adding an AllQuery
    let bypass = BooleanQuery::new(vec![
        (Occur::Must, Box::new(AllQuery)),
        (Occur::MustNot, Box::new(query)),
    ]);
    let results = searcher.search(&bypass, &collector).unwrap();
    assert_eq!(results.len(), 1);
fulmicoton commented 9 months ago

I think we should stick to that specification. I believe the query parser does the thing you expected however. (not 100% sure)

adamreichold commented 9 months ago

I believe the query parser does the thing you expected however. (not 100% sure)

It fails with

Invalid query: Only excluding terms given

which does make sense IMHO, i.e. I agree to keep the current behaviour.