let mut schema_builder = SchemaBuilder::default();
schema_builder.add_text_field("title", TEXT | STORED);
let schema = schema_builder.build();
let index = Index::create(index_path, schema.clone())
.map_err(|_| String::from("Index creation error"))?;
let mut index_writer = index.writer(1_000_000_000)
.map_err(|_| String::from("Index writer creation error"))?;
let title = schema.get_field("title").unwrap();
println!("-- index prepped");
let before_read = Instant::now();
for line in source.lines().skip(1) {
println!("-- -- line: {:?}", line);
if let Ok(l) = line {
let props: Vec<&str> = l.split('\t').collect();
let mut doc = Document::default();
let title_val = props.get(0).unwrap();
doc.add_text(title, title_val);
index_writer.add_document(doc);
};
}
let elapsed = before_read.elapsed();
println!("-- docs read in {},{}", elapsed.as_secs(), elapsed.subsec_nanos());
let before_commit = Instant::now();
index_writer.commit().map_err(|_| String::from("Index commit error"))?;
let elapsed = before_commit.elapsed();
println!("-- committed: {},{}", elapsed.as_secs(), elapsed.subsec_nanos());
...
Produces the following output:
target/debug/tantivy-bench --data ~/Downloads/wiki-1.txt
-- index prepped
-- -- line: Ok("Anarchism\t30-APR-2012 03:25:17.000\t{{Redirect|Anarchist|the fictional character|Anarchist (comics)}} {{Redirect|Anarchists}} {{Anarchism sidebar}} {{Libertarianism sidebar}} \'\'\'Anarchism\'\'\' is generally defined as the [[political philosophy]] which holds the [[state (polity)|state]] to be undesirable, unnecessary, and harmful,<ref name=\"definition\"> {{Cite journal|last=Malatesta|first=Errico|title=Towards Anarchism|journal=MAN!|publisher=International Group of San Francisco|location=Los Angeles|oclc=3930443|url=http://www.marxists.org/archive/malatesta/1930s/xx/toanarchy.htm|authorlink=Errico Malatesta}} {{Cite journal|url=http://www.theglobeandmail.com/servlet/story/RTGAM.20070514.wxlanarchist14/BNStory/lifeWork/home/ |title=Working for The Man |journal=[[The Globe and Mail]] |accessdate=2008-04-14 |last=Agrell |first=Siri |date=2007-05-14}} {{cite web|url=http://www.britannica.com/eb/article-9117285|title=Anarchism|year=2006|work=Encyclopædia Britannica|publisher=Encyclopædia Britannica Premium Service|accessdate=2006-08-29| archiveurl=")
-- docs read in 0,426478
-- committed: 3,434918851
-- searcher created
-- query parser created
-- query parsed
-- searched
varying the number of fields seems to have a huge influence, whereas the length of the value does not...
This seems to be due to a large heap size and debug compilation. Since release effective solves this and the code above comes from an simple_search.rs, for now I'd settle for improving this 😅
This code:
Produces the following output:
varying the number of fields seems to have a huge influence, whereas the length of the value does not...