(ns me.rkx.dtlv0.search0
(:require [datalevin.core :as d]))
;; A search engine depends on a key-value store to store the indices.
(def lmdb (d/open-kv "/tmp/search-db"))
(def engine (d/new-search-engine lmdb))
;; Here are the documents to be indexed, keyed by doc-id
(def docs
{1 "The quick red fox jumped over the lazy red dogs."
2 "Mary had a little lamb whose fleece was red as fire."
3 "Moby Dick is a story of a whale and a man obsessed."})
;; Add the documents into the search index. `add-doc` takes a `doc-ref`, which
;; can be anything that uniquely identify a document, in this case, a doc-id
(d/add-doc engine 1 (docs 1))
(d/add-doc engine 2 (docs 2))
(d/add-doc engine 3 (docs 3))
;; Search engine does not store the raw documents themselves.
;; If we want to retrieve the found documents, we can optionally store them in
;; a key-value sub-database
(d/open-dbi lmdb "raw")
(d/transact-kv lmdb
[[:put "raw" 1 (docs 1)]
[:put "raw" 2 (docs 2)]
[:put "raw" 3 (docs 3)]])
;; search by default return a list of `doc-ref` ordered by relevance to query
(d/search engine "red")
;;=> (1 2)
;; we can alter the display to show offets of term occurrences as well, useful
;; e.g. to highlight matched terms in documents
(d/search engine "red" {:display :offsets})
;=> ([1 (["red" [10 39]])] [2 (["red" [40]])])
Using the "standalone search" example from https://cljdoc.org/d/datalevin/datalevin/0.8.9/doc/search-engine
Was expecting
Source file: