fergiemcdowall / search-index

A persistent, network resilient, full text search library for the browser and Node.js
MIT License
1.39k stars 149 forks source link

How to debug your leveldb? #519

Closed everlose closed 3 years ago

everlose commented 3 years ago

I used electron and I did Chinese full text search,I'm very interested in how it works。 But I tried levelui and other software, failed to open this leveldb,So I want to know how you debug it

eklem commented 3 years ago

Isn't it then stored in IndexedDB? Since that is s the key/value-storage in browsers.

fergiemcdowall commented 3 years ago

@everlose what do you mean when you say "debug"? Do you mean how can you inspect/read the database? Or something else..?

fergiemcdowall commented 3 years ago

You can read the db by using level-out

everlose commented 3 years ago

@everlose what do you mean when you say "debug"? Do you mean how can you inspect/read the database? Or something else..?

I want to use GUI software to open this leveldb,failed。ok,Let me see that “level-out”

everlose commented 3 years ago

I can debug it successfully with "level-up" I PUT 3 record and start debugging。

// search-index API
searchDB.PUT([{
  {"_id":"4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X","idx":["音","切实","百","音","例","么","已","车","还","做","公","此","则","土","期","时","列","同","金","特","资","半点","主题","并","当","交","复","县","产","出","权","林","史","年","学","打","达","习","速回","志","本","习","即","三十","近","方","心","图","名","高","果","技","存","现","由","四","真","市","参","部","证","育","走","始","具","按","行","变","了","件","党格","业","战","专","解","程","四世","高","断","要","单子","法","法","查","历","位","表","书","石","百","对","部","程","义","在","今","展","权","本","矿","了","易","代","意","器","何","约","始","长","太","七","目","济","如","委","京","和","严","儿","况","拉","者","周","应","应","音","样","内","造","存"]},
  {"_id":"S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p","idx":["边","次","办","解度","总","布","圆","长","争","五","电","界","算","听","广","族","须","太","性","那","京","统","已","工用","产","广","素","华","采","进","它","行","决","量","个","布","技","张","身","极","农","选","队","斯","解","消","会","美","收","越","把","专美","场","器","验","而","步","极","千","名","民","完","运八","学","局","才","去","只","分数","始","华","家","特","且","改","等","低","发","江","己","心","温表","自","万","体","次","开车","红","广","记","达","但","亲","约","能","心","值","对","如","议","世","基","开","是","律","经理","权","主","头","派"]},
  {"_id":"qZ36wBEIniK1drbRB2mpFA1BmwxATA5A","idx":["件","用","备","片","或","林","院","育","导","工","术","完","门","合","红","各","具","本","九","光点","平","选","者","片","各","中","们","走","者","据","角","美国","断","两","时","高","列","相","据","米","员","打","科","在","计","究","进","使","发","基","别题","在","变","放","位","而","内","流","资","单易","务","法","领","被","着","单","规","类","路","三","对","制","育","始","想","农","特","律","计划","次","真","去","始","力","号","持","多","委","节","没准","系","老","由","识","办","和","元","斯","军","支","身","张","共","收","新","家","到","政","经过","百","边","管","定","开","住","即","江"]}
}])

and I use level-up

var levelup = require('levelup')
var leveldown = require('leveldown')

// 1) Create our store
var db = levelup(leveldown('./ELECTRON-QUICK-START-SEARCH'))

db.createReadStream()
  .on('data', function (data) {
    console.log(data.key.toString(), '=', data.value.toString())
  })
  .on('error', function (err) {
    console.log('Oh my!', err)
  })
  .on('close', function () {
    console.log('Stream closed')
  })
  .on('end', function () {
    console.log('Stream ended')
  })

This is the result

❯ node testdb.js
idx:七#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:万#0.33 = ["S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p"]
idx:三#0.50 = ["qZ36wBEIniK1drbRB2mpFA1BmwxATA5A"]
idx:三十#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:专#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:专美#0.33 = ["S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p"]
idx:且#0.33 = ["S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p"]
idx:世#0.33 = ["S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p"]
idx:业#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:两#0.50 = ["qZ36wBEIniK1drbRB2mpFA1BmwxATA5A"]
idx:严#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:个#0.33 = ["S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p"]
idx:中#0.50 = ["qZ36wBEIniK1drbRB2mpFA1BmwxATA5A"]
idx:主#0.33 = ["S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p"]
idx:主题#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:么#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
idx:义#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]

// Omit some lines.........

○DOCUMENT_COUNT○ = 3
○DOC_RAW○4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X○ = {"_id":"4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X","idx":["音","切实","百","音","例","么","已","车","还","做","公","此","则","土","期","时","列","同","金","特","资","半点","主题","并","当","交","复","县","产","出","权","林","史","年","学","打","达","习","速回","志","本","习","即","三十","近","方","心","图","名","高","果","技","存","现","由","四","真","市","参","部","证","育","走","始","具","按","行","变","了","件","党格","业","战","专","解","程","四世","高","断","要","单子","法","法","查","历","位","表","书","石","百","对","部","程","义","在","今","展","权","本","矿","了","易","代","意","器","何","约","始","长","太","七","目","济","如","委","京","和","严","儿","况","拉","者","周","应","应","音","样","内","造","存"]}
○DOC_RAW○S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p○ = {"_id":"S0iYuV3mj1Up6u1vKPor0q4K6lJgvw3p","idx":["边","次","办","解度","总","布","圆","长","争","五","电","界","算","听","广","族","须","太","性","那","京","统","已","工用","产","广","素","华","采","进","它","行","决","量","个","布","技","张","身","极","农","选","队","斯","解","消","会","美","收","越","把","专美","场","器","验","而","步","极","千","名","民","完","运八","学","局","才","去","只","分数","始","华","家","特","且","改","等","低","发","江","己","心","温表","自","万","体","次","开车","红","广","记","达","但","亲","约","能","心","值","对","如","议","世","基","开","是","律","经理","权","主","头","派"]}
○DOC_RAW○qZ36wBEIniK1drbRB2mpFA1BmwxATA5A○ = {"_id":"qZ36wBEIniK1drbRB2mpFA1BmwxATA5A","idx":["件","用","备","片","或","林","院","育","导","工","术","完","门","合","红","各","具","本","九","光点","平","选","者","片","各","中","们","走","者","据","角","美国","断","两","时","高","列","相","据","米","员","打","科","在","计","究","进","使","发","基","别题","在","变","放","位","而","内","流","资","单易","务","法","领","被","着","单","规","类","路","三","对","制","育","始","想","农","特","律","计划","次","真","去","始","力","号","持","多","委","节","没准","系","老","由","识","办","和","元","斯","军","支","身","张","共","收","新","家","到","政","经过","百","边","管","定","开","住","即","江"]}
○FIELD○idx○ = "idx"
○○LAST_UPDATED = 1614168776455
Stream ended
Stream closed
  1. Why there are so many symbols "○" here? What does that mean?
  2. Look at the code below,Why lte with '○' can find the record,'○' ,Does it represent an infinite number?
db.createReadStream(
  {
    gte: 'idx' + ':' + '车' + '#',
    lte: 'idx' + ':' + '车' + '#' + '○',
  }
)

❯ node testdb.js
idx:车#0.33 = ["4KmtfEtD1sTTVz4e3MOUiRXnR4M4hP2X"]
Stream ended
Stream closed
fergiemcdowall commented 3 years ago

I have made a gist to show you how you might use search-index to search in the documents you provided: https://gist.github.com/fergiemcdowall/6b34f87f51dd04a9b07156044fca3fbc

If you want to simply dump the documents in the index (rather than searching), then use ALL_DOCUMENTS- see the gist for details

everlose commented 3 years ago

I have made a gist to show you how you might use search-index to search in the documents you provided: https://gist.github.com/fergiemcdowall/6b34f87f51dd04a9b07156044fca3fbc

If you want to simply dump the documents in the index (rather than searching), then use ALL_DOCUMENTS- see the gist for details

em........I've used the API, but that's not the point.
I just want to know what does "○" mean in db. I'm just interested in what you put in the database and how search api works

fergiemcdowall commented 3 years ago

"○" is a field separator- it is used to divide keys up into fields/namespaces.

consider ['board', 'boarder', 'boarding']

If you say "give med everything between 'board' and 'board'" you might get 'board, boarder, boarding'. Its impossible to just get 'board'

However if you use "○"- ['board○', 'boarder○', 'boarding○'] then you can specify everything between 'board○' and 'board○' which will then only return 'board'

fergiemcdowall commented 3 years ago

I think all questions on this thread have been answered- closing for now...