google / leveldb

LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.
BSD 3-Clause "New" or "Revised" License
35.71k stars 7.72k forks source link

filter block on sstable cannot generated every 2KB datablock ?The filters in the Level 0 SSTable generated by BuildTable are not created for every 2KB. #1190

Closed TalpsG closed 1 month ago

TalpsG commented 1 month ago

look at this code. In TableBuilder::Add(const Slice& key, const Slice& value), once the block size exceeds 4KB, it will trigger a Flush. During the Flush, r->filter_block->StartBlock(r->offset) will be called. StartBlock creates multiple filters based on the size of the already written data blocks (one filter for every 2KB). However, in the GenerateFilter function, all the keys that have been written are consumed each time. As a result, if multiple filters need to be generated, the subsequent filters end up being empty.

void TableBuilder::Add(const Slice& key, const Slice& value) {
  Rep* r = rep_;
  assert(!r->closed);
  if (!ok()) return;
  if (r->num_entries > 0) {
    assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0);
  }

  if (r->pending_index_entry) {
    assert(r->data_block.empty());
    r->options.comparator->FindShortestSeparator(&r->last_key, key);
    std::string handle_encoding;
    r->pending_handle.EncodeTo(&handle_encoding);
    r->index_block.Add(r->last_key, Slice(handle_encoding));
    r->pending_index_entry = false;
  }

  if (r->filter_block != nullptr) {
    r->filter_block->AddKey(key);
  }

  r->last_key.assign(key.data(), key.size());
  r->num_entries++;
  r->data_block.Add(key, value);

  const size_t estimated_block_size = r->data_block.CurrentSizeEstimate();
  if (estimated_block_size >= r->options.block_size) {
    Flush();
  }
}

void TableBuilder::Flush() {
  Rep* r = rep_;
  assert(!r->closed);
  if (!ok()) return;
  if (r->data_block.empty()) return;
  assert(!r->pending_index_entry);
  WriteBlock(&r->data_block, &r->pending_handle);
  if (ok()) {
    r->pending_index_entry = true;
    r->status = r->file->Flush();
  }
  if (r->filter_block != nullptr) {
    r->filter_block->StartBlock(r->offset);
  }
}

void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
  uint64_t filter_index = (block_offset / kFilterBase);
  assert(filter_index >= filter_offsets_.size());
  while (filter_index > filter_offsets_.size()) {
    GenerateFilter();
  }
}
void FilterBlockBuilder::GenerateFilter() {
  const size_t num_keys = start_.size();
  if (num_keys == 0) {
    // Fast path if there are no keys for this filter
    filter_offsets_.push_back(result_.size());
    return;
  }

  // Make list of keys from flattened key structure
  start_.push_back(keys_.size());  // Simplify length computation
  tmp_keys_.resize(num_keys);
  for (size_t i = 0; i < num_keys; i++) {
    const char* base = keys_.data() + start_[i];
    size_t length = start_[i + 1] - start_[i];
    tmp_keys_[i] = Slice(base, length);
  }

  // Generate filter for current set of keys and append to result_.
  filter_offsets_.push_back(result_.size());
  policy_->CreateFilter(&tmp_keys_[0], static_cast<int>(num_keys), &result_);

  tmp_keys_.clear();
  keys_.clear();
  start_.clear();
}

Below is the code that can reproduce this issue. You can set a breakpoint at StartBlock to inspect the problem

#include <cstdlib>
#include <cstring>
#include <format>
#include <iostream>
#include <memory>
#include <string>

#include "leveldb/db.h"
#include "leveldb/filter_policy.h"
#include "leveldb/options.h"
#include "leveldb/slice.h"
#include "leveldb/status.h"
#include "leveldb/write_batch.h"
std::string getstring() {
  std::string str;
  for (int i = 0; i < 1024; i++) {
    str.push_back('y');
  }
  return str;
}
int main(int argc, char* argv[]) {
  leveldb::DB* db;
  leveldb::Options ops;
  ops.compression = leveldb::kNoCompression;
  ops.filter_policy = leveldb::NewBloomFilterPolicy(4);
  // ops.compression = leveldb::kNoCompression;
  ops.create_if_missing = true;
  auto s = leveldb::DB::Open(ops, "db-compaction", &db);
  if (!s.ok()) {
    std::cout << "db-compaction open fail : " << s.ToString() << "\n";
    return -1;
  }
  leveldb::WriteOptions write_ops;
  std::unique_ptr<leveldb::DB> dbptr(db);
  for (int i = 0; i < 5000; i++) {
    std::string k = std::format("userinfo:{}", i);
    std::string v = getstring();
    db->Put(write_ops, k, v);
  }

  while (1) {
    ;
  }
  return 0;
}
TalpsG commented 1 month ago

i was wrong.