facebook / rocksdb

A library that provides an embeddable, persistent key-value store for fast storage.
http://rocksdb.org
GNU General Public License v2.0
28.49k stars 6.3k forks source link

RocksDB leaves many empty .log files and does not clean them on DB open. #11313

Closed socketpair closed 1 year ago

socketpair commented 1 year ago

In my scenario, I only ingest .SST files, no Put/Delete or so. This way it creates plenty of .log files. Don't know how to remove them.

siying commented 1 year ago

Are these log files empty? How large are they? Log files are supposed to be cleaned up as soon as they are not needed.

socketpair commented 1 year ago

@siying I said in the subject. All of them EMPTY. file sizes are zero bytes. Yes, you are right, the files should be removed, but actually they stay in the db and don't disappear even after manual compaction. I can provide an example of such a program.

socketpair commented 1 year ago

Ingesting any(?) SST file add at least one such journal. The program that ingests files:

static void XXX(const std::string &message, const rocksdb::Status &s)
{
    log_info("RocksDB: %s", message.c_str());
    if (!s.ok())
        throw std::runtime_error("RocksDB: " + message + " FAILED: " + s.ToString());
}

DBOptions db_options;
vector<ColumnFamilyDescriptor> colfam {};
XXX("Loading latest DB options.", LoadLatestOptions(rocksdb_path, Env::Default(), &db_options, &colfam));
unique_ptr<DB> db(
    [&]() {
        DB *db_raw;
        XXX("Opening DB " + rocksdb_path,
            DB::Open(Options(db_options, colfam.at(0).options), rocksdb_path, &db_raw));
        return db_raw;
    }());

IngestExternalFileOptions ifo;
ifo.failed_move_fall_back_to_copy = false;
ifo.move_files = true;
ifo.verify_checksums_before_ingest = false; // as default
ifo.verify_file_checksum = false; // speedup
ifo.write_global_seqno = false; // we are the new project.
XXX("Ingesting diff", db->IngestExternalFile({ sst_file }, ifo));

XXX("Closing DB", db->Close());

db options:

# This is a RocksDB option file.
#
# For detailed file format spec, please refer to the example file
# in examples/rocksdb_option_file_example.ini
#

[Version]
  rocksdb_version=7.4.5
  options_file_version=1.1

[DBOptions]
  compaction_readahead_size=0
  strict_bytes_per_sync=false
  bytes_per_sync=1048576
  max_background_jobs=16
  avoid_flush_during_shutdown=false
  max_background_flushes=-1
  delayed_write_rate=16777216
  max_open_files=-1
  max_subcompactions=1
  writable_file_max_buffer_size=1048576
  wal_bytes_per_sync=0
  max_background_compactions=-1
  max_total_wal_size=0
  delete_obsolete_files_period_micros=21600000000
  stats_dump_period_sec=600
  stats_history_buffer_size=1048576
  stats_persist_period_sec=600
  enforce_single_del_contracts=true
  lowest_used_cache_tier=kNonVolatileBlockTier
  bgerror_resume_retry_interval=1000000
  best_efforts_recovery=false
  log_readahead_size=0
  write_dbid_to_manifest=false
  wal_compression=kNoCompression
  manual_wal_flush=false
  db_host_id=__hostname__
  two_write_queues=false
  random_access_max_buffer_size=1048576
  avoid_unnecessary_blocking_io=false
  skip_checking_sst_file_sizes_on_db_open=false
  flush_verify_memtable_count=true
  fail_if_options_file_error=true
  atomic_flush=false
  verify_sst_unique_id_in_manifest=false
  skip_stats_update_on_db_open=false
  track_and_verify_wals_in_manifest=false
  experimental_mempurge_threshold=0.000000
  paranoid_checks=true
  create_if_missing=false
  max_write_batch_group_size_bytes=1048576
  avoid_flush_during_recovery=false
  file_checksum_gen_factory=nullptr
  enable_thread_tracking=false
  allow_fallocate=true
  allow_data_in_errors=false
  error_if_exists=false
  use_direct_io_for_flush_and_compaction=false
  create_missing_column_families=false
  WAL_size_limit_MB=0
  use_direct_reads=false
  persist_stats_to_disk=false
  allow_mmap_reads=false
  allow_mmap_writes=false
  use_adaptive_mutex=false
  allow_2pc=false
  is_fd_close_on_exec=true
  max_log_file_size=0
  access_hint_on_compaction_start=NORMAL
  max_file_opening_threads=16
  wal_filter=nullptr
  use_fsync=false
  table_cache_numshardbits=6
  dump_malloc_stats=false
  db_write_buffer_size=0
  allow_ingest_behind=false
  keep_log_file_num=2
  max_bgerror_resume_count=2147483647
  allow_concurrent_memtable_write=true
  recycle_log_file_num=0
  log_file_time_to_roll=0
  manifest_preallocation_size=4194304
  enable_write_thread_adaptive_yield=true
  WAL_ttl_seconds=0
  max_manifest_file_size=1073741824
  wal_recovery_mode=kPointInTimeRecovery
  enable_pipelined_write=false
  write_thread_slow_yield_usec=3
  unordered_write=false
  write_thread_max_yield_usec=100
  advise_random_on_open=true
  info_log_level=WARN_LEVEL
..

[CFOptions "default"]
  compression_opts={max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  bottommost_compression=kDisableCompressionOption
  enable_blob_garbage_collection=false
  blob_file_size=268435456
  sample_for_compression=0
  bottommost_temperature=kUnknown
  periodic_compaction_seconds=0
  ttl=2592000
  blob_garbage_collection_age_cutoff=0.250000
  compaction_options_universal={incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;size_ratio=1;}
  compression=kSnappyCompression
  max_sequential_skip_in_iterations=8
  max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
  max_bytes_for_level_multiplier=10.000000
  min_blob_size=0
  check_flush_compaction_key_order=true
  disable_auto_compactions=false
  bottommost_compression_opts={max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  compaction_options_fifo={allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
  level0_file_num_compaction_trigger=4
  target_file_size_base=33554432
  soft_pending_compaction_bytes_limit=68719476736
  hard_pending_compaction_bytes_limit=274877906944
  level0_slowdown_writes_trigger=20
  blob_compression_type=kNoCompression
  level0_stop_writes_trigger=36
  blob_file_starting_level=0
  enable_blob_files=false
  blob_garbage_collection_force_threshold=1.000000
  paranoid_file_checks=false
  prefix_extractor=nullptr
  max_write_buffer_number=2
  report_bg_io_stats=false
  memtable_prefix_bloom_size_ratio=0.000000
  target_file_size_multiplier=1
  arena_block_size=1048576
  blob_compaction_readahead_size=0
  inplace_update_num_locks=10000
  max_compaction_bytes=838860800
  write_buffer_size=67108864
  memtable_huge_page_size=0
  max_successive_merges=0
  max_bytes_for_level_base=268435456
  memtable_whole_key_filtering=false
  compaction_pri=kMinOverlappingRatio
  compaction_filter_factory=nullptr
  comparator=leveldb.BytewiseComparator
  table_factory=BlockBasedTable
  merge_operator=nullptr
  compaction_filter=nullptr
  level_compaction_dynamic_level_bytes=false
  optimize_filters_for_hits=false
  inplace_update_support=false
  max_write_buffer_number_to_maintain=0
  bloom_locality=0
  max_write_buffer_size_to_maintain=0
  sst_partitioner_factory=nullptr
  compaction_style=kCompactionStyleLevel
  min_write_buffer_number_to_merge=1
  memtable_factory=SkipListFactory
  memtable_insert_with_hint_prefix_extractor=nullptr
  force_consistency_checks=true
  num_levels=7
..
[TableOptions/BlockBasedTable "default"]
  initial_auto_readahead_size=8192
  pin_top_level_index_and_filter=true
  block_align=false
  block_size_deviation=10
  checksum=kCRC32c
  index_shortening=kShortenSeparators
  whole_key_filtering=true
  data_block_index_type=kDataBlockBinarySearch
  index_type=kBinarySearch
  no_block_cache=false
  index_block_restart_interval=1
  data_block_hash_table_util_ratio=0.750000
  prepopulate_block_cache=kDisable
  pin_l0_filter_and_index_blocks_in_cache=false
  filter_policy=bloomfilter:10:false
  cache_index_and_filter_blocks_with_high_priority=true
  verify_compression=false
  block_restart_interval=16
  max_auto_readahead_size=262144
  flush_block_policy_factory=FlushBlockBySizePolicyFactory
  partition_filters=false
  cache_index_and_filter_blocks=false
  block_size=4096
  metadata_block_size=4096
  optimize_filters_for_memory=false
  detect_filter_construct_corruption=false
  format_version=5
  metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
  read_amp_bytes_per_bit=0
  enable_index_compression=true
ajkr commented 1 year ago

Good news, #11409 should fix the problem. It should be available in our 8.3 release. Let us know if you need it in an earlier release; we can patch 8.2 with it to release it sooner.

socketpair commented 1 year ago

@ajkr what is the correct workaround for that ? If it's not possible to upgrade RocksDB version ?

ajkr commented 1 year ago

The mitigation would involve deleting the empty WAL files. You should test this but my understanding is it's safe to do so as long as track_and_verify_wals_in_manifest=false.

socketpair commented 6 months ago

@ajkr uhh. Even current latest update of Fedora 39 (Fedora 40 has not released yet) has RocksDB 8.1.1. Still can not test :(