westerndigitalcorporation / zenfs

ZenFS is a storage backend for RocksDB that enables support for ZNS SSDs and SMR HDDs.
GNU General Public License v2.0
235 stars 86 forks source link

Segmentation fault while enable zenfs gc #249

Closed ywang-wnlo closed 1 year ago

ywang-wnlo commented 1 year ago

Segmentation fault while enable zenfs gc

sudo ~/rocksdb/plugin/zenfs/util/zenfs mkfs --zbd=nvme1n1 --aux_path=/home/femu/zenfs-aux --force --enable-gc

sudo ./db_bench --fs_uri=zenfs://dev:nvme1n1 --benchmarks=overwrite --use_existing_db --target_file_size_base=2147483648 --use_direct_io_for_flush_and_compaction --max_background_jobs=8 --use_direct_reads --write_buffer_size=2147483648 --num=137438953 --value_size=1000 --histogram
RocksDB:    version 7.10.0
Date:       Thu Nov 24 17:40:27 2022
CPU:        16 * Intel(R) Xeon(R) Platinum P-8124 CPU @ 3.00GHz
CPUCache:   16384 KB
Received signal 11 (Segmentation fault)
#0   /home/femu/rocksdb/db_bench(+0x4ca369) [0x559e3b6eb369] ?? ??:0    
#1   /home/femu/rocksdb/db_bench(+0x4be553) [0x559e3b6df553] ?? ??:0    
#2   /lib/x86_64-linux-gnu/libstdc++.so.6(+0xdc2b3) [0x7ff2b94c62b3] ?? ??:0    
#3   /lib/x86_64-linux-gnu/libc.so.6(+0x94b43) [0x7ff2b914db43] ??  ??:0    
#4   /lib/x86_64-linux-gnu/libc.so.6(+0x126a00) [0x7ff2b91dfa00] ?? ??:0    

by objdump

00000000004be500 <_ZN7rocksdb5ZenFS8GCWorkerEv+0x100>:
      if (!s.ok()) {
        Error(logger_, "Garbage collection failed");
      }
    }
  }
}
  4be500:   28 00                   sub    %al,(%rax)
  4be502:   00 00                   add    %al,(%rax)
  4be504:   0f 85 92 03 00 00       jne    4be89c <_ZN7rocksdb5ZenFS8GCWorkerEv+0x49c>
  4be50a:   48 81 c4 f8 00 00 00    add    $0xf8,%rsp
  4be511:   5b                      pop    %rbx
  4be512:   41 5c                   pop    %r12
  4be514:   41 5d                   pop    %r13
  4be516:   41 5e                   pop    %r14
  4be518:   41 5f                   pop    %r15
  4be51a:   5d                      pop    %rbp
  4be51b:   c3                      ret    
  4be51c:   0f 1f 40 00             nopl   0x0(%rax)
    options.zone_ = 1;
  4be520:   b8 01 01 00 00          mov    $0x101,%eax
  4be525:   66 89 85 fb fe ff ff    mov    %ax,-0x105(%rbp)
    GetZenFSSnapshot(snapshot, options);
  4be52c:   48 8d 85 60 ff ff ff    lea    -0xa0(%rbp),%rax
  4be533:   48 8d 95 fa fe ff ff    lea    -0x106(%rbp),%rdx
  4be53a:   48 89 c6                mov    %rax,%rsi
  4be53d:   4c 89 e7                mov    %r12,%rdi
    options.log_garbage_ = 1;
  4be540:   c6 85 fe fe ff ff 01    movb   $0x1,-0x102(%rbp)
    GetZenFSSnapshot(snapshot, options);
  4be547:   48 89 85 e8 fe ff ff    mov    %rax,-0x118(%rbp)
  4be54e:   e8 cd 6f ff ff          call   4b5520 <_ZN7rocksdb5ZenFS16GetZenFSSnapshotERNS_13ZenFSSnapshotERKNS_20ZenFSSnapshotOptionsE>
    uint64_t threshold = (100 - GC_SLOPE * (GC_START_LEVEL - free_percent));
  4be553:   49 8b 84 24 08 01 00    mov    0x108(%r12),%rax
  4be55a:   00 
      _GLIBCXX_CONSTEXPR __normal_iterator() _GLIBCXX_NOEXCEPT
      : _M_current(_Iterator()) { }

      explicit _GLIBCXX20_CONSTEXPR
      __normal_iterator(const _Iterator& __i) _GLIBCXX_NOEXCEPT
      : _M_current(__i) { }
  4be55b:   4c 8b 75 80             mov    -0x80(%rbp),%r14
  4be55f:   48 29 d8                sub    %rbx,%rax
  4be562:   49 0f af 84 24 10 01    imul   0x110(%r12),%rax
  4be569:   00 00 
  4be56b:   48 8b 9d 78 ff ff ff    mov    -0x88(%rbp),%rbx

……

00000000004ca2c0 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv>:
void ZonedBlockDevice::LogGarbageInfo() {
  4ca2c0:   f3 0f 1e fa             endbr64 
  4ca2c4:   55                      push   %rbp
  int zone_gc_stat[12] = {0};
  4ca2c5:   c5 f9 ef c0             vpxor  %xmm0,%xmm0,%xmm0
void ZonedBlockDevice::LogGarbageInfo() {
  4ca2c9:   48 89 e5                mov    %rsp,%rbp
  4ca2cc:   41 57                   push   %r15
  4ca2ce:   41 56                   push   %r14
  4ca2d0:   41 55                   push   %r13
  4ca2d2:   41 54                   push   %r12
  4ca2d4:   53                      push   %rbx
  4ca2d5:   48 81 ec 18 02 00 00    sub    $0x218,%rsp
  4ca2dc:   48 89 bd c8 fd ff ff    mov    %rdi,-0x238(%rbp)
  4ca2e3:   48 8b 57 10             mov    0x10(%rdi),%rdx
  4ca2e7:   48 8b 7f 18             mov    0x18(%rdi),%rdi
  4ca2eb:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  4ca2f2:   00 00 
  4ca2f4:   48 89 45 c8             mov    %rax,-0x38(%rbp)
  4ca2f8:   31 c0                   xor    %eax,%eax
  int zone_gc_stat[12] = {0};
  4ca2fa:   c5 f9 7f 85 10 fe ff    vmovdqa %xmm0,-0x1f0(%rbp)
  4ca301:   ff 
  4ca302:   c5 f9 7f 85 20 fe ff    vmovdqa %xmm0,-0x1e0(%rbp)
  4ca309:   ff 
  4ca30a:   c5 f9 7f 85 30 fe ff    vmovdqa %xmm0,-0x1d0(%rbp)
  4ca311:   ff 
  for (auto z : io_zones) {
  4ca312:   48 39 fa                cmp    %rdi,%rdx
  4ca315:   0f 84 a5 00 00 00       je     4ca3c0 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0x100>
    int idx = int((garbage_rate + 0.1) * 10);
  4ca31b:   c5 fb 10 25 05 7a 12    vmovsd 0x127a05(%rip),%xmm4        # 5f1d28 <_ZZN7rocksdb4test9RandomKeyB5cxx11EPNS_6RandomEiNS0_13RandomKeyTypeEE10kTestChars+0x400>
  4ca322:   00 
  4ca323:   c5 fb 10 1d ad 73 12    vmovsd 0x1273ad(%rip),%xmm3        # 5f16d8 <_ZZNSt19_Sp_make_shared_tag5_S_tiEvE5__tag+0xa8>
  4ca32a:   00 
  4ca32b:   c5 e8 57 d2             vxorps %xmm2,%xmm2,%xmm2
  4ca32f:   4c 8d 85 ef fd ff ff    lea    -0x211(%rbp),%r8
  4ca336:   41 b9 01 00 00 00       mov    $0x1,%r9d
  4ca33c:   eb 50                   jmp    4ca38e <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0xce>
  4ca33e:   66 90                   xchg   %ax,%ax
        double(z->wp_ - z->start_ - z->used_capacity_) / z->max_capacity_;
  4ca340:   4c 29 d0                sub    %r10,%rax
    return __atomic_load_n(&_M_i, int(__m));
  4ca343:   4c 8b 59 40             mov    0x40(%rcx),%r11
  4ca347:   4c 29 d8                sub    %r11,%rax
  4ca34a:   62 f1 ef 08 7b c0       vcvtusi2sd %rax,%xmm2,%xmm0
  4ca350:   62 f1 ef 08 7b 49 05    vcvtusi2sdq 0x28(%rcx),%xmm2,%xmm1
  4ca357:   c5 fb 5e c1             vdivsd %xmm1,%xmm0,%xmm0
    int idx = int((garbage_rate + 0.1) * 10);
  4ca35b:   c5 fb 58 c4             vaddsd %xmm4,%xmm0,%xmm0
  4ca35f:   c5 fb 59 c3             vmulsd %xmm3,%xmm0,%xmm0
  4ca363:   c5 fb 2c c0             vcvttsd2si %xmm0,%eax
    zone_gc_stat[idx]++;
  4ca367:   48 98                   cltq   
  4ca369:   ff 84 85 10 fe ff ff    incl   -0x1f0(%rbp,%rax,4)
    return __atomic_compare_exchange_n(&_M_i, &__i1, __i2, 0,
  4ca370:   44 89 c8                mov    %r9d,%eax
  4ca373:   31 c9                   xor    %ecx,%ecx
    bool expected = true;
  4ca375:   44 88 8d ef fd ff ff    mov    %r9b,-0x211(%rbp)
  4ca37c:   f0 0f b0 0e             lock cmpxchg %cl,(%rsi)
  4ca380:   74 03                   je     4ca385 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0xc5>
  4ca382:   41 88 00                mov    %al,(%r8)
  for (auto z : io_zones) {
  4ca385:   48 83 c2 08             add    $0x8,%rdx
  4ca389:   48 39 d7                cmp    %rdx,%rdi
  4ca38c:   74 32                   je     4ca3c0 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0x100>
  4ca38e:   48 8b 0a                mov    (%rdx),%rcx
  4ca391:   31 c0                   xor    %eax,%eax
  4ca393:   48 8d 71 10             lea    0x10(%rcx),%rsi
    bool expected = false;
  4ca397:   c6 85 ef fd ff ff 00    movb   $0x0,-0x211(%rbp)
  4ca39e:   f0 44 0f b0 0e          lock cmpxchg %r9b,(%rsi)
  4ca3a3:   75 dd                   jne    4ca382 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0xc2>
bool Zone::IsEmpty() { return (wp_ == start_); }
  4ca3a5:   48 8b 41 30             mov    0x30(%rcx),%rax
  4ca3a9:   4c 8b 51 18             mov    0x18(%rcx),%r10
    if (z->IsEmpty()) {
  4ca3ad:   4c 39 d0                cmp    %r10,%rax
  4ca3b0:   75 8e                   jne    4ca340 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0x80>
      zone_gc_stat[0]++;
  4ca3b2:   ff 85 10 fe ff ff       incl   -0x1f0(%rbp)
    bool expected = true;
  4ca3b8:   eb b6                   jmp    4ca370 <_ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv+0xb0>
  4ca3ba:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
       *  The default constructor does nothing and is not normally
       *  accessible to users.
      */
      basic_ios()
      : ios_base(), _M_tie(0), _M_fill(char_type()), _M_fill_init(false), 
    _M_streambuf(0), _M_ctype(0), _M_num_put(0), _M_num_get(0)
  4ca3c0:   4c 8d bd c0 fe ff ff    lea    -0x140(%rbp),%r15
  4ca3c7:   4c 89 ff                mov    %r15,%rdi
  4ca3ca:   e8 31 05 b8 ff          call   4a900 <_ZNSt8ios_baseC2Ev@plt>
  4ca3cf:   48 8d 05 9a c9 22 00    lea    0x22c99a(%rip),%rax        # 6f6d70 <_ZTVSt9basic_iosIcSt11char_traitsIcEE@GLIBCXX_3.4+0x10>
  4ca3d6:   48 89 85 c0 fe ff ff    mov    %rax,-0x140(%rbp)
      : ios_base(), _M_tie(0), _M_fill(char_type()), _M_fill_init(false), 
  4ca3dd:   31 c0                   xor    %eax,%eax
  4ca3df:   66 89 45 a0             mov    %ax,-0x60(%rbp)
      seekg(off_type, ios_base::seekdir);
      ///@}
ywang-wnlo commented 1 year ago

again

Received signal 11 (Segmentation fault)
#0   /home/femu/rocksdb/db_bench(+0x4ca4d9) [0x55776b5824d9] ?? ??:0
#1   /home/femu/rocksdb/db_bench(+0x4be693) [0x55776b576693] ?? ??:0
#2   /lib/x86_64-linux-gnu/libstdc++.so.6(+0xdc2b3) [0x7f486e90b2b3] ?? ??:0
#3   /lib/x86_64-linux-gnu/libc.so.6(+0x94b43) [0x7f486e592b43] ??      ??:0
#4   /lib/x86_64-linux-gnu/libc.so.6(+0x126a00) [0x7f486e624a00] ??     ??:0

$ objdump -x db_bench | grep 00000000004ca
00000000004ca210 g     F .text  0000000000000212              _ZN7rocksdb16ZonedBlockDevice10EncodeJsonERSo
00000000004ca0f0 g     F .text  0000000000000119              _ZN7rocksdb16ZonedBlockDevice21SetZoneDeferredStatusENS_8IOStatusE
00000000004ca430 g     F .text  0000000000000537              _ZN7rocksdb16ZonedBlockDevice14LogGarbageInfoEv
00000000004ca970 g     F .text  0000000000000a29              _ZN7rocksdb16ZonedBlockDevice14AllocateIOZoneENS_3Env17WriteLifeTimeHintENS_6IOTypeEPPNS_4ZoneE

$ objdump -x db_bench | grep 00000000004be
000000000004be48 l     F .text  000000000000003f              _ZN7rocksdb25SimulatedHybridFileSystem15NewWritableFileERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKNS_11FileOptionsEPSt10unique_ptrINS_14FSWritableFileESt14default_deleteISD_EEPNS_14IODebugContextE.cold
000000000004be88 l     F .text  00000000000001d2              _ZN7rocksdb25SimulatedHybridFileSystemC2ERKSt10shared_ptrINS_10FileSystemEERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEib.cold
00000000004bea60 g     F .text  0000000000000433              _ZN7rocksdb5ZenFS18DecodeSnapshotFromEPNS_5SliceE
00000000004beea0 g     F .text  00000000000005d3              _ZN7rocksdb5ZenFS20DecodeFileUpdateFromEPNS_5SliceEb
00000000004be400 g     F .text  0000000000000655              _ZN7rocksdb5ZenFS8GCWorkerEv

Segmentation fault seems in GCWorker->LogGarbageInfo

ywang-wnlo commented 1 year ago

Fix Segmentation fault while enable zenfs gc.

in LogGarbageInfo

    double garbage_rate =
        double(z->wp_ - z->start_ - z->used_capacity_) / z->max_capacity_;
    assert(garbage_rate > 0);
    int idx = int((garbage_rate + 0.1) * 10);
    zone_gc_stat[idx]++;

When zone become full, the wp_ will be negative value, so change to use max_capacity_

The pull request is submited!