kiwix / kiwix-apple

Kiwix for offline access on iOS and macOS
https://apple.kiwix.org
GNU Lesser General Public License v3.0
451 stars 69 forks source link

Opening corruped file leads to crash #838

Open kelson42 opened 1 week ago

kelson42 commented 1 week ago

See https://github.com/openzim/libzim/issues/893, but with using latest libzim/libkiwix it should not https://github.com/kiwix/kiwix-apple/issues/835

kelson42 commented 1 week ago

@BPerlakiH Only explanation Insee is that the libzim exception is not catch properly

BPerlakiH commented 1 week ago

I have added an extra catch for ZimFileFormatError, see: https://github.com/kiwix/kiwix-apple/compare/838-addititonal-error-handling?expand=1

Although it is still crashing with this ZIM file:

Kiwix`zim::FileImpl::getCluster:
    0x102b29aec <+0>:    sub    sp, sp, #0x90
    0x102b29af0 <+4>:    stp    x26, x25, [sp, #0x40]
    0x102b29af4 <+8>:    stp    x24, x23, [sp, #0x50]
    0x102b29af8 <+12>:   stp    x22, x21, [sp, #0x60]
    0x102b29afc <+16>:   stp    x20, x19, [sp, #0x70]
    0x102b29b00 <+20>:   stp    x29, x30, [sp, #0x80]
    0x102b29b04 <+24>:   add    x29, sp, #0x80
    0x102b29b08 <+28>:   mov    x24, x1
    0x102b29b0c <+32>:   mov    x19, x8
    0x102b29b10 <+36>:   str    w24, [sp, #0x20]
    0x102b29b14 <+40>:   ldr    w8, [x0, #0x60]
    0x102b29b18 <+44>:   cmp    w8, w24
    0x102b29b1c <+48>:   b.ls   0x102b29d8c               ; <+672>
    0x102b29b20 <+52>:   mov    x22, x0
    0x102b29b24 <+56>:   mov    w0, #0xa0
    0x102b29b28 <+60>:   bl     0x102d32e30               ; symbol stub for: operator new(unsigned long)
    0x102b29b2c <+64>:   mov    x25, x0
    0x102b29b30 <+68>:   mov    w8, #0xaba7
    0x102b29b34 <+72>:   movk   w8, #0x32aa, lsl #16
    0x102b29b38 <+76>:   mov    x20, x0
    0x102b29b3c <+80>:   str    x8, [x20, #0x18]!
    0x102b29b40 <+84>:   stp    xzr, xzr, [x0, #0x8]
    0x102b29b44 <+88>:   movi.2d v0, #0000000000000000
    0x102b29b48 <+92>:   stp    q0, q0, [x0, #0x20]
    0x102b29b4c <+96>:   str    q0, [x0, #0x40]
    0x102b29b50 <+100>:  mov    w8, #0xb1bb
    0x102b29b54 <+104>:  movk   w8, #0x3cb0, lsl #16
    0x102b29b58 <+108>:  stp    xzr, x8, [x0, #0x50]
    0x102b29b5c <+112>:  stp    q0, q0, [x0, #0x60]
    0x102b29b60 <+116>:  stur   q0, [x0, #0x7c]
    0x102b29b64 <+120>:  adrp   x8, 2895
    0x102b29b68 <+124>:  add    x8, x8, #0x2e8            ; vtable for std::__1::__assoc_state<std::__1::shared_ptr<zim::Cluster const>> + 16
    0x102b29b6c <+128>:  str    x8, [x0]
    0x102b29b70 <+132>:  add    x23, x22, #0xd8
    0x102b29b74 <+136>:  str    x0, [sp, #0x38]
    0x102b29b78 <+140>:  mov    x0, x23
    0x102b29b7c <+144>:  bl     0x102d33220               ; symbol stub for: std::__1::mutex::lock()
    0x102b29b80 <+148>:  mov    x0, x20
    0x102b29b84 <+152>:  bl     0x102d33220               ; symbol stub for: std::__1::mutex::lock()
    0x102b29b88 <+156>:  ldr    w8, [x25, #0x88]
    0x102b29b8c <+160>:  tbnz   w8, #0x1, 0x102b29de8     ; <+764>
    0x102b29b90 <+164>:  add    x21, x22, #0xa0
    0x102b29b94 <+168>:  add    x9, x25, #0x8
    0x102b29b98 <+172>:  mov    w10, #0x1
    0x102b29b9c <+176>:  ldadd  x10, x9, [x9]
    0x102b29ba0 <+180>:  orr    w8, w8, #0x2
    0x102b29ba4 <+184>:  str    w8, [x25, #0x88]
    0x102b29ba8 <+188>:  mov    x0, x20
    0x102b29bac <+192>:  bl     0x102d33238               ; symbol stub for: std::__1::mutex::unlock()
    0x102b29bb0 <+196>:  str    x25, [sp, #0x8]
    0x102b29bb4 <+200>:  mov    x9, x22
    0x102b29bb8 <+204>:  ldr    x11, [x9, #0xc0]!
    0x102b29bbc <+208>:  cbz    x11, 0x102b29bf8          ; <+268>
    0x102b29bc0 <+212>:  ldr    w10, [sp, #0x20]
    0x102b29bc4 <+216>:  mov    x8, x9
    0x102b29bc8 <+220>:  ldr    w12, [x11, #0x20]
    0x102b29bcc <+224>:  add    x13, x11, #0x8
    0x102b29bd0 <+228>:  cmp    w12, w10
    0x102b29bd4 <+232>:  csel   x12, x13, x11, lo
    0x102b29bd8 <+236>:  csel   x8, x8, x11, lo
    0x102b29bdc <+240>:  ldr    x11, [x12]
    0x102b29be0 <+244>:  cbnz   x11, 0x102b29bc8          ; <+220>
    0x102b29be4 <+248>:  cmp    x8, x9
    0x102b29be8 <+252>:  b.eq   0x102b29bf8               ; <+268>
    0x102b29bec <+256>:  ldr    w9, [x8, #0x20]
    0x102b29bf0 <+260>:  cmp    w10, w9
    0x102b29bf4 <+264>:  b.hs   0x102b29d3c               ; <+592>
    0x102b29bf8 <+268>:  add    x1, sp, #0x20
    0x102b29bfc <+272>:  add    x2, sp, #0x8
    0x102b29c00 <+276>:  mov    x0, x21
    0x102b29c04 <+280>:  bl     0x102b2f7a0               ; zim::lru_cache<unsigned int, std::__1::shared_future<std::__1::shared_ptr<zim::Cluster const>>>::putMissing(unsigned int const&, std::__1::shared_future<std::__1::shared_ptr<zim::Cluster const>> const&)
    0x102b29c08 <+284>:  ldr    x25, [sp, #0x8]
    0x102b29c0c <+288>:  cbz    x25, 0x102b29c44          ; <+344>
    0x102b29c10 <+292>:  mov    w26, #0x1
    0x102b29c14 <+296>:  mov    x20, x25
    0x102b29c18 <+300>:  add    x8, x20, #0x8
    0x102b29c1c <+304>:  mov    w9, #0x1
    0x102b29c20 <+308>:  ldadd  x9, x8, [x8]
    0x102b29c24 <+312>:  add    x8, x25, #0x8
    0x102b29c28 <+316>:  mov    x9, #-0x1
    0x102b29c2c <+320>:  ldaddal x9, x8, [x8]
    0x102b29c30 <+324>:  cbz    x8, 0x102b29c54           ; <+360>
    0x102b29c34 <+328>:  mov    x0, x23
    0x102b29c38 <+332>:  bl     0x102d33238               ; symbol stub for: std::__1::mutex::unlock()
    0x102b29c3c <+336>:  cbnz   w26, 0x102b29c70          ; <+388>
    0x102b29c40 <+340>:  b      0x102b29cd4               ; <+488>
    0x102b29c44 <+344>:  mov    x0, x23
    0x102b29c48 <+348>:  bl     0x102d33238               ; symbol stub for: std::__1::mutex::unlock()
    0x102b29c4c <+352>:  mov    x20, #0x0
    0x102b29c50 <+356>:  b      0x102b29c70               ; <+388>
    0x102b29c54 <+360>:  ldr    x8, [x25]
    0x102b29c58 <+364>:  ldr    x8, [x8, #0x10]
    0x102b29c5c <+368>:  mov    x0, x25
    0x102b29c60 <+372>:  blr    x8
    0x102b29c64 <+376>:  mov    x0, x23
    0x102b29c68 <+380>:  bl     0x102d33238               ; symbol stub for: std::__1::mutex::unlock()
    0x102b29c6c <+384>:  cbz    w26, 0x102b29cd4          ; <+488>
    0x102b29c70 <+388>:  ldr    x0, [x22, #0x80]
    0x102b29c74 <+392>:  ubfiz  x1, x24, #3, #32
    0x102b29c78 <+396>:  bl     0x102b25350               ; unsigned long long zim::Reader::read_uint<unsigned long long>(zim::offset_t) const
    0x102b29c7c <+400>:  mov    x1, x0
    0x102b29c80 <+404>:  ldr    x0, [x22, #0x10]
    0x102b29c84 <+408>:  add    x8, sp, #0x8
    0x102b29c88 <+412>:  bl     0x102b21dbc               ; zim::Cluster::read(zim::Reader const&, zim::offset_t)
->  0x102b29c8c <+416>:  ldp    x8, x9, [sp, #0x8]
    0x102b29c90 <+420>:  stp    x8, x9, [sp, #0x28]
    0x102b29c94 <+424>:  ldr    x0, [sp, #0x38]
    0x102b29c98 <+428>:  cbz    x0, 0x102b29df4           ; <+776>
    0x102b29c9c <+432>:  add    x1, sp, #0x28
    0x102b29ca0 <+436>:  bl     0x102b2fa88               ; void std::__1::__assoc_state<std::__1::shared_ptr<zim::Cluster const>>::set_value<std::__1::shared_ptr<zim::Cluster const>>(std::__1::shared_ptr<zim::Cluster const>&&)
    0x102b29ca4 <+440>:  ldr    x21, [sp, #0x30]
    0x102b29ca8 <+444>:  cbz    x21, 0x102b29cd4          ; <+488>
    0x102b29cac <+448>:  add    x8, x21, #0x8
    0x102b29cb0 <+452>:  mov    x9, #-0x1
    0x102b29cb4 <+456>:  ldaddal x9, x8, [x8]
    0x102b29cb8 <+460>:  cbnz   x8, 0x102b29cd4           ; <+488>
    0x102b29cbc <+464>:  ldr    x8, [x21]
    0x102b29cc0 <+468>:  ldr    x8, [x8, #0x10]
    0x102b29cc4 <+472>:  mov    x0, x21
    0x102b29cc8 <+476>:  blr    x8
    0x102b29ccc <+480>:  mov    x0, x21
    0x102b29cd0 <+484>:  bl     0x102d32f08               ; symbol stub for: std::__1::__shared_weak_count::__release_weak()
    0x102b29cd4 <+488>:  mov    x0, x20
    0x102b29cd8 <+492>:  bl     0x102b2fce4               ; std::__1::__assoc_state<std::__1::shared_ptr<zim::Cluster const>>::copy()
    0x102b29cdc <+496>:  ldp    x9, x8, [x0]
    0x102b29ce0 <+500>:  stp    x9, x8, [x19]
    0x102b29ce4 <+504>:  cbz    x8, 0x102b29cf4           ; <+520>
    0x102b29ce8 <+508>:  add    x8, x8, #0x8
    0x102b29cec <+512>:  mov    w9, #0x1
    0x102b29cf0 <+516>:  ldadd  x9, x8, [x8]
    0x102b29cf4 <+520>:  cbz    x20, 0x102b29d18          ; <+556>
    0x102b29cf8 <+524>:  add    x8, x20, #0x8
    0x102b29cfc <+528>:  mov    x9, #-0x1
    0x102b29d00 <+532>:  ldaddal x9, x8, [x8]
    0x102b29d04 <+536>:  cbnz   x8, 0x102b29d18           ; <+556>
    0x102b29d08 <+540>:  ldr    x8, [x20]
    0x102b29d0c <+544>:  ldr    x8, [x8, #0x10]
    0x102b29d10 <+548>:  mov    x0, x20
    0x102b29d14 <+552>:  blr    x8
    0x102b29d18 <+556>:  add    x0, sp, #0x38
    0x102b29d1c <+560>:  bl     0x102b2fda0               ; std::__1::promise<std::__1::shared_ptr<zim::Cluster const>>::~promise()
    0x102b29d20 <+564>:  ldp    x29, x30, [sp, #0x80]
    0x102b29d24 <+568>:  ldp    x20, x19, [sp, #0x70]
    0x102b29d28 <+572>:  ldp    x22, x21, [sp, #0x60]
    0x102b29d2c <+576>:  ldp    x24, x23, [sp, #0x50]
    0x102b29d30 <+580>:  ldp    x26, x25, [sp, #0x40]
    0x102b29d34 <+584>:  add    sp, sp, #0x90
    0x102b29d38 <+588>:  ret    
    0x102b29d3c <+592>:  ldr    x9, [x22, #0xa8]
    0x102b29d40 <+596>:  ldr    x8, [x8, #0x28]
    0x102b29d44 <+600>:  cmp    x9, x8
    0x102b29d48 <+604>:  b.eq   0x102b29d7c               ; <+656>
    0x102b29d4c <+608>:  ldr    x10, [x8, #0x8]
    0x102b29d50 <+612>:  cmp    x10, x9
    0x102b29d54 <+616>:  b.eq   0x102b29d7c               ; <+656>
    0x102b29d58 <+620>:  ldr    x11, [x8]
    0x102b29d5c <+624>:  str    x10, [x11, #0x8]
    0x102b29d60 <+628>:  ldr    x10, [x8, #0x8]
    0x102b29d64 <+632>:  str    x11, [x10]
    0x102b29d68 <+636>:  ldr    x10, [x9]
    0x102b29d6c <+640>:  str    x8, [x10, #0x8]
    0x102b29d70 <+644>:  str    x10, [x8]
    0x102b29d74 <+648>:  str    x8, [x9]
    0x102b29d78 <+652>:  str    x9, [x8, #0x8]
    0x102b29d7c <+656>:  mov    w26, #0x0
    0x102b29d80 <+660>:  ldr    x20, [x8, #0x18]
    0x102b29d84 <+664>:  cbnz   x20, 0x102b29c18          ; <+300>
    0x102b29d88 <+668>:  b      0x102b29c24               ; <+312>
    0x102b29d8c <+672>:  mov    w0, #0x10
    0x102b29d90 <+676>:  bl     0x102d33334               ; symbol stub for: __cxa_allocate_exception
    0x102b29d94 <+680>:  mov    x20, x0
    0x102b29d98 <+684>:  adrp   x1, 591
    0x102b29d9c <+688>:  add    x1, x1, #0x27d            ; "cluster index out of range"
    0x102b29da0 <+692>:  add    x0, sp, #0x8
    0x102b29da4 <+696>:  bl     0x102a442e8               ; std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>::basic_string[abi:v160006]<std::nullptr_t>(char const*)
    0x102b29da8 <+700>:  mov    w21, #0x1
    0x102b29dac <+704>:  add    x1, sp, #0x8
    0x102b29db0 <+708>:  mov    x0, x20
    0x102b29db4 <+712>:  bl     0x102d3325c               ; symbol stub for: std::runtime_error::runtime_error(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&)
    0x102b29db8 <+716>:  adrp   x8, 2860
    0x102b29dbc <+720>:  add    x8, x8, #0x8c8            ; vtable for zim::ZimFileFormatError
    0x102b29dc0 <+724>:  add    x8, x8, #0x10
    0x102b29dc4 <+728>:  str    x8, [x20]
    0x102b29dc8 <+732>:  mov    w21, #0x0
    0x102b29dcc <+736>:  adrp   x1, 2860
    0x102b29dd0 <+740>:  add    x1, x1, #0x8b0            ; typeinfo for zim::ZimFileFormatError
    0x102b29dd4 <+744>:  adrp   x2, -1352
    0x102b29dd8 <+748>:  add    x2, x2, #0xe8c            ; zim::ZimFileFormatError::~ZimFileFormatError at error.h:31
    0x102b29ddc <+752>:  mov    x0, x20
    0x102b29de0 <+756>:  bl     0x102d32dd0               ; symbol stub for: __cxa_throw
    0x102b29de4 <+760>:  b      0x102b29ed4               ; <+1000>
    0x102b29de8 <+764>:  mov    w0, #0x1
    0x102b29dec <+768>:  bl     0x102b2fa2c               ; std::__1::__throw_future_error[abi:v160006](std::__1::future_errc)
    0x102b29df0 <+772>:  b      0x102b29ed4               ; <+1000>
    0x102b29df4 <+776>:  mov    w0, #0x3
    0x102b29df8 <+780>:  bl     0x102b2fa2c               ; std::__1::__throw_future_error[abi:v160006](std::__1::future_errc)
    0x102b29dfc <+784>:  b      0x102b29ed4               ; <+1000>
    0x102b29e00 <+788>:  mov    x19, x0
    0x102b29e04 <+792>:  ldrsb  w8, [sp, #0x1f]
    0x102b29e08 <+796>:  tbz    w8, #0x1f, 0x102b29e1c    ; <+816>
    0x102b29e0c <+800>:  ldr    x0, [sp, #0x8]
    0x102b29e10 <+804>:  bl     0x102d333a0               ; symbol stub for: operator delete(void*)
    0x102b29e14 <+808>:  tbz    w21, #0x0, 0x102b29e20    ; <+820>
    0x102b29e18 <+812>:  b      0x102b29e2c               ; <+832>
    0x102b29e1c <+816>:  cbnz   w21, 0x102b29e2c          ; <+832>
    0x102b29e20 <+820>:  mov    x0, x19
    0x102b29e24 <+824>:  bl     0x102d33e8c               ; symbol stub for: _Unwind_Resume
    0x102b29e28 <+828>:  mov    x19, x0
    0x102b29e2c <+832>:  mov    x0, x20
    0x102b29e30 <+836>:  bl     0x102d32f14               ; symbol stub for: __cxa_free_exception
    0x102b29e34 <+840>:  mov    x0, x19
    0x102b29e38 <+844>:  bl     0x102d33e8c               ; symbol stub for: _Unwind_Resume
    0x102b29e3c <+848>:  mov    x19, x0
    0x102b29e40 <+852>:  ldr    x0, [sp, #0x8]
    0x102b29e44 <+856>:  cbz    x0, 0x102b29e84           ; <+920>
    0x102b29e48 <+860>:  add    x8, x0, #0x8
    0x102b29e4c <+864>:  mov    x9, #-0x1
    0x102b29e50 <+868>:  ldaddal x9, x8, [x8]
    0x102b29e54 <+872>:  cbnz   x8, 0x102b29e84           ; <+920>
    0x102b29e58 <+876>:  ldr    x8, [x0]
    0x102b29e5c <+880>:  ldr    x8, [x8, #0x10]
    0x102b29e60 <+884>:  blr    x8
    0x102b29e64 <+888>:  b      0x102b29e84               ; <+920>
    0x102b29e68 <+892>:  mov    x19, x0
    0x102b29e6c <+896>:  b      0x102b29ee0               ; <+1012>
    0x102b29e70 <+900>:  mov    x19, x0
    0x102b29e74 <+904>:  mov    x0, x20
    0x102b29e78 <+908>:  bl     0x102d33238               ; symbol stub for: std::__1::mutex::unlock()
    0x102b29e7c <+912>:  b      0x102b29e84               ; <+920>
    0x102b29e80 <+916>:  mov    x19, x0
    0x102b29e84 <+920>:  mov    x0, x23
    0x102b29e88 <+924>:  bl     0x102d33238               ; symbol stub for: std::__1::mutex::unlock()
    0x102b29e8c <+928>:  b      0x102b29f04               ; <+1048>
    0x102b29e90 <+932>:  mov    x19, x0
    0x102b29e94 <+936>:  b      0x102b29f04               ; <+1048>
    0x102b29e98 <+940>:  mov    x22, x1
    0x102b29e9c <+944>:  mov    x19, x0
    0x102b29ea0 <+948>:  add    x0, sp, #0x28
    0x102b29ea4 <+952>:  bl     0x102b29f24               ; std::__1::shared_ptr<zim::Cluster const>::~shared_ptr[abi:v160006]()
    0x102b29ea8 <+956>:  b      0x102b29eb4               ; <+968>
    0x102b29eac <+960>:  mov    x22, x1
    0x102b29eb0 <+964>:  mov    x19, x0
    0x102b29eb4 <+968>:  cmp    w22, #0x1
    0x102b29eb8 <+972>:  b.ne   0x102b29ee0               ; <+1012>
    0x102b29ebc <+976>:  mov    x0, x19
    0x102b29ec0 <+980>:  bl     0x102d330f4               ; symbol stub for: __cxa_begin_catch
    0x102b29ec4 <+984>:  add    x1, sp, #0x20
    0x102b29ec8 <+988>:  mov    x0, x21
    0x102b29ecc <+992>:  bl     0x102b2f628               ; zim::ConcurrentCache<unsigned int, std::__1::shared_ptr<zim::Cluster const>>::drop(unsigned int const&)
    0x102b29ed0 <+996>:  bl     0x102d32e24               ; symbol stub for: __cxa_rethrow
    0x102b29ed4 <+1000>: brk    #0x1
    0x102b29ed8 <+1004>: mov    x19, x0
    0x102b29edc <+1008>: bl     0x102d33274               ; symbol stub for: __cxa_end_catch
    0x102b29ee0 <+1012>: cbz    x20, 0x102b29f04          ; <+1048>
    0x102b29ee4 <+1016>: add    x8, x20, #0x8
    0x102b29ee8 <+1020>: mov    x9, #-0x1
    0x102b29eec <+1024>: ldaddal x9, x8, [x8]
    0x102b29ef0 <+1028>: cbnz   x8, 0x102b29f04           ; <+1048>
    0x102b29ef4 <+1032>: ldr    x8, [x20]
    0x102b29ef8 <+1036>: ldr    x8, [x8, #0x10]
    0x102b29efc <+1040>: mov    x0, x20
    0x102b29f00 <+1044>: blr    x8
    0x102b29f04 <+1048>: add    x0, sp, #0x38
    0x102b29f08 <+1052>: bl     0x102b2fda0               ; std::__1::promise<std::__1::shared_ptr<zim::Cluster const>>::~promise()
    0x102b29f0c <+1056>: mov    x0, x19
    0x102b29f10 <+1060>: bl     0x102d33e8c               ; symbol stub for: _Unwind_Resume
    0x102b29f14 <+1064>: bl     0x1025d3058               ; __clang_call_terminate

@mgautierfr maybe you have an idea, what type is not yet caught on the reader side?

mgautierfr commented 1 week ago

I don't know.

Are you sure that this is the place where you have to catch ZimFileFormatError ? This exception can be thrown at any place when we have a read so all call to libzim can throw it.

But anyway, ZimFileFormatError is inheriting from std::exception so it should be ok to just catch std::exception.

BPerlakiH commented 1 week ago

Thank you @mgautierfr,

I had a second look at it, and maybe this trace will give us some hint:

The interesting parts are:

"Cannot read after the end of the reader"
"bzip2 not enabled in this library"
"zlib not enabled in this library"
"Invalid compression flag"
0x1027ade98 <+560>:  bl     0x1027afbf0               ; std::__1::shared_ptr<zim::Cluster> std::__1::allocate_shared[abi:v160006]<zim::Cluster, std::__1::allocator<zim::Cluster>, std::__1::unique_ptr<zim::IStreamReader, std::__1::default_delete<zim::IStreamReader>>, zim::Cluster::Compression&, bool&, void>(std::__1::allocator<zim::Cluster> const&, std::__1::unique_ptr<zim::IStreamReader, std::__1::default_delete<zim::IStreamReader>>&&, zim::Cluster::Compression&, bool&)
->  0x1027ade9c <+564>:  ldr    x0, [sp, #0x8]
    0x1027adea0 <+568>:  str    xzr, [sp, #0x8]
    0x1027adea4 <+572>:  cbz    x0, 0x1027adeb4           ; <+588>
    0x1027adea8 <+576>:  ldr    x8, [x0]
    0x1027adeac <+580>:  ldr    x8, [x8, #0x8]
    0x1027adeb0 <+584>:  blr    x8
    0x1027adeb4 <+588>:  ldp    x29, x30, [sp, #0x60]
    0x1027adeb8 <+592>:  ldp    x20, x19, [sp, #0x50]
    0x1027adebc <+596>:  ldp    x22, x21, [sp, #0x40]
    0x1027adec0 <+600>:  add    sp, sp, #0x70
    0x1027adec4 <+604>:  ret    
    0x1027adec8 <+608>:  adrp   x8, 2902
    0x1027adecc <+612>:  add    x8, x8, #0xfe0            ; vtable for zim::RawStreamReader
    0x1027aded0 <+616>:  add    x8, x8, #0x10
    0x1027aded4 <+620>:  stp    x8, x20, [x0]
    0x1027aded8 <+624>:  stp    xzr, xzr, [x0, #0x10]
    0x1027adedc <+628>:  str    x0, [sp, #0x8]
    0x1027adee0 <+632>:  ldr    x20, [sp, #0x38]
    0x1027adee4 <+636>:  cbnz   x20, 0x1027ade5c          ; <+500>
    0x1027adee8 <+640>:  b      0x1027ade84               ; <+540>
    0x1027adeec <+644>:  mov    w0, #0x10
    0x1027adef0 <+648>:  bl     0x1029bf1e0               ; symbol stub for: __cxa_allocate_exception
    0x1027adef4 <+652>:  mov    x20, x0
    0x1027adef8 <+656>:  adrp   x1, 598
    0x1027adefc <+660>:  add    x1, x1, #0xd4d            ; "Cannot read after the end of the reader"
    0x1027adf00 <+664>:  bl     0x1029bf168               ; symbol stub for: std::runtime_error::runtime_error(char const*)
    0x1027adf04 <+668>:  b      0x1027adf3c               ; <+724>
    0x1027adf08 <+672>:  mov    w0, #0x10
    0x1027adf0c <+676>:  bl     0x1029bf1e0               ; symbol stub for: __cxa_allocate_exception
    0x1027adf10 <+680>:  mov    x20, x0
    0x1027adf14 <+684>:  adrp   x1, 598
    0x1027adf18 <+688>:  add    x1, x1, #0xd12            ; "bzip2 not enabled in this library"
    0x1027adf1c <+692>:  bl     0x1029bf168               ; symbol stub for: std::runtime_error::runtime_error(char const*)
    0x1027adf20 <+696>:  b      0x1027adf3c               ; <+724>
    0x1027adf24 <+700>:  mov    w0, #0x10
    0x1027adf28 <+704>:  bl     0x1029bf1e0               ; symbol stub for: __cxa_allocate_exception
    0x1027adf2c <+708>:  mov    x20, x0
    0x1027adf30 <+712>:  adrp   x1, 598
    0x1027adf34 <+716>:  add    x1, x1, #0xcf1            ; "zlib not enabled in this library"
    0x1027adf38 <+720>:  bl     0x1029bf168               ; symbol stub for: std::runtime_error::runtime_error(char const*)
    0x1027adf3c <+724>:  adrp   x1, 2865
    0x1027adf40 <+728>:  ldr    x1, [x1, #0x9c8]
    0x1027adf44 <+732>:  adrp   x2, 2865
    0x1027adf48 <+736>:  ldr    x2, [x2, #0xa00]
    0x1027adf4c <+740>:  mov    x0, x20
    0x1027adf50 <+744>:  bl     0x1029bec7c               ; symbol stub for: __cxa_throw
    0x1027adf54 <+748>:  mov    w0, #0x10
    0x1027adf58 <+752>:  bl     0x1029bf1e0               ; symbol stub for: __cxa_allocate_exception
    0x1027adf5c <+756>:  mov    x20, x0
    0x1027adf60 <+760>:  adrp   x1, 598
    0x1027adf64 <+764>:  add    x1, x1, #0xd34            ; "Invalid compression flag"
    0x1027adf68 <+768>:  add    x0, sp, #0x18
    0x1027adf6c <+772>:  bl     0x1026d0194               ; std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>::basic_string[abi:v160006]<std::nullptr_t>(char const*)
    0x1027adf70 <+776>:  mov    w21, #0x1
    0x1027adf74 <+780>:  add    x1, sp, #0x18
    0x1027adf78 <+784>:  mov    x0, x20
    0x1027adf7c <+788>:  bl     0x1029bf108               ; symbol stub for: std::runtime_error::runtime_error(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&)
    0x1027adf80 <+792>:  adrp   x8, 2868
    0x1027adf84 <+796>:  add    x8, x8, #0x8e0            ; vtable for zim::ZimFileFormatError
    0x1027adf88 <+800>:  add    x8, x8, #0x10
    0x1027adf8c <+804>:  str    x8, [x20]
    0x1027adf90 <+808>:  mov    w21, #0x0
    0x1027adf94 <+812>:  adrp   x1, 2868
    0x1027adf98 <+816>:  add    x1, x1, #0x8c8            ; typeinfo for zim::ZimFileFormatError
    0x1027adf9c <+820>:  adrp   x2, -1345
    0x1027adfa0 <+824>:  add    x2, x2, #0xa04            ; zim::ZimFileFormatError::~ZimFileFormatError at error.h:31
    0x1027adfa4 <+828>:  mov    x0, x20
    0x1027adfa8 <+832>:  bl     0x1029bec7c               ; symbol stub for: __cxa_throw
    0x1027adfac <+836>:  brk    #0x1
    0x1027adfb0 <+840>:  mov    x19, x0
    0x1027adfb4 <+844>:  ldrsb  w8, [sp, #0x2f]
    0x1027adfb8 <+848>:  tbz    w8, #0x1f, 0x1027adfcc    ; <+868>
    0x1027adfbc <+852>:  ldr    x0, [sp, #0x18]
    0x1027adfc0 <+856>:  bl     0x1029bf24c               ; symbol stub for: operator delete(void*)
    0x1027adfc4 <+860>:  tbnz   w21, #0x0, 0x1027adfd8    ; <+880>
    0x1027adfc8 <+864>:  b      0x1027ae05c               ; <+1012>
    0x1027adfcc <+868>:  cbnz   w21, 0x1027adfd8          ; <+880>
    0x1027adfd0 <+872>:  b      0x1027ae05c               ; <+1012>
    0x1027adfd4 <+876>:  mov    x19, x0
    0x1027adfd8 <+880>:  mov    x0, x20
    0x1027adfdc <+884>:  bl     0x1029bedc0               ; symbol stub for: __cxa_free_exception
    0x1027adfe0 <+888>:  b      0x1027ae05c               ; <+1012>
    0x1027adfe4 <+892>:  b      0x1027ae008               ; <+928>
    0x1027adfe8 <+896>:  b      0x1027ae008               ; <+928>
    0x1027adfec <+900>:  b      0x1027adff0               ; <+904>
    0x1027adff0 <+904>:  mov    x19, x0
    0x1027adff4 <+908>:  add    x0, sp, #0x18
    0x1027adff8 <+912>:  bl     0x1027af224               ; std::__1::shared_ptr<zim::Reader const>::~shared_ptr[abi:v160006]()
    0x1027adffc <+916>:  mov    x0, x22
    0x1027ae000 <+920>:  bl     0x1029bf24c               ; symbol stub for: operator delete(void*)
    0x1027ae004 <+924>:  b      0x1027ae05c               ; <+1012>
    0x1027ae008 <+928>:  mov    x19, x0
    0x1027ae00c <+932>:  mov    x0, x20
    0x1027ae010 <+936>:  bl     0x1029bedc0               ; symbol stub for: __cxa_free_exception
    0x1027ae014 <+940>:  mov    x0, x19
    0x1027ae018 <+944>:  bl     0x1029bfd38               ; symbol stub for: _Unwind_Resume
    0x1027ae01c <+948>:  mov    x19, x0
    0x1027ae020 <+952>:  str    xzr, [sp, #0x18]
    0x1027ae024 <+956>:  ldr    x8, [x20]
    0x1027ae028 <+960>:  ldr    x8, [x8, #0x10]
    0x1027ae02c <+964>:  mov    x0, x20
    0x1027ae030 <+968>:  b      0x1027ae04c               ; <+996>
    0x1027ae034 <+972>:  mov    x19, x0
    0x1027ae038 <+976>:  ldr    x0, [sp, #0x8]
    0x1027ae03c <+980>:  str    xzr, [sp, #0x8]
    0x1027ae040 <+984>:  cbz    x0, 0x1027ae050           ; <+1000>
    0x1027ae044 <+988>:  ldr    x8, [x0]
    0x1027ae048 <+992>:  ldr    x8, [x8, #0x8]
    0x1027ae04c <+996>:  blr    x8
    0x1027ae050 <+1000>: mov    x0, x19
    0x1027ae054 <+1004>: bl     0x1029bfd38               ; symbol stub for: _Unwind_Resume
    0x1027ae058 <+1008>: mov    x19, x0
    0x1027ae05c <+1012>: add    x0, sp, #0x30
    0x1027ae060 <+1016>: bl     0x1027af224               ; std::__1::shared_ptr<zim::Reader const>::~shared_ptr[abi:v160006]()
    0x1027ae064 <+1020>: mov    x0, x19
    0x1027ae068 <+1024>: bl     0x1029bfd38               ; symbol stub for: _Unwind_Resume
BPerlakiH commented 1 week ago

btw it fails at this entry:

"42694_cover_image.jpg"
kelson42 commented 1 week ago

@BPerlakiH The exception handling you have shown us seems to be only for getmetadata(). You should protect all accesses to the ZIM. I suspect the issue is there.

BPerlakiH commented 1 week ago

@kelson42 @mgautierfr I am a bit lost at this... My current understanding is this: the fix on libzim side has been applied to the validation of the zim file. This crash happens, when we try item::getSize() as part of getting the metadata.

So far on the Apple reader side, we were not using the validation of the ZIM files at all.

I tested how it would look with validation added to avoid opening corrupted files, well... It is painfully slow, even on an M1 mac, so that doesn't seems to be a good solution:

"opened url: file:///Volumes/256GB/gutenberg_pl_all_2023-08.zim in: 0.29125991667388007 seconds"
"opened url: file:///Volumes/256GB/zimgit-water_en_2024-05.zim in: 0.5197830833203625 seconds"
"opened url: file:///Volumes/256GB/wikisummaries_en_all_maxi_2021-04.zim in: 0.15318508332711644 seconds"
"opened url: file:///Volumes/256GB/fas-military-medicine_en_2024-05.zim in: 2.0123385000042617 seconds"
"opened url: file:///Volumes/256GB/wikipedia_en_100_maxi_2024-06.zim in: 0.9691604166582692 seconds"
Checksum doesn't match
"opened url: file:///Volumes/256GB/gutenberg_fr_all_2023-08.zim in: 95.62253008334665 seconds"
"opened url: file:///Volumes/256GB/vidtest_v1_20240506.zim in: 4.948511583352229 seconds"
"opened url: file:///Volumes/256GB/tests_fr_reg-videos_2024-05.zim in: 45.98980366665637 seconds"
"opened url: file:///Volumes/256GB/freecodecamp_en_javascript_2023-12.zim in: 0.027759874996263534 seconds"
"opened url: file:///Volumes/256GB/ted_mul_addiction_2024-04.zim in: 10.49696370831225 seconds"

Do you think we should have a similar offset check in libzim for item::getSize() ? Or maybe there's a quicker check we could run, before calling item::getSize() ?

kelson42 commented 1 week ago

@BPerlakiH There is no ZIM validation at opening time. What we expect is that stdandart exception are caught and managed properly for any call impliying the libzim/libkiwix.

BPerlakiH commented 6 days ago

@kelson42 the problem we have is that we already have a try catch where we are handling std::exception, but it is still crashing inside that, so it is not a std::exception.

@mgautierfr I dig further into this, and this is what I have found, it stops on one of the ASSERT statements:

Screenshot 2024-07-03 at 22 18 34
    0x104ef0920 <+48>:  b.ls   0x104ef094c               ; <+92>
    0x104ef0924 <+52>:  adrp   x0, 583
    0x104ef0928 <+56>:  add    x0, x0, #0x357            ; "offset.v+size.v"
    0x104ef092c <+60>:  adrp   x1, 582
    0x104ef0930 <+64>:  add    x1, x1, #0xc67            ; "<="
    0x104ef0934 <+68>:  adrp   x2, 583
    0x104ef0938 <+72>:  add    x2, x2, #0x496            ; "_size.v"
    0x104ef093c <+76>:  adrp   x5, 583
    0x104ef0940 <+80>:  add    x5, x5, #0x327            ; "../../SOURCE/libzim_release/src/file_reader.cpp"

This is in GuardMalloc:

0x1065fa2c0 <+376>:  adr    x0, #0x1972               ; "Attempting to allocate %lu to represent %lu bytes, which would bring VM size to %lu. That exceeds the %lu VM maximum set via MALLOC_MAXIMUM_VM env var.\n"

can we do a std::exception instead of the ASSERT?

BPerlakiH commented 6 days ago

This is how it looks on the reader side:

Screenshot 2024-07-03 at 22 27 26
mgautierfr commented 6 days ago

Do you have the call trace when the ASSERT is happening ?

The ASSERT is good here, but it means that we don't call the method with the right arguments. This is kind of expected with corrupted zim file, but we need to add a check&exception before calling the method.

@kelson42, Are we speaking of the same corrupted zim file than https://github.com/openzim/libzim/issues/893 ?

kelson42 commented 6 days ago

@kelson42, Are we speaking of the same corrupted zim file than openzim/libzim#893 ?

Yes

BPerlakiH commented 6 days ago

This is the backtrace:

#0  0x00000001009c2934 in void zim::Cluster::read_header<unsigned int>() ()
#1  0x00000001009c20cc in zim::Cluster::Cluster(std::__1::unique_ptr<zim::IStreamReader, std::__1::default_delete<zim::IStreamReader>>, zim::Cluster::Compression, bool) ()
#2  0x00000001009c3c68 in std::__1::shared_ptr<zim::Cluster> std::__1::allocate_shared[abi:v160006]<zim::Cluster, std::__1::allocator<zim::Cluster>, std::__1::unique_ptr<zim::IStreamReader, std::__1::default_delete<zim::IStreamReader>>, zim::Cluster::Compression&, bool&, void>(std::__1::allocator<zim::Cluster> const&, std::__1::unique_ptr<zim::IStreamReader, std::__1::default_delete<zim::IStreamReader>>&&, zim::Cluster::Compression&, bool&) ()
#3  0x00000001009c1e84 in zim::Cluster::read(zim::Reader const&, zim::offset_t) ()
#4  0x00000001009c9b38 in zim::FileImpl::getCluster(zim::cluster_index_t) ()
#5  0x00000001009d3c4c in zim::Item::getSize() const ()
#6  0x0000000100480818 in -[ZimFileService getMetaData:contentPath:] at /Users/balazs/dev/ios/kiwix/Model/ZimFileService/ZimFileService.mm:181
#7  0x00000001005ea73c in ZimFileService.getContentMetaData(url:) at /Users/balazs/dev/ios/kiwix/Model/ZimFileService/ZimFileService.swift:139
#8  0x00000001005aebd4 in closure #1 in closure #1 in KiwixURLSchemeHandler.contentMetaData(for:) at /Users/balazs/dev/ios/kiwix/Model/Utilities/WebKitHandler.swift:146
#9  0x00000001005b06c4 in partial apply for closure #1 in closure #1 in KiwixURLSchemeHandler.contentMetaData(for:) ()
#10 0x00000001004a25b8 in thunk for @escaping @callee_guaranteed @Sendable @async () -> (@out τ_0_0) ()
#11 0x00000001005afc54 in thunk for @escaping @callee_guaranteed @Sendable @async () -> (@out τ_0_0)partial apply ()
BPerlakiH commented 6 days ago

Yes, it is the same ZIM file, taken from: https://github.com/openzim/libzim/issues/893#issuecomment-2154968709

kelson42 commented 2 days ago

The very same crash scenario was visible also on zimcheck. I have verified with libzim 9.2.2 and the crash scenario has vanished.

kelson42 commented 9 hours ago

@mgautierfr Any feedback?