ksharonin / kerchunkC

0 stars 0 forks source link

Tracing broken read for `standard_name` attribute #11

Closed ksharonin closed 6 months ago

ksharonin commented 7 months ago

Due to dlvl management, the readLinkMsg upgrades the highestDataLevel once it finds the variable, but it assumes within this bound it find the attribute message. Therefore, in this case it never find the attribute message

if(dlvl < static_cast<int>(datasetPath.size()))
        {
            if(StringLib::match((const char*)link_name, datasetPath[dlvl]))
            {
                highestDataLevel = dlvl + 1;
                readObjHdr(object_header_addr, highestDataLevel);
            }
        }

Consequently it tries to read the dataset object after coming back from the root object header. readMessages iteration eventually breaks back due to conditioning

/* Check if Dataset Found */
        if(highestDataLevel > dlvl)
        {
            pos = end; // go directly to end of header
            break; // dataset found
        }

Back trace

 H5FileBuffer::readMessages (this=0xffff44ada098, pos=77, end=251, hdr_flags=44 ',', dlvl=0)
    at /host/packages/h5/H5Coro.cpp:1884
#1  0x0000aaaaba2eb238 in H5FileBuffer::readObjHdr (this=0xffff44ada098, pos=71, dlvl=0)
    at /host/packages/h5/H5Coro.cpp:1844
#2  0x0000aaaaba2e11a0 in H5FileBuffer::H5FileBuffer (this=0xffff44ada098, info=0xffff44ada2a0, context=0x0, asset=
#3  0x0000aaaaba2f4378 in H5Coro::read (asset=0xffff38008590,
    resource=0xffff380094e0 "OR_ABI-L2-FDCC-M3_G17_s20182390052191_e20182390054564_c20182390055159.nc",
    datasetname=0xffff38016ba0 "/Power/standard_name", valtype=RecordObject::INTEGER, col=0, startrow=0, numrows=-1,
    context=0x0, _meta_only=false, parent_trace_id=0) at /host/packages/h5/H5Coro.cpp:3549
#4  0x0000aaaaba2fca18 in H5DatasetDevice::H5DatasetDevice (this=0xffff3800b0d0, L=0xaaaad2a1fc68,
    _role=DeviceObject::READER, _asset=0xffff38008590,
    _resource=0xffff380033f8 "OR_ABI-L2-FDCC-M3_G17_s20182390052191_e20182390054564_c20182390055159.nc",
    dataset_name=0xffff38003528 "/Power/standard_name", id=0, raw_mode=true, datatype=RecordObject::INTEGER, col=0,
    startrow=0, numrows=-1) at /host/packages/h5/H5DatasetDevice.cpp:137
ksharonin commented 7 months ago

What's known:

ksharonin commented 7 months ago

Read in H5O__attr_decode with:

 if (NULL == (attr->shared->data = H5FL_BLK_MALLOC(attr_buf, attr->shared->data_size)))
   280              HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed");
-> 281          H5MM_memcpy(attr->shared->data, p, attr->shared->data_size);

Full bt

 thread #1, queue = 'com.apple.main-thread', stop reason = step over
  * frame #0: 0x00000001002d3d68 libhdf5.310.dylib`H5O__attr_decode(f=<unavailable>, open_oh=<unavailable>, mesg_flags=<unavailable>, ioflags=<unavailable>, p_size=<unavailable>, p="fire_radiative_power\U00000003") at H5Oattr.c:286:11 [opt]
    frame #1: 0x00000001002d2208 libhdf5.310.dylib`H5O__attr_shared_decode(f=<unavailable>, open_oh=<unavailable>, mesg_flags=<unavailable>, ioflags=0x000000016fdfe9bc, p_size=<unavailable>, p=<unavailable>) at H5Oshared.h:73:34 [opt]
    frame #2: 0x0000000100302900 libhdf5.310.dylib`H5O_msg_decode(f=<unavailable>, open_oh=0x0000000000000000, type_id=12, buf_size=<unavailable>, buf=<unavailable>) at H5Omessage.c:1635:30 [opt]
    frame #3: 0x000000010012be34 libhdf5.310.dylib`H5A__dense_fh_name_cmp(obj=0x0000000101812740, obj_len=55, _udata=0x000000016fdfeb10) at H5Abtree2.c:153:26 [opt]
    frame #4: 0x000000010028f308 libhdf5.310.dylib`H5HF__man_op_real(hdr=0x0000000100b42f00, id=<unavailable>, op=(libhdf5.310.dylib`H5A__dense_fh_name_cmp at H5Abtree2.c:143), op_data=0x000000016fdfeb10, op_flags=<unavailable>) at H5HFman.c:404:9 [opt]
    frame #5: 0x000000010028f4e4 libhdf5.310.dylib`H5HF__man_op(hdr=0x0000000100b42f00, id="", op=(libhdf5.310.dylib`H5A__dense_fh_name_cmp at H5Abtree2.c:143), op_data=0x000000016fdfeb10) at H5HFman.c:508:9 [opt]
    frame #6: 0x0000000100276e38 libhdf5.310.dylib`H5HF_op(fh=0x0000000100b3ef00, _id=0x0000000100b43918, op=(libhdf5.310.dylib`H5A__dense_fh_name_cmp at H5Abtree2.c:143), op_data=0x000000016fdfeb10) at H5HF.c:656:13 [opt]
    frame #7: 0x000000010012b7bc libhdf5.310.dylib`H5A__dense_btree2_name_compare(_bt2_udata=0x000000016fdfec90, _bt2_rec=0x0000000100b43918, result=0x000000016fdfebfc) at H5Abtree2.c:263:13 [opt]
    frame #8: 0x000000010014b530 libhdf5.310.dylib`H5B2__locate_record(type=0x00000001005ad810, nrec=<unavailable>, rec_off=0x0000000100b42d78, native="", udata=0x000000016fdfec90, idx=0x000000016fdfebf8, cmp=0x000000016fdfebfc) at H5B2int.c:103:13 [opt]
    frame #9: 0x00000001001449d4 libhdf5.310.dylib`H5B2_find(bt2=<unavailable>, udata=0x000000016fdfec90, found=0x000000016fdfec8f, op=0x0000000000000000, op_data=0x0000000000000000) at H5B2.c:605:13 [opt]
    frame #10: 0x000000010012c6f8 libhdf5.310.dylib`H5A__dense_open(f=0x0000000100b3c550, ainfo=<unavailable>, name="standard_name") at H5Adense.c:380:9 [opt]
    frame #11: 0x00000001002d53b0 libhdf5.310.dylib`H5O__attr_open_by_name(loc=0x000000016fdfeda0, name="standard_name") at H5Oattribute.c:472:40 [opt]
    frame #12: 0x0000000100133974 libhdf5.310.dylib`H5A__open_by_name(loc=0x000000016fdfee20, obj_name="/Power", attr_name="standard_name") at H5Aint.c:637:25 [opt]
    frame #13: 0x000000010045e038 libhdf5.310.dylib`H5VL__native_attr_open(obj=0x0000000100b3c550, loc_params=0x000000016fdfef68, attr_name="standard_name", aapl_id=792633534417207311, dxpl_id=<unavailable>, req=<unavailable>) at H5VLnative_attr.c:168:29 [opt]
    frame #14: 0x0000000100441954 libhdf5.310.dylib`H5VL__attr_open(obj=0x0000000100b3c550, loc_params=0x000000016fdfef68, cls=0x0000000100b10310, name="standard_name", aapl_id=792633534417207311, dxpl_id=792633534417207304, req=0x0000000000000000) at H5VLcallback.c:1104:30 [opt]
    frame #15: 0x00000001004417a4 libhdf5.310.dylib`H5VL_attr_open(vol_obj=0x0000000100b3d6e0, loc_params=0x000000016fdfef68, name="standard_name", aapl_id=792633534417207311, dxpl_id=792633534417207304, req=0x0000000000000000) at H5VLcallback.c:1136:30 [opt]
    frame #16: 0x000000010012b26c libhdf5.310.dylib`H5A__open_common(vol_obj=0x0000000100b3d6e0, loc_params=0x000000016fdfef68, attr_name="standard_name", aapl_id=792633534417207311, token_ptr=0x0000000000000000) at H5A.c:463:17 [opt]
    frame #17: 0x00000001001235e4 libhdf5.310.dylib`H5A__open_by_name_api_common(loc_id=72057594037927936, obj_name="/Power", attr_name="standard_name", aapl_id=<unavailable>, lapl_id=0, token_ptr=0x0000000000000000, _vol_obj_ptr=<unavailable>) at H5A.c:640:22 [opt]
    frame #18: 0x0000000100123344 libhdf5.310.dylib`H5Aopen_by_name(loc_id=72057594037927936, obj_name="/Power", attr_name="standard_name", aapl_id=0, lapl_id=0) at H5A.c:678:14 [opt]
    frame #19: 0x0000000100003a10 run_hdf`main at run_hdf5.c:23:20
    frame #20: 0x000000018ac64420 libdyld.dylib`start + 4
ksharonin commented 7 months ago

Debugger file shows the following btreev2 ID break set --file H5B2cache.c --line 250

// requires some manual p *image++ iterations
(lldb) p *image++
(const uint8_t) $9 = '\0'

ID 2: "This B-tree is used for indexing indirectly accessed, filtered ‘huge’ fractal heap objects."

ksharonin commented 7 months ago
ksharonin commented 7 months ago

Attack design:

  1. Detect dense storage method
    /* Check for attributes in dense storage */
        if (H5_addr_defined(ainfo.fheap_addr)) {
            /* Open attribute with dense storage */
            if (NULL == (opened_attr = H5A__dense_open(loc->file, &ainfo, name)))
                HGOTO_ERROR(H5E_ATTR, H5E_CANTOPENOBJ, NULL, "can't open attribute");
        } /* end if */

H5_addr_defined method appears internal, but helps set up; implies that attribute message doesn't exist in the remaining messages. One way of implementing check is to finish outside attribute search; if done with message iteration, call back on dense method

Need to save dense info such that it can be visited; see if fractal address saved

Follow metaData as https://github.com/ICESat2-SlideRule/sliderule/blob/172f375885c01bf77cbe9231f8846e4ff8ca7e22/packages/h5/H5Coro.h#L300C9-L318C24

typedef struct {
            char                    url[MAX_META_NAME_SIZE];
            data_type_t             type;
            layout_t                layout;
            fill_t                  fill;
            bool                    filter[NUM_FILTERS]; // true if enabled for dataset
            bool                    signedval; // is the value a signed or not
            int                     typesize;
            int                     fillsize;
            int                     ndims;
            int                     elementsize; // size of the data element in the chunk; should be equal to the typesize
            int                     offsetsize; // size of "offset" fields in h5 files
            int                     lengthsize; // size of "length" fields in h5 files
            uint64_t                dimensions[MAX_NDIMS];
            uint64_t                chunkelements; // number of data elements per chunk
            uint64_t                chunkdims[MAX_NDIMS]; // dimension of each chunk
            uint64_t                address;
            int64_t                 size;
        } meta_entry_t;
ksharonin commented 7 months ago

Lock down search algorithm, below is scratchwork for tracking variable vals

in (H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, udata, &idx, &cmp)
(lldb) p *hdr->cls
(const H5B2_class_t) $1 = {
  id = H5B2_GRP_DENSE_NAME_ID
  name = 0x000000010056644a "H5B2_GRP_DENSE_NAME_ID"
  nrec_size = 12
  crt_context = 0x0000000000000000
  dst_context = 0x0000000000000000
  store = 0x0000000100255c54 (libhdf5.310.dylib`H5G__dense_btree2_name_store at H5Gbtree2.c:181)
  compare = 0x0000000100255cd4 (libhdf5.310.dylib`H5G__dense_btree2_name_compare at H5Gbtree2.c:207)
  encode = 0x0000000100255e1c (libhdf5.310.dylib`H5G__dense_btree2_name_encode at H5Gbtree2.c:263)
  decode = 0x0000000100255eb8 (libhdf5.310.dylib`H5G__dense_btree2_name_decode at H5Gbtree2.c:287)
  debug = 0x0000000100255f5c (libhdf5.310.dylib`H5G__dense_btree2_name_debug at H5Gbtree2.c:312)
}
ksharonin commented 6 months ago

Resolved with dense reading