Open abhibaruah opened 1 month ago
I'm able to replicate this crash. The root cause likely has something to do with trying to use the old file format, as running it with the debug library and valgrind shows that H5Lget_info2()
fails at H5VL_set_vol_wrapper()
's assertion that vol_obj->data
exists.
Adaptation of the test script for the C API:
#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include "string.h"
#define FILE1 "h5treferences.h5"
#define DATASET1 "/OBJECT_REFERENCES"
#define DIM10 4
#define DIM11 8
int main() {
hid_t file, space, dset, dcpl; /* Handles */
herr_t status;
uint8_t ref[DIM10 * DIM11];
/*
* Create a new file using the default properties.
*/
file = H5Fopen(FILE1, H5F_ACC_RDONLY, H5P_DEFAULT);
/*
* Create the chunked dataset.
*/
dset = H5Dopen(file, DATASET1, H5P_DEFAULT);
hid_t h_type = H5Dget_type(dset);
hid_t my_file_space_id = H5Dget_space(dset);
hid_t my_mem_space_id = H5Dget_space(dset);
hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
/*
* Write the data to the dataset.
*/
status = H5Dread(dset, h_type, my_mem_space_id, my_file_space_id, xfer_plist_id, ref);
size_t type_size = H5Tget_size(h_type);
printf("Type size: %zu\n", type_size);
printf("Read Status: %d\n", status);
uint8_t *ref_subset = &ref[3 * DIM11];
H5R_type_t ref_type = H5R_OBJECT;
ssize_t name_len = H5Rget_name(file, ref_type, ref_subset, NULL, 0);
if (name_len < 0) {
printf("Error 1\n");
}
char *name = NULL;
name_len = H5Rget_name(file, ref_type, ref_subset, &name[0], name_len + 1);
printf("Allocating %zu bytes for name\n", name_len + 1);
name = malloc(name_len + 1);
name_len = H5Rget_name(file, ref_type, ref_subset, &name[0], name_len + 1);
printf("name = %s\n", name);
H5T_cset_t cset = H5T_CSET_ASCII;
if (name != "/") {
// Get the object's ID to determine its encoding
hid_t obj_id = H5Rdereference(file, H5P_DEFAULT, ref_type, ref_subset);
if (obj_id >= 0) {
printf("H5Rdereference1 success! \n");
}
// Get link info to know how to interpret name
H5L_info_t linfo;
herr_t status = H5Lget_info2(obj_id, name, &linfo, H5P_DEFAULT);
if (status >= 0) {
printf("H5Lget_info success! \n");
}
cset = linfo.cset;
// Don't leak resource
status = H5Oclose(obj_id);
if (status >= 0) {
printf("H5Oclose success! (Shouldn't happen) \n");
}
}
status = H5Dclose(dset);
status = H5Fclose(file);
printf("Success!! \n");
free(name);
return 0;
}
@abhibaruah I believe you need to use "ref" like this: hobj_ref_t ref_obj; <- I just used a different name for distinction ref_obj = new hobj_ref_t[sizeof(hobj_ref_t) DIM10];
instead of this: uint8_t ref = new uint8_t[DIM10 DIM11];
Then you pass ref_obj into H5Dread and pass &ref_obj[0] (or 1 or 2) into H5Rget_name. Please let us know if that works.
Thanks @bmribler. I replaced uint8_t ref = new uint8_t[DIM10 DIM11];
with
hobj_ref_t ref; ref = new hobj_ref_t[sizeof(hobj_ref_t) DIM10];
and
uint8_t ref_subset = &ref[3DIM11]; with hobj_ref_t* ref_subset = &ref[3];
However, I am still seeing the segmentation fault.
My code:
#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include "string.h"
#define FILE1 "h5treferences.h5"
#define DATASET1 "/OBJECT_REFERENCES"
#define DIM10 4
#define DIM11 8
int main() {
hid_t file, space, dset, dcpl; /* Handles */
herr_t status;
//uint8_t* ref = new uint8_t[DIM10 * DIM11];
hobj_ref_t *ref;
ref = new hobj_ref_t[sizeof(hobj_ref_t) * DIM10];
/*
* Create a new file using the default properties.
*/
file = H5Fopen(FILE1, H5F_ACC_RDONLY, H5P_DEFAULT);
/*
* Create the chunked dataset.
*/
dset = H5Dopen(file, DATASET1, H5P_DEFAULT);
hid_t h_type = H5Dget_type(dset);
hid_t my_file_space_id = H5Dget_space(dset);
hid_t my_mem_space_id = H5Dget_space(dset);
hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
//dset = H5Dcreate1 (file, DATASET, H5T_NATIVE_DOUBLE, space, dcpl);
/*
* Write the data to the dataset.
*/
status = H5Dread(dset, h_type, my_mem_space_id, my_file_space_id, xfer_plist_id,
ref);
std::cout << "Read Status: " << status <<std::endl;
//for (int i = 0; i<32; i++){
// std::cout << (int)*(ref + i) <<std::endl;
//}
//uint8_t* ref_subset = &ref[2*DIM11];
hobj_ref_t* ref_subset = &ref[3];
//for (int i = 0; i<8; i++){
// std::cout << (int)*(ref_subset + i) <<std::endl;
//}
H5R_type_t ref_type = H5R_OBJECT;
ssize_t name_len = H5Rget_name(file,ref_type,ref_subset,NULL,0);
if ( name_len < 0 ) {
std::cout << "Error 1" << std::endl;
}
std::string name;
name.resize(name_len+1);
name_len = H5Rget_name(file,ref_type,ref_subset,&name[0],name_len+1);
name.resize(name_len);
H5T_cset_t cset = H5T_CSET_ASCII;
if (name != "/") {
// Get the object's ID to determine its encoding
hid_t obj_id = H5Rdereference1(file, ref_type, ref_subset);
if ( obj_id >= 0 ) {
std::cout << "H5Rdereference1 success!" <<std::endl;
}
std::cout << "Name: " << name <<std::endl;
// Get link info to know how to interpret name
H5L_info_t linfo;
herr_t status = H5Lget_info(obj_id, name.c_str(), &linfo, H5P_DEFAULT);
if ( status >= 0 ) {
std::cout << "H5Lget_info success!" <<std::endl;
}
cset = linfo.cset;
// Don't leak resource
status = H5Oclose(obj_id);
if ( status >= 0 ) {
std::cout << "H5Oclose success!" <<std::endl; // Shouldn't happen
}
}
status = H5Dclose(dset);
status = H5Fclose(file);
std::cout << "Success!!" <<std::endl;
delete [] ref;
return 0;
}
Hi @abhibaruah, It appeared that the segfault happened here: herr_t status = H5Lget_info(obj_id, name.c_str(), &linfo, H5P_DEFAULT); when obj_id, which is a datatype ID in this case, was passed in.
I'm aware that the documentation says: "The identifier may be that of a file, group, dataset, named datatype, or attribute." However, it doesn't seem to work properly. While I'm getting advice from the developers, you can by pass this segfault by passing in the file ID for loc_id instead. Please note that the obj_id (aka datatype ID) can still be passed in to other APIs such as H5Tget_class, H5Tget_size, H5Oget_info2, etc...
Thanks @bmribler. Replacing obj_id withthe file ID helped to resolve the segV and work around it.
FYI, @abhibaruah, this issue has been fixed (PR #4733)
I have a dataset named 'OBJECT_REFERENCES' of type H5T_REFERENCE. I have a workflow (derived from one of our tests) where I read the reference dataset and then call H5Rdereference and H5Lget_info on a subset of the reference dataset.
The code below works with HDF5 1.10.11 but with 1.14.4.3, I see a segmentation violation during the call to H5Lget_info. I see the segV for both H5Lget_info1 and H5Lget_info2 (in v1.14.4.3).
Can someone take a look at it and let me know if this is a bug, and if there is a workaround to revert to the non segV behavior?
Link to the h5 file for reproduction: https://mathworks-my.sharepoint.com/:u:/p/abaruah/EdkeRkHv9SZMgEDGfvULYR4BcaBMX8bI9Q-UfylNeqMyog?e=ulkR7K