Closed axelboc closed 7 months ago
Agreed that strings are tricky - this seems like it will be easy to do and I'm happy to implement.
How did I not notice that you already implemented this? I created another PR, which I will delete... On the plus side, our implementations were essentially the same so I guess we agree on the approach.
@axelboc - would you be ok with making cset
and strpad
optional in the Metadata
interface, and then omitting those properties for non-string dtypes? Including them is cluttering up the objects for non-string dtypes for no good reason I can think of.
diff --git a/src/hdf5_util.cc b/src/hdf5_util.cc
index 59a8698..34e9672 100644
--- a/src/hdf5_util.cc
+++ b/src/hdf5_util.cc
@@ -263,11 +263,6 @@ val get_dtype_metadata(hid_t dtype)
attr.set("cset", (int)(H5Tget_cset(dtype)));
attr.set("strpad", (int)(H5Tget_strpad(dtype)));
}
- else
- {
- attr.set("cset", -1);
- attr.set("strpad", -1);
- }
if (dtype_class == H5T_COMPOUND)
{
@@ -1320,6 +1315,12 @@ EMSCRIPTEN_BINDINGS(hdf5)
.value("H5T_VLEN", H5T_VLEN) // = 9, /**< variable-Length types */
.value("H5T_ARRAY", H5T_ARRAY) // = 10, /**< array types */
;
+ enum_<H5T_str_t>("H5T_str_t")
+ .value("H5T_STR_ERROR", H5T_STR_ERROR) // = -1, /**<error */
+ .value("H5T_STR_NULLTERM", H5T_STR_NULLTERM) // = 0, /**<null-terminated */
+ .value("H5T_STR_NULLPAD", H5T_STR_NULLPAD) // = 1, /**<pad with nulls */
+ .value("H5T_STR_SPACEPAD", H5T_STR_SPACEPAD) // = 2, /**<pad with spaces */
+ ;
//constant("H5L_type_t", H5L_type_t);
// FILE ACCESS
diff --git a/src/hdf5_util_helpers.d.ts b/src/hdf5_util_helpers.d.ts
index 822b4a5..78d9aa8 100644
--- a/src/hdf5_util_helpers.d.ts
+++ b/src/hdf5_util_helpers.d.ts
@@ -17,12 +17,19 @@ export interface H5T_class_t {
H5T_ARRAY: {value: 10} // = 10, /**< array types */
}
+export interface H5T_str_t {
+ H5T_STR_ERROR: {value: -1}, // = -1, /**< error */
+ H5T_STR_NULLTERM: {value: 0}, // = 0, /**< null-terminated, null-padding */
+ H5T_STR_NULLPAD: {value: 1}, // = 1, /**< null-terminated, null-padding */
+ H5T_STR_SPACEPAD: {value: 2}, // = 2, /**< space-padding */
+ H5T_STR_RESERVED_3: {value: 3} // = 3 /**< reserved for future use */
+}
+
export interface Metadata {
array_type?: Metadata,
chunks: number[] | null,
compound_type?: CompoundTypeMetadata,
- cset: number,
- strpad: number,
+ cset?: number,
enum_type?: EnumTypeMetadata,
littleEndian: boolean,
maxshape: number[] | null,
@@ -30,6 +37,7 @@ export interface Metadata {
shape: number[] | null,
signed: boolean,
size: number,
+ strpad?: H5T_str_t,
total_size: number,
type: number,
vlen: boolean,
diff --git a/test/bool_test.mjs b/test/bool_test.mjs
index 955bda8..ef27ba6 100644
--- a/test/bool_test.mjs
+++ b/test/bool_test.mjs
@@ -14,8 +14,6 @@ async function bool_test() {
assert.deepEqual(f.get('bool').metadata, {
chunks: null,
- cset: -1,
- strpad: -1,
enum_type: {
type: 0,
nmembers: 2,
diff --git a/test/compound_and_array_test.mjs b/test/compound_and_array_test.mjs
index 913bbf3..c728531 100644
--- a/test/compound_and_array_test.mjs
+++ b/test/compound_and_array_test.mjs
@@ -67,8 +67,6 @@ async function compound_array_test() {
members: [
{
array_type: {
- cset: -1,
- strpad: -1,
shape: [2, 2],
littleEndian: true,
signed: false,
@@ -77,8 +75,6 @@ async function compound_array_test() {
type: 1,
vlen: false,
},
- cset: -1,
- strpad: -1,
littleEndian: true,
name: 'floaty',
offset: 0,
@@ -91,17 +87,15 @@ async function compound_array_test() {
{
array_type: {
cset: 0,
- strpad: 1,
shape: [2, 2],
littleEndian: false,
signed: false,
size: 5,
+ strpad: 1,
total_size: 4,
type: 3,
vlen: false,
},
- cset: -1,
- strpad: -1,
littleEndian: false,
name: 'stringy',
offset: 32,
@@ -114,8 +108,6 @@ async function compound_array_test() {
],
nmembers: 2
},
- cset: -1,
- strpad: -1,
littleEndian: true,
maxshape: [2],
shape: [2],
How did I not notice that you already implemented this? I created another PR, which I will delete...
Haha I did wonder. The PR description was formulated more like an issue, sorry. :sweat_smile: You could have kept your PR and closed this one, I would not have been offended :wink:
I'll push the diff above and maybe add an enum for cset
. Totally agree with the approach of setting cset
only when relevant. :100:
In the end, I decided not to add the two enums for the time being. They're not technically needed in hdf5_util.cc
(unlike H5T_class_t
), and it didn't feel right having to redeclare them manually in hdf5_util_helpers.d.ts
as strange enum-like interfaces. Would be good if this declaration file could be generated automatically with emcc --embind-emit-tsd
, but this feels like a can of worms. :sweat_smile:
I want the end-user to have some way of interpreting the enums for cset
and strpad
, but I don't know what the most straightforward way to do that is. Using a single source of truth (the exported values from the C++ lib) seemed like a good idea, but the implementation is awkward.
I agree that for the moment, just getting the correct functionality into the library is the most important.
I also haven't figured out how to get --embind-emit-tsd
to work... it's a reasonably new feature, maybe it will get easier.
We have a request to expose the padding information for string dtypes in H5Web: https://github.com/silx-kit/h5web/discussions/1612