scverse / anndataR

AnnData interoperability in R
https://anndatar.data-intuitive.com
Other
57 stars 8 forks source link

hdf5r writes boolean vectors as H5T_STD_U8LE, but h5py as H5T_STD_I8LE #175

Open rcannood opened 2 months ago

rcannood commented 2 months ago

Concerning:

  # NOTE: `mask_dtype` will be written as a H5T_STD_U8LE, but h5py writes this as a H5T_STD_I8LE
  mask_dtype <- hdf5r::H5T_LOGICAL$new(include_NA = FALSE)

  hdf5_create_dataset(
    file = file,
    name = paste0(name, "/mask"),
    value = is.na(value),
    dtype = mask_dtype
  )

A potential workaround is to create a signed logical, e.g. something like:

#' Class for HDF5 logical datatypes. This is an enum with the 3 values FALSE, TRUE and NA mapped on values 0, 1 and 2.
#' Is transparently mapped onto a logical variable
#'
#' Inherits from class \code{\link[=H5T]{H5T}}.
#' @docType class
#' @importFrom R6 R6Class
#' @return Object of class \code{\link[=H5T_LOGICAL]{H5T_LOGICAL}}.
#' @export
#' @author Holger Hoefling
#' @seealso \code{\link[=H5T]{H5T}}, \code{\link[=H5T_ENUM]{H5T_ENUM}}
H5T_SIGNED_LOGICAL <- R6::R6Class("H5T_SIGNED_LOGICAL",
  inherit = hdf5r::H5T_ENUM,
  public = list(
    initialize = function(include_NA = TRUE, id = NULL) {
      "Create a logical datatype. This is"
      "internally represented by an ENUM-type"
      "@param id Internal use only"
      if (!is.null(id)) {
        super$initialize(id = id)
        return(self)
      }
      if (include_NA) {
        dtype_id <- h5types$H5T_NATIVE_CHAR
        if (compareVersion(h5version(verbose = FALSE), "1.8.16") >= 0) {
          ## can only do this for 1.8.16 or above
          ## lower version have problems getting native type of an enum based on a non-native type
          dtype_id$set_size(1)
          dtype_id$set_precision(2)
        }
        id <- .Call("h5create_enum_type", as.character(c("FALSE", "TRUE", "NA")), as.integer(c(0, 1, 2)), dtype_id$id, PACKAGE = "hdf5r")$return_val
        super$initialize(id = id)
        return(self)
      } else {
        dtype_id <- h5types$H5T_NATIVE_CHAR
        if (compareVersion(h5version(verbose = FALSE), "1.8.16") >= 0) {
          ## can only do this for 1.8.16 or above
          ## lower version have problems getting native type of an enum based on a non-native type
          dtype_id$set_size(1)
          dtype_id$set_precision(1)
        }
        id <- .Call("h5create_enum_type", as.character(c("FALSE", "TRUE")), as.integer(c(0, 1)), dtype_id$id, PACKAGE = "hdf5r")$return_val
        super$initialize(id = id)
        return(self)
      }
    }
  ),
  cloneable = FALSE
)

( based on https://github.com/hhoeflin/hdf5r/blob/dc4774c03dd02219febdf57229ceb3e1ab439df7/R/R6Classes_H5T.R#L718 )

However, I don't know whether this is even necessary.