vjcitn / rhdf5client

2 stars 1 forks source link

bug in multifetch in Dataset.R #1

Open vjcitn opened 1 year ago

vjcitn commented 1 year ago

blk can be length 0 if there are missing values returned by HSDS

library(rhdf5client)
arr = HSDSArray(URL_hsds(), "hsds", "/shared/bioconductor/heeryassays.h5", "/assay001")
arr2 = HSDSArray(URL_hsds(), "hsds", "/shared/bioconductor/heeryassays.h5", "/assay002")

library(methrix)
library(SummarizedExperiment)
se = readRDS("tcga_methrix_chrX_hg38/se.rds")
se@assays@data$beta = arr
se@assays@data$cov = arr2
# demo
# assays(se2,2, withDimnames=FALSE)$cov
methrix_hsds = se
sse = as(se, "SummarizedExperiment")

all good

> assay(sse)
<9628 x 9744> matrix of class DelayedMatrix and type "double":
        TCGA_05_4384_01 TCGA_05_4390_01 ... TCGA_ZU_A8S4_11 TCGA_ZX_AA5X_01
   [1,]       0.3989962       0.2512255   .       0.4615754       0.4524275
   [2,]       0.9414642       0.7077563   .       0.8877961       0.8125127
   [3,]       0.7157899       0.5456646   .       0.7728202       0.7160093
   [4,]       0.8756111       0.8222875   .       0.8877234       0.7073579
   [5,]       0.9078735       0.6654673   .       0.8318627       0.5739341
    ...               .               .   .               .               .
[9624,]       0.1521706       0.3111954   .      0.05412039      0.30505938
[9625,]       0.1747374       0.3330962   .      0.36224970      0.43863850
[9626,]              NA       0.4146589   .              NA              NA
[9627,]              NA       0.3873645   .              NA      0.52371750
[9628,]       0.9439578       0.5403513   .      0.92269660      0.92448432

until

> assay(sse[51,])
<1 x 9744> matrix of class DelayedMatrix and type "double":
Error in do.call("[<-", c(list(R), arglst, list(blk))) : 
  replacement has length zero

Enter a frame number, or 0 to exit   

 1: (new("standardGeneric", .Data = function (object) 
standardGeneric("show"), 
 2: (new("standardGeneric", .Data = function (object) 
standardGeneric("show"), 
 3: show_compact_array(object)
 4: .print_array_data(object, n1, n2)
 5: .print_2Darray_data(x, nhead, ntail, n1, n2, quote = quote)
 6: .prepare_2Darray_sample(x, m1, m2, n1, n2, justify, quote = quote)
 7: .csplit_2Darray_data(x, n1, n2, justify, quote = quote)
 8: .extract_and_stitch_two_matrices_by_col(x, j1, j2, justify, quote = quote)
 9: .extract_two_matrices_by_col(x, j1, j2)
10: extract_array_by_Nindex(x, list(NULL, c(j1, j2)))
11: extract_array(x, expand_Nindex_RangeNSBS(Nindex))
12: extract_array(x, expand_Nindex_RangeNSBS(Nindex))
13: callNextMethod()
14: .nextMethod(x = x, index = index)
15: extract_array(x@seed, index)
16: extract_array(x@seed, index)
17: extract_array(x@seed, index)
18: extract_array(x@seed, index)
19: extract_array(x2@seed, x2@index)
20: extract_array(x2@seed, x2@index)
21: getDataList(x@dataset, idxlist)
22: multifetch(slicelist, dataset)
23: do.call("[<-", c(list(R), arglst, list(blk)))
vjcitn commented 1 year ago

Seems to be remedied by proper handling of binary content type