JuliaIO / JLD2.jl

HDF5-compatible file format in pure Julia
Other
547 stars 85 forks source link

InexactError while loading JLD2 #451

Closed BioTurboNick closed 1 year ago

BioTurboNick commented 1 year ago

Quite a long stacktrace here.

Possibly relevant, I'm using CodecBzip2 for compression, so there may be a relation of some sort to #399 ?

I guess the proximate cause is trying to copy from a Vector{UInt64} to a Vector{UInt32}.

Maybe chunk_dimensions is the culprit in jlread?

https://github.com/JuliaIO/JLD2.jl/blob/5cda06dc2b5816097d0095f8dc92a84c77906338/src/object_headers.jl#L440

Looks like read_nb_uint returns a UInt64, making chunk_dimensions a UInt64 array.

So is the solution just to change DataLayout to have chunk_dimensions::Vector{UInt64}?

[1] throw_inexacterror(f::Symbol, #unused#::Type{UInt32}, val::UInt64)
@ Core ./boot.jl:614

[2] checked_trunc_uint
@ ./boot.jl:644 [inlined]

[3] toUInt32
@ ./boot.jl:733 [inlined]

[4] UInt32
@ ./boot.jl:768 [inlined]

[5] convert
@ ./number.jl:7 [inlined]

[6] setindex!
@ ./array.jl:966 [inlined]

[7] _unsafe_copyto!(dest::Vector{UInt32}, doffs::Int64, src::Vector{UInt64}, soffs::Int64, n::Int64)
@ Base ./array.jl:253

[8] unsafe_copyto!
@ ./array.jl:307 [inlined]

[9] _copyto_impl!
@ ./array.jl:331 [inlined]

[10] copyto!
@ ./array.jl:317 [inlined]

[11] copyto!
@ ./array.jl:343 [inlined]

[12] copyto_axcheck!
@ ./abstractarray.jl:1127 [inlined]

[13] Array
@ ./array.jl:626 [inlined]

[14] convert
@ ./array.jl:617 [inlined]

[15] DataLayout
@ ~/.julia/packages/JLD2/1YVED/src/object_headers.jl:415 [inlined]

[16] jlread(cio::JLD2.MmapIO, #unused#::Type{JLD2.DataLayout}, f::JLD2.JLDFile{JLD2.MmapIO})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/object_headers.jl:447

[17] load_dataset(f::JLD2.JLDFile{JLD2.MmapIO}, offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:95

[18] jlconvert
@ ~/.julia/packages/JLD2/1YVED/src/data/writing_datatypes.jl:314 [inlined]

[19] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/dataio.jl:70 [inlined]

[20] macro expansion
@ ./simdloop.jl:77 [inlined]

[21] read_array!(v::Vector{AbstractVector}, f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{AbstractVector, JLD2.RelOffset})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/dataio.jl:68

[22] read_array(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, rr::JLD2.ReadRepresentation{AbstractVector, JLD2.RelOffset}, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:410

[23] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{Any, JLD2.RelOffset}, read_dataspace::Tuple{JLD2.ReadDataspace, JLD2.RelOffset, JLD2.DataLayout, JLD2.FilterPipeline}, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:276

[24] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/datasets.jl:224 [inlined]

[25] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/datatypes.jl:105 [inlined]

[26] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, datatype_class::UInt8, datatype_offset::Int64, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:211

[27] load_dataset(f::JLD2.JLDFile{JLD2.MmapIO}, offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:125

[28] jlconvert
@ ~/.julia/packages/JLD2/1YVED/src/data/writing_datatypes.jl:314 [inlined]

[29] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/data/reconstructing_datatypes.jl:582 [inlined]

[30] jlconvert(#unused#::JLD2.ReadRepresentation{DataFrames.DataFrame, JLD2.OnDiskRepresentation{(0, 8, 24, 32, 40), Tuple{Any, DataFrames.Index, Any, Any, Bool}, Tuple{JLD2.RelOffset, JLD2.OnDiskRepresentation{(0, 8), Tuple{Dict{Symbol, Int64}, Any}, Tuple{JLD2.CustomSerialization{Array, JLD2.RelOffset}, JLD2.RelOffset}, 16}(), JLD2.RelOffset, JLD2.RelOffset, Bool}, 41}()}, f::JLD2.JLDFile{JLD2.MmapIO}, ptr::Ptr{Nothing}, header_offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/data/reconstructing_datatypes.jl:547

[31] read_scalar(f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{DataFrames.DataFrame, JLD2.OnDiskRepresentation{(0, 8, 24, 32, 40), Tuple{Any, DataFrames.Index, Any, Any, Bool}, Tuple{JLD2.RelOffset, JLD2.OnDiskRepresentation{(0, 8), Tuple{Dict{Symbol, Int64}, Any}, Tuple{JLD2.CustomSerialization{Array, JLD2.RelOffset}, JLD2.RelOffset}, 16}(), JLD2.RelOffset, JLD2.RelOffset, Bool}, 41}()}, header_offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/dataio.jl:37

[32] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, rr::Any, read_dataspace::Tuple{JLD2.ReadDataspace, JLD2.RelOffset, JLD2.DataLayout, JLD2.FilterPipeline}, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:238

[33] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, datatype_class::UInt8, datatype_offset::Int64, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:194

[34] load_dataset(f::JLD2.JLDFile{JLD2.MmapIO}, offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:125

[35] jlconvert
@ ~/.julia/packages/JLD2/1YVED/src/data/writing_datatypes.jl:314 [inlined]

[36] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/data/reconstructing_datatypes.jl:628 [inlined]

[37] jlconvert(#unused#::JLD2.ReadRepresentation{NamedTuple{(:locs, :binding_traces, :precision), Tuple{DataFrames.DataFrame, Vector{NamedTuple{(:cluster_index, :position, :events), Tuple{Int64, StaticArraysCore.SVector{2, Float64}, Vector{Any}}}}, Float64}}, JLD2.OnDiskRepresentation{(0, 8, 16), Tuple{DataFrames.DataFrame, Vector{NamedTuple{(:cluster_index, :position, :events), Tuple{Int64, StaticArraysCore.SVector{2, Float64}, Vector{Any}}}}, Float64}, Tuple{JLD2.RelOffset, JLD2.RelOffset, Float64}, 24}()}, f::JLD2.JLDFile{JLD2.MmapIO}, ptr::Ptr{Nothing}, header_offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/data/reconstructing_datatypes.jl:547

[38] read_scalar(f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{NamedTuple{(:locs, :binding_traces, :precision), Tuple{DataFrames.DataFrame, Vector{NamedTuple{(:cluster_index, :position, :events), Tuple{Int64, StaticArraysCore.SVector{2, Float64}, Vector{Any}}}}, Float64}}, JLD2.OnDiskRepresentation{(0, 8, 16), Tuple{DataFrames.DataFrame, Vector{NamedTuple{(:cluster_index, :position, :events), Tuple{Int64, StaticArraysCore.SVector{2, Float64}, Vector{Any}}}}, Float64}, Tuple{JLD2.RelOffset, JLD2.RelOffset, Float64}, 24}()}, header_offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/dataio.jl:37

[39] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, rr::Any, read_dataspace::Tuple{JLD2.ReadDataspace, JLD2.RelOffset, JLD2.DataLayout, JLD2.FilterPipeline}, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:238

[40] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, datatype_class::UInt8, datatype_offset::Int64, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:194

[41] load_dataset(f::JLD2.JLDFile{JLD2.MmapIO}, offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:125

[42] jlconvert
@ ~/.julia/packages/JLD2/1YVED/src/data/writing_datatypes.jl:314 [inlined]

[43] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/dataio.jl:70 [inlined]

[44] macro expansion
@ ./simdloop.jl:77 [inlined]

[45] read_array!(v::Vector{Any}, f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{Any, JLD2.RelOffset})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/dataio.jl:68

[46] read_array(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, rr::JLD2.ReadRepresentation{Any, JLD2.RelOffset}, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:410

[47] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{Any, JLD2.RelOffset}, read_dataspace::Tuple{JLD2.ReadDataspace, JLD2.RelOffset, JLD2.DataLayout, JLD2.FilterPipeline}, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:276

[48] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/datasets.jl:224 [inlined]

[49] macro expansion
@ ~/.julia/packages/JLD2/1YVED/src/datatypes.jl:105 [inlined]

[50] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, datatype_class::UInt8, datatype_offset::Int64, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:211

[51] load_dataset(f::JLD2.JLDFile{JLD2.MmapIO}, offset::JLD2.RelOffset)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/datasets.jl:125

[52] getindex(g::JLD2.Group{JLD2.JLDFile{JLD2.MmapIO}}, name::String)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/groups.jl:109

[53] getindex
@ ~/.julia/packages/JLD2/1YVED/src/JLD2.jl:460 [inlined]

[54] loadtodict!(d::Dict{String, Any}, g::JLD2.JLDFile{JLD2.MmapIO}, prefix::String)
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/loadsave.jl:154

[55] loadtodict!
@ ~/.julia/packages/JLD2/1YVED/src/loadsave.jl:153 [inlined]

[56] (::JLD2.var"#100#101")(file::JLD2.JLDFile{JLD2.MmapIO})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/fileio.jl:39

[57] jldopen(::Function, ::String, ::Vararg{String}; kws::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ JLD2 ~/.julia/packages/JLD2/1YVED/src/loadsave.jl:4

[58] jldopen
@ ~/.julia/packages/JLD2/1YVED/src/loadsave.jl:1 [inlined]
BioTurboNick commented 1 year ago

@JonasIsensee - if you wouldn't mind taking a look? Fix may be as simple as modifying DataLayout.

JonasIsensee commented 1 year ago

I don't quite recall why I decided to use UInt32 there. I made the change in #452. Could you please test if this solves the problem for you?

This error can only ever occur if you're trying to read arrays with more than typemax(UInt32) elements in one direction. (is that what you were trying?)

BioTurboNick commented 1 year ago

I wasn't deliberately trying to save that much, but I'm not too surprised that our workflow sometimes produces an array that big.

I had brute-forced a patch by overwriting methods to accept the modified DataLayout struct, which worked. So I'll assume it is fixed and report back if not. Thanks!