fhs / NPZ.jl

A Julia package that provides support for reading and writing Numpy .npy and .npz files
Other
117 stars 16 forks source link

"EOFError: read end of file" reading a file #46

Open CarloLucibello opened 2 years ago

CarloLucibello commented 2 years ago

Here a reproducible example

shell> wget https://github.com/shchur/gnn-benchmark/raw/master/data/npz/cora.npz 

julia> using NPZ

julia> npzread("cora.npz")
ERROR: EOFError: read end of file
Stacktrace:
  [1] _read(f::ZipFile.ReadableFile, a::Vector{UInt8})
    @ ZipFile ~/.julia/packages/ZipFile/fdYkP/src/ZipFile.jl:519
  [2] read!
    @ ~/.julia/packages/ZipFile/fdYkP/src/ZipFile.jl:483 [inlined]
  [3] readheader(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:210
  [4] npzreadarray(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:243
  [5] (::NPZ.var"#4#7")(f::ZipFile.ReadableFile)
    @ NPZ ./none:0
  [6] iterate
    @ ./generator.jl:47 [inlined]
  [7] _all(f::Base.var"#282#284", itr::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"}, #unused#::Colon)
    @ Base ./reduce.jl:922
  [8] all
    @ ./reduce.jl:918 [inlined]
  [9] Dict(kv::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"})
    @ Base ./dict.jl:131
 [10] npzread (repeats 2 times)
    @ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:311 [inlined]
 [11] npzread(::String)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:295
 [12] top-level scope
    @ REPL[3]:1

caused by: parsing header failed: unsupported type U7
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:33
  [2] parsedtype
    @ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:157 [inlined]
  [3] parseheader(s::SubString{String})
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:185
  [4] readheader(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:226
  [5] npzreadarray(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:243
  [6] (::NPZ.var"#4#7")(f::ZipFile.ReadableFile)
    @ NPZ ./none:0
  [7] iterate
    @ ./generator.jl:47 [inlined]
  [8] grow_to!(dest::Dict{String, Vector{T} where T}, itr::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"}, st::Int64)
    @ Base ./dict.jl:162
  [9] grow_to!(dest::Dict{String, Vector{Float32}}, itr::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"}, st::Int64)
    @ Base ./dict.jl:160
 [10] grow_to!(dest::Dict{Any, Any}, itr::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"})
    @ Base ./dict.jl:145
 [11] dict_with_eltype
    @ ./abstractdict.jl:545 [inlined]
 [12] Dict(kv::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"})
    @ Base ./dict.jl:129
 [13] npzread (repeats 2 times)
    @ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:311 [inlined]
 [14] npzread(::String)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:295
 [15] top-level scope
    @ REPL[3]:1

The downloaded file is correctly read from python

aterenin commented 2 years ago

Same issue here.

maj0e commented 2 years ago

I encountered a similar problem. In my case the EOFError is due to the uncompressed file size exceeding 4 Gb.

When saving the same file from python as an uncompressed .npz the size is around 4.9 Gb. Trying to read the file like that from NPZ.jl leads to another error:

ERROR: LoadError: invalid file header
Stacktrace:
 [1] error(s::String)
   @ Base ./error.jl:33
 [2] _getfiles(io::IOStream, diroffset::UInt32, nfiles::UInt16)
   @ ZipFile ~/.julia/packages/ZipFile/evaHP/src/ZipFile.jl:309
 [3] Reader
   @ ~/.julia/packages/ZipFile/evaHP/src/ZipFile.jl:106 [inlined]
 [4] ZipFile.Reader(filename::String)
   @ ZipFile ~/.julia/packages/ZipFile/evaHP/src/ZipFile.jl:118
 [5] npzread(::String)
   @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:294

Judging from this issue in ZipFiles, ZIP64 and therefore files bigger than 4Gb uncompressed aren't supported at all.

Would be nice if NPZ.jl could check this in advance before calling ZipFiles. If not, I'm just leaving this information here for other people wondering, why their file couldn't be read.

rachithaiyappa commented 2 years ago

Same issue here. My filesize is 22mb

ERROR: LoadError: EOFError: read end of file
Stacktrace:
 [1] _read(::ZipFile.ReadableFile, ::Array{UInt8,1}) at /u/racball/.julia/packages/ZipFile/fdYkP/src/ZipFile.jl:519
 [2] read! at /u/racball/.julia/packages/ZipFile/fdYkP/src/ZipFile.jl:483 [inlined]
 [3] readheader(::ZipFile.ReadableFile) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:210
 [4] npzreadarray(::ZipFile.ReadableFile) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:243
 [5] (::NPZ.var"#4#7")(::ZipFile.ReadableFile) at ./none:0
 [6] iterate at ./generator.jl:47 [inlined]
 [7] _all(::Base.var"#239#241", ::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Array{String,1}},Array{ZipFile.ReadableFile,1}},NPZ.var"#4#7"}, ::Colon) at ./reduce.jl:819
 [8] all at ./reduce.jl:815 [inlined]
 [9] Dict(::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Array{String,1}},Array{ZipFile.ReadableFile,1}},NPZ.var"#4#7"}) at ./dict.jl:130
 [10] npzread at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:311 [inlined] (repeats 2 times)
 [11] npzread(::String) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:295
 [12] top-level scope at /nfs/nfs9/home/nobackup/racball/github/graphzip/exps/20211103_juliasparse.jl:3
 [13] include(::Module, ::String) at ./Base.jl:377
 [14] exec_options(::Base.JLOptions) at ./client.jl:288
 [15] _start() at ./client.jl:484
in expression starting at /nfs/nfs9/home/nobackup/racball/github/graphzip/exps/20211103_juliasparse.jl:3
caused by [exception 1]
parsing header failed: unsupported type S3
Stacktrace:
 [1] error(::String) at ./error.jl:33
 [2] parsedtype at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:157 [inlined]
 [3] parseheader(::SubString{String}) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:185
 [4] readheader(::ZipFile.ReadableFile) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:226
 [5] npzreadarray(::ZipFile.ReadableFile) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:243
 [6] (::NPZ.var"#4#7")(::ZipFile.ReadableFile) at ./none:0
 [7] iterate at ./generator.jl:47 [inlined]
 [8] grow_to!(::Dict{String,Array{Int32,1}}, ::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Array{String,1}},Array{ZipFile.ReadableFile,1}},NPZ.var"#4#7"}, ::Int64) at ./dict.jl:161
 [9] grow_to!(::Dict{Any,Any}, ::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Array{String,1}},Array{ZipFile.ReadableFile,1}},NPZ.var"#4#7"}) at ./dict.jl:144
 [10] dict_with_eltype at ./abstractdict.jl:549 [inlined]
 [11] Dict(::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Array{String,1}},Array{ZipFile.ReadableFile,1}},NPZ.var"#4#7"}) at ./dict.jl:128
 [12] npzread at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:311 [inlined] (repeats 2 times)
 [13] npzread(::String) at /u/racball/.julia/packages/NPZ/UizJj/src/NPZ.jl:295
 [14] top-level scope at /nfs/nfs9/home/nobackup/racball/github/graphzip/exps/20211103_juliasparse.jl:3
 [15] include(::Module, ::String) at ./Base.jl:377
 [16] exec_options(::Base.JLOptions) at ./client.jl:288
 [17] _start() at ./client.jl:484
sdall commented 2 years ago

I observed the same problem, while reading a compressed npz file of 314M

ERROR: not a numpy array file
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:33
  [2] readheader(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:212
  [3] npzreadarray(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:243
  [4] (::NPZ.var"#4#7")(f::ZipFile.ReadableFile)
    @ NPZ ./none:0
  [5] _all(f::Base.var"#318#320", itr::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"}, #unused#::Colon)
    @ Base ./reduce.jl:0
  [6] all
    @ ./reduce.jl:1152 [inlined]
  [7] Dict(kv::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"})
    @ Base ./dict.jl:131
  [8] npzread (repeats 2 times)
    @ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:311 [inlined]
  [9] npzread(::String)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:295
 [10] top-level scope
    @ REPL[3]:1

caused by: EOFError: read end of file
Stacktrace:
  [1] _read(f::ZipFile.ReadableFile, a::Matrix{Int64})
    @ ZipFile ~/.julia/packages/ZipFile/evaHP/src/ZipFile.jl:519
  [2] read!
    @ ~/.julia/packages/ZipFile/evaHP/src/ZipFile.jl:484 [inlined]
  [3] _npzreadarray(f::ZipFile.ReadableFile, hdr::NPZ.Header{Int64, 2, typeof(ltoh)})
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:234
  [4] npzreadarray(f::ZipFile.ReadableFile)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:244
  [5] (::NPZ.var"#4#7")(f::ZipFile.ReadableFile)
    @ NPZ ./none:0
  [6] iterate
    @ ./generator.jl:47 [inlined]
  [7] grow_to!(dest::Dict{Any, Any}, itr::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"})
    @ Base ./dict.jl:140
  [8] dict_with_eltype
    @ ./abstractdict.jl:547 [inlined]
  [9] Dict(kv::Base.Generator{Base.Iterators.Filter{NPZ.var"#5#8"{Vector{String}}, Vector{ZipFile.ReadableFile}}, NPZ.var"#4#7"})
    @ Base ./dict.jl:129
 [10] npzread (repeats 2 times)
    @ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:311 [inlined]
 [11] npzread(::String)
    @ NPZ ~/.julia/packages/NPZ/UizJj/src/NPZ.jl:295
 [12] top-level scope
    @ REPL[3]:1
Lucius2019 commented 1 year ago

So is there any solution?