quinnj / JSON3.jl

Other
214 stars 47 forks source link

Issue deserializing json within arrow table #287

Open ericphanson opened 1 month ago

ericphanson commented 1 month ago

Here I have some JSON data embedded inside a struct which itself is inside a column of an arrow table, and I deserialize the JSON at fromarrowstruct-time. This gives errors like the following (also with Symbol, not just string):

conversion to pointer not defined for Arrow.Primitive{UInt8, Vector{UInt8}}
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:35
  [2] unsafe_convert(::Type{Ptr{UInt8}}, a::Arrow.Primitive{UInt8, Vector{UInt8}})
    @ Base ./pointer.jl:67
  [3] pointer
    @ ./abstractarray.jl:1240 [inlined]
  [4] pointer(V::SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, i::Int64)
    @ Base ./subarray.jl:476
  [5] getvalue(::Type{String}, buf::SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, tape::SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}, tapeidx::Int64, t::UInt64)
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/utils.jl:0
  [6] getindex
    @ ~/.julia/packages/JSON3/jSAdy/src/JSON3.jl:163 [inlined]
  [7] copyto_unaliased!
    @ ./abstractarray.jl:1088 [inlined]
  [8] copyto!(dest::Vector{String}, src::JSON3.Array{String, SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}})
    @ Base ./abstractarray.jl:1068
  [9] _collect_indices
    @ ./array.jl:777 [inlined]
 [10] collect
    @ ./array.jl:761 [inlined]
 [11] _show(io::IOBuffer, arr::JSON3.Array{String, SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}}, indent::Int64, offset::Int64)
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/show.jl:39
 [12] _show(io::IOBuffer, obj::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}}, indent::Int64, offset::Int64)
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/show.jl:21
 [13] _show
    @ ~/.julia/packages/JSON3/jSAdy/src/show.jl:7 [inlined]
 [14] show(io::IOBuffer, j::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}})
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/show.jl:1
 [15] sprint(f::Function, args::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}}; context::Nothing, sizehint::Int64)
    @ Base ./strings/io.jl:114
 [16] sprint
    @ ./strings/io.jl:107 [inlined]
 [17] repr(x::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}})
    @ Base ./strings/io.jl:286

I was able to workaround it with:

function JSON3.getvalue(::Type{T}, buf::SubArray{UInt8,1,<:Arrow.Primitive}, tape, tapeidx,
                        t) where {T}
    return JSON3.getvalue(T, copy(buf), tape, tapeidx, t)
end

I guess something like that could be added to a JSON3<>Arrow package extension, but it might be better to have a more generic fallback for getvalue that doesn't need pointers.