JuliaParallel / DistributedArrays.jl

Distributed Arrays in Julia
Other
197 stars 35 forks source link

`DArray` construction leads to wrong `eltype` if each `pid` has a different type #162

Closed raminammour closed 5 years ago

raminammour commented 6 years ago

Hello,

I found this while testing the fix for the other issue, you must hate me by now ;)

Here is a minimal example, if the worker that infers the wrong type is slowed down with a sleep statement, inference is ok.

julia> addprocs(2);using DistributedArrays

julia> ddr=@DArray [(rr=rand(mod(i,3)<2 ? Float64 : Float32,3);println(eltype(rr));rr) for i=0:3];
      From worker 2:    Float64
      From worker 2:    Float64
      From worker 3:    Float32
      From worker 3:    Float64

julia> ddr
4-element DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}:
Error showing value of type DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}:
ERROR: TypeError: in isassigned, in typeassert, expected Array{Float64,1}, got Array{Float32,1}
Stacktrace:
 [1] isassigned(::DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}, ::Int64, ::Int64) at /data/gpfs/Users/j0280401/JULIA/.julia/packages/DistributedArrays/XV7NS/src/darray.jl:590
 [2] alignment(::IOContext{REPL.Terminals.TTYTerminal}, ::DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}, ::UnitRange{Int64}, ::UnitRange{Int64}, ::Int64, ::Int64, ::Int64) at ./arrayshow.jl:67
 [3] print_matrix(::IOContext{REPL.Terminals.TTYTerminal}, ::DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}, ::String, ::String, ::String, ::String, ::String, ::String, ::Int64, ::Int64) at ./arrayshow.jl:186
 [4] print_matrix at ./arrayshow.jl:159 [inlined]
 [5] print_array at ./arrayshow.jl:308 [inlined]
 [6] show(::IOContext{REPL.Terminals.TTYTerminal}, ::MIME{Symbol("text/plain")}, ::DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}) at ./arrayshow.jl:345
 [7] display(::REPL.REPLDisplay, ::MIME{Symbol("text/plain")}, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:131
 [8] display(::REPL.REPLDisplay, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:135
 [9] display(::DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}) at ./multimedia.jl:287
 [10] #invokelatest#1 at ./essentials.jl:686 [inlined]
 [11] invokelatest at ./essentials.jl:685 [inlined]
 [12] print_response(::IO, ::Any, ::Any, ::Bool, ::Bool, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:154
 [13] print_response(::REPL.AbstractREPL, ::Any, ::Any, ::Bool, ::Bool) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:139
 [14] (::getfield(REPL, Symbol("#do_respond#40")){Bool,getfield(REPL, Symbol("##50#59")){REPL.LineEditREPL,REPL.REPLHistoryProvider},REPL.LineEditREPL,REPL.LineEdit.Prompt})(::Any, ::Any, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:708
 [15] #invokelatest#1 at ./essentials.jl:686 [inlined]
 [16] invokelatest at ./essentials.jl:685 [inlined]
 [17] run_interface(::REPL.Terminals.TextTerminal, ::REPL.LineEdit.ModalInterface, ::REPL.LineEdit.MIState) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/LineEdit.jl:2261
 [18] run_frontend(::REPL.LineEditREPL, ::REPL.REPLBackendRef) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:1029
 [19] run_repl(::REPL.AbstractREPL, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:191
 [20] (::getfield(Base, Symbol("##720#722")){Bool,Bool,Bool,Bool})(::Module) at ./logging.jl:311
 [21] #invokelatest#1 at ./essentials.jl:686 [inlined]
 [22] invokelatest at ./essentials.jl:685 [inlined]
 [23] macro expansion at ./logging.jl:308 [inlined]
 [24] run_main_repl(::Bool, ::Bool, ::Bool, ::Bool, ::Bool) at ./client.jl:330
 [25] exec_options(::Base.JLOptions) at ./client.jl:242
 [26] _start() at ./client.jl:421

julia> typeof(ddr)
DArray{Array{Float64,1},1,Array{Array{Float64,1},1}}

julia> ddr=@DArray [(rr=rand(mod(i,3)<2 ? Float64 : Float32,3);println(eltype(rr));myid()==2 && sleep(1);rr) for i=0:3];
      From worker 2:    Float64
      From worker 3:    Float32
      From worker 3:    Float64
      From worker 2:    Float64

julia> typeof(ddr)
DArray{Array{T,1} where T,1,Array{Array{T,1} where T,1}}

julia> ddr=@DArray [(rr=rand(mod(i,3)<2 ? Float64 : Float32,3);println(eltype(rr));myid()==2 && sleep(1);rr) for i=0:3];
      From worker 2:    Float64
      From worker 3:    Float32
      From worker 3:    Float64
      From worker 2:    Float64

julia> typeof(ddr)
DArray{Array{T,1} where T,1,Array{Array{T,1} where T,1}}

julia> ddr
4-element DArray{Array{T,1} where T,1,Array{Array{T,1} where T,1}}:
 [0.33996, 0.838821, 0.802694]        
 [0.929603, 0.454744, 0.331237]       
 Float32[0.174719, 0.617074, 0.112888]
 [0.329048, 0.3101, 0.09931]          

julia> versioninfo()
Julia Version 1.0.0
Commit 5d4eaca0c9 (2018-08-08 20:58 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2670 0 @ 2.60GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-6.0.0 (ORCJIT, sandybridge)

Cheers!

p.s: thanks for a great package, despite any issues i may file :)

raminammour commented 6 years ago

It is caused by this part:

function DArray(id, init, dims, pids, idxs, cuts)
    r=Channel(1)
    @sync begin
        for i = 1:length(pids)
            @async begin
                local typA
                if isa(init, Function)
                    typA=remotecall_fetch(construct_localparts, pids[i], init, id, dims, pids, idxs, cuts)
                else
                    # constructing from an array of remote refs.
                    typA=remotecall_fetch(construct_localparts, pids[i], init[i], id, dims, pids, idxs, cuts)
                end
                !isready(r) && put!(r, typA)
            end
        end
    end

    A = take!(r)
    if myid() in pids
        d = registry[id]
        d = isa(d, WeakRef) ? d.value : d
    else
        T = eltype(A)
        N = length(dims)
        d = DArray{T,N,A}(id, dims, pids, idxs, cuts, empty_localpart(T,N,A))
    end
    d
end

It is whichever process pushes eltype to the channel first. Somehow, all types should be reduced and checked to be the same before inferring eltype. Will see if I can fix this...