FluxML / IRTools.jl

Mike's Little Intermediate Representation
MIT License
111 stars 35 forks source link

dynamo possibly corrupts memory for comparison in getindex #75

Open femtomc opened 4 years ago

femtomc commented 4 years ago

This is a bizarro bug, I'm not even sure what to call it - but I know it's something in IRTools because when I turn my dynamo off, the program runs correctly. So it's possible I'm doing something crazy in my dynamo, but I don't think so. This is Julia v.1.4.2 with IRTools 0.4.

This error occurs seemingly randomly during program execution - it sometimes makes it all the way through, sometimes it breaks.

Key: ##305, Symbol Available: [Symbol("##298"), Symbol("##259"), Symbol("##260"), Symbol("##299"), Symbol("##305"), Symbol("##258")], false
ERROR: LoadError: KeyError: key Symbol("##305") not found
Stacktrace:
 [1] getindex at ./dict.jl:477 [inlined]
 [2] ExecutionContext at /home/mccoy/.julia/dev/Jaynes/src/contexts.jl:13 [inlined]
 [3] step_arbiter at /home/mccoy/.julia/dev/GenerativeHap/src/core/execution_engine/step_arbiter.jl:2 [inlined]
 [4] (::Jaynes.HierarchicalTrace)(::typeof(Main.RecursiveSearcher.GenerativeHap.step_arbiter), ::Main.RecursiveSearcher.GenerativeHap.ActiveBehaviorTree) at /home/mccoy/.julia/packages/IRTools/BpoqK/src/reflection/dynamo.jl:0

This call occurs here:

function step_arbiter(tree::ActiveBehaviorTree)
    # Here.
    collected = Step[tree.nodes[x] for x in tree.leaves]

    tups = map(i -> (i.priority_mod, i.uuid), collected)
    unnorm_probs, items = unzip(tups)
    norm_probs = unnorm_probs ./ Base.sum(unnorm_probs)
    symbol_probs = Dict(zip(items, norm_probs))
    selection = rand(:step_selection, SymbolicCategorical(symbol_probs))
    sel = tree.nodes[selection]
    return sel
end

Here is the core part of my dynamo, ExecutionContext is abstract

@dynamo function (mx::ExecutionContext)(a...)
    ir = IR(a...)
    ir == nothing && return
    recurse!(ir)
    return ir
end

function (mx::ExecutionContext)(fn::typeof(getindex), c, k)
    c isa Dict && begin
        # Printout below.
        println("Key: $k, $(typeof(k)) Available: $(typeof(c)) $(keys(c)), $(k in keys(c))")
    end
    return c[k]
end

but I define dispatch on a concrete sub-instance HierarchicalTrace

@inline function (tr::HierarchicalTrace)(fn::typeof(rand), addr::Address, d::Distribution{T}) where T
    s = rand(d)
    tr.chm[addr] = ChoiceSite(logpdf(d, s), s)
    return s
end

@inline function (tr::HierarchicalTrace)(fn::typeof(rand), addr::Address, call::Function, args...)
    n_tr = Trace()
    ret = n_tr(call, args...)
    tr.chm[addr] = CallSite(n_tr, call, args, ret)
    return ret
end

which does track the calls, if I run the program:

Key: ##381, Symbol Available: Dict{Symbol,Main.RecursiveSearcher.GenerativeHap.Node} [Symbol("##381"), Symbol("##446"), Symbol("##379"), Symbol("##380"), Symbol("##447"), Symbol("##453")], true
Key: ##447, Symbol Available: Dict{Symbol,Main.RecursiveSearcher.GenerativeHap.Node} [Symbol("##381"), Symbol("##446"), Symbol("##379"), Symbol("##380"), Symbol("##447"), Symbol("##453")], false

which shows the printout line from above. So the equality check in getindex randomly fails for keys, even though the key (at least, what I think is the key) is present in the KeySet. I cannot for the life of me figure out what could cause this.

This also doesn't just occur in this call, but in other calls where getindex is called e.g.

Stacktrace:
 [1] getindex at ./dict.jl:477 [inlined]
 [2] (::Jaynes.HierarchicalTrace)(::typeof(getindex), ::Dict{Symbol,Main.RecursiveSearcher.GenerativeHap.Node}, ::Symbol) at /home/mccoy/.julia/dev/Jaynes/src/contexts.jl:13
 [3] update_STCC! at /home/mccoy/.julia/dev/GenerativeHap/src/core/execution_engine/pre_execution_updates.jl:44 [inlined]

where a getindex call occurs in another part of my codebase. And just to be totally transparent, that method is defined:

function update_STCC!(tree::ActiveBehaviorTree, 
                      env::Environment)
    for (e, an) in tree.executing_actions
        # Here.
        tree.nodes[e].success_test(tree.globals, env) && begin

            succeed!(s, tree)
        end
    end

    for b in tree.active_behaviors
        nd = tree.nodes[b]
        for cc in nd.context_conditions
            cc(tree.globals, env) && begin
                fail!(nd, tree)
            end
        end
    end
end

which, at least to me, shows that it's not some weird type thing with list comprehension. The same thing occurs - Symbol is "apparently" in the KeySet but the getindex equality check fails and throws a KeyError.

femtomc commented 4 years ago

@MikeInnes I ended up developing a workaround which prevents the dynamo from wrapping calls to Base.

unwrap(gr::GlobalRef) = gr.name
unwrap(gr) = gr

# Whitelist includes vectorized calls.
whitelist = [:rand, :learnable, :foldr, :map, :soss_fmi, :gen_fmi, :turing_fmi]

# Fix for specialized tracing.
function recur!(ir, to = self)
    for (x, st) in ir
        isexpr(st.expr, :call) && begin
            ref = unwrap(st.expr.args[1])
            ref in whitelist || 
            !(unwrap(st.expr.args[1]) in names(Base)) ||
            continue
            ir[x] = Expr(:call, to, st.expr.args...)
        end
    end
    return ir
end

The issue turns out to be a call in Base somewhere, because this fixed the problem. It also greatly increased performance on programs which could have type stability issues. I had to do something similar for Cassette at one point.