AnyDSL / thorin

The Higher-Order Intermediate Representation
https://anydsl.github.io
GNU Lesser General Public License v3.0
151 stars 15 forks source link

Explicit control flow forces code duplication #96

Open madmann91 opened 5 years ago

madmann91 commented 5 years ago

The explicit control flow in the match (and therefore also in the branch()) intrinsic functions will duplicate code when specialization:

fn main(i: i32, j: i32) -> i32 {
    let light = match j {
        0 => @ || 5,
        1 => @ || 6,
        2 => @ || 7,
        3 => @ || 8,
        _ => @ || 9
    };
    let shader = match i {
        0 => @ || 1,
        1 => @ || 2,
        2 => @ || 3,
        3 => @ || 4,
        _ => @ || 5
    };
    light() + shader()
}

In the following example, a programmer would logically expect the compiler to generate two switches. Unfortunately, this is not what happens, as one switch will become embedded inside each and every branch of the other (thereby creating 1 + 5 switches). See the thorin IR at the bottom of this post for reference.

The solution to this problem would be (in my opinion), to stop using the branch() and match() intrinsics, and use a combination of select + jump (and maybe a new primop that selects between more than two options) instead (which is what we were doing at some point before introducing branch()). This will indeed complicate a bit code generation but is also more natural (the IR is then smaller) and I don't think it is too hard to implement.

Module before PE (2 switches):

module 'test_pe'

main_136(mem mem_137, qs32 __138, qs32 __139, fn(mem, qs32) return_140) extern  @(bool 0, bool 0, bool 0, bool 0)
    (qs32, fn()) match_232 = tuple qs32 1, matchcase_225
    (qs32, fn()) match_240 = tuple qs32 2, matchcase_233
    (qs32, fn()) match_248 = tuple qs32 3, matchcase_241
    (qs32, fn()) match_224 = tuple qs32 0, matchcase_218
    (mem, frame) _162 = enter mem_137
    mem _164 = extract _162, qu32 0
    match_142(__139, matchotherwise_149, ((qs32, fn()) tuple qs32 0, matchcase_218), ((qs32, fn()) tuple qs32 1, matchcase_225), ((qs32, fn()) tuple qs32 2, matchcase_233), ((qs32, fn()) tuple qs32 3, matchcase_241))

    matchcase_218()
        next_150(lambda_219)

    matchotherwise_149()
        next_150(lambda_212)

    matchcase_233()
        next_150(lambda_234)

    matchcase_225()
        next_150(lambda_226)

    matchcase_241()
        next_150(lambda_242)

    next_150(fn(mem, fn(mem, qs32)) light_151)
        (qs32, fn()) match_211 = tuple qs32 3, matchcase_204
        (qs32, fn()) match_195 = tuple qs32 1, matchcase_188
        (qs32, fn()) match_187 = tuple qs32 0, matchcase_180
        (qs32, fn()) match_203 = tuple qs32 2, matchcase_196
        match_152(__138, matchotherwise_159, ((qs32, fn()) tuple qs32 0, matchcase_180), ((qs32, fn()) tuple qs32 1, matchcase_188), ((qs32, fn()) tuple qs32 2, matchcase_196), ((qs32, fn()) tuple qs32 3, matchcase_204))

    matchcase_188()
        next_160(lambda_189)

    matchotherwise_159()
        next_160(lambda_172)

    matchcase_204()
        next_160(lambda_205)

    matchcase_180()
        next_160(lambda_181)

    matchcase_196()
        next_160(lambda_197)

    next_160(fn(mem, fn(mem, qs32)) shader_161)
        light_151(_164, light_cont_165)

    light_cont_165(mem mem_166, qs32 light_167)
        shader_161(mem_166, shader_cont_168)

    shader_cont_168(mem mem_169, qs32 shader_170)
        qs32 _171 = add light_167, shader_170
        return_140(mem_169, _171)

Module after PE (6 switches):

module 'test_pe'

main_2626(mem mem_2627, qs32 __2628, qs32 __2629, fn(mem, qs32) return_2630) extern 
    (qs32, fn()) match_2700 = tuple qs32 2, matchcase_2690
    (qs32, fn()) match_2689 = tuple qs32 1, matchcase_2679
    (qs32, fn()) match_2711 = tuple qs32 3, matchcase_2701
    (qs32, fn()) match_2678 = tuple qs32 0, matchcase_2664
    match_2631(__2629, matchotherwise_2638, ((qs32, fn()) tuple qs32 0, matchcase_2664), ((qs32, fn()) tuple qs32 1, matchcase_2679), ((qs32, fn()) tuple qs32 2, matchcase_2690), ((qs32, fn()) tuple qs32 3, matchcase_2701))

    matchotherwise_2638()
        (qs32, fn()) match_2651 = tuple qs32 0, matchcase_2649
        (qs32, fn()) match_2659 = tuple qs32 2, matchcase_2657
        (qs32, fn()) match_2663 = tuple qs32 3, matchcase_2661
        (qs32, fn()) match_2655 = tuple qs32 1, matchcase_2653
        match_2639(__2628, matchotherwise_2646, ((qs32, fn()) tuple qs32 0, matchcase_2649), ((qs32, fn()) tuple qs32 1, matchcase_2653), ((qs32, fn()) tuple qs32 2, matchcase_2657), ((qs32, fn()) tuple qs32 3, matchcase_2661))

    matchcase_2649()
        return_2630(mem_2627, qs32 10)

    matchcase_2653()
        return_2630(mem_2627, qs32 11)

    matchotherwise_2646()
        return_2630(mem_2627, qs32 14)

    matchcase_2657()
        return_2630(mem_2627, qs32 12)

    matchcase_2661()
        return_2630(mem_2627, qs32 13)

    matchcase_2679()
        (qs32, fn()) match_2684 = tuple qs32 1, matchcase_2683
        (qs32, fn()) match_2682 = tuple qs32 0, matchcase_2681
        (qs32, fn()) match_2686 = tuple qs32 2, matchcase_2685
        (qs32, fn()) match_2688 = tuple qs32 3, matchcase_2687
        match_2639(__2628, matchotherwise_2680, ((qs32, fn()) tuple qs32 0, matchcase_2681), ((qs32, fn()) tuple qs32 1, matchcase_2683), ((qs32, fn()) tuple qs32 2, matchcase_2685), ((qs32, fn()) tuple qs32 3, matchcase_2687))

    matchcase_2687()
        return_2630(mem_2627, qs32 10)

    matchcase_2685()
        return_2630(mem_2627, qs32 9)

    matchotherwise_2680()
        return_2630(mem_2627, qs32 11)

    matchcase_2683()
        return_2630(mem_2627, qs32 8)

    matchcase_2681()
        return_2630(mem_2627, qs32 7)

    matchcase_2690()
        (qs32, fn()) match_2695 = tuple qs32 1, matchcase_2694
        (qs32, fn()) match_2693 = tuple qs32 0, matchcase_2692
        (qs32, fn()) match_2697 = tuple qs32 2, matchcase_2696
        (qs32, fn()) match_2699 = tuple qs32 3, matchcase_2698
        match_2639(__2628, matchotherwise_2691, ((qs32, fn()) tuple qs32 0, matchcase_2692), ((qs32, fn()) tuple qs32 1, matchcase_2694), ((qs32, fn()) tuple qs32 2, matchcase_2696), ((qs32, fn()) tuple qs32 3, matchcase_2698))

    matchotherwise_2691()
        return_2630(mem_2627, qs32 12)

    matchcase_2694()
        return_2630(mem_2627, qs32 9)

    matchcase_2692()
        return_2630(mem_2627, qs32 8)

    matchcase_2696()
        return_2630(mem_2627, qs32 10)

    matchcase_2698()
        return_2630(mem_2627, qs32 11)

    matchcase_2664()
        (qs32, fn()) match_2671 = tuple qs32 1, matchcase_2669
        (qs32, fn()) match_2674 = tuple qs32 2, matchcase_2672
        (qs32, fn()) match_2668 = tuple qs32 0, matchcase_2666
        (qs32, fn()) match_2677 = tuple qs32 3, matchcase_2675
        match_2639(__2628, matchotherwise_2665, ((qs32, fn()) tuple qs32 0, matchcase_2666), ((qs32, fn()) tuple qs32 1, matchcase_2669), ((qs32, fn()) tuple qs32 2, matchcase_2672), ((qs32, fn()) tuple qs32 3, matchcase_2675))

    matchcase_2666()
        return_2630(mem_2627, qs32 6)

    matchotherwise_2665()
        return_2630(mem_2627, qs32 10)

    matchcase_2669()
        return_2630(mem_2627, qs32 7)

    matchcase_2675()
        return_2630(mem_2627, qs32 9)

    matchcase_2672()
        return_2630(mem_2627, qs32 8)

    matchcase_2701()
        (qs32, fn()) match_2706 = tuple qs32 1, matchcase_2705
        (qs32, fn()) match_2704 = tuple qs32 0, matchcase_2703
        (qs32, fn()) match_2708 = tuple qs32 2, matchcase_2707
        (qs32, fn()) match_2710 = tuple qs32 3, matchcase_2709
        match_2639(__2628, matchotherwise_2702, ((qs32, fn()) tuple qs32 0, matchcase_2703), ((qs32, fn()) tuple qs32 1, matchcase_2705), ((qs32, fn()) tuple qs32 2, matchcase_2707), ((qs32, fn()) tuple qs32 3, matchcase_2709))

    matchcase_2707()
        return_2630(mem_2627, qs32 11)

    matchcase_2705()
        return_2630(mem_2627, qs32 10)

    matchcase_2703()
        return_2630(mem_2627, qs32 9)

    matchotherwise_2702()
        return_2630(mem_2627, qs32 13)

    matchcase_2709()
        return_2630(mem_2627, qs32 12)
leissa commented 5 years ago

We used to do the select + jump trick back in the days of yore. IMHO the branch intrinsic is a bit more concise and easier to work with. At least I never found my self looking back to select + jump. That being said, I don't quite see why select + jump would help?