vnmakarov / mir

A lightweight JIT compiler based on MIR (Medium Internal Representation) and C11 JIT compiler and interpreter based on MIR
MIT License
2.24k stars 147 forks source link

Output optimized mir #303

Open mingodad opened 1 year ago

mingodad commented 1 year ago

While testing c2m with the sieve.c example I noticed that the mir output is not optimized. Is it possible to add an option to output optimized mir ?

Source of sieve.c where a dummy assigned but unused variable is added to see if it will be eliminated (it's not):

void printf (const char *fmt, ...);
void abort (void);
#if defined(_WIN32) || !defined(SIEVE_BENCH)
/* limitation for alloca */
#define SieveSize 8190
#define Expected 1027
#else
#define SieveSize 819000
#define Expected 65333
#endif
#define N_ITER 1000
int sieve (int n) {
  long i, k, count, iter, prime;
  char flags[SieveSize];

  for (iter = 0; iter < n; iter++) {
    count = 0;
    for (i = 0; i < SieveSize; i++) flags[i] = 1;
    for (i = 2; i < SieveSize; i++)
      if (flags[i]) {
        prime = i + 1;
        for (k = i + prime; k < SieveSize; k += prime) flags[k] = 0;
        count++;
      }
  }
  return count;
}
int main (void) {
  int n = sieve (N_ITER);
  int z = n;  ///!!!! dummy assigned but not used variable
  int w;  ///!!!! dummy not used variable

  printf ("%d iterations of sieve for %d: result = %d\n", N_ITER, SieveSize, n);
  if (n != Expected) abort ();
  return 0;
}

Output of c2m -S sieve.c that doesn't eliminate the dummy assigned but unused variable, but it does eliminated the dummy not assigned unused variable:

M0: module
proto0: proto   i32, i32:i0_n
proto1: proto   u64:U0_fmt, ...
proto2: proto   
    import  printf
    import  abort
sieve:  func    i32, i32:i0_n
    local   i64:fp, i64:I0_iter, i64:I_0, i64:i_1, i64:I0_count, i64:I0_i, i64:i_2, i64:I_3
    local   i64:I_4, i64:i_5, i64:i_6, i64:I0_prime, i64:I_7, i64:I0_k, i64:I_8, i64:i_9
    local   i64:I_10, i64:I_11, i64:I_12, i64:i_13, i64:I_14, i64:I_15, i64:i_16, i64:I_17
    local   i64:I_18, i64:i_19
# 1 arg, 26 locals
    alloca  fp, 8192
    mov I0_iter, 0
    ext32   I_0, i0_n
    bge L3, I0_iter, I_0
L1:
    mov I0_count, 0
    mov I0_i, 0
    bge L6, I0_i, 8190
L4:
    ext8    I_3, 1
    mov i8:(fp, I0_i), I_3
L5:
    mov I_4, I0_i
    add I_4, I_4, 1
    mov I0_i, I_4
    blt L4, I0_i, 8190
L6:
    mov I0_i, 2
    bge L9, I0_i, 8190
L7:
    bf  L11, i8:(fp, I0_i)
L10:
    add I_7, I0_i, 1
    mov I0_prime, I_7
    add I_8, I0_i, I0_prime
    mov I0_k, I_8
    bge L15, I0_k, 8190
L13:
    ext8    I_10, 0
    mov i8:(fp, I0_k), I_10
L14:
    mov I_11, I0_k
    add I_11, I_11, I0_prime
    mov I0_k, I_11
    blt L13, I0_k, 8190
L15:
    mov I_14, I0_count
    add I_14, I_14, 1
    mov I0_count, I_14
    jmp L12
L11:
L12:
L8:
    mov I_15, I0_i
    add I_15, I_15, 1
    mov I0_i, I_15
    blt L7, I0_i, 8190
L9:
L2:
    mov I_17, I0_iter
    add I_17, I_17, 1
    mov I0_iter, I_17
    ext32   I_18, i0_n
    blt L1, I0_iter, I_18
L3:
    ret I0_count
    endfunc
    export  sieve
main:   func    i32
    local   i64:i0_n, i64:i_0, i64:i0_z, i64:i_1
# 0 args, 4 locals
    call    proto0, sieve, i_0, 1000
    mov i0_n, i_0
    mov i0_z, i0_n //!!!!<<<<< notice here the dummy unused variable
    call    proto1, printf, "%d iterations of sieve for %d: result = %d\n\000", 1000, 8190, i0_n
    beqs    L17, i0_n, 1027
L16:
    call    proto2, abort
    jmp L18
L17:
L18:
    ret 0
    endfunc
    export  main
    endmodule
mingodad commented 1 year ago

I did an experiment with this C file:

void printf (const char *fmt, ...);
int main(int argc, char *argv[]) {
    int a,b, c = 0, i = 10;
    printf("i = %d\n", i);
    return 0;
}

The normal mir output is this:

M0: module
proto0: proto   u64:U0_fmt, ...
    import  printf
main:   func    i32, i32:i0_argc, u64:U0_argv
    local   i64:i0_c, i64:i0_i
# 2 args, 2 locals
    mov i0_c, 0
    mov i0_i, 10
    call    proto0, printf, "i = %d\n\000", i0_i
    ret 0
    endfunc
    export  main
    endmodule

Then I added this code to output mir again just before execution (in c2mir-driver.c::main):

        MIR_link (main_ctx,
                  gen_exec_p ? (n_gen > 1 ? MIR_set_parallel_gen_interface : MIR_set_gen_interface)
                             : MIR_set_lazy_gen_interface,
                  import_resolver);
        fun_addr = gen_exec_p && n_gen > 1 ? MIR_gen (main_ctx, 0, main_func) : main_func->addr;
//// output mir after MIR_gen
        module = DLIST_HEAD (MIR_module_t, *MIR_get_module_list (main_ctx));
        MIR_output_module (main_ctx, stderr, module);
////
        start_time = real_usec_time ();
        result_code
          = fun_addr (VARR_LENGTH (char_ptr_t, exec_argv), VARR_ADDR (char_ptr_t, exec_argv)

And got this output:

M0:     module
proto0: proto   u64:U0_fmt, ...
        import  printf
main:   func    i32, i32:i0_argc, u64:U0_argv
        local   i64:i0_c, i64:i0_i, i64:t1, i64:t2
# 2 args, 4 locals
        ext32   i0_argc, i0_argc
        mov     i0_c, 0
        mov     i0_i, 10
        mov     t1, .lc1
        call    proto0, printf, t1, i0_i
        mov     t2, 0
        ext32   t2, t2
        ret     t2
        endfunc
        export  main
.lc1:   u8      105, 32, 61, 32, 37, 100, 10, 0 # "i = %d\n\000"
        endmodule

It shows that the optimizer didn't removed the variable assigned with a constant but not used c.

mingodad commented 1 year ago

It's seems that I'm not outputting the result of the optimizer because when I run it with -dg9 then I get this:

Code generation of function main:
+++++++++++++MIR before generator:
main:   func    i32, i32:i0_argc, u64:U0_argv
        local   i64:i0_c, i64:i0_i, i64:t1, i64:t2
# 2 args, 4 locals
        ext32   i0_argc, i0_argc
        mov     i0_c, 0
        mov     i0_i, 10
        mov     t1, .lc1
        call    proto0, printf, t1, i0_i
        mov     t2, 0
        ext32   t2, t2
        ret     t2
        endfunc
+++++++++++++MIR after building CFG:
BB   0
  in edges:
  out edges:   1   2

BB   1
  in edges:   0   2
  out edges:

BB   2
  in edges:   0
  out edges:   1
        ext32   i0_argc, i0_argc # indexes: _,_
        mov     i0_c, 0 # indexes: _,_
        mov     i0_i, 10 # indexes: _,_
        mov     t1, .lc1 # indexes: _,_
        call    proto0, printf, t1, i0_i # indexes: _,_,_,_
        mov     t2, 0 # indexes: _,_
        ext32   t2, t2 # indexes: _,_
        ret     t2 # indexes: _

Minimizing SSA phis: from 0 to 0 phis (non-phi insns 8)
  Start def insn 8      mov     i0_argc, i0_argc # indexes: 8,_
     Adding ssa edge: def 8:0 -> use 0:1:
        mov     i0_argc, i0_argc # indexes: 8,_
        ext32   i0_argc, i0_argc # indexes: _,8
  Start def insn 0      ext32   i0_argc, i0_argc # indexes: _,8
    Change i0_argc to i0_argc@1 in insn 0       ext32   i0_argc@1, i0_argc # indexes: _,8
  Start def insn 1      mov     i0_c, 0 # indexes: _,_
  Start def insn 2      mov     i0_i, 10 # indexes: 2,_
     Adding ssa edge: def 2:0 -> use 4:3:
        mov     i0_i, 10 # indexes: 2,_
        call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  Start def insn 3      mov     t1, .lc1 # indexes: 3,_
     Adding ssa edge: def 3:0 -> use 4:2:
        mov     t1, .lc1 # indexes: 3,_
        call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  Start def insn 5      mov     t2, 0 # indexes: 5,_
     Adding ssa edge: def 5:0 -> use 6:1:
        mov     t2, 0 # indexes: 5,_
        ext32   t2, t2 # indexes: 6,5
  Start def insn 6      ext32   t2, t2 # indexes: 6,5
     Adding ssa edge: def 6:0 -> use 7:0:
        ext32   t2, t2 # indexes: 6,5
        ret     t2 # indexes: 6
    Change t2 to t2@1 in insn 6         ext32   t2@1, t2 # indexes: 6,5
    Change t2 to t2@1 in insn 7         ret     t2@1 # indexes: 6
+++++++++++++MIR after building SSA:
undef init insns:
arg init insns:
  8     mov     i0_argc, i0_argc # indexes: 8,_

BB   0
  in edges:
  out edges:   1   2

BB   1
  in edges:   0   2
  out edges:

BB   2
  in edges:   0
  out edges:   1
  0     ext32   i0_argc@1, i0_argc # indexes: _,8
  1     mov     i0_c, 0 # indexes: _,_
  2     mov     i0_i, 10 # indexes: 2,_
  3     mov     t1, .lc1 # indexes: 3,_
  4     call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  5     mov     t2, 0 # indexes: 5,_
  6     ext32   t2@1, t2 # indexes: 6,5
  7     ret     t2@1 # indexes: 6

+++++++++++++Copy Propagation:
    0 deleted copy insns
+++++++++++++MIR after Copy Propagation:
BB   0
  in edges:
  out edges:   1   2

BB   1
  in edges:   0   2
  out edges:

BB   2
  in edges:   0
  out edges:   1
  0     ext32   i0_argc@1, i0_argc # indexes: _,8
  1     mov     i0_c, 0 # indexes: _,_
  2     mov     i0_i, 10 # indexes: 2,_
  3     mov     t1, .lc1 # indexes: 3,_
  4     call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  5     mov     t2, 0 # indexes: 5,_
  6     ext32   t2@1, t2 # indexes: 6,5
  7     ret     t2@1 # indexes: 6

+++++++++++++GVN:
  Adding   0: ext32 _, i0_argc
Val=0 for insn 0:       ext32   i0_argc@1, i0_argc
  Adding   1: mov _, 0
Val=1 for insn 1:       mov     i0_c, 0
  Adding   2: mov _, 10
Val=2 for insn 2:       mov     i0_i, 10
  Adding   3: mov _, .lc1
Val=3 for insn 3:       mov     t1, .lc1
Val=1 for insn 5:       mov     t2, 0
  Adding   6: ext32 _, t2
Val=6 for insn 6:       ext32   t2@1, t2
    0 found GVN redundant insns
+++++++++++++MIR after GVN:
BB   0
  in edges:
  out edges:   1   2

BB   1
  in edges:   0   2
  out edges:

BB   2
  in edges:   0
  out edges:   1
  0     ext32   i0_argc@1, i0_argc # indexes: _,8
  1     mov     i0_c, 0 # indexes: _,_
  2     mov     i0_i, 10 # indexes: 2,_
  3     mov     t1, .lc1 # indexes: 3,_
  4     call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  5     mov     t2, 0 # indexes: 5,_
  6     ext32   t2@1, t2 # indexes: 6,5
  7     ret     t2@1 # indexes: 6

+++++++++++++Dead code elimination:
  Removing dead insn 1          mov     i0_c, 0
  Removing dead insn 0          ext32   i0_argc@1, i0_argc
    2 removed SSA dead insns
+++++++++++++MIR after dead code elimination after GVN:
BB   0 (pressure: int=0, fp=0)
  in edges:
  out edges:   1   2

BB   1 (pressure: int=0, fp=0)
  in edges:   0   2
  out edges:

BB   2 (pressure: int=0, fp=0)
  in edges:   0
  out edges:   1
  2     mov     i0_i, 10 # indexes: 2,_
  3     mov     t1, .lc1 # indexes: 3,_
  4     call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  5     mov     t2, 0 # indexes: 5,_
  6     ext32   t2@1, t2 # indexes: 6,5
  7     ret     t2@1 # indexes: 6

+++++++++++++CCP:
  CCP analysis:
       processing bb0
       processing bb2
       processing bb2 insn:     mov     i0_i, 10           pushing bb2 insn:    call    proto0, printf, t1, i0_i -- make the result a constant 10
       processing bb2 insn:     mov     t1, .lc1 -- make the result varying
       processing bb2 insn:     call    proto0, printf, t1, i0_i -- make all results varying
       processing bb2 insn:     mov     t2, 0           pushing bb2 insn:       ext32   t2@1, t2 -- make the result a constant 0
       processing bb2 insn:     ext32   t2@1, t2           pushing bb2 insn:    ret     t2@1 -- make the result a constant 0
       processing bb2 insn:     ret     t2@1
       processing bb1
       processing bb2 insn:     ret     t2@1
       processing bb2 insn:     ext32   t2@1, t2 -- keep the result a constant 0
       processing bb2 insn:     call    proto0, printf, t1, i0_i -- keep all results varying
  CCP modification:
  changing insn         ext32   t2@1, t2    on insn     mov     t2@1, 0
    0 deleted CCP insns + 0 deleted branches
+++++++++++++MIR after CCP:
BB   0
  in edges:
  out edges:   1   2

BB   1
  in edges:   0   2
  out edges:

BB   2
  in edges:   0
  out edges:   1
  2     mov     i0_i, 10 # indexes: 2,_
  3     mov     t1, .lc1 # indexes: 3,_
  4     call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  5     mov     t2, 0 # indexes: _,_
  6     mov     t2@1, 0 # indexes: 6,_
  7     ret     t2@1 # indexes: 6

+++++++++++++Dead code elimination:
  Removing dead insn 5          mov     t2, 0
    1 removed SSA dead insns
+++++++++++++MIR after dead code elimination after CCP:
BB   0 (pressure: int=0, fp=0)
  in edges:
  out edges:   1   2

BB   1 (pressure: int=0, fp=0)
  in edges:   0   2
  out edges:

BB   2 (pressure: int=0, fp=0)
  in edges:   0
  out edges:   1
  2     mov     i0_i, 10 # indexes: 2,_
  3     mov     t1, .lc1 # indexes: 3,_
  4     call    proto0, printf, t1, i0_i # indexes: _,_,3,2
  6     mov     t2@1, 0 # indexes: 6,_
  7     ret     t2@1 # indexes: 6

+++++++++++++MIR after machinize:

  10    mov     U0_argv, hr6 # indexes: _,_
  9     mov     i0_argc, hr7 # indexes: _,_
  2     mov     i0_i, 10 # indexes: _,_
  3     mov     t1, .lc1 # indexes: _,_
  11    mov     t3, printf # indexes: _,_
  12    mov     hr7, t1 # indexes: _,_
  13    mov     hr6, i0_i # indexes: _,_
  14    mov     hr0, 0 # indexes: _,_
  4     call    proto0, t3, hr7, hr6 # indexes: _,_,_,_ # call used: hr0
  6     mov     t2@1, 0 # indexes: _,_
  15    mov     hr0, t2@1 # indexes: _,_
  7     ret     hr0 # indexes: _

  move with freq          1:    mov     hr0, t2@1
  move with freq          1:    mov     hr6, i0_i
  move with freq          1:    mov     hr7, t1
  move with freq          1:    mov     i0_argc, hr7
  move with freq          1:    mov     U0_argv, hr6
+++++++++++++MIR after building live_info:
Loop Tree
  Loop  3 (pressure: int=0, fp=0):
    BB0   (pressure: int=0, fp=0)
    BB1   (pressure: int=0, fp=0)
    BB2   (pressure: int=0, fp=0)
BB   0 (pressure: int=2, fp=0) (loop  3):
  in edges:
  out edges:   1   2
  live_in:   6   7
  live_out:   6   7

BB   1 (pressure: int=0, fp=0) (loop  3):
  in edges:   0   2
  out edges:

BB   2 (pressure: int=3, fp=0) (loop  3):
  in edges:   0
  out edges:   1
  live_in:   6   7
  live_gen:   6   7
  live_kill:   0   1   2   4   5   6   7   8   9  10  11  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34(i64:i0_argc)  35(i64:U0_argv)  37(i64:i0_i)  38(i64:t1)  41(i64:t2@1)  42(i64:t3)

  ------BB0 end: point=0
  ------BB1 end: point=1
  ------BB2 end: point=1
  p1            ret     hr0 # indexes: _ # dead: hr0
  p2            mov     hr0, t2@1 # indexes: _,_ # dead: t2@1
  p4            mov     t2@1, 0 # indexes: _,_
  p5            call    proto0, t3, hr7, hr6 # indexes: _,_,_,_ # dead: t3 hr7 hr6 # call used: hr0
  p6            mov     hr0, 0 # indexes: _,_
  p7            mov     hr6, i0_i # indexes: _,_ # dead: i0_i
  p9            mov     hr7, t1 # indexes: _,_ # dead: t1
  p11           mov     t3, printf # indexes: _,_
  p12           mov     t1, .lc1 # indexes: _,_
  p13           mov     i0_i, 10 # indexes: _,_
  p14           mov     i0_argc, hr7 # indexes: _,_ # dead: hr7
  p16           mov     U0_argv, hr6 # indexes: _,_ # dead: hr6
+++++++++++++Live ranges:
0: [5..6] [1..2]
1: [5..5]
2: [5..5]
4: [5..5]
5: [5..5]
6: [17..18] [5..7] [0..0]
7: [15..18] [5..9] [0..0]
8: [5..5]
9: [5..5]
10: [5..5]
11: [5..5]
16: [5..5]
17: [5..5]
18: [5..5]
19: [5..5]
20: [5..5]
21: [5..5]
22: [5..5]
23: [5..5]
24: [5..5]
25: [5..5]
26: [5..5]
27: [5..5]
28: [5..5]
29: [5..5]
30: [5..5]
31: [5..5]
32: [5..5]
33: [5..5]
34 (i64:i0_argc): [14..14]
35 (i64:U0_argv): [16..16]
37 (i64:i0_i): [8..13]
38 (i64:t1): [10..12]
41 (i64:t2@1): [3..4]
42 (i64:t3): [5..11]
Compressing live ranges: from 19 to 9 - 47%
Ranges after the compression:
+++++++++++++Live ranges:
0: [3..3] [1..1]
1: [3..3]
2: [3..3]
4: [3..3]
5: [3..3]
6: [8..8] [3..3] [0..0]
7: [7..8] [3..4] [0..0]
8: [3..3]
9: [3..3]
10: [3..3]
11: [3..3]
16: [3..3]
17: [3..3]
18: [3..3]
19: [3..3]
20: [3..3]
21: [3..3]
22: [3..3]
23: [3..3]
24: [3..3]
25: [3..3]
26: [3..3]
27: [3..3]
28: [3..3]
29: [3..3]
30: [3..3]
31: [3..3]
32: [3..3]
33: [3..3]
34 (i64:i0_argc): [6..6]
35 (i64:U0_argv): [7..7]
37 (i64:i0_i): [4..5]
38 (i64:t1): [5..5]
41 (i64:t2@1): [2..2]
42 (i64:t3): [3..5]
 Assigning to i0_i:var= 37, breg=  3 (freq 4  ), thread breg=  3 (freq 4  ) -- 0
 Assigning to t1:var= 38, breg=  4 (freq 4  ), thread breg=  4 (freq 4  ) -- 1
 Assigning to t2@1:var= 41, breg=  7 (freq 4  ), thread breg=  7 (freq 4  ) -- 0
 Assigning to t3:var= 42, breg=  8 (freq 4  ), thread breg=  8 (freq 4  ) -- 3
 Assigning to i0_argc:var= 34, breg=  0 (freq 2  ), thread breg=  0 (freq 2  ) -- 0
 Assigning to U0_argv:var= 35, breg=  1 (freq 2  ), thread breg=  1 (freq 2  ) -- 0
 Assigning to i0_c:var= 36, breg=  2 (freq 0  ), thread breg=  2 (freq 0  ) -- 0
 Assigning to t2:var= 39, breg=  5 (freq 0  ), thread breg=  5 (freq 0  ) -- 0
 Assigning to i0_argc@1:var= 40, breg=  6 (freq 0  ), thread breg=  6 (freq 0  ) -- 0
+++++++++++++Disposition after assignment:
  34=>0   35=>0   36=>0   37=>0   38=>1   39=>0   40=>0   41=>0 
  42=>3 
Deleting noop move      mov     hr0, hr0 which was      mov     hr0, t2@1
    1 deleted RA noop moves out of 5 non-conflicting moves (20.0%), out of 10 all moves (10.0%), out of 12 all insns (8.3%)
+++++++++++++MIR after rewrite:

        mov     hr0, hr6 # indexes: _,_ # dead: hr6
        mov     hr0, hr7 # indexes: _,_ # dead: hr7
        mov     hr0, 10 # indexes: _,_
        mov     hr1, .lc1 # indexes: _,_
        mov     hr3, printf # indexes: _,_
        mov     hr7, hr1 # indexes: _,_ # dead: t1
        mov     hr6, hr0 # indexes: _,_ # dead: i0_i
        mov     hr0, 0 # indexes: _,_
        call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: t3 hr7 hr6 # call used: hr0
        mov     hr0, 0 # indexes: _,_
        ret     hr0 # indexes: _ # dead: hr0

+++++++++++++MIR before combine:

        mov     hr0, hr6 # indexes: _,_ # dead: hr6
        mov     hr0, hr7 # indexes: _,_ # dead: hr7
        mov     hr0, 10 # indexes: _,_
        mov     hr1, .lc1 # indexes: _,_
        mov     hr3, printf # indexes: _,_
        mov     hr7, hr1 # indexes: _,_ # dead: hr1
        mov     hr6, hr0 # indexes: _,_ # dead: hr0
        mov     hr0, 0 # indexes: _,_
        call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: hr3 hr7 hr6 # call used: hr0
        mov     hr0, 0 # indexes: _,_
        ret     hr0 # indexes: _ # dead: hr0

Processing bb0
Processing bb1
Processing bb2
  Processing    mov     hr0, hr6 # indexes: _,_ # dead: hr6
  Processing    mov     hr0, hr7 # indexes: _,_ # dead: hr7
  Processing    mov     hr0, 10 # indexes: _,_
  Processing    mov     hr1, .lc1 # indexes: _,_
  Processing    mov     hr3, printf # indexes: _,_
  Processing    mov     hr7, hr1 # indexes: _,_ # dead: hr1
      deleting now dead insn    mov     hr1, .lc1 # indexes: _,_
      changing to       mov     hr7, .lc1 # indexes: _,_
  Processing    mov     hr6, hr0 # indexes: _,_ # dead: hr0
      deleting now dead insn    mov     hr0, 10 # indexes: _,_
      changing to       mov     hr6, 10 # indexes: _,_
  Processing    mov     hr0, 0 # indexes: _,_
  Processing    call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: hr3 hr7 hr6 # call used: hr0
  Processing    mov     hr0, 0 # indexes: _,_
  Processing    ret     hr0 # indexes: _ # dead: hr0
Processing bb2
  Processing    mov     hr0, hr6 # indexes: _,_ # dead: hr6
  Processing    mov     hr0, hr7 # indexes: _,_ # dead: hr7
  Processing    mov     hr3, printf # indexes: _,_
  Processing    mov     hr7, .lc1 # indexes: _,_
  Processing    mov     hr6, 10 # indexes: _,_
  Processing    mov     hr0, 0 # indexes: _,_
  Processing    call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: hr3 hr7 hr6 # call used: hr0
  Processing    mov     hr0, 0 # indexes: _,_
  Processing    ret     hr0 # indexes: _ # dead: hr0
    2 deleted combine insns out of 20 (10.0%)
+++++++++++++MIR after combine:

        mov     hr0, hr6 # indexes: _,_ # dead: hr6
        mov     hr0, hr7 # indexes: _,_ # dead: hr7
        mov     hr3, printf # indexes: _,_
        mov     hr7, .lc1 # indexes: _,_
        mov     hr6, 10 # indexes: _,_
        mov     hr0, 0 # indexes: _,_
        call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: hr3 hr7 hr6 # call used: hr0
        mov     hr0, 0 # indexes: _,_
        ret     hr0 # indexes: _ # dead: hr0

+++++++++++++Dead code elimination:
  Removing dead insn 9          mov     hr0, hr7
  Removing dead insn 10         mov     hr0, hr6
    2 removed dead insns
+++++++++++++MIR after dead code elimination after combine:
BB   0 (pressure: int=4, fp=0) (loop  3):
  in edges:
  out edges:   1   2
  live_in:   6   7
  live_out:   6   7

BB   1 (pressure: int=0, fp=0) (loop  3):
  in edges:   0   2
  out edges:

BB   2 (pressure: int=3, fp=0) (loop  3):
  in edges:   0
  out edges:   1
  live_in:   6   7
  live_gen:   6   7
  live_kill:   0   1   2   3   4   5   6   7   8   9  10  11  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33

        mov     hr3, printf # indexes: _,_
        mov     hr7, .lc1 # indexes: _,_
        mov     hr6, 10 # indexes: _,_
        mov     hr0, 0 # indexes: _,_
        call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: hr3 hr7 hr6 # call used: hr0
        mov     hr0, 0 # indexes: _,_
        ret     hr0 # indexes: _ # dead: hr0

+++++++++++++MIR after forming prolog/epilog:

        mov     i64:-8(hr4), hr5 # indexes: _,_
        add     hr5, hr4, -8 # indexes: _,_,_
        sub     hr4, hr4, 24 # indexes: _,_,_
        mov     i64:-16(hr5), hr3 # indexes: _,_
        mov     hr3, printf # indexes: _,_
        mov     hr7, .lc1 # indexes: _,_
        mov     hr6, 10 # indexes: _,_
        mov     hr0, 0 # indexes: _,_
        call    proto0, hr3, hr7, hr6 # indexes: _,_,_,_ # dead: hr3 hr7 hr6 # call used: hr0
        mov     hr0, 0 # indexes: _,_
        mov     hr3, i64:-16(hr5) # indexes: _,_
        add     hr4, hr5, 8 # indexes: _,_,_
        mov     hr5, i64:-8(hr4) # indexes: _,_
        ret     hr0 # indexes: _ # dead: hr0

gcc -c -o _mir_0_8690.c.o _mir_0_8690.c 2>&1 && objcopy --update-section .text=_mir_0_8690.bin _mir_0_8690.c.o && objdump --adjust-vma=0x7ffff7ff7030 -d _mir_0_8690.c.o; rm -f _mir_0_8690.c.o _mir_0_8690.c _mir_0_8690.bin

_mir_0_8690.c.o:     file format elf64-x86-64

Disassembly of section .text:

00007ffff7ff7030 <code>:
    7ffff7ff7030:       48 89 6c 24 f8          mov    %rbp,-0x8(%rsp)
    7ffff7ff7035:       48 8d 6c 24 f8          lea    -0x8(%rsp),%rbp
    7ffff7ff703a:       48 83 ec 18             sub    $0x18,%rsp
    7ffff7ff703e:       48 89 5d f0             mov    %rbx,-0x10(%rbp)
    7ffff7ff7042:       48 bb 40 3e 62 f7 ff    movabs $0x7ffff7623e40,%rbx
    7ffff7ff7049:       7f 00 00 
    7ffff7ff704c:       48 bf 10 2a 8e 55 55    movabs $0x5555558e2a10,%rdi
    7ffff7ff7053:       55 00 00 
    7ffff7ff7056:       48 c7 c6 0a 00 00 00    mov    $0xa,%rsi
    7ffff7ff705d:       33 c0                   xor    %eax,%eax
    7ffff7ff705f:       ff d3                   callq  *%rbx
    7ffff7ff7061:       33 c0                   xor    %eax,%eax
    7ffff7ff7063:       48 8b 5d f0             mov    -0x10(%rbp),%rbx
    7ffff7ff7067:       48 8d 65 08             lea    0x8(%rbp),%rsp
    7ffff7ff706b:       48 8b 6c 24 f8          mov    -0x8(%rsp),%rbp
    7ffff7ff7070:       c3                      retq   
        ...
code size = 80:
  Code generation for main: 14 MIR insns (addr=7ffff7ff7030, len=80) -- time 36.39 ms
M0:     module
proto0: proto   u64:U0_fmt, ...
        import  printf
main:   func    i32, i32:i0_argc, u64:U0_argv
        local   i64:i0_c, i64:i0_i, i64:t1, i64:t2
# 2 args, 4 locals
        ext32   i0_argc, i0_argc
        mov     i0_c, 0
        mov     i0_i, 10
        mov     t1, .lc1
        call    proto0, printf, t1, i0_i
        mov     t2, 0
        ext32   t2, t2
        ret     t2
        endfunc
        export  main
.lc1:   u8      105, 32, 61, 32, 37, 100, 10, 0 # "i = %d\n\000"
        endmodule
i = 10
vnmakarov commented 1 year ago

While testing c2m with the sieve.c example I noticed that the mir output is not optimized. Is it possible to add an option to output optimized mir ?

I guess output optimized MIR right after out of SSA pass could be a solution. At this stage, there are no machine regs yet (except ones generated from reg asm C extension). The only problem is that the code will be in simplified form and will contain a lot of simple insns (e.g. only indirect addresses, no memory operands for non-load/store insns etc). My experience the code will be harder to read and understand.

Another solution would be do some optimizations in c2mir. But probably it is not wise too as it results in optimization duplication. So I don't see a reasonable solution I'd like. Sorry.

vnmakarov commented 1 year ago

I did an experiment with this C file:

void printf (const char *fmt, ...);
int main(int argc, char *argv[]) {
  int a,b, c = 0, i = 10;
  printf("i = %d\n", i);
  return 0;
}

It shows that the optimizer didn't removed the variable assigned with a constant but not used c.

I've checked your code too. The generator does remove code.

+++++++++++++Dead code elimination: Removing dead insn 1 mov i0_c, 0 Removing dead insn 0 ext32 i0_argc@1, i0_argc 2 removed SSA dead insns

There is setting hr0 to 0 in the generated code before the call which can be confused with mov i0_c,0. It is a part of ABI for calling vararg functions saying how much xmm regs to put into vararg area.

mingodad commented 1 year ago

I was expecting to be able to output mir code just before Generate Machine Code in this diagram https://github.com/vnmakarov/mir/blob/master/mir-gen.svg , with that we could compare before and after opitmization.