cornell-zhang / heterocl

HeteroCL: A Multi-Paradigm Programming Infrastructure for Software-Defined Heterogeneous Computing
https://cornell-zhang.github.io/heterocl/
Apache License 2.0
322 stars 92 forks source link

[Backend] Code generator for Vivado HLS #66

Closed Huyuwei closed 5 years ago

Huyuwei commented 5 years ago

This PR implements a code generator for Vivado HLS, which is based on the MerlinC code generator.

Huyuwei commented 5 years ago

Binary Convolution Example

HeteroCL code

A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A")
B = hcl.placeholder((64, 32, 3, 3), dtype=hcl.UInt(1), name="B")
rc = hcl.reduce_axis(0, 32)
ry = hcl.reduce_axis(0, 3)
rx = hcl.reduce_axis(0, 3)
C = hcl.compute((1, 64, 12, 12),
    lambda nn, ff, yy, xx: hcl.sum(
        A[nn, rc, yy + ry, xx + rx] * B[ff, rc, ry, rx], axis=[rc, ry, rx]),
    dtype=hcl.UInt(8), name="C")
s = hcl.create_schedule([A, B, C])
s[C].reorder(C.axis[1], C.axis[0])
s[C].split(C.axis[1], factor=5)
s[C].pipeline(C.axis[2], initiation_interval=2)
code = hcl.build(s, target='vhls')

Generated Vivado HLS code

#include <ap_int.h>
#include <ap_fixed.h>
#include <math.h>
void default_function(ap_uint<1>* A, ap_uint<1>* B, unsigned char* C) {
  for (int ff_outer = 0; ff_outer < 13; ++ff_outer) {
    for (int ff_inner = 0; ff_inner < 5; ++ff_inner) {
      if ((ff_outer * 5) < (64 - ff_inner)) {
        for (int yy = 0; yy < 12; ++yy) {
#pragma HLS pipeline II=2
          for (int xx = 0; xx < 12; ++xx) {
            int reducer0;
            reducer0 = 0;
            for (int ra = 0; ra < 32; ++ra) {
              for (int ra1 = 0; ra1 < 3; ++ra1) {
                for (int ra2 = 0; ra2 < 3; ++ra2) {
                  reducer0 = ((int)(((ap_int<34>)(((ap_uint<2>)A[((((((ap_int<34>)xx) + ((ap_int<34>)ra2)) + ((((ap_int<34>)yy) + ((ap_int<34>)ra1)) * (ap_int<34>)14)) + ((ap_int<34>)(ra * 196))) + ((ap_int<34>)(0 * 6272)))]) * ((ap_uint<2>)B[(((ra2 + (ra1 * 3)) + (ra * 9)) + (((ff_outer * 5) + ff_inner) * 288))]))) + ((ap_int<34>)reducer0)));
                }
              }
            }
            C[(((xx + (yy * 12)) + (((ff_outer * 5) + ff_inner) * 144)) + (0 * 9216))] = ((unsigned char)reducer0);
          }
        }
      }
    }
  }
}