Closed Huyuwei closed 5 years ago
A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A")
B = hcl.placeholder((64, 32, 3, 3), dtype=hcl.UInt(1), name="B")
rc = hcl.reduce_axis(0, 32)
ry = hcl.reduce_axis(0, 3)
rx = hcl.reduce_axis(0, 3)
C = hcl.compute((1, 64, 12, 12),
lambda nn, ff, yy, xx: hcl.sum(
A[nn, rc, yy + ry, xx + rx] * B[ff, rc, ry, rx], axis=[rc, ry, rx]),
dtype=hcl.UInt(8), name="C")
s = hcl.create_schedule([A, B, C])
s[C].reorder(C.axis[1], C.axis[0])
s[C].split(C.axis[1], factor=5)
s[C].pipeline(C.axis[2], initiation_interval=2)
code = hcl.build(s, target='vhls')
#include <ap_int.h>
#include <ap_fixed.h>
#include <math.h>
void default_function(ap_uint<1>* A, ap_uint<1>* B, unsigned char* C) {
for (int ff_outer = 0; ff_outer < 13; ++ff_outer) {
for (int ff_inner = 0; ff_inner < 5; ++ff_inner) {
if ((ff_outer * 5) < (64 - ff_inner)) {
for (int yy = 0; yy < 12; ++yy) {
#pragma HLS pipeline II=2
for (int xx = 0; xx < 12; ++xx) {
int reducer0;
reducer0 = 0;
for (int ra = 0; ra < 32; ++ra) {
for (int ra1 = 0; ra1 < 3; ++ra1) {
for (int ra2 = 0; ra2 < 3; ++ra2) {
reducer0 = ((int)(((ap_int<34>)(((ap_uint<2>)A[((((((ap_int<34>)xx) + ((ap_int<34>)ra2)) + ((((ap_int<34>)yy) + ((ap_int<34>)ra1)) * (ap_int<34>)14)) + ((ap_int<34>)(ra * 196))) + ((ap_int<34>)(0 * 6272)))]) * ((ap_uint<2>)B[(((ra2 + (ra1 * 3)) + (ra * 9)) + (((ff_outer * 5) + ff_inner) * 288))]))) + ((ap_int<34>)reducer0)));
}
}
}
C[(((xx + (yy * 12)) + (((ff_outer * 5) + ff_inner) * 144)) + (0 * 9216))] = ((unsigned char)reducer0);
}
}
}
}
}
}
This PR implements a code generator for Vivado HLS, which is based on the MerlinC code generator.