Open chongxi opened 5 years ago
The algorithmic structure of this kind of module are simple, assign bits for an arbitrary length struct:
#define CH 160 // change to 8 for C simulation
#define T 2
typedef ap_fixed<32,19> mua_type;
typedef ap_uint<12> ch_type; // ch
typedef ap_uint<32> ch_hash_type; // (ch_nn3, ch_nn2, ch_nn1, ch_nn0)
typedef ap_uint<32> time_type;
// 160 bits are consists of time, ch_ref, ch, mua, thr etc...
struct mua_struct{
ap_uint<160> data;
};
// 160 bits are consists of time, ch_ref, ch, mua, thr etc...
struct muar_struct{
ap_uint<160> data;
};
Then read input, store + process, output results:
void ref_sub(hls::stream<mua_struct> &mua_stream,
hls::stream<muar_struct> &muar_stream)
{
static mua_type buf_2d[T][CH];
// ------------------------------------------------
// input: mua_stream (t, ch, ch_hash, mua_data)
// ------------------------------------------------
mua_struct mua;
time_type t, t_out;
ch_type ch, ch_ref;
ch_hash_type ch_hash;
mua_type val, _val, thr;
mua = mua_stream.read();
t.range(31,0) = mua.data.range(159,128);
ch_ref.range(11,0) = mua.data.range(119,108);
ch.range(11,0) = mua.data.range(107,96);
ch_hash.range(31,0) = mua.data.range(95, 64);
thr.range(31,0) = mua.data.range(63, 32);
val.range(31,0) = mua.data.range(31, 0);
// ------------------------------------------------
// Memory + Processing
// The clever trick is when you write `val`, `_val` are guaranteed to exist
// When current bank is storing, the next bank is processing (in this case, just subtraction)
// ------------------------------------------------
bool j,l;
j = t.range(0,0);
l = !j;
// write current time point
buf_2d[j][ch] = val;
// read previous time point and do ref substraction
if(ch_ref<CH)
_val = buf_2d[l][ch] - buf_2d[l][ch_ref];
else
_val = buf_2d[l][ch];
t_out = t - 1;
// ------------------------------------------------
// output: muar_stream (_val, thr, ch_hash, ch, ch_ref, t-1)
// ------------------------------------------------
output:
{
if(t>0)
{
muar_struct muar;
muar.data.range(31, 0) = _val.range(31,0);
muar.data.range(63, 32) = thr.range(31,0);
muar.data.range(95, 64) = ch_hash.range(31,0);
muar.data.range(107,96) = ch.range(11,0);
muar.data.range(119,108) = ch_ref.range(11,0);
muar.data.range(159,128) = t_out.range(31,0);
muar_stream.write(muar);
}
}
}
From the hardware
view, both mua_stream
and muar_stream
are AXI-Stream
.
However, AXI-Stream
input can only cache one input before it generates results, so more than two inputs come before it can generate output will cause data loss. See how this issue (https://github.com/chongxi/xike_hls_module/issues/3) is solved.
This is one of the simplest forms of data flow hacking: among several synchronous data stream, only one stream needs to go to the memory and being processed while the processed stream needs to be synchronized at the output port.