Closed kaushikcfd closed 1 year ago
Consider the kernel:
import loopy as lp knl = lp.make_kernel( "{[r0, r1]: 0<=r0,r1<10}", """ tmp0 = sum(r0, r0**2) ... gbarrier tmp1 = sum(r1, r1**3) ... gbarrier out = tmp0 + tmp1 """, [lp.TemporaryVariable("tmp0", shape=lp.auto, address_space=lp.AddressSpace.GLOBAL, base_storage="base1"), lp.TemporaryVariable("tmp1", shape=lp.auto, address_space=lp.AddressSpace.GLOBAL, base_storage="base2"), ...], seq_dependencies=True ) print(lp.generate_code_v2(knl).device_code())
and this generates:
/// .. <Some code above> __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global int *__restrict__ base1) { int acc_r0; __global int *const __restrict__ tmp0 = (__global int *const __restrict__ ) (base1 + 0); __global int *const __restrict__ tmp1 = (__global int *const __restrict__ ) (base2 + 0); acc_r0 = 0; for (int r0 = 0; r0 <= 9; ++r0) acc_r0 = acc_r0 + r0 * r0; tmp0[0] = acc_r0; } __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel_0(__global int *__restrict__ base2) { int acc_r1; __global int *const __restrict__ tmp0 = (__global int *const __restrict__ ) (base1 + 0); __global int *const __restrict__ tmp1 = (__global int *const __restrict__ ) (base2 + 0); acc_r1 = 0; for (int r1 = 0; r1 <= 9; ++r1) acc_r1 = acc_r1 + loopy_pow_int32_int32(r1, 3); tmp1[0] = acc_r1; } /// .. <Some more code below>
Clearly loopy_kernel_0 is illegal as base1 is not defined.
loopy_kernel_0
base1
Consider the kernel:
and this generates:
Clearly
loopy_kernel_0
is illegal asbase1
is not defined.