Closed xysmlx closed 2 years ago
🐛 Bug
-fdefault_device=GENERIC_CPU -fantares_mode=true can not generate model code with Antares kernels. The reason is that AntaresCpuKernelEmitter::get_or_emit_source() return nullptr due to code format mismatch.
// extract kernel code const char* s_func_pattern = "// [thread_compute]\n"; const char* e_func_pattern = "\n}\n"; const char* s_rank_pattern = "__rank__ = "; const char* e_rank_pattern = "\n"; std::string::size_type s_func_pos = antares_code.find(s_func_pattern); std::string::size_type e_func_pos = antares_code.rfind(e_func_pattern); if (s_func_pos != std::string::npos || e_func_pos != std::string::npos) return nullptr;
extern "C" void template_op_kernel0(const int __rank__, void** __args) { auto * input0 = (float* __restrict)__args[0]; auto * input1 = (float* __restrict)__args[1]; auto * output0 = (float*)__args[2]; using namespace std; // [thread_extent] __rank__ = 8 for (int N0_outer_inner = 0; N0_outer_inner < 8; ++N0_outer_inner) { for (int N1_outer_inner = 0; N1_outer_inner < 32; ++N1_outer_inner) { for (int N2_outer_inner = 0; N2_outer_inner < 2; ++N2_outer_inner) { for (int N0_inner = 0; N0_inner < 2; ++N0_inner) { for (int N1_inner = 0; N1_inner < 4; ++N1_inner) { for (int N2_inner = 0; N2_inner < 7; ++N2_inner) { for (int N3_inner = 0; N3_inner < 7; ++N3_inner) { output0[((((((((((((((int)__rank__) >> 2) * 802816) + (N0_outer_inner * 100352)) + (N0_inner * 50176)) + (((((int)__rank__) & 3) >> 1) * 25088)) + (N1_outer_inner * 784)) + (N1_inner * 196)) + (N2_outer_inner * 98)) + (N2_inner * 14)) + ((((int)__rank__) & 1) * 7)) + N3_inner))] = (input0[((((((((((((((int)__rank__) >> 2) * 802816) + (N0_outer_inner * 100352)) + (N0_inner * 50176)) + (((((int)__rank__) & 3) >> 1) * 25088)) + (N1_outer_inner * 784)) + (N1_inner * 196)) + (N2_outer_inner * 98)) + (N2_inner * 14)) + ((((int)__rank__) & 1) * 7)) + N3_inner))] + input1[((((((((((((((int)__rank__) >> 2) * 802816) + (N0_outer_inner * 100352)) + (N0_inner * 50176)) + (((((int)__rank__) & 3) >> 1) * 25088)) + (N1_outer_inner * 784)) + (N1_inner * 196)) + (N2_outer_inner * 98)) + (N2_inner * 14)) + ((((int)__rank__) & 1) * 7)) + N3_inner))]); } } } } } } } }
🐛 Bug
-fdefault_device=GENERIC_CPU -fantares_mode=true can not generate model code with Antares kernels. The reason is that AntaresCpuKernelEmitter::get_or_emit_source() return nullptr due to code format mismatch.