I am facing the following error : error: OpenMP constructs other than '#pragma omp ordered simd' may not be nested inside 'simd' region due to #pragma omp atomic update inside simd loop.
Please see the MFE
from devito import Function,Operator,SpaceDimension
import devito as dv
import numpy as np
from devito import configuration
configuration['language'] = 'openmp'
configuration['log-level'] = 'DEBUG'
# convolve input_f with kernel
data = np.random.randn(50,50) ; k = np.random.randn(5,5)
input_dimensions = [SpaceDimension("inp_I_"+str(x)) for x in range(0,2)]
kernel_dimensions = [SpaceDimension("inp_K_"+str(x)) for x in range(0,2)]
result_dimensions = [SpaceDimension("inp_R_"+str(x)) for x in range(0,2)]
input_func = Function(name=("Input_F"), shape=data.shape, dimensions=input_dimensions)
kernel_func = Function(name=("Kernel_F"), shape=k.shape, dimensions=kernel_dimensions)
res_func = Function(name=("Result_F"), shape=(46,46),dimensions=result_dimensions)
input_func.data[:] = data; kernel_func.data[:]=k
rhs = kernel_func[kernel_dimensions] * input_func[result_dimensions[0] + kernel_dimensions[0],result_dimensions[1] + kernel_dimensions[1]]
eqs = [dv.Inc(res_func, rhs)]
op = Operator(eqs)
op.apply()
print(res_func.data)
I am using a linux system with a GCC compiler.
The C Code that is generated by Devito
#define _POSIX_C_SOURCE 200809L
#define START_TIMER(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP_TIMER(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;
#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "xmmintrin.h"
#include "pmmintrin.h"
#include "omp.h"
struct dataobj
{
void *restrict data;
unsigned long * size;
unsigned long * npsize;
unsigned long * dsize;
int * hsize;
int * hofs;
int * oofs;
void * dmap;
} ;
struct profiler
{
double section0;
} ;
int Kernel(struct dataobj *restrict Input_F_vec, struct dataobj *restrict Kernel_F_vec, struct dataobj *restrict Result_F_vec, const int inp_K_0_M, const int inp_K_0_m, const int inp_K_1_M, const int inp_K_1_m, const int inp_R_0_M, const int inp_R_0_m, const int inp_R_1_M, const int inp_R_1_m, const int nthreads_nonaffine, struct profiler * timers)
{
float (*restrict Input_F)[Input_F_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[Input_F_vec->size[1]]) Input_F_vec->data;
float (*restrict Kernel_F)[Kernel_F_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[Kernel_F_vec->size[1]]) Kernel_F_vec->data;
float (*restrict Result_F)[Result_F_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[Result_F_vec->size[1]]) Result_F_vec->data;
/* Flush denormal numbers to zero in hardware */
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
/* Begin section0 */
START_TIMER(section0)
for (int inp_K_0 = inp_K_0_m; inp_K_0 <= inp_K_0_M; inp_K_0 += 1)
{
#pragma omp parallel num_threads(nthreads_nonaffine)
{
int chunk_size = (int)(fmax(1, (1.0F/3.0F)*(inp_K_1_M - inp_K_1_m + 1)*(inp_R_0_M - inp_R_0_m + 1)/nthreads_nonaffine));
#pragma omp for collapse(2) schedule(dynamic,chunk_size)
for (int inp_R_0 = inp_R_0_m; inp_R_0 <= inp_R_0_M; inp_R_0 += 1)
{
for (int inp_K_1 = inp_K_1_m; inp_K_1 <= inp_K_1_M; inp_K_1 += 1)
{
#pragma omp simd aligned(Input_F,Kernel_F,Result_F:64)
for (int inp_R_1 = inp_R_1_m; inp_R_1 <= inp_R_1_M; inp_R_1 += 1)
{
float r0 = Input_F[inp_K_0 + inp_R_0 + 1][inp_K_1 + inp_R_1 + 1]*Kernel_F[inp_K_0 + 1][inp_K_1 + 1];
#pragma omp atomic update
Result_F[inp_R_0 + 1][inp_R_1 + 1] += r0;
}
}
}
}
}
STOP_TIMER(section0,timers)
/* End section0 */
return 0;
}
I am facing the following error :
error: OpenMP constructs other than '#pragma omp ordered simd' may not be nested inside 'simd' region
due to #pragma omp atomic update inside simd loop.Please see the MFE
I am using a linux system with a GCC compiler.
The C Code that is generated by Devito