As the index i-1 or i+1 will cause out-of-bound memory accesses in the following kernel, the kernel may be modified to remove them.
static __global__ void gpuInvMap(
const int* mapspkey,
const int* mapspval,
int* mappsfirst,
int* mapssnext)
{
const int i = blockIdx.x * CHUNK_SIZE + threadIdx.x;
if (i >= nums) return;
int p = mapspkey[i];
int pp = mapspkey[i+1];
int pm = mapspkey[i-1];
int s = mapspval[i];
int sp = mapspval[i+1];
if (i == 0 || p != pm) mappsfirst[p] = s;
if (i+1 == nums || p != pp)
mapssnext[s] = -1;
else
mapssnext[s] = sp;
}
int pp;
int pm;
int sp;
if (i > 0) pm = mapspkey[i-1];
if (i < nums - 1) {
pp = mapspkey[i+1];
sp = mapspval[i+1];
}
As the index i-1 or i+1 will cause out-of-bound memory accesses in the following kernel, the kernel may be modified to remove them.