carlren / gSLICr

gSLICr: Real-time super-pixel segmentation
Other
336 stars 126 forks source link

when local_id<32 , the code is different from local_id < 64; #24

Open IaGumiLukaRin opened 5 years ago

IaGumiLukaRin commented 5 years ago

Each SM splits its own blocks into Warps (currently with a maximum size of 32 threads).

All the threads in a warp executes concurrently on the resources of the SM ?

__syncthreads();
    if (should_add)
    {
        if (local_id < 128)
        {
            color_shared[local_id] += color_shared[local_id + 128];
            xy_shared[local_id] += xy_shared[local_id + 128];
            count_shared[local_id] += count_shared[local_id + 128];
        }
        __syncthreads();

        if (local_id < 64)
        {
            color_shared[local_id] += color_shared[local_id + 64];
            xy_shared[local_id] += xy_shared[local_id + 64];
            count_shared[local_id] += count_shared[local_id + 64];
        }
        __syncthreads();

        if (local_id < 32)
        {
            color_shared[local_id] += color_shared[local_id + 32];
            color_shared[local_id] += color_shared[local_id + 16];
            color_shared[local_id] += color_shared[local_id + 8];
            color_shared[local_id] += color_shared[local_id + 4];
            color_shared[local_id] += color_shared[local_id + 2];
            color_shared[local_id] += color_shared[local_id + 1];

            xy_shared[local_id] += xy_shared[local_id + 32];
            xy_shared[local_id] += xy_shared[local_id + 16];
            xy_shared[local_id] += xy_shared[local_id + 8];
            xy_shared[local_id] += xy_shared[local_id + 4];
            xy_shared[local_id] += xy_shared[local_id + 2];
            xy_shared[local_id] += xy_shared[local_id + 1];

            count_shared[local_id] += count_shared[local_id + 32];
            count_shared[local_id] += count_shared[local_id + 16];
            count_shared[local_id] += count_shared[local_id + 8];
            count_shared[local_id] += count_shared[local_id + 4];
            count_shared[local_id] += count_shared[local_id + 2];
            count_shared[local_id] += count_shared[local_id + 1];
        }
    }
__syncthreads();
__syncthreads();
        if (local_id < 32)
        {
            color_shared[local_id] += color_shared[local_id + 32];
            xy_shared[local_id] += xy_shared[local_id + 32];
            count_shared[local_id] += count_shared[local_id + 32];
        }
__syncthreads();
if (local_id < 16)
        {
            color_shared[local_id] += color_shared[local_id + 16];
            xy_shared[local_id] += xy_shared[local_id + 16];
            count_shared[local_id] += count_shared[local_id + 16];
        }
__syncthreads();