inducer / pyopencl

OpenCL integration for Python, plus shiny features
http://mathema.tician.de/software/pyopencl
Other
1.06k stars 241 forks source link

Why array element not equal assignable value? #512

Closed MikkoArtik closed 3 years ago

MikkoArtik commented 3 years ago

Hi! I can't undestand where I make mistake. Target - fill array with unique work id.

I found magic number - q = 16777216 = 1024 1024 16. If array length less than q, all good: check_array function (see below) returns True with params:

arr_size = 1024 * 1024 * 16
grid_size = (1024, 1024, 16)
block_size = (32, 8, 4)

check_array(...) -> True

BUT...

arr_size = 1024 * 1024 * 20
grid_size = (1024, 1024, 20)
block_size = (32, 8, 4)

check_array(...) -> False

I made slice arr[16777216:1677726]

In theory result must be arr[16777216:1677726] = [16777216, 16777217, 16777218, 16777219, 16777220, 16777221, 16777222, 16777223, 16777224, 16777225]

But in fact arr[16777216:1677726] = [16777216, **16777216**, 16777218, **16777220**, 16777220, **16777220**, 16777222, **16777224**, 16777224, **16777224**]

please, help me.

GPU-card is Nvidia GeForce GTX 1050 Ti

Open CL core:

int get_general_block_id(int3 block_ids)
{
    int3 grid_size = {get_num_groups(0), get_num_groups(1), get_num_groups(2)};
    return block_ids.s0 + block_ids.s1 * grid_size.s0 + block_ids.s2 * grid_size.s0 * grid_size.s1;
}

int get_local_thread_id(int3 thread_ids)
{
    int3 block_size = {get_local_size(0), get_local_size(1), get_local_size(2)};
    return thread_ids.s0 + thread_ids.s1 * block_size.s0 + thread_ids.s2 * block_size.s0 * block_size.s1;
}

int get_global_thread_id(int gen_block_id, int gen_thread_id)
{
    int3 block_size = {get_local_size(0), get_local_size(1), get_local_size(2)};
    return gen_block_id * block_size.s0 * block_size.s1 * block_size.s2 + gen_thread_id; 
}

kernel void index_function(global float *array, int length)
{
    int3 b_ids = {get_group_id(0), get_group_id(1), get_group_id(2)};
    int gb_id = get_general_block_id(b_ids);

    int3 t_ids = {get_local_id(0), get_local_id(1), get_local_id(2)};
    int lt_id = get_local_thread_id(t_ids);

    int global_tid = get_global_thread_id(gb_id, lt_id);

    if (global_tid >= length)
    {
        return;
    }
    array[global_tid] = global_tid;

    if ((global_tid >= 16777210) && (global_tid <= 16777245))
    {
        printf("block_id=%i thread_id=%i gid=%i val=%f\n", gb_id, lt_id, global_tid, array[global_tid]);
    }
}

Python code:

import numpy as np
import pyopencl as cl

def load_core(filename='core.c') -> str:
    core_text = ''
    with open(filename, 'r') as f:
        for line in f:
            core_text += line
    return core_text

def create_context_and_queue():
    # Run only one device!
    platforms = cl.get_platforms()
    gpu_dev = []
    for platform_item in platforms:
        devs = platform_item.get_devices(device_type=cl.device_type.GPU)
        gpu_dev += devs

    context = cl.Context(devices=[gpu_dev[0]])
    queue = cl.CommandQueue(context)
    return context, queue

def compile_function(context, filename='core.c'):
    core = load_core(filename)
    module = cl.Program(context, core).build()
    return module.index_function

def check_array(arr: np.ndarray):
    for i, item in enumerate(arr):
        delta = abs(i - item)
        if delta > 1e-5:
            return False
    else:
        return True

arr_size = 1000 * 1000 * 1000
grid_size = (1000, 1000, 60)
block_size = (10, 10, 10)
if __name__ == '__main__':
    context, queue = create_context_and_queue()
    cl_function = compile_function(context)

    arr = np.zeros(shape=arr_size, dtype=np.float32)
    arr.fill(-1)

    mf = cl.mem_flags
    a_g = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=arr)

    cl_function(queue, grid_size, block_size, a_g, np.uint32(arr_size))

    result_arr = cl.Buffer(context, mf.WRITE_ONLY, arr.nbytes)
    cl.enqueue_copy(queue, arr, a_g)

    print(check_array(arr))

CommandLine Out:

block_id=16384 thread_id=22 gid=16777238 val=16777238.000000
block_id=16384 thread_id=23 gid=16777239 val=16777240.000000
block_id=16384 thread_id=24 gid=16777240 val=16777240.000000
block_id=16384 thread_id=25 gid=16777241 val=16777240.000000
block_id=16384 thread_id=26 gid=16777242 val=16777242.000000
block_id=16384 thread_id=27 gid=16777243 val=16777244.000000
block_id=16384 thread_id=28 gid=16777244 val=16777244.000000
block_id=16384 thread_id=29 gid=16777245 val=16777244.000000

Why array element not equal assignable value?

MikkoArtik commented 3 years ago

Update I changed type of array to int and fix it