Open ryansoq opened 8 years ago
Hi All, I have a question. How do I use enqueue_kernel ?
This is my environment : CLOC-0.8 /opt/amd/bin/clc2 -version LLVM version 3.2svn /opt/amd/bin/opt -version LLVM version 3.2svn HSA-Runtime-AMD-release-v1.0
void VectorAdd_child(__global int* a, __global int* b, __global int* res); __kernel void vectoradd(__global int* a, __global int* b, __global int* res) { int gid = get_global_id(0); //res[gid] = a[gid] + b[gid]; queue_t defQ = get_default_queue(); ndrange_t ndrange1 = ndrange_1D(256); void (^myblock)(void)=^{VectorAdd_child(a, b, res);}; int err_ret = enqueue_kernel(defQ, 1, ndrange1, myblock); } void VectorAdd_child(__global int* a, __global int* b, __global int* res) { int gid = get_global_id(0); res[gid] = a[gid] + b[gid]; }
// ref libclc code
typedef struct _HsaAqlDispatchPacket { uint mix; ushort workgroup_size[3]; ushort reserved2; uint grid_size[3]; uint private_segment_size_bytes; uint group_segment_size_bytes; ulong kernel_object_address; ulong kernel_arg_address; ulong reserved3; ulong completion_signal; } HsaAqlDispatchPacket; typedef struct _AmdAqlWrap { uint state; uint enqueue_flags; uint command_id; uint child_counter; ulong completion; ulong parent_wrap; ulong wait_list; uint wait_num; uint reserved[5]; HsaAqlDispatchPacket aql; } AmdAqlWrap; typedef struct _AmdVQueueHeader { uint aql_slot_num; uint event_slot_num; ulong event_slot_mask; ulong event_slots; ulong aql_slot_mask; uint command_counter; uint wait_size; uint arg_size; uint reserved0; ulong kernel_table; uint reserved[2]; } AmdVQueueHeader;
int main(int argc, char **argv) { ... hsa_status_t err; status_t build_err;
err = hsa_init(); check(Initializing the hsa runtime, err); /* * Iterate over the agents and pick the gpu agent using * the find_gpu callback. */ hsa_agent_t device = 0; err = hsa_iterate_agents(find_gpu, &device); check(Calling hsa_iterate_agents, err); err = (device == 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS; check(Checking if the GPU device is non-zero, err); /* * Query the name of the device. */ char name[64] = { 0 }; err = hsa_agent_get_info(device, HSA_AGENT_INFO_NAME, name); check(Querying the device name, err); printf("The device name is %s.\n", name); /* * Query the maximum size of the queue. */ uint32_t queue_size = 0; err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); check(Querying the device maximum queue size, err); printf("The maximum queue size is %u.\n", (unsigned int) queue_size); /* * Create a queue using the maximum size. */ hsa_queue_t* commandQueue; err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, &commandQueue); check(Creating the queue, err); /* * Load BRIG, encapsulated in an ELF container, into a BRIG module. */ hsa_ext_brig_module_t* brigModule; char file_name[128] = "shader.brig"; build_err = create_brig_module_from_brig_file(file_name, &brigModule); check_build(Creating the brig module from shader.brig, build_err); /* * Create hsa program. */ hsa_ext_program_handle_t hsaProgram; err = hsa_ext_program_create(&device, 1, HSA_EXT_BRIG_MACHINE_LARGE, HSA_EXT_BRIG_PROFILE_FULL, &hsaProgram); check(Creating the hsa program, err); /* * Add the BRIG module to hsa program. */ hsa_ext_brig_module_handle_t module; err = hsa_ext_add_module(hsaProgram, brigModule, &module); check(Adding the brig module to the program, err); /* * Construct finalization request list. */ hsa_ext_finalization_request_t finalization_request_list; finalization_request_list.module = module; finalization_request_list.program_call_convention = 0; char kernel_name[128] = "&__OpenCL_vectoradd_kernel"; //&__OpenCL_%s_kernel //char kernel_name[128] = "&__OpenCL___amd_blocks_func___vectoradd_block_invoke_kernel"; err = find_symbol_offset(brigModule, kernel_name, &finalization_request_list.symbol); check(Finding the symbol offset for the kernel, err); /* * Finalize the hsa program. */ err = hsa_ext_finalize_program(hsaProgram, device, 1, &finalization_request_list, NULL, NULL, 0, NULL, 0); check(Finalizing the program, err); /* * Destroy the brig module. The program was successfully created the kernel * symbol was found and the program was finalized, so it is no longer needed. */ destroy_brig_module(brigModule); /* * Get the hsa code descriptor address. */ hsa_ext_code_descriptor_t *hsaCodeDescriptor; err = hsa_ext_query_kernel_descriptor_address(hsaProgram, module, finalization_request_list.symbol, &hsaCodeDescriptor); check(Querying the kernel descriptor address, err); /* * Create a signal to wait for the dispatch to finish. */ hsa_signal_t signal; err=hsa_signal_create(1, 0, NULL, &signal); check(Creating a HSA signal, err); /* * Initialize the dispatch packet. */ hsa_dispatch_packet_t aql; memset(&aql, 0, sizeof(aql)); /* * Allocate and initialize the kernel arguments. */ const int DATA_SIZE = 128; int *a=(int*)malloc(sizeof(int)*DATA_SIZE); int *b=(int*)malloc(sizeof(int)*DATA_SIZE); int *res=(int*)malloc(sizeof(int)*DATA_SIZE); memset(a, 0, DATA_SIZE*sizeof(int)); //err=hsa_memory_register(a, DATA_SIZE*4); //check(Registering argument memory for input parameter, err); memset(b, 0, DATA_SIZE*sizeof(int)); //err=hsa_memory_register(b, DATA_SIZE*4); //check(Registering argument memory for input parameter, err); memset(res, 0, DATA_SIZE*sizeof(int)); //err=hsa_memory_register(res, DATA_SIZE*4); //check(Registering argument memory for input parameter, err); srand (time(NULL)); for(int i = 0; i < DATA_SIZE; i++) { a[i] = rand() % 10; b[i] = rand() % 10; } /* * Setup the dispatch information. */ aql.completion_signal=signal; aql.dimensions=(uint16_t)1; aql.workgroup_size_x=(uint16_t)32; aql.workgroup_size_y=(uint16_t)1; aql.workgroup_size_z=(uint16_t)1; aql.grid_size_x=DATA_SIZE; aql.grid_size_y=1; aql.grid_size_z=1; aql.header.type=HSA_PACKET_TYPE_DISPATCH; aql.header.acquire_fence_scope=2; aql.header.release_fence_scope=2; aql.header.barrier=1; aql.group_segment_size=0; aql.private_segment_size=0;
...
size_t kernel_arg_buffer_size = hsaCodeDescriptor->kernarg_segment_byte_size; void* args __attribute__ ((aligned(HSA_ARGUMENT_ALIGN_BYTES))) = malloc(hsaCodeDescriptor->kernarg_segment_byte_size) ; memset(args, 0, kernel_arg_buffer_size); void* args_pointer = args; args_pointer += sizeof(void*) * (/*kernel->hsaArgCount*/9 - /*kernel->num_args*/3 - /* enque arg*/3); uint64_t *pb=(uint64_t *)malloc(sizeof(uint64_t)*128); AmdVQueueHeader *vq =(AmdVQueueHeader *) malloc(sizeof(AmdVQueueHeader)); AmdAqlWrap *aw = (AmdAqlWrap *)malloc(sizeof(AmdAqlWrap)); memset(&pb, 0, sizeof(uint64_t)*128); memset(&vq, 0, sizeof(AmdVQueueHeader)); memset(&aw, 0, sizeof(AmdAqlWrap)); void* q1 = &pb; void* q2 = &vq; void* q3 = &aw; //set argument //map __vqueue_pointer, __aqlwrap_pointer memcpy(args_pointer, &q1, sizeof(void*)); args_pointer += sizeof(void*); memcpy(args_pointer, &q2, sizeof(void*)); args_pointer += sizeof(void*); memcpy(args_pointer, &q3, sizeof(void*)); args_pointer += sizeof(void*); void* p1 = &vq; memcpy(args_pointer, &p1, sizeof(void*)); args_pointer += sizeof(void*); void* p2 = &aw; memcpy(args_pointer, &p2, sizeof(void*)); args_pointer += sizeof(void*); void* p3 = res; memcpy(args_pointer, &p3, sizeof(void*)); args_pointer += sizeof(void*);
printf("aw->aql.mix : %d \n",(aw->aql).mix); printf("aw->aql.ws 0 : %hd \n",(aw->aql).workgroup_size[0]); printf("aw->aql.ws 1 : %hd \n",(aw->aql).workgroup_size[1]); printf("aw->aql.ws 2 : %hd \n",(aw->aql).workgroup_size[2]); printf("aw->aql.gs 0 : %d \n",(aw->aql).grid_size[0]); printf("aw->aql.gs 1 : %d \n",(aw->aql).grid_size[1]); printf("aw->aql.gs 2 : %d \n",(aw->aql).grid_size[2]);
// I get error data
version 0:20140528:$full:$large; extension "amd:gcn"; extension "IMAGE";
decl prog function &abort()(); ...
prog kernel &__OpenCL_vectoradd_kernel( kernarg_u64 %__global_offset_0, kernarg_u64 %__global_offset_1, kernarg_u64 %__global_offset_2, kernarg_u64 %__printf_buffer, kernarg_u64 %__vqueue_pointer, kernarg_u64 %__aqlwrap_pointer, kernarg_u64 %a, kernarg_u64 %b, kernarg_u64 %res) {
... Thanks
Hi All, I have a question. How do I use enqueue_kernel ?
This is my environment : CLOC-0.8 /opt/amd/bin/clc2 -version LLVM version 3.2svn /opt/amd/bin/opt -version LLVM version 3.2svn HSA-Runtime-AMD-release-v1.0
.cl code
host code
// ref libclc code
int main(int argc, char **argv) { ... hsa_status_t err; status_t build_err;
...
...
// I get error data
.hsail
version 0:20140528:$full:$large; extension "amd:gcn"; extension "IMAGE";
decl prog function &abort()(); ...
... Thanks