Open ysh329 opened 4 years ago
#pragma OPENCL EXTENSION cl_khr_fp16 : enable __kernel void elementwise_mul(__global image2d_t input, __global image2d_t bias, __write_only image2d_t outputImage) { int x = get_global_id(0); int y = get_global_id(1); const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; int2 coords; coords.x = x; coords.y = y; half4 in = read_imageh(input, sampler, coords); half4 biase = read_imageh(bias, sampler, coords); half4 output = in * biase; write_imageh(outputImage, coords, output); } __kernel void channel_mul(__global image2d_t input, __global image2d_t bias, __write_only image2d_t outputImage, int w) { int x = get_global_id(0); int y = get_global_id(1); const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; int2 coords; coords.x = x; coords.y = y; int2 coords_bias; coords_bias.x = x / w; coords_bias.y = 0; half4 in = read_imageh(input, sampler, coords); half4 biase = read_imageh(bias, sampler, coords_bias); half4 output = in * biase; write_imageh(outputImage, coords, output); } // etc : 1 1 1 72 // run time Y [value,0,0,0] * 72 __kernel void channel_mul_d2(__global image2d_t input, __global image2d_t bias, __write_only image2d_t outputImage, int w) { int x = get_global_id(0); int y = get_global_id(1); const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; int2 coords; coords.x = x; coords.y = y; int2 coords_bias0; int2 coords_bias1; int2 coords_bias2; int2 coords_bias3; coords_bias0.x = x / w * 4; coords_bias0.y = 0; coords_bias1.x = x / w * 4 + 1; coords_bias1.y = 0; coords_bias2.x = x / w * 4 + 2; coords_bias2.y = 0; coords_bias3.x = x / w * 4 + 3; coords_bias3.y = 0; half4 biase0 = read_imageh(bias, sampler, coords_bias0); half4 biase1 = read_imageh(bias, sampler, coords_bias1); half4 biase2 = read_imageh(bias, sampler, coords_bias2); half4 biase3 = read_imageh(bias, sampler, coords_bias3); half4 biase = {biase0.x, biase1.x, biase2.x, biase3.x}; half4 in = read_imageh(input, sampler, coords); half4 output = mad(in, biase, 0); write_imageh(outputImage, coords, output); } __kernel void channel_mul_d4(__global image2d_t input, __global image2d_t bias, __write_only image2d_t outputImage, int w) { int x = get_global_id(0); int y = get_global_id(1); const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; int2 coords; coords.x = x; coords.y = y; int2 coords_bias; coords_bias.x = x / w; coords_bias.y = 0; half4 in = read_imageh(input, sampler, coords); half4 biase = read_imageh(bias, sampler, coords_bias); half4 output = in * biase; write_imageh(outputImage, coords, output); }