clMathLibraries / clBLAS

a software library containing BLAS functions written in OpenCL
Apache License 2.0
839 stars 240 forks source link

CL build error when running the tune tool #6

Closed AMD-FirePro closed 10 years ago

AMD-FirePro commented 11 years ago

I ran the following command tune.exe --gemm --double --store-kernels

and got the following output :

attribute((reqd_work_group_size(32, 2, 1))) void kernel dgemmBlock( uint M, uint N, uint K, const double alpha, const double beta, const global restrict A, const global *restrict B, global *C, uint lda, uint ldb, uint ldc) { double2 a0; double2 b0, b1; double2 c0, c1, c2, c3, c4, c5, c6, c7; uint4 coord = 0u; / contains coordB, coordA, k */

uint kif;
uint get_group_id_0;
uint get_global_id_0;
A += 8u * (uint)get_global_id(1) * lda;
get_group_id_0 = (get_group_id(0) + get_group_id(1))% get_num_groups(0);
get_global_id_0 = get_group_id_0 * get_local_size(0) + get_local_id(0);
kif = (N % 256 != 0);
get_global_id_0 = (kif*(uint)get_global_id(0)) + ((1-kif)*get_global_id_0);
B += get_global_id_0 * 2;
coord.y = 8u * (uint)get_global_id(1);
coord.x = 2u * (uint)get_global_id_0;
c0 = 0;
c1 = 0;
c2 = 0;
c3 = 0;
c4 = 0;
c5 = 0;
c6 = 0;
c7 = 0;

for (uint k1 = 0; k1 < K; k1 += 2) {
    /* -- Tiles multiplier -- */
    b0 = B[0];
    b0 = B[1];
    b1 = B[ldb];
    b1 = B[ldb + 1];

    a0 = A[0];
    a0 = A[1];

    c0 = mad(b0, a0.s0, c0);
    c0 = mad(b1, a0.s1, c0);

    a0 = A[lda];
    a0 = A[lda + 1];

    c1 = mad(b0, a0.s0, c1);
    c1 = mad(b1, a0.s1, c1);

    a0 = A[(lda << 1)];
    a0 = A[mad24(2u, lda, 1u)];

    c2 = mad(b0, a0.s0, c2);
    c2 = mad(b1, a0.s1, c2);

    a0 = A[mad24(3u, lda, 0u)];
    a0 = A[mad24(3u, lda, 1u)];

    c3 = mad(b0, a0.s0, c3);
    c3 = mad(b1, a0.s1, c3);

    a0 = A[(lda << 2)];
    a0 = A[mad24(4u, lda, 1u)];

    c4 = mad(b0, a0.s0, c4);
    c4 = mad(b1, a0.s1, c4);

    a0 = A[mad24(5u, lda, 0u)];
    a0 = A[mad24(5u, lda, 1u)];

    c5 = mad(b0, a0.s0, c5);
    c5 = mad(b1, a0.s1, c5);

    a0 = A[mad24(6u, lda, 0u)];
    a0 = A[mad24(6u, lda, 1u)];

    c6 = mad(b0, a0.s0, c6);
    c6 = mad(b1, a0.s1, c6);

    a0 = A[mad24(7u, lda, 0u)];
    a0 = A[mad24(7u, lda, 1u)];

    c7 = mad(b0, a0.s0, c7);
    c7 = mad(b1, a0.s1, c7);

    A += 2;
    B += (ldb << 1);
    /* ---------------------- */
}

GPtr uC;

uC.d = C + coord.y * ldc + coord.x;

__global  *pC = uC.d0v;

double2 tempC0, tempC1;

tempC0 = pC[0];
tempC0 = pC[1];
tempC1 = pC[ldc];
tempC1 = pC[ldc + 1];
tempC0 = mad(tempC0, beta, 0);
tempC1 = mad(tempC1, beta, 0);
tempC0 = mad(c0, alpha, tempC0);
tempC1 = mad(c1, alpha, tempC1);
pC[0] = tempC0;
pC[1] = tempC0;
pC[ldc] = tempC1;
pC[ldc + 1] = tempC1;

tempC0 = pC[(ldc << 1)];
tempC0 = pC[mad24(2u, ldc, 1u)];
tempC1 = pC[mad24(3u, ldc, 0u)];
tempC1 = pC[mad24(3u, ldc, 1u)];
tempC0 = mad(tempC0, beta, 0);
tempC1 = mad(tempC1, beta, 0);
tempC0 = mad(c2, alpha, tempC0);
tempC1 = mad(c3, alpha, tempC1);
pC[(ldc << 1)] = tempC0;
pC[mad24(2u, ldc, 1u)] = tempC0;
pC[mad24(3u, ldc, 0u)] = tempC1;
pC[mad24(3u, ldc, 1u)] = tempC1;

tempC0 = pC[(ldc << 2)];
tempC0 = pC[mad24(4u, ldc, 1u)];
tempC1 = pC[mad24(5u, ldc, 0u)];
tempC1 = pC[mad24(5u, ldc, 1u)];
tempC0 = mad(tempC0, beta, 0);
tempC1 = mad(tempC1, beta, 0);
tempC0 = mad(c4, alpha, tempC0);
tempC1 = mad(c5, alpha, tempC1);
pC[(ldc << 2)] = tempC0;
pC[mad24(4u, ldc, 1u)] = tempC0;
pC[mad24(5u, ldc, 0u)] = tempC1;
pC[mad24(5u, ldc, 1u)] = tempC1;

tempC0 = pC[mad24(6u, ldc, 0u)];
tempC0 = pC[mad24(6u, ldc, 1u)];
tempC1 = pC[mad24(7u, ldc, 0u)];
tempC1 = pC[mad24(7u, ldc, 1u)];
tempC0 = mad(tempC0, beta, 0);
tempC1 = mad(tempC1, beta, 0);
tempC0 = mad(c6, alpha, tempC0);
tempC1 = mad(c7, alpha, tempC1);
pC[mad24(6u, ldc, 0u)] = tempC0;
pC[mad24(6u, ldc, 1u)] = tempC0;
pC[mad24(7u, ldc, 0u)] = tempC1;
pC[mad24(7u, ldc, 1u)] = tempC1;

}


Build log:

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 2: warning: OpenCL extension is now part of core

pragma OPENCL EXTENSION cl_khr_fp64 : enable

                       ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 54: warning: explicit type is missing ("int" assumed) const __global *restrict A, ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 55: warning: explicit type is missing ("int" assumed) const __global *restrict B, ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 56: warning: explicit type is missing ("int" assumed) __global *C, ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 149: warning: a value of type "global int *" cannot be assigned to an entity of type "global double " uC.d = C + coord.y \ ldc + coord.x; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 151: warning: explicit type is missing ("int" assumed) __global *pC = uC.d0v; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 151: error: union "GPtr" has no field "d0v" __global *pC = uC.d0v; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 163: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[0] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 164: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[1] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 165: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[ldc] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 166: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[ldc + 1] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 176: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[(ldc << 1)] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 177: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(2u, ldc, 1u)] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 178: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(3u, ldc, 0u)] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 179: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(3u, ldc, 1u)] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 189: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[(ldc << 2)] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 190: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(4u, ldc, 1u)] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 191: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(5u, ldc, 0u)] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 192: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(5u, ldc, 1u)] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 202: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(6u, ldc, 0u)] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 203: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(6u, ldc, 1u)] = tempC0; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 204: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(7u, ldc, 0u)] = tempC1; ^

"C:\Users\DELL2\AppData\Local\Temp\OCLD77D.tmp.cl", line 205: error: a value of type "double2" cannot be assigned to an entity of type "int" pC[mad24(7u, ldc, 1u)] = tempC1; ^

17 errors detected in the compilation of "C:\Users\DELL2\AppData\Local\Temp\OCLD 77D.tmp.cl".

Frontend phase failed compilation.

"

I don't have this output if I don't use the --store-kernels option But I guess the same problem exists.