GRU inaccuracies - Githubissues

FAIL: test_gru_batchwise_cpu (__main__.OnnxBackendNodeModelTest)

``` ====================================================================== FAIL: test_gru_batchwise_cpu (__main__.OnnxBackendNodeModelTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/onnx/backend/test/runner/__init__.py", line 290, in device_test_func return test_func(*args, device=device, **kwargs) File "/usr/local/lib/python3.8/dist-packages/onnx/backend/test/runner/__init__.py", line 467, in run self.assert_similar_outputs( File "../test/py/onnx_backend_test.py", line 59, in assert_similar_outputs np.testing.assert_allclose(ref_outputs[i], File "/usr/local/lib/python3.8/dist-packages/numpy/testing/_private/utils.py", line 1530, in assert_allclose assert_array_compare(compare, actual, desired, err_msg=str(err_msg), File "/usr/local/lib/python3.8/dist-packages/numpy/testing/_private/utils.py", line 844, in assert_array_compare raise AssertionError(msg) AssertionError: Not equal to tolerance rtol=0.001, atol=1e-05 Program = module: "main" R = @param:R -> float_type, {1, 18, 6}, {108, 6, 1}, target_id=0 W = @param:W -> float_type, {1, 18, 2}, {36, 2, 1}, target_id=0 X = @param:X -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @3 = undefined -> float_type, {}, {}, target_id=0 @4 = gru[hidden_size=6,actv_func={sigmoid, tanh},direction=forward,clip=0,linear_before_reset=0](X,W,R,@3,@3,@3) -> float_type, {3, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @5 = rnn_last_hs_output(@4) -> float_type, {1, 1, 6}, {6, 6, 1}, target_id=0 @6 = @return(@4,@5), target_id=0 Compiled program = module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0 @1 = hip::hip_allocate_memory[shape=int8_type, {880}, {1},id=main:scratch] -> int8_type, {880}, {1}, target_id=0 @2 = load[offset=496,end=640](@1) -> float_type, {1, 18, 2}, {36, 2, 1}, target_id=0 W = @param:W -> float_type, {1, 18, 2}, {36, 2, 1}, target_id=0 @4 = hip::copy_to_gpu(W,@2) -> float_type, {1, 18, 2}, {36, 2, 1}, target_id=0 @5 = load[offset=32,end=464](@1) -> float_type, {1, 18, 6}, {108, 6, 1}, target_id=0 R = @param:R -> float_type, {1, 18, 6}, {108, 6, 1}, target_id=0 @7 = hip::copy_to_gpu(R,@5) -> float_type, {1, 18, 6}, {108, 6, 1}, target_id=0 @8 = load[offset=640,end=664](@1) -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 X = @param:X -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @10 = hip::copy_to_gpu(X,@8) -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @11 = slice[axes={0},starts={1},ends={2}](@10) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @12 = squeeze[axes={0}](@11) -> float_type, {1, 2}, {2, 1}, target_id=0 @13 = squeeze[axes={0}](@4) -> float_type, {18, 2}, {2, 1}, target_id=0 @14 = transpose[permutation={1, 0}](@13) -> float_type, {2, 18}, {1, 2}, target_id=0 @15 = load[offset=672,end=744](@1) -> float_type, {1, 18}, {18, 1}, target_id=0 @16 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@12,@14,@15) -> float_type, {1, 18}, {18, 1}, target_id=0 @17 = load[offset=752,end=824](@1) -> float_type, {1, 18}, {18, 1}, target_id=0 @18 = slice[axes={0},starts={0},ends={1}](@10) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @19 = squeeze[axes={0}](@18) -> float_type, {1, 2}, {2, 1}, target_id=0 @20 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@19,@14,@17) -> float_type, {1, 18}, {18, 1}, target_id=0 @21 = hip::hip_copy_literal[id=main:@literal:1] -> float_type, {1, 6}, {6, 1}, target_id=0 @22 = hip::hip_copy_literal[id=main:@literal:0] -> float_type, {1, 6}, {6, 1}, target_id=0 @23 = load[offset=464,end=488](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @24 = squeeze[axes={0}](@7) -> float_type, {18, 6}, {6, 1}, target_id=0 @25 = transpose[permutation={1, 0}](@24) -> float_type, {6, 18}, {1, 6}, target_id=0 @26 = slice[axes={1},starts={12},ends={18}](@25) -> float_type, {6, 6}, {1, 6}, target_id=0 @27 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@22,@26,@23) -> float_type, {1, 6}, {6, 1}, target_id=0 @28 = slice[axes={1},starts={0},ends={12}](@25) -> float_type, {6, 12}, {1, 6}, target_id=0 @29 = load[offset=832,end=880](@1) -> float_type, {1, 12}, {12, 1}, target_id=0 @30 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@22,@28,@29) -> float_type, {1, 12}, {12, 1}, target_id=0 @31 = slice[axes={1},starts={0},ends={6}](@20) -> float_type, {1, 6}, {18, 1}, target_id=0 @32 = load[offset=0,end=24](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @33 = slice[axes={1},starts={12},ends={18}](@20) -> float_type, {1, 6}, {18, 1}, target_id=0 @34 = slice[axes={1},starts={0},ends={6}](@30) -> float_type, {1, 6}, {12, 1}, target_id=0 @35 = gpu::code_object[code_object=9576,symbol_name=add_tanh_add_sigmoid_sub_mul_kernel,global=1024,local=1024,](@33,@27,@31,@34,@21,@32) -> float_type, {1, 6}, {6, 1}, target_id=0 @36 = load[offset=752,end=800](@1) -> float_type, {1, 12}, {12, 1}, target_id=0 @37 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@35,@28,@36) -> float_type, {1, 12}, {12, 1}, target_id=0 @38 = slice[axes={1},starts={6},ends={12}](@16) -> float_type, {1, 6}, {18, 1}, target_id=0 @39 = load[offset=464,end=488](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @40 = slice[axes={1},starts={6},ends={12}](@37) -> float_type, {1, 6}, {12, 1}, target_id=0 @41 = gpu::code_object[code_object=9552,symbol_name=add_sigmoid_mul_kernel,global=1024,local=1024,](@38,@40,@35,@39) -> float_type, {1, 6}, {6, 1}, target_id=0 @42 = load[offset=800,end=824](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @43 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@41,@26,@42) -> float_type, {1, 6}, {6, 1}, target_id=0 @44 = slice[axes={1},starts={0},ends={6}](@16) -> float_type, {1, 6}, {18, 1}, target_id=0 @45 = load[offset=832,end=856](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @46 = slice[axes={1},starts={0},ends={6}](@37) -> float_type, {1, 6}, {12, 1}, target_id=0 @47 = slice[axes={1},starts={12},ends={18}](@16) -> float_type, {1, 6}, {18, 1}, target_id=0 @48 = gpu::code_object[code_object=9592,symbol_name=add_sigmoid_add_tanh_sub_mul_mul_add_kernel,global=1024,local=1024,](@44,@46,@47,@43,@21,@35,@45) -> float_type, {1, 6}, {6, 1}, target_id=0 @49 = slice[axes={0},starts={2},ends={3}](@10) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @50 = load[offset=672,end=744](@1) -> float_type, {1, 18}, {18, 1}, target_id=0 @51 = squeeze[axes={0}](@49) -> float_type, {1, 2}, {2, 1}, target_id=0 @52 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@51,@14,@50) -> float_type, {1, 18}, {18, 1}, target_id=0 @53 = load[offset=464,end=512](@1) -> float_type, {1, 12}, {12, 1}, target_id=0 @54 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@48,@28,@53) -> float_type, {1, 12}, {12, 1}, target_id=0 @55 = slice[axes={1},starts={6},ends={12}](@54) -> float_type, {1, 6}, {12, 1}, target_id=0 @56 = slice[axes={1},starts={6},ends={12}](@52) -> float_type, {1, 6}, {18, 1}, target_id=0 @57 = load[offset=544,end=568](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @58 = gpu::code_object[code_object=9552,symbol_name=add_sigmoid_mul_kernel,global=1024,local=1024,](@56,@55,@48,@57) -> float_type, {1, 6}, {6, 1}, target_id=0 @59 = load[offset=512,end=536](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @60 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@58,@26,@59) -> float_type, {1, 6}, {6, 1}, target_id=0 @61 = load[offset=32,end=56](@1) -> float_type, {1, 6}, {6, 1}, target_id=0 @62 = slice[axes={1},starts={0},ends={6}](@54) -> float_type, {1, 6}, {12, 1}, target_id=0 @63 = slice[axes={1},starts={12},ends={18}](@52) -> float_type, {1, 6}, {18, 1}, target_id=0 @64 = slice[axes={1},starts={0},ends={6}](@52) -> float_type, {1, 6}, {18, 1}, target_id=0 @65 = gpu::code_object[code_object=9592,symbol_name=add_sigmoid_add_tanh_sub_mul_mul_add_kernel,global=1024,local=1024,](@64,@62,@63,@60,@21,@48,@61) -> float_type, {1, 6}, {6, 1}, target_id=0 @66 = reshape[dims={1, 1, 6}](@65) -> float_type, {1, 1, 6}, {6, 6, 1}, target_id=0 @67 = hip::copy_from_gpu(@66) -> float_type, {1, 1, 6}, {6, 6, 1}, target_id=0 @68 = load[offset=64,end=136](@1) -> float_type, {3, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @69 = unsqueeze[axes={0, 1},steps={}](@35) -> float_type, {1, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @70 = unsqueeze[axes={0, 1},steps={}](@65) -> float_type, {1, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @71 = unsqueeze[axes={0, 1},steps={}](@48) -> float_type, {1, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @72 = gpu::code_object[code_object=9536,symbol_name=concat_kernel,global=1024,local=1024,](@69,@71,@70,@68) -> float_type, {3, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @73 = hip::copy_from_gpu(@72) -> float_type, {3, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @74 = hip::sync_stream(@73,@67) -> float_type, {3, 1, 1, 6}, {6, 6, 6, 1}, target_id=0 @75 = @return(@74,@67), target_id=0 Mismatched elements: 12 / 18 (66.7%) Max absolute difference: 0.26081663 Max relative difference: 0.72823805 x: array([[[[0.1903 , 0.1903 , 0.1903 , 0.1903 , 0.1903 , 0.1903 ]]], ... y: array([[[[0.1903 , 0.1903 , 0.1903 , 0.1903 , 0.1903 , 0.1903 ]]], ... ```

migraphx-benchmark / AMDMIGraphX

GRU inaccuracies #133