LSTM inaccuracies - Githubissues

FAIL: test_lstm_batchwise_cpu (__main__.OnnxBackendNodeModelTest)

``` ====================================================================== FAIL: test_lstm_batchwise_cpu (__main__.OnnxBackendNodeModelTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/onnx/backend/test/runner/__init__.py", line 290, in device_test_func return test_func(*args, device=device, **kwargs) File "/usr/local/lib/python3.8/dist-packages/onnx/backend/test/runner/__init__.py", line 467, in run self.assert_similar_outputs( File "../test/py/onnx_backend_test.py", line 59, in assert_similar_outputs np.testing.assert_allclose(ref_outputs[i], File "/usr/local/lib/python3.8/dist-packages/numpy/testing/_private/utils.py", line 1530, in assert_allclose assert_array_compare(compare, actual, desired, err_msg=str(err_msg), File "/usr/local/lib/python3.8/dist-packages/numpy/testing/_private/utils.py", line 844, in assert_array_compare raise AssertionError(msg) AssertionError: Not equal to tolerance rtol=0.001, atol=1e-05 Program = module: "main" R = @param:R -> float_type, {1, 28, 7}, {196, 7, 1}, target_id=0 W = @param:W -> float_type, {1, 28, 2}, {56, 2, 1}, target_id=0 X = @param:X -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @3 = undefined -> float_type, {}, {}, target_id=0 @4 = lstm[hidden_size=7,actv_func={sigmoid, tanh, tanh},direction=forward,clip=0,input_forget=0](X,W,R,@3,@3,@3,@3,@3) -> float_type, {3, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @5 = rnn_last_hs_output(@4) -> float_type, {1, 1, 7}, {7, 7, 1}, target_id=0 @6 = rnn_last_cell_output(@4) -> float_type, {1, 1, 7}, {7, 7, 1}, target_id=0 @7 = @return(@4,@5), target_id=0 Compiled program = module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0 @1 = hip::hip_allocate_memory[shape=int8_type, {1408}, {1},id=main:scratch] -> int8_type, {1408}, {1}, target_id=0 W = @param:W -> float_type, {1, 28, 2}, {56, 2, 1}, target_id=0 @3 = load[offset=1184,end=1408](@1) -> float_type, {1, 28, 2}, {56, 2, 1}, target_id=0 @4 = hip::copy_to_gpu(W,@3) -> float_type, {1, 28, 2}, {56, 2, 1}, target_id=0 X = @param:X -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @6 = load[offset=896,end=920](@1) -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @7 = hip::copy_to_gpu(X,@6) -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @8 = squeeze[axes={0}](@4) -> float_type, {28, 2}, {2, 1}, target_id=0 @9 = transpose[permutation={1, 0}](@8) -> float_type, {2, 28}, {1, 2}, target_id=0 @10 = load[offset=784,end=896](@1) -> float_type, {1, 28}, {28, 1}, target_id=0 @11 = slice[axes={0},starts={0},ends={1}](@7) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @12 = squeeze[axes={0}](@11) -> float_type, {1, 2}, {2, 1}, target_id=0 @13 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@12,@9,@10) -> float_type, {1, 28}, {28, 1}, target_id=0 R = @param:R -> float_type, {1, 28, 7}, {196, 7, 1}, target_id=0 @15 = load[offset=0,end=784](@1) -> float_type, {1, 28, 7}, {196, 7, 1}, target_id=0 @16 = hip::copy_to_gpu(R,@15) -> float_type, {1, 28, 7}, {196, 7, 1}, target_id=0 @17 = hip::hip_copy_literal[id=main:@literal:0] -> float_type, {1, 7}, {7, 1}, target_id=0 @18 = squeeze[axes={0}](@16) -> float_type, {28, 7}, {7, 1}, target_id=0 @19 = transpose[permutation={1, 0}](@18) -> float_type, {7, 28}, {1, 7}, target_id=0 @20 = load[offset=1072,end=1184](@1) -> float_type, {1, 28}, {28, 1}, target_id=0 @21 = gpu::gemm[alpha=1,beta=1,int8_x4_format=0,compute_fp32=1,trans_batch=0](@17,@19,@13,@20) -> float_type, {1, 28}, {28, 1}, target_id=0 @22 = load[offset=928,end=956](@1) -> float_type, {1, 7}, {7, 1}, target_id=0 @23 = slice[axes={1},starts={21},ends={28}](@21) -> float_type, {1, 7}, {28, 1}, target_id=0 @24 = slice[axes={1},starts={0},ends={7}](@21) -> float_type, {1, 7}, {28, 1}, target_id=0 @25 = gpu::code_object[code_object=9552,symbol_name=tanh_sigmoid_mul_kernel,global=1024,local=1024,](@23,@24,@22) -> float_type, {1, 7}, {7, 1}, target_id=0 @26 = slice[axes={0},starts={2},ends={3}](@7) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @27 = squeeze[axes={0}](@26) -> float_type, {1, 2}, {2, 1}, target_id=0 @28 = load[offset=784,end=896](@1) -> float_type, {1, 28}, {28, 1}, target_id=0 @29 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@27,@9,@28) -> float_type, {1, 28}, {28, 1}, target_id=0 @30 = load[offset=960,end=1072](@1) -> float_type, {1, 28}, {28, 1}, target_id=0 @31 = slice[axes={0},starts={1},ends={2}](@7) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @32 = squeeze[axes={0}](@31) -> float_type, {1, 2}, {2, 1}, target_id=0 @33 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@32,@9,@30) -> float_type, {1, 28}, {28, 1}, target_id=0 @34 = slice[axes={1},starts={7},ends={14}](@21) -> float_type, {1, 7}, {28, 1}, target_id=0 @35 = load[offset=896,end=924](@1) -> float_type, {1, 7}, {7, 1}, target_id=0 @36 = gpu::code_object[code_object=9552,symbol_name=tanh_sigmoid_mul_kernel,global=1024,local=1024,](@25,@34,@35) -> float_type, {1, 7}, {7, 1}, target_id=0 @37 = load[offset=1072,end=1184](@1) -> float_type, {1, 28}, {28, 1}, target_id=0 @38 = gpu::gemm[alpha=1,beta=1,int8_x4_format=0,compute_fp32=1,trans_batch=0](@36,@19,@33,@37) -> float_type, {1, 28}, {28, 1}, target_id=0 @39 = load[offset=960,end=988](@1) -> float_type, {1, 7}, {7, 1}, target_id=0 @40 = slice[axes={1},starts={21},ends={28}](@38) -> float_type, {1, 7}, {28, 1}, target_id=0 @41 = slice[axes={1},starts={0},ends={7}](@38) -> float_type, {1, 7}, {28, 1}, target_id=0 @42 = slice[axes={1},starts={14},ends={21}](@38) -> float_type, {1, 7}, {28, 1}, target_id=0 @43 = gpu::code_object[code_object=9584,symbol_name=tanh_sigmoid_mul_sigmoid_mul_add_kernel,global=1024,local=1024,](@40,@41,@42,@25,@39) -> float_type, {1, 7}, {7, 1}, target_id=0 @44 = slice[axes={1},starts={7},ends={14}](@38) -> float_type, {1, 7}, {28, 1}, target_id=0 @45 = load[offset=928,end=956](@1) -> float_type, {1, 7}, {7, 1}, target_id=0 @46 = gpu::code_object[code_object=9552,symbol_name=tanh_sigmoid_mul_kernel,global=1024,local=1024,](@43,@44,@45) -> float_type, {1, 7}, {7, 1}, target_id=0 @47 = load[offset=992,end=1104](@1) -> float_type, {1, 28}, {28, 1}, target_id=0 @48 = gpu::gemm[alpha=1,beta=1,int8_x4_format=0,compute_fp32=1,trans_batch=0](@46,@19,@29,@47) -> float_type, {1, 28}, {28, 1}, target_id=0 @49 = slice[axes={1},starts={14},ends={21}](@48) -> float_type, {1, 7}, {28, 1}, target_id=0 @50 = slice[axes={1},starts={7},ends={14}](@48) -> float_type, {1, 7}, {28, 1}, target_id=0 @51 = slice[axes={1},starts={0},ends={7}](@48) -> float_type, {1, 7}, {28, 1}, target_id=0 @52 = slice[axes={1},starts={21},ends={28}](@48) -> float_type, {1, 7}, {28, 1}, target_id=0 @53 = load[offset=0,end=28](@1) -> float_type, {1, 7}, {7, 1}, target_id=0 @54 = gpu::code_object[code_object=9616,symbol_name=tanh_sigmoid_mul_sigmoid_mul_add_tanh_sigmoid_mul_kernel,global=1024,local=1024,](@52,@51,@49,@43,@50,@53) -> float_type, {1, 7}, {7, 1}, target_id=0 @55 = reshape[dims={1, 1, 7}](@54) -> float_type, {1, 1, 7}, {7, 7, 1}, target_id=0 @56 = hip::copy_from_gpu(@55) -> float_type, {1, 1, 7}, {7, 7, 1}, target_id=0 @57 = unsqueeze[axes={0, 1},steps={}](@54) -> float_type, {1, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @58 = unsqueeze[axes={0, 1},steps={}](@46) -> float_type, {1, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @59 = load[offset=32,end=116](@1) -> float_type, {3, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @60 = unsqueeze[axes={0, 1},steps={}](@36) -> float_type, {1, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @61 = gpu::code_object[code_object=9536,symbol_name=concat_kernel,global=1024,local=1024,](@60,@58,@57,@59) -> float_type, {3, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @62 = hip::copy_from_gpu(@61) -> float_type, {3, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @63 = hip::sync_stream(@62,@56) -> float_type, {3, 1, 1, 7}, {7, 7, 7, 1}, target_id=0 @64 = @return(@63,@56), target_id=0 Mismatched elements: 14 / 21 (66.7%) Max absolute difference: 0.25890702 Max relative difference: 0.26487032 x: array([[[[0.333693, 0.333693, 0.333693, 0.333693, 0.333693, 0.333693, 0.333693]]], ... y: array([[[[0.333693, 0.333693, 0.333693, 0.333693, 0.333693, 0.333693, 0.333693]]], ... ```

migraphx-benchmark / AMDMIGraphX

LSTM inaccuracies #102