migraphx-benchmark / AMDMIGraphX

AMD's graph optimization engine.
https://rocmsoftwareplatform.github.io/AMDMIGraphX/doc/html/
MIT License
0 stars 1 forks source link

RNN inaccuracies #119

Closed attila-dusnoki-htec closed 9 months ago

attila-dusnoki-htec commented 1 year ago

Failing tests:

attila-dusnoki-htec commented 1 year ago
FAIL: test_simple_rnn_batchwise_cpu (__main__.OnnxBackendNodeModelTest) ``` ====================================================================== FAIL: test_simple_rnn_batchwise_cpu (__main__.OnnxBackendNodeModelTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/onnx/backend/test/runner/__init__.py", line 290, in device_test_func return test_func(*args, device=device, **kwargs) File "/usr/local/lib/python3.8/dist-packages/onnx/backend/test/runner/__init__.py", line 467, in run self.assert_similar_outputs( File "../test/py/onnx_backend_test.py", line 59, in assert_similar_outputs np.testing.assert_allclose(ref_outputs[i], File "/usr/local/lib/python3.8/dist-packages/numpy/testing/_private/utils.py", line 1530, in assert_allclose assert_array_compare(compare, actual, desired, err_msg=str(err_msg), File "/usr/local/lib/python3.8/dist-packages/numpy/testing/_private/utils.py", line 844, in assert_array_compare raise AssertionError(msg) AssertionError: Not equal to tolerance rtol=0.001, atol=1e-05 Program = module: "main" R = @param:R -> float_type, {1, 4, 4}, {16, 4, 1}, target_id=0 W = @param:W -> float_type, {1, 4, 2}, {8, 2, 1}, target_id=0 X = @param:X -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @3 = undefined -> float_type, {}, {}, target_id=0 @4 = rnn[hidden_size=4,actv_func={tanh},direction=forward,clip=0](X,W,R,@3,@3,@3) -> float_type, {3, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @5 = rnn_last_hs_output(@4) -> float_type, {1, 1, 4}, {4, 4, 1}, target_id=0 @6 = @return(@4,@5), target_id=0 Compiled program = module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0 @1 = hip::hip_allocate_memory[shape=int8_type, {192}, {1},id=main:scratch] -> int8_type, {192}, {1}, target_id=0 @2 = load[offset=128,end=160](@1) -> float_type, {1, 4, 2}, {8, 2, 1}, target_id=0 W = @param:W -> float_type, {1, 4, 2}, {8, 2, 1}, target_id=0 @4 = hip::copy_to_gpu(W,@2) -> float_type, {1, 4, 2}, {8, 2, 1}, target_id=0 @5 = load[offset=16,end=80](@1) -> float_type, {1, 4, 4}, {16, 4, 1}, target_id=0 R = @param:R -> float_type, {1, 4, 4}, {16, 4, 1}, target_id=0 @7 = hip::copy_to_gpu(R,@5) -> float_type, {1, 4, 4}, {16, 4, 1}, target_id=0 X = @param:X -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @9 = load[offset=160,end=184](@1) -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @10 = hip::copy_to_gpu(X,@9) -> float_type, {3, 1, 2}, {2, 2, 1}, target_id=0 @11 = slice[axes={0},starts={0},ends={1}](@10) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @12 = squeeze[axes={0}](@11) -> float_type, {1, 2}, {2, 1}, target_id=0 @13 = load[offset=112,end=128](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @14 = squeeze[axes={0}](@4) -> float_type, {4, 2}, {2, 1}, target_id=0 @15 = transpose[permutation={1, 0}](@14) -> float_type, {2, 4}, {1, 2}, target_id=0 @16 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@12,@15,@13) -> float_type, {1, 4}, {4, 1}, target_id=0 @17 = load[offset=96,end=112](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @18 = slice[axes={0},starts={1},ends={2}](@10) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @19 = squeeze[axes={0}](@18) -> float_type, {1, 2}, {2, 1}, target_id=0 @20 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@19,@15,@17) -> float_type, {1, 4}, {4, 1}, target_id=0 @21 = slice[axes={0},starts={2},ends={3}](@10) -> float_type, {1, 1, 2}, {2, 2, 1}, target_id=0 @22 = squeeze[axes={0}](@21) -> float_type, {1, 2}, {2, 1}, target_id=0 @23 = load[offset=0,end=16](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @24 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@22,@15,@23) -> float_type, {1, 4}, {4, 1}, target_id=0 @25 = hip::hip_copy_literal[id=main:@literal:0] -> float_type, {1, 4}, {4, 1}, target_id=0 @26 = squeeze[axes={0}](@7) -> float_type, {4, 4}, {4, 1}, target_id=0 @27 = load[offset=128,end=144](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @28 = transpose[permutation={1, 0}](@26) -> float_type, {4, 4}, {1, 4}, target_id=0 @29 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@25,@28,@27) -> float_type, {1, 4}, {4, 1}, target_id=0 @30 = load[offset=80,end=96](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @31 = gpu::code_object[code_object=9536,symbol_name=add_tanh_kernel,global=1024,local=1024,](@16,@29,@30) -> float_type, {1, 4}, {4, 1}, target_id=0 @32 = load[offset=128,end=144](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @33 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@31,@28,@32) -> float_type, {1, 4}, {4, 1}, target_id=0 @34 = load[offset=112,end=128](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @35 = gpu::code_object[code_object=9536,symbol_name=add_tanh_kernel,global=1024,local=1024,](@20,@33,@34) -> float_type, {1, 4}, {4, 1}, target_id=0 @36 = load[offset=96,end=112](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @37 = gpu::gemm[alpha=1,beta=0,int8_x4_format=0,compute_fp32=1,trans_batch=0](@35,@28,@36) -> float_type, {1, 4}, {4, 1}, target_id=0 @38 = load[offset=16,end=32](@1) -> float_type, {1, 4}, {4, 1}, target_id=0 @39 = gpu::code_object[code_object=9536,symbol_name=add_tanh_kernel,global=1024,local=1024,](@24,@37,@38) -> float_type, {1, 4}, {4, 1}, target_id=0 @40 = reshape[dims={1, 1, 4}](@39) -> float_type, {1, 1, 4}, {4, 4, 1}, target_id=0 @41 = hip::copy_from_gpu(@40) -> float_type, {1, 1, 4}, {4, 4, 1}, target_id=0 @42 = unsqueeze[axes={0, 1},steps={}](@35) -> float_type, {1, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @43 = load[offset=32,end=80](@1) -> float_type, {3, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @44 = unsqueeze[axes={0, 1},steps={}](@39) -> float_type, {1, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @45 = unsqueeze[axes={0, 1},steps={}](@31) -> float_type, {1, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @46 = gpu::code_object[code_object=9536,symbol_name=concat_kernel,global=1024,local=1024,](@45,@42,@44,@43) -> float_type, {3, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @47 = hip::copy_from_gpu(@46) -> float_type, {3, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @48 = hip::sync_stream(@47,@41) -> float_type, {3, 1, 1, 4}, {4, 4, 4, 1}, target_id=0 @49 = @return(@48,@41), target_id=0 Mismatched elements: 4 / 12 (33.3%) Max absolute difference: 0.0017733 Max relative difference: 0.00177338 x: array([[[[0.905148, 0.905148, 0.905148, 0.905148]]], ... y: array([[[[0.905148, 0.905148, 0.905148, 0.905148]]], ... ```
attila-dusnoki-htec commented 11 months ago

Related to https://github.com/migraphx-benchmark/AMDMIGraphX/issues/102#issuecomment-1757048272