ROCm / apex

A PyTorch Extension: Tools for easy mixed precision and distributed training in Pytorch
BSD 3-Clause "New" or "Revised" License
19 stars 17 forks source link

IFU-master-2022-02-09 #75

Closed hubertlu-tw closed 2 years ago

hubertlu-tw commented 2 years ago

The PR failed some of the unit tests in MHA extension. To run MHA extension, please follow the following lines:

pytest apex/contrib/test/multihead_attn

The results are as follows:

test_encdec_multihead_attn.py ...                                                                                                                                    [ 30%]
test_encdec_multihead_attn_norm_add.py F                                                                                                                             [ 40%]
test_fast_self_multihead_attn_bias.py F                                                                                                                              [ 50%]
test_mha_fused_softmax.py .                                                                                                                                          [ 60%]
test_self_multihead_attn.py ...                                                                                                                                      [ 90%]
test_self_multihead_attn_norm_add.py F                                                                                                                               [100%]

================================================================================= FAILURES =================================================================================
____________________________________________________ EncdecMultiheadAttnNormAddTest.test_encdec_multihead_attn_norm_add ____________________________________________________

self = <test_encdec_multihead_attn_norm_add.EncdecMultiheadAttnNormAddTest testMethod=test_encdec_multihead_attn_norm_add>

    def test_encdec_multihead_attn_norm_add(self) :
        grads         = torch.randn_like(self.tst_inputs_q)

        ref_outputs,_ = self.ref_layer.forward(self.ref_inputs_q,
                                               self.ref_inputs_k,
                                               self.ref_inputs_k,
                                               key_padding_mask=None,
                                               need_weights=False,
                                               attn_mask=None,
                                               is_training=True)

        tst_outputs,_ = self.tst_layer.forward(self.tst_inputs_q,
                                               self.tst_inputs_k,
                                               self.tst_inputs_k,
                                               key_padding_mask=None,
                                               need_weights=False,
                                               attn_mask=None,
                                               is_training=True)

        self.ref_inputs_q.backward(grads)
        self.tst_inputs_q.backward(grads)

        self.assertTrue(torch.allclose(self.ref_inputs_q,  self.tst_inputs_q,  atol=1e-5, rtol=1e-5))
        self.assertTrue(torch.allclose(self.ref_inputs_k,  self.tst_inputs_k,  atol=1e-5, rtol=1e-5))
>       self.assertTrue(torch.allclose(ref_outputs, tst_outputs, atol=1e-3, rtol=1e-3))
E       AssertionError: False is not true

test_encdec_multihead_attn_norm_add.py:73: AssertionError
_______________________________________________________ SelfMultiheadAttnTest.test_self_multihead_attn_additive_mask _______________________________________________________

self = <test_fast_self_multihead_attn_bias.SelfMultiheadAttnTest testMethod=test_self_multihead_attn_additive_mask>

    def test_self_multihead_attn_additive_mask(self) :
        grads         = torch.randn_like(self.tst_inputs)
        mask = ((torch.randn(self.sequences, self.seq_length) > 0) * -10000.0).half().cuda()

        ref_outputs,_ = self.ref_layer.forward(self.ref_inputs,
                                               self.ref_inputs,
                                               self.ref_inputs,
                                               key_padding_mask=mask,
                                               need_weights=False,
                                               attn_mask=None,
                                               is_training=True)

        tst_outputs,_ = self.tst_layer.forward(self.tst_inputs,
                                               self.tst_inputs,
                                               self.tst_inputs,
                                               key_padding_mask=mask,
                                               need_weights=False,
                                               attn_mask=None,
                                               is_training=True)

        self.ref_inputs.backward(grads)
        self.tst_inputs.backward(grads)

        self.assertTrue(torch.allclose(self.ref_inputs,  self.tst_inputs,  atol=1e-5, rtol=1e-5))
>       self.assertTrue(torch.allclose(ref_outputs, tst_outputs, atol=1e-3, rtol=1e-3))
E       AssertionError: False is not true

test_fast_self_multihead_attn_bias.py:73: AssertionError
______________________________________________________ SelfMultiheadAttnNormAddTest.test_self_multihead_attn_norm_add ______________________________________________________

self = <test_self_multihead_attn_norm_add.SelfMultiheadAttnNormAddTest testMethod=test_self_multihead_attn_norm_add>

    def test_self_multihead_attn_norm_add(self) :
        grads         = torch.randn_like(self.tst_inputs)

        ref_outputs,_ = self.ref_layer.forward(self.ref_inputs,
                                               self.ref_inputs,
                                               self.ref_inputs,
                                               key_padding_mask=None,
                                               need_weights=False,
                                               attn_mask=None,
                                               is_training=True)

        tst_outputs,_ = self.tst_layer.forward(self.tst_inputs,
                                               self.tst_inputs,
                                               self.tst_inputs,
                                               key_padding_mask=None,
                                               need_weights=False,
                                               attn_mask=None,
                                               is_training=True)

        self.ref_inputs.backward(grads)
        self.tst_inputs.backward(grads)

        self.assertTrue(torch.allclose(self.ref_inputs,  self.tst_inputs,  atol=1e-5, rtol=1e-5))
>       self.assertTrue(torch.allclose(ref_outputs, tst_outputs, atol=1e-3, rtol=1e-3))
E       AssertionError: False is not true

test_self_multihead_attn_norm_add.py:68: AssertionError
========================================================================= short test summary info ==========================================================================
FAILED test_encdec_multihead_attn_norm_add.py::EncdecMultiheadAttnNormAddTest::test_encdec_multihead_attn_norm_add - AssertionError: False is not true
FAILED test_fast_self_multihead_attn_bias.py::SelfMultiheadAttnTest::test_self_multihead_attn_additive_mask - AssertionError: False is not true
FAILED test_self_multihead_attn_norm_add.py::SelfMultiheadAttnNormAddTest::test_self_multihead_attn_norm_add - AssertionError: False is not true
======================================================================= 3 failed, 7 passed in 10.46s =======================================================================