ROCm / hcc

HCC is an Open Source, Optimizing C++ Compiler for Heterogeneous Compute currently for the ROCm GPU Computing Platform
https://github.com/RadeonOpenCompute/hcc/wiki
Other
433 stars 108 forks source link

hc::array fails with Memory access fault on Vega 10 (MI25) #1121

Open ex-rzr opened 5 years ago

ex-rzr commented 5 years ago
#include <iostream>

#include <hcc/hc.hpp>

// hcc hc_array.cpp -hc -o hc_array && ./hc_array

int main()
{
    hc::accelerator acc;
    hc::accelerator_view acc_view = acc.create_view();

    while (1)
    {
        const size_t n = 3;
        std::vector<hc::array<int>> d_arrays;
        for (size_t i = 0; i < n; i++)
        {
            std::cout << i << std::endl;
            d_arrays.emplace_back(hc::extent<1>(123), acc_view);
            std::cout << "accelerator_pointer: " << (void*)d_arrays[i].accelerator_pointer() << std::endl;
        }
    }
}

To compile and run:

hcc hc_array.cpp -hc -o hc_array && ./hc_array

After a few iterations:

...
1
accelerator_pointer: 0x7f8f0bc03000
2
accelerator_pointer: 0x7f8f0bc02000
0
accelerator_pointer: 0x7f8f0bc02000
1
accelerator_pointer: 0x7f8f0bc03000
2
Memory access fault by GPU node-2 (Agent handle: 0x1f59a00) on address 0x7f911ac92000. Reason: Page not present or supervisor privilege.
Aborted (core dumped)

Vega 10 [Radeon Instinct MI25], the latest ROCm (2.3)

HCC clang version 9.0.0 (/data/jenkins_workspace/compute-rocm-rel-2.3/external/hcc-tot/clang 785f31db116e742ac53d052e207979869a857d1a) (/data/jenkins_workspace/compute-rocm-rel-2.3/external/hcc-tot/compiler 87f982f8ce2b85ce824f91bf8c2c90f6843a50a3) (based on HCC 1.3.19115-9b3a740-785f31d-87f982f )

4.15.0-48-generic #51~16.04.1-Ubuntu SMP Fri Apr 5 12:01:12 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux

It works on Fiji (S9300) + ROCm 2.3 and Vega 20 + ROCm 2.2.

ex-rzr commented 5 years ago
hcc hc_array.cpp -hc -o hc_array && ltrace -C -e 'hsa*' ./hc_array
1
libmcwamp_hsa.so->hsa_amd_memory_pool_allocate(0x19c1ff0, 492, 0, 0x7ffd1ca9c0d8)                                                                  = 0
libmcwamp_hsa.so->hsa_amd_agents_allow_access(1, 0x19e17f0, 0, 0x7f00db603000 <unfinished ...>
libhsa-runtime64.so.1->hsaKmtQueryPointerInfo(0x7f00db603000, 0x7ffd1ca9bea0, 0, 0)                                                                = 0
libhsa-runtime64.so.1->hsaKmtMapMemoryToGPUNodes(0x7f00db600000, 0x200000, 0x7ffd1ca9bf98, 0)                                                      = 0
<... hsa_amd_agents_allow_access resumed> )                                                                                                        = 0
libmcwamp_hsa.so->hsa_amd_memory_pool_allocate(0x19c1ff0, 492, 0, 0x7ffd1ca9bfb8)                                                                  = 0
libmcwamp_hsa.so->hsa_amd_agents_allow_access(1, 0x19e17f0, 0, 0x7f00db604000 <unfinished ...>
libhsa-runtime64.so.1->hsaKmtQueryPointerInfo(0x7f00db604000, 0x7ffd1ca9bd80, 0, 0)                                                                = 0
libhsa-runtime64.so.1->hsaKmtMapMemoryToGPUNodes(0x7f00db600000, 0x200000, 0x7ffd1ca9be78, 0)                                                      = 0
<... hsa_amd_agents_allow_access resumed> )                                                                                                        = 0
libmcwamp_hsa.so->hsa_amd_memory_lock(0x7f00db602000, 492, 0x19e17f0, 1 <unfinished ...>
libhsa-runtime64.so.1->hsaKmtRegisterMemoryToNodes(0x7f00db602000, 492, 1, 0x1aa9ed0)                                                              = 0
libhsa-runtime64.so.1->hsaKmtMapMemoryToGPUNodes(0x7f00db602000, 492, 0x7ffd1ca9c378, 64)                                                          = 0
<... hsa_amd_memory_lock resumed> )                                                                                                                = 0
libmcwamp_hsa.so->hsa_signal_store_relaxed(0x7f02ea55df80, 1, 0x19c1a00, 0x7f02ea510000)                                                           = 0x7f02ea55df80
libmcwamp_hsa.so->hsa_amd_memory_async_copy(0x7f00db604000, 0x19c1a00, 0x7f02ea510000, 0x19b5880)                                                  = 0
libmcwamp_hsa.so->hsa_signal_wait_scacquire(0x7f02ea55df80, 0, 0, -1)                                                                              = 0
libhsa-runtime64.so.1->hsaKmtUnmapMemoryToGPU(0x7f00db602000, 0x7f00db602000, 0x19b7280, 0x7f02ea55df80)                                           = 0
libhsa-runtime64.so.1->hsaKmtDeregisterMemory(0x7f00db602000, 0, 0x7f02e8bbc3f8, 0x7f02e8f79f00)                                                   = 0
libmcwamp_hsa.so->hsa_amd_memory_pool_free(0x7f00db602000, 0x7f00db602000, 0x1aa7e90, 0x7f02e76f95b0)                                              = 0
accelerator_pointer: 0x7f00db603000
2
libmcwamp_hsa.so->hsa_amd_memory_pool_allocate(0x19c1ff0, 492, 0, 0x7ffd1ca9c0d8)                                                                  = 0
libmcwamp_hsa.so->hsa_amd_agents_allow_access(1, 0x19e17f0, 0, 0x7f00db602000 <unfinished ...>
libhsa-runtime64.so.1->hsaKmtQueryPointerInfo(0x7f00db602000, 0x7ffd1ca9bea0, 0, 0)                                                                = 0
libhsa-runtime64.so.1->hsaKmtMapMemoryToGPUNodes(0x7f00db600000, 0x200000, 0x7ffd1ca9bf98, 0)                                                      = 0
<... hsa_amd_agents_allow_access resumed> )                                                                                                        = 0
libmcwamp_hsa.so->hsa_amd_memory_pool_allocate(0x19c1ff0, 492, 0, 0x7ffd1ca9bfb8)                                                                  = 0
libmcwamp_hsa.so->hsa_amd_agents_allow_access(1, 0x19e17f0, 0, 0x7f00db605000 <unfinished ...>
libhsa-runtime64.so.1->hsaKmtQueryPointerInfo(0x7f00db605000, 0x7ffd1ca9bd80, 0, 0)                                                                = 0
libhsa-runtime64.so.1->hsaKmtMapMemoryToGPUNodes(0x7f00db600000, 0x200000, 0x7ffd1ca9be78, 0)                                                      = 0
<... hsa_amd_agents_allow_access resumed> )                                                                                                        = 0
libmcwamp_hsa.so->hsa_amd_memory_lock(0x7f00db604000, 492, 0x19e17f0, 1 <unfinished ...>
libhsa-runtime64.so.1->hsaKmtRegisterMemoryToNodes(0x7f00db604000, 492, 1, 0x1aea8c0)                                                              = 0
libhsa-runtime64.so.1->hsaKmtMapMemoryToGPUNodes(0x7f00db604000, 492, 0x7ffd1ca9c378, 64)                                                          = 0
<... hsa_amd_memory_lock resumed> )                                                                                                                = 0
libmcwamp_hsa.so->hsa_signal_store_relaxed(0x7f02ea55df80, 1, 0x19c1a00, 0x7f02ea510000)                                                           = 0x7f02ea55df80
libmcwamp_hsa.so->hsa_amd_memory_async_copy(0x7f00db605000, 0x19c1a00, 0x7f02ea510000, 0x19b5880)                                                  = 0
libmcwamp_hsa.so->hsa_signal_wait_scacquire(0x7f02ea55df80, 0, 0, -1Memory access fault by GPU node-2 (Agent handle: 0x19c1a00) on address 0x7f02ea510000. Reason: Page not present or supervisor privilege.
 <no return ...>
+++ killed by SIGABRT +++