HSAFoundation / HSA-Runtime-AMD

The HSA-Runtime
Other
48 stars 16 forks source link

Finalization fails in the second run. #13

Closed harald-lang closed 8 years ago

harald-lang commented 8 years ago

I have a minimal init-dispatch-shutdown test case (see below). The test runs perfectly unless it runs more than once within the same process. The second run fails with a SegFault in hsa_ext_program_finalize_impl() in 8 out of 10 runs. When running under a debugger, 2 out of 10 runs fail.

This issue might be related to https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device/issues/3 (which, unfortunately, is open for almost a year now).

System setup:

Below is a GDB backtrace. The test named HSA.Finalize always succeeds, whereas the HSA.Finalize2 (which runs the same code) fails as described above:

[ RUN      ] HSA.Finalize
&__OpenCL_storeGlobalId_kernel
&__OpenCL_storeGlobalId2_kernel
[New Thread 0x7ffef541d700 (LWP 22471)]
[       OK ] HSA.Finalize (39 ms)
[Thread 0x7ffef541d700 (LWP 22471) exited]
[ RUN      ] HSA.Finalize2

Program received signal SIGSEGV, Segmentation fault.
0x00007ffef5f2aeda in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
(gdb) bt
#0  0x00007ffef5f2aeda in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#1  0x00007ffef5f0af91 in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#2  0x00007ffef5f0b394 in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#3  0x00007ffef5a2865f in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#4  0x00007ffef59df41b in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#5  0x00007ffef59cf3c1 in ?? () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#6  0x00007ffef59cae47 in hsa_ext_program_finalize_impl () from /home/hl/git/HSA-Runtime-AMD/lib/libhsa-ext-finalize64.so.1
#7  0x000000000041987f in (anonymous namespace)::HSA_Finalize2_Test::TestBody (this=<optimized out>) at test/rts/hsa/TestHsa.cpp:512
#8  0x000000000043d073 in HandleSehExceptionsInMethodIfSupported<testing::Test, void> (location=0x440b82 "the test body", method=<optimized out>, object=<optimized out>)
    at ../../dbcore/test/gtest/gtest.cc:2091
#9  testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void> (object=object@entry=0x455330, 
    method=(void (testing::Test::*)(testing::Test * const)) 0x419510 <(anonymous namespace)::HSA_Finalize2_Test::TestBody()>, location=location@entry=0x440b82 "the test body")
    at ../../dbcore/test/gtest/gtest.cc:2127
#10 0x0000000000433c14 in testing::Test::Run (this=0x455330) at ../../dbcore/test/gtest/gtest.cc:2163
#11 0x0000000000433de0 in testing::TestInfo::Run (this=0x455210) at ../../dbcore/test/gtest/gtest.cc:2339
#12 0x0000000000434025 in Run (this=<optimized out>) at ../../dbcore/test/gtest/gtest.cc:2314
#13 testing::TestCase::Run (this=0x4546d0) at ../../dbcore/test/gtest/gtest.cc:2446
#14 0x00000000004364e6 in Run (this=<optimized out>) at ../../dbcore/test/gtest/gtest.cc:4307
#15 testing::internal::UnitTestImpl::RunAllTests (this=this@entry=0x4543f0) at ../../dbcore/test/gtest/gtest.cc:4269
#16 0x000000000043672f in RunAllTests (this=0x4543f0) at ../../dbcore/test/gtest/gtest.cc:4186
#17 HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool> (location=<optimized out>, method=<optimized out>, object=0x4543f0)
    at ../../dbcore/test/gtest/gtest.cc:2091
#18 HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool> (location=0x43fb68 "auxiliary test code (environments or event listeners)", 
    method=(bool (testing::internal::UnitTestImpl::*)(testing::internal::UnitTestImpl * const)) 0x436690 <testing::internal::UnitTestImpl::RunAllTests()>, object=0x4543f0)
    at ../../dbcore/test/gtest/gtest.cc:2127
#19 testing::UnitTest::Run (this=<optimized out>) at ../../dbcore/test/gtest/gtest.cc:3904
#20 0x00000000004058cf in main (argc=1, argv=<optimized out>) at test/main.cpp:5
TEST(HSA, Finalize) {
    hsa_status_t status;
    status = hsa_init();
    ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

    {

        // Create a HSA program
        hsa_ext_program_t program;
        {
            status = hsa_ext_program_create(
                    HSA_MACHINE_MODEL_LARGE,
                    HSA_PROFILE_FULL,
                    HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
                    nullptr, // no options
                    &program);
            ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

            // Add the BRIG to the program (add module)
            const string filename = "bin/test/rts/hsa/kernel/StoreGlobalId.brig";
            ifstream file(filename, ifstream::binary);
            if (file.fail()) {
                throw "couldn't open file: " + filename;
            }
            string contents((istreambuf_iterator<char>(file)), (istreambuf_iterator<char>()));
            if (contents.substr(0, 8) != "HSA BRIG") {
                throw "invalid magic number";
            }
            status = hsa_ext_program_add_module(program, (hsa_ext_module_t)contents.c_str());
            ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);
        }

        hsa_agent_t kernelAgent = determineKernelAgent();
        hsa_isa_t kernelAgentIsa;
        status = hsa_agent_get_info(kernelAgent, HSA_AGENT_INFO_ISA, &kernelAgentIsa);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        // Finalize
        hsa_code_object_t codeObject;
        {
            hsa_ext_control_directives_t finalizerControlDirectives;
            finalizerControlDirectives.control_directives_mask = 0;
            status = hsa_ext_program_finalize(program, kernelAgentIsa,
                    HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, finalizerControlDirectives,
                    nullptr, HSA_CODE_OBJECT_TYPE_PROGRAM, &codeObject);
            ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);
            printCodeSymbols(codeObject);
        }
        hsa_ext_program_destroy(program);

        hsa_executable_t executable;
        status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, nullptr, &executable);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        status = hsa_executable_load_code_object(executable, kernelAgent, codeObject, nullptr);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        hsa_code_object_destroy(codeObject);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        status = hsa_executable_freeze(executable, nullptr);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        hsa_executable_symbol_t executableSymbol;
        const char* moduleName = nullptr;
        status = hsa_executable_get_symbol(executable, moduleName, "&__OpenCL_storeGlobalId2_kernel",
                kernelAgent, HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, &executableSymbol);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        uint64_t kernelObject;
        status = hsa_executable_symbol_get_info(executableSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernelObject);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        uint32_t kernelArgSegmentSize;
        status = hsa_executable_symbol_get_info(executableSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernelArgSegmentSize);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        uint32_t kernelGroupSegmentSize;
        status = hsa_executable_symbol_get_info(executableSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kernelGroupSegmentSize);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        uint32_t kernelPrivateSegmentSize;
        status = hsa_executable_symbol_get_info(executableSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kernelPrivateSegmentSize);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        hsa_region_t kernelArgRegion = determineKernelArgumentRegion(kernelAgent);

        const size_t n = 1024*1024;
        size_t* output = new size_t[n];
        memset(output,0, n * sizeof(size_t));

        KernelArgs kernelArgs = KernelArgs::buildOpenCL<size_t*,size_t>(kernelArgRegion, output, n);

        hsa_queue_t* queue = createQueue(kernelAgent, 4);

        // request a packet id
        uint64_t packetId = hsa_queue_add_write_index_relaxed(queue, 1);
        ASSERT_EQ(0, packetId);

        // Calculate the virtual address of the packet. Note: All packet types are of the same size (64 bytes)
        const uint32_t queueSize = roundUpToNextPowerOfTwo(4);
        const uint32_t queueMask = queueSize - 1;
        hsa_kernel_dispatch_packet_t* packet =
                reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address) + (packetId & queueMask);

        // Reserved fields, private and group memory, and completion signal are all set to 0.
        memset(((uint8_t*) packet) + 4, 0, sizeof(hsa_kernel_dispatch_packet_t) - 4);
        packet->workgroup_size_x = 128;
        packet->workgroup_size_y = 1;
        packet->workgroup_size_z = 1;
        packet->grid_size_x = n;
        packet->grid_size_y = 1;
        packet->grid_size_z = 1;

        // Indicate which executable code to run.
        // The application is expected to have a finalized kernel (for example, using the finalization API).
        packet->kernel_object = kernelObject;
        packet->kernarg_address = *kernelArgs;

        // Create a signal with an initial value of one to monitor the task completion
        hsa_signal_create(1, 0, NULL, &packet->completion_signal);

        uint16_t header = 0;
        header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
        header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
        header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

        // We assume that we only dispatch 1-dimensional kernels.
        constexpr uint16_t numDimensions = 1;
        uint16_t setup = numDimensions << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;

        // Atomically set header and setup fields (as described in the specs)
        __atomic_store_n(reinterpret_cast<uint32_t*>(packet), header | (setup << 16), __ATOMIC_RELEASE);

        // Notify the runtime that a new packet is enqueued
        hsa_signal_store_release(queue->doorbell_signal, packetId);

        // Wait for the task to finish, which is the same as waiting for the value of the completion signal to be zero
        while (hsa_signal_wait_acquire(packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0);

        // Done! The kernel has completed. Time to cleanup resources and leave

        for (size_t i = 0; i < n; i++) {
            ASSERT_EQ(i,output[i]);
        }

        hsa_signal_destroy(packet->completion_signal);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        hsa_queue_destroy(queue);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        hsa_executable_destroy(executable);
        ASSERT_HSA_STATUS(HSA_STATUS_SUCCESS, status);

        delete [] output;

    }
    status = hsa_shut_down();
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
harald-lang commented 8 years ago

The problem was, that the HSAIL module was destroyed to early. The spec says: "The HSA runtime does not perform a deep copy of the HSAIL module upon addition. Instead, it stores a pointer to the HSAIL module. The ownership of the HSAIL module belongs to the application, which must ensure that module is not released before destroying the HSAIL program."