intel / compute-runtime

Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver
MIT License
1.12k stars 230 forks source link

A770: Printing empty string segfaults #635

Open maleadt opened 1 year ago

maleadt commented 1 year ago

I'm working on Julia support for oneAPI, and after upgrading to an A770 I noticed that printing an empty string segfaults.

Julia MWE:

using oneAPI

function kernel()
    oneAPI.@printf("")
    return
end

@oneapi kernel()
synchronize()
signal (11): Segmentation fault
in expression starting at /home/tim/Julia/pkg/oneAPI/wip.jl:13
strnlen_s at /workspace/srcdir/compute-runtime/shared/source/helpers/string.h:40
printString at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:54
printKernelOutput at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:48
printOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/printf_handler/printf_handler.cpp:76
printPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/kernel/kernel_imp.cpp:974
printKernelsPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:178
postSyncOperations at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:184
synchronizeByPollingForTaskCount at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:171
synchronize at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:147
zeCommandQueueSynchronize at /workspace/srcdir/compute-runtime/level_zero/api/core/ze_cmdqueue_api_entrypoints.h:39
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/libze.jl:1556 [inlined]
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/utils.jl:5 [inlined]
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/libze.jl:13 [inlined]
zeCommandQueueSynchronize at /home/tim/Julia/pkg/oneAPI/lib/utils/call.jl:24
synchronize at /home/tim/Julia/pkg/oneAPI/lib/level-zero/cmdqueue.jl:34 [inlined]
synchronize at /home/tim/Julia/pkg/oneAPI/lib/level-zero/cmdqueue.jl:34 [inlined]
synchronize at /home/tim/Julia/pkg/oneAPI/src/context.jl:59
main at /home/tim/Julia/pkg/oneAPI/wip.jl:10
unknown function (ip: 0x7ff371f1767f)

The kernel above generates the following LLVM IR:

julia> oneAPI.code_llvm(kernel, Tuple{}; kernel=true, dump_module=true, debuginfo=:none)
; ModuleID = 'text'
source_filename = "text"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-unknown"

@0 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1

declare i32 @printf(i8*, ...)

define spir_kernel void @_Z6kernel() local_unnamed_addr #0 {
conversion:
  %0 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i64 0, i64 0))
  ret void
}

attributes #0 = { "probe-stack"="inline-asm" }

!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spirv.version = !{!3}
!julia.kernel = !{!4}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 2, i32 0}
!3 = !{i32 1, i32 5}
!4 = !{void ()* @_Z6kernel}

... which we translate to SPIR-V using the Khronos translator:

; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 18
; Schema: 0
               OpCapability Addresses
               OpCapability Kernel
               OpCapability Int64
               OpCapability Int8
          %1 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %11 "_Z6kernel"
               OpSource OpenCL_C 200000
               OpName %conversion "conversion"
               OpDecorate %8 Constant
               OpDecorate %8 Alignment 1
      %ulong = OpTypeInt 64 0
      %uchar = OpTypeInt 8 0
       %uint = OpTypeInt 32 0
    %ulong_1 = OpConstant %ulong 1
    %ulong_0 = OpConstant %ulong 0
%_arr_uchar_ulong_1 = OpTypeArray %uchar %ulong_1
%_ptr_Function__arr_uchar_ulong_1 = OpTypePointer Function %_arr_uchar_ulong_1
       %void = OpTypeVoid
         %10 = OpTypeFunction %void
%_ptr_Function_uchar = OpTypePointer Function %uchar
          %6 = OpConstantNull %_arr_uchar_ulong_1
          %8 = OpVariable %_ptr_Function__arr_uchar_ulong_1 Function %6
         %11 = OpFunction %void None %10
 %conversion = OpLabel
         %15 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %8 %ulong_0 %ulong_0
         %17 = OpExtInst %uint %1 printf %15
               OpReturn
               OpFunctionEnd

The compiled SPIR-V kernel is attached, and can be loaded (after extracting) using the following C-based loader:

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <level_zero/ze_api.h>
#include <level_zero/zet_api.h>

void read_spirv_binary(const char *filename, uint8_t **spirv, size_t *spirv_size) {
    FILE *file = fopen(filename, "rb");
    assert(file != NULL);

    fseek(file, 0, SEEK_END);
    *spirv_size = ftell(file);
    fseek(file, 0, SEEK_SET);

    *spirv = (uint8_t *)malloc(*spirv_size);
    assert(*spirv != NULL);

    size_t bytes_read = fread(*spirv, 1, *spirv_size, file);
    assert(bytes_read == *spirv_size);

    fclose(file);
}

int main(int argc, char *argv[]) {
    if (argc != 2) {
        fprintf(stderr, "Usage: %s <path to SPIR-V binary>\n", argv[0]);
        return 1;
    }

    ze_result_t result = zeInit(0);
    assert(result == ZE_RESULT_SUCCESS);

    uint32_t driver_count = 0;
    result = zeDriverGet(&driver_count, NULL);
    assert(result == ZE_RESULT_SUCCESS);
    assert(driver_count > 0);

    ze_driver_handle_t driver;
    result = zeDriverGet(&driver_count, &driver);
    assert(result == ZE_RESULT_SUCCESS);

    uint32_t device_count = 0;
    result = zeDeviceGet(driver, &device_count, NULL);
    assert(result == ZE_RESULT_SUCCESS);
    assert(device_count > 0);

    ze_device_handle_t device;
    result = zeDeviceGet(driver, &device_count, &device);
    assert(result == ZE_RESULT_SUCCESS);

    ze_context_handle_t context;
    ze_context_desc_t context_desc = {
        .stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC,
        .pNext = NULL,
        .flags = 0
    };
    result = zeContextCreate(driver, &context_desc, &context);
    assert(result == ZE_RESULT_SUCCESS);

    uint8_t *spirv;
    size_t spirv_size;
    read_spirv_binary(argv[1], &spirv, &spirv_size);

    ze_module_handle_t module;
    ze_module_desc_t module_desc = {
        .stype = ZE_STRUCTURE_TYPE_MODULE_DESC,
        .pNext = NULL,
        .format = ZE_MODULE_FORMAT_IL_SPIRV,
        .inputSize = spirv_size,
        .pInputModule = spirv,
        .pBuildFlags = ""
    };
    result = zeModuleCreate(context, device, &module_desc, &module, NULL);
    assert(result == ZE_RESULT_SUCCESS);

    ze_kernel_handle_t kernel;
    ze_kernel_desc_t kernel_desc = {
        .stype = ZE_STRUCTURE_TYPE_KERNEL_DESC,
        .pNext = NULL,
        .flags = 0,
        .pKernelName = "_Z6kernel"
    };
    result = zeKernelCreate(module, &kernel_desc, &kernel);
    assert(result == ZE_RESULT_SUCCESS);

    ze_command_queue_handle_t cmd_queue;
    ze_command_queue_desc_t cmd_queue_desc = {
        .stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
        .pNext = NULL,
        .ordinal = 0,
        .index = 0,
        .flags = 0,
        .mode = ZE_COMMAND_QUEUE_MODE_DEFAULT,
        .priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL
    };
    result = zeCommandQueueCreate(context, device, &cmd_queue_desc, &cmd_queue);
    assert(result == ZE_RESULT_SUCCESS);

    ze_command_list_handle_t cmd_list;
    ze_command_list_desc_t cmd_list_desc = {
        .stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
        .pNext = NULL,
        .commandQueueGroupOrdinal = 0,
        .flags = 0
    };
    result = zeCommandListCreate(context, device, &cmd_list_desc, &cmd_list);
    assert(result == ZE_RESULT_SUCCESS);

    ze_group_count_t group_count = {
        .groupCountX = 1,
        .groupCountY = 1,
        .groupCountZ = 1
    };
    result = zeCommandListAppendLaunchKernel(cmd_list, kernel, &group_count, NULL, 0, NULL);
    assert(result == ZE_RESULT_SUCCESS);

    result = zeCommandListClose(cmd_list);
    assert(result == ZE_RESULT_SUCCESS);

    result = zeCommandQueueExecuteCommandLists(cmd_queue, 1, &cmd_list, NULL);
    assert(result == ZE_RESULT_SUCCESS);

    result = zeCommandQueueSynchronize(cmd_queue, UINT32_MAX);
    assert(result == ZE_RESULT_SUCCESS);

    zeKernelDestroy(kernel);
    zeModuleDestroy(module);
    free(spirv);
    zeCommandListDestroy(cmd_list);
    zeCommandQueueDestroy(cmd_queue);
    zeContextDestroy(context);

    return 0;
}

Tested on Linux 6.2.11, both using compute-runtime 22.43.24595.30 from the Arch Linux repos as our own build of 22.53.25593. Printing non-empty strings works, as does printing empty strings on another system of mine (a NUC with Xe graphics, running Linux 5.10 with compute-runtime 22.53.25593).

kernel.spv.zip

JablonskiMateusz commented 1 year ago

Fixed in https://github.com/intel/compute-runtime/commit/1a1bd04d4a7ee9fc0a5fedf2bcf656f40fc0f3f9

@maleadt could you confirm it is working fine now?

maleadt commented 1 year ago

No. The issue doesn't seem to be a nullpointer exception.

❯ gdb --args /home/tim/Julia/depot/juliaup/julia-1.8.5+0.x64.linux.gnu/bin/julia --project wip.jl
Thread 1 "julia" received signal SIGSEGV, Segmentation fault.

#2  0x00007ffeb54a2acd in NEO::PrintFormatter::printKernelOutput(std::function<void (char*)> const&) (this=0x7fffffffb0f0, print=...)
    at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:49
49                  printString(formatString, print);
(gdb) l
44      } else {
45          while (currentOffset + sizeof(char *) <= printfOutputBufferSize) {
46              char *formatString = nullptr;
47              read(&formatString);
48              if (formatString != nullptr) {
49                  printString(formatString, print);
50              }
51          }
52      }
53  }
(gdb) p formatString
$1 = 0xffffd556aa670000 <error: Cannot access memory at address 0xffffd556aa670000>
JablonskiMateusz commented 1 year ago

Handling nullptr on neo side seems to be correct, IGC should take a look at this issue from their side

maleadt commented 1 year ago

Any update?

amielcza commented 9 months ago

Hi, the issue was fixed in this commit

maleadt commented 6 months ago

This still fails, now on non-Arc hardware too. I'm using NEO v24.5.28454 and IGC v1.0.15985.

[3137871] signal (11.1): Segmentation fault
in expression starting at REPL[4]:1
strnlen_s at /workspace/srcdir/compute-runtime/shared/source/helpers/string.h:40
printString at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:59
printKernelOutput at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:52
printOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/printf_handler/printf_handler.cpp:76
printPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/kernel/kernel_imp.cpp:1163
printKernelsPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:196
postSyncOperations at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:213
synchronizeByPollingForTaskCount at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:188
synchronize at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:164
zeCommandQueueSynchronize at /workspace/srcdir/compute-runtime/level_zero/api/core/ze_cmdqueue_api_entrypoints.h:39
eero-t commented 4 months ago

the issue was fixed in this commit

This still fails, now on non-Arc hardware too. I'm using NEO v24.5.28454 and IGC v1.0.15985.

@JablonskiMateusz / @amielcza igc-1.0.15985.0 contains 06eeecbc8a, so that fix was not good/enough?