ROCm / HIP

HIP: C++ Heterogeneous-Compute Interface for Portability
https://rocmdocs.amd.com/projects/HIP/
MIT License
3.75k stars 533 forks source link

[Issue]: Cannot create shareable handle #3584

Closed telliere closed 3 weeks ago

telliere commented 2 months ago

Problem Description

By going through HIP unit tests, I found a code using shareable memory, I'm trying to reproduce it on my server, without success.

Here is the code :

#include <hip/hip_runtime.h>
#include <iostream>
#include <vector>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#define ROUND_UP(value, multiple) (((value) + (multiple) - 1) / (multiple) * (multiple))

#define HIP_CHECK(val) hipCheck((val), #val, __FILE__, __LINE__)
inline void hipCheck(hipError_t        err,
                     const char* const func,
                     const char* const file,
                     const int         line) {
  if(err != hipSuccess) {
    std::cerr << "HIP Runtime Error at: " << file << ":" << line << std::endl;
    std::cerr << err << " : " << hipGetErrorString(err) << " " << func << std::endl;
    std::exit(-1);
  }
}

#define checkVMMSupported(device)                                                                  \
  {                                                                                                \
    int value = 0;                                                                                 \
    hipDeviceAttribute_t attr = hipDeviceAttributeVirtualMemoryManagementSupported;                \
    HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));                                        \
    if (value == 0) {                                                                              \
      std::cerr << "VMM Not Supported, aborting." << std::endl;                                    \
      exit(1);                                                                                     \
    } else { printf("OK VMM\n"); }                                                                 \
  }

int main() {
  int fd[2];
  if(pipe(fd) != 0){
    printf("Error opening pipe, aborting.\n");
    exit(1);
  }

  auto pid = fork();

  if(pid < 0){
    printf("Error forking, aborting.\n");
    exit(1);
  }

  if (pid == 0) {  // child
    if(close(fd[1])){
      printf("Error closing 01, aborting.\n");
      exit(1);
    }

    void* shareable_handle = nullptr;
    if(read(fd[0], &shareable_handle, sizeof(shareable_handle)) < 0){
      printf("Error reading, aborting.\n");
      exit(1);
    }
    if(close(fd[0]) != 0){
      printf("Error closing 02, aborting.\n");
      exit(1);
    }

    if(shareable_handle != nullptr){
      printf("Error shareable handle != nullptr, aborting.\n");
      exit(1);
    }

    HIP_CHECK(hipFree(0));

    hipMemGenericAllocationHandle_t imported_handle;
    HIP_CHECK(hipMemImportFromShareableHandle(&imported_handle, shareable_handle,
                                              hipMemHandleTypePosixFileDescriptor));

    exit(0);
  } else {  // parent
    if(close(fd[0])){
      printf("Error closing 11, aborting.\n");
      exit(1);
    }

    HIP_CHECK(hipFree(0));

    hipDevice_t device;
    HIP_CHECK(hipDeviceGet(&device, 0));
    checkVMMSupported(device);

    hipMemAllocationProp prop = {};
    prop.type = hipMemAllocationTypePinned;
    prop.requestedHandleType = hipMemHandleTypePosixFileDescriptor;
    prop.location.type = hipMemLocationTypeDevice;
    prop.location.id = device;

    size_t granularity;
    HIP_CHECK(
        hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum));

    hipMemGenericAllocationHandle_t handle;
    HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0));

    void* shareable_handle = nullptr;
    HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle,
                                            hipMemHandleTypePosixFileDescriptor, 0));

    if(write(fd[1], &shareable_handle, sizeof(shareable_handle)) < 0){
      printf("Error writing 11, aborting.\n");
      exit(1);
    }
    if(close(fd[1]) != 0){
      printf("Error closing 12, aborting.\n");
      exit(1);
    }

    if(wait(NULL) < 0){
      printf("Error waiting, aborting.\n");
      exit(1);
    }

    HIP_CHECK(hipMemRelease(handle));
  }
}

I'm compiling it using the following command :

❯ hipcc hip_shareable_handles.cpp -o hip_shareable_handles

(No output)

But then, when running the program :

OK VMM
HIP Runtime Error at: hip_shareable_handles.cpp:99
801 : operation not supported hipMemCreate(&handle, granularity * 2, &prop, 0)
HIP Runtime Error at: hip_shareable_handles.cpp:73
1 : invalid argument hipMemImportFromShareableHandle(&imported_handle, shareable_handle, hipMemHandleTypePosixFileDescriptor)

Shareable handles seems not to be supported but everything seems to be okay (ROCm, HIP versions, device used ...). I'm clueless.

Operating System

Ubuntu 22.04.4 LTS (Jammy Jellyfish)

CPU

AMD EPYC 7543

GPU

AMD Instinct MI210

ROCm Version

ROCm 6.1.0

ROCm Component

HIP

Steps to Reproduce

No response

(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support

No response

Additional Information

No response

jtpatel commented 1 month ago

@telliere , One of change that is required is.. if(shareable_handle != nullptr){ should be if(shareable_handle == nullptr){ I am checking more into this and will update you. Thanks for raising issue.

jtpatel commented 1 month ago

Hello @telliere , We need to use Unix Socket of sharing handle between processes.

ppanchad-amd commented 1 month ago

Hi @telliere, do you still need assistance with this ticket? If not, please close the ticket. Thanks

ppanchad-amd commented 3 weeks ago

@telliere Closing ticket due to lack of activity. Please feel free to re-open ticket if you still need assistance. Thanks!