CHIP-SPV / chipStar

chipStar is a tool for compiling and running HIP/CUDA on SPIR-V via OpenCL or Level Zero APIs.
Other
157 stars 27 forks source link

Is CHIP_JIT_FLAGS working as expected? #841

Closed colleeneb closed 2 months ago

colleeneb commented 2 months ago

I tried using export CHIP_JIT_FLAGS="-ze-opt-large-register-file" to add extra flags to zeModuleCreate, but when I checked with iprof I didn't see any extra flags getting passed. Is this supposed to work and I'm missing something? I'm using PR https://github.com/CHIP-SPV/chipStar/pull/838 to test.

The code I used is below:

> cat s.cpp
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"

#define HIP_ASSERT(x) (assert((x)==hipSuccess))

__global__
void saxpy(int n, float a, float *x, float *y)
{
  int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
  if (i < n) y[i] = a*x[i] + y[i];
}

int main(void)
{
  int N = 1<<20;
  float *x, *y, *d_x, *d_y;
  x = (float*)malloc(N*sizeof(float));
  y = (float*)malloc(N*sizeof(float));

  HIP_ASSERT(hipMalloc(&d_x, N*sizeof(float)));
  HIP_ASSERT(hipMalloc(&d_y, N*sizeof(float)));

  for (int i = 0; i < N; i++) {
    x[i] = 1.0f;
    y[i] = 2.0f;
  }

  HIP_ASSERT(hipMemcpy(d_x, x, N*sizeof(float), hipMemcpyHostToDevice));
  HIP_ASSERT(hipMemcpy(d_y, y, N*sizeof(float), hipMemcpyHostToDevice));

  // Perform SAXPY on 1M elements
  hipLaunchKernelGGL(saxpy,(N+255)/256, 256,0,0,N, 2.0f, d_x, d_y );

  HIP_ASSERT(hipMemcpy(y, d_y, N*sizeof(float), hipMemcpyDeviceToHost));

  float maxError = 0.0f;
  for (int i = 0; i < N; i++)
    maxError = ( maxError > abs(y[i]-4.0f) ) ? maxError : abs(y[i]-4.0f) ;
  printf("Max error: %f\n", maxError);

  HIP_ASSERT(hipFree(d_x));
  HIP_ASSERT(hipFree(d_y));
  free(x);
  free(y);
}

Compile

hipcc s.cpp

Run

module load thapi
CHIP_BE=level0 CHIP_LOGLEVEL=debug iprof -t -- mpirun -n 1 --cpu-bind=depth ./a.out
pvelesko commented 2 months ago

it's CHIP_JIT_FLAGS_OVERRIDE

the README needs to be updated