ROCm / rocprofiler-compute

Advanced Profiling and Analytics for AMD Hardware
https://rocm.docs.amd.com/projects/omniperf/en/latest/
MIT License
135 stars 49 forks source link

Peak ALU line does not extend to end of chart (web GUI) #345

Open benrichard-amd opened 6 months ago

benrichard-amd commented 6 months ago

Describe the bug

When profiling a workload with very high arithmetic intensity, the peak ALU line stops in the middle of the chart while the data points appear further right.

To Reproduce

Profile workload with very high arithmetic intensity (example program below)

Analyze in web GUI

Expected behavior The peak ALU rooflines extend to the end of the chart (or at least appears above the data points)

Screenshots

off_the_charts

Additional context

#include <hip/hip_runtime.h>

__global__ void busy_kernel(float *in, float *out, float x, int size) {
  int tid = blockIdx.x * blockDim.x + threadIdx.x;

  if (tid < size) {
    float value = in[tid];

    float v0 = value;
    float v1 = v0 * v0 + 1.0f;
    for(int i = 0; i < 10000; i++) {

        for(int j = 0; j < 32; j++) {
            v0 = v0 * v0 + x;
            v1 = v1 * v1 + x;
        }
    }

    out[tid] = v0 + v1;
  }
}

int main() {
  int size = 1024 * 1024;
  int *in, *out;
  hipMalloc(&in, size * sizeof(float));
  hipMalloc(&out, size * sizeof(float));

  int blockSize = 64;
  int gridSize = (size + blockSize - 1) / blockSize;

  hipLaunchKernelGGL(busy_kernel, dim3(gridSize), dim3(blockSize), 0, 0, (float*)in, (float*)out, 1.0f, size);

  hipDeviceSynchronize();

  hipFree(in);
  hipFree(out);

  return 0;
}