NVIDIA / CUDALibrarySamples

CUDA Library Samples
Other
1.52k stars 318 forks source link

cuFFT 1d_r2c_example.cpp wrong settings for batch_size >= 2 #140

Open Robbie-Juelich opened 1 year ago

Robbie-Juelich commented 1 year ago

1.If batch_size is not 1, the 'output' array size should be '(n/2 + 1) * batch_size ';

  1. With multiple batch, the second parameter of cufftPlan1d(), 'nx' should be " input.size() / batch_size", not input.size() .

diff --git a/cuFFT/1d_r2c/1d_r2c_example.cpp b/cuFFT/1d_r2c/1d_r2c_example.cpp index 82384d2..d94a3e4 100644 --- a/cuFFT/1d_r2c/1d_r2c_example.cpp +++ b/cuFFT/1d_r2c/1d_r2c_example.cpp



@@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {
     using output_type = std::complex<scalar_type>;

     std::vector<input_type> input(fft_size, 0);

``` - std::vector<output_type> output(static_cast<int>((fft_size / 2 + 1)));
  + std::vector<output_type> output(static_cast<int>((n / 2 + 1) * batch_size));

     for (int i = 0; i < fft_size; i++) {
         input[i] = static_cast<input_type>(i);
@@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
     cufftComplex *d_output = nullptr;

     CUFFT_CALL(cufftCreate(&plan));
-    CUFFT_CALL(cufftPlan1d(&plan, input.size(), CUFFT_R2C, batch_size));
+    CUFFT_CALL(cufftPlan1d(&plan, input.size() / batch_size, CUFFT_R2C, batch_size));

     CUDA_RT_CALL(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
     CUFFT_CALL(cufftSetStream(plan, stream));
@@ -122,4 +122,4 @@ int main(int argc, char *argv[]) {
     CUDA_RT_CALL(cudaDeviceReset());

     return EXIT_SUCCESS;
-}
\ No newline at end of file
+}
``