FFTW / fftw3

DO NOT CHECK OUT THESE FILES FROM GITHUB UNLESS YOU KNOW WHAT YOU ARE DOING. (See below.)
GNU General Public License v2.0
2.72k stars 661 forks source link

Does fftw_plan_many_dft have a max size? #245

Closed christopherkang closed 3 years ago

christopherkang commented 3 years ago

I've been using FFTW on an array of size 3003 3003 2^N for different values of N. What's strange is that when N <= 13, the code runs fine, but at N = 14 I get a segfault (code below).

Do you have any ideas as to what could be causing this?

    int NUMBER_OF_THREADS = 40;
    fftw_init_threads();
    fftw_plan_with_nthreads(NUMBER_OF_THREADS);
    // std::cout << "NUM THREADS: " << fftw_planner_nthreads() << std::endl;

    auto ld = n_choose_o_ * n_choose_o_;
    uint64_t num_trials = 1 << precision;

    int rank = 1;
    int n[] = {num_trials};
    int howmany = ld;
    int idist = 1;
    int odist = 1;

    int istride = ld;
    int ostride = ld;

    int *inembed = n;
    int *onembed = n;

    auto io = reinterpret_cast<fftw_complex*>(state_vector.data());

    fftw_plan master_plan = fftw_plan_many_dft(
      rank, n, howmany, 
      io, inembed, 
      istride, idist, 
      io, onembed, 
      ostride, odist, 
      -1, FFTW_ESTIMATE);

    std::cout << "DFT Plan Initialized" << std::endl;

    // plan the Fourier Transform
    double NORMALIZATION_FACTOR = sqrt(num_trials);

    fftw_execute(master_plan);

    std::cout << "DFT Plan Executed" << std::endl;

    #pragma omp parallel for
    for (uint64_t i = 0; i < ld * num_trials; i++){
      state_vector[i] /= NORMALIZATION_FACTOR;
    }

    fftw_destroy_plan(master_plan);

Where Statevector is a C++ complex vector initialized and then resize()ed; n_chooseo is 3003 in this case.

stevengj commented 3 years ago

fftw_plan_many_dft takes 32-bit integer arguments. You need to use the guru64 interface if you need sizes that are larger 2^31 - 1: http://fftw.org/fftw3_doc/64_002dbit-Guru-Interface.html#g_t64_002dbit-Guru-Interface

christopherkang commented 3 years ago

@stevengj thanks for the response! I updated the code as described but am still running into segfaults - where else do you think I could debug it?

    StateVector& state_vector = *this;

    // FFTW - PLANNING
    // methods to make this multi-threaded
    int NUMBER_OF_THREADS = 40;
    fftw_init_threads();
    fftw_plan_with_nthreads(NUMBER_OF_THREADS);
    // std::cout << "NUM THREADS: " << fftw_planner_nthreads() << std::endl;

    uint64_t ld = n_choose_o_ * n_choose_o_;
    uint64_t num_trials = 1 << precision;

    fftw_iodim64 arr_dims[1];
    arr_dims[0].n = num_trials;
    arr_dims[0].is = ld;
    arr_dims[0].os = ld;

    fftw_iodim64 arr_howmany_dims[1];
    arr_howmany_dims[0].n = ld;
    arr_howmany_dims[0].is = 1;
    arr_howmany_dims[0].os = 1;

    auto io = reinterpret_cast<fftw_complex*>(state_vector.data());

    fftw_plan master_plan = fftw_plan_guru64_dft(
      1, arr_dims, 1, arr_howmany_dims,
      io, io, -1, FFTW_ESTIMATE
    );

    assert(master_plan != NULL);

    std::cout << "DFT Plan Initialized" << std::endl;

    // plan the Fourier Transform
    double NORMALIZATION_FACTOR = sqrt(num_trials);

    fftw_execute(master_plan);

    std::cout << "DFT Plan Executed" << std::endl;

    #pragma omp parallel for
    for (uint64_t i = 0; i < ld * num_trials; i++){
      state_vector[i] /= NORMALIZATION_FACTOR;
    }

    fftw_destroy_plan(master_plan);
christopherkang commented 3 years ago

@stevengj would appreciate a lookover! Can open a new issue if preferable. Thanks, I appreciate your time and expertise.

stevengj commented 3 years ago

I don't see anything obviously wrong with your code at first glance; it's not possible to help your further without a self-contained runnable example.

christopherkang commented 3 years ago

Thanks for the follow up, @stevengj ! I've made a self-contained example below. Thanks.

Main

int main(int argc, char *argv[])
{
    StateVector sve = StateVector(std::stoi(argv[1]), 15, 5);
    sve[0] = 1.0;

    sve.qift();

    return 0;
}

Statevector file

#include <complex.h>
#include <vector>
#include <fftw3.h>

inline constexpr uint64_t nChoosek(int n, int k) {
  if (k > n) return 0;
  if (k * 2 > n) k = n - k;
  if (k == 0) return 1;

  uint64_t result = n;
  for (int i = 2; i <= k; ++i) {
    result *= (n - i + 1);
    result /= i;
  }
  return result;
}

class StateVector : public std::vector<std::complex<double>> {
 public:
  StateVector(unsigned precision, unsigned num_orbitals,
              unsigned num_occupied)
      : precision{precision},
        num_orbitals_{num_orbitals},
        num_occupied_{num_occupied} {

    assert(num_occupied <= num_orbitals);
    assert(precision > 0);
    n_choose_o_ = nChoosek(num_orbitals, num_occupied);
    resize((1ul << precision) * n_choose_o_ * n_choose_o_, 0);
    assert((*this).max_size() > (1ul << precision) * n_choose_o_ * n_choose_o_);
  }

  void qift() {
    StateVector& state_vector = *this;

    int NUMBER_OF_THREADS = 40;
    fftw_init_threads();
    fftw_plan_with_nthreads(NUMBER_OF_THREADS);

    uint64_t ld = n_choose_o_ * n_choose_o_;
    uint64_t num_trials = 1 << precision;

    fftw_iodim64 arr_dims[1];
    arr_dims[0].n = num_trials;
    arr_dims[0].is = ld;
    arr_dims[0].os = ld;

    fftw_iodim64 arr_howmany_dims[1];
    arr_howmany_dims[0].n = ld;
    arr_howmany_dims[0].is = 1;
    arr_howmany_dims[0].os = 1;

    auto io = reinterpret_cast<fftw_complex*>(state_vector.data());

    fftw_plan master_plan = fftw_plan_guru64_dft(
      1, arr_dims, 1, arr_howmany_dims,
      io, io, -1, FFTW_ESTIMATE
    );

    assert(master_plan != NULL);

    std::cout << "DFT Plan Initialized" << std::endl;

    // plan the Fourier Transform
    double NORMALIZATION_FACTOR = sqrt(num_trials);

    fftw_execute(master_plan);

    std::cout << "DFT Plan Executed" << std::endl;

    #pragma omp parallel for
    for (uint64_t i = 0; i < ld * num_trials; i++){
      state_vector[i] /= NORMALIZATION_FACTOR;
    }

    fftw_destroy_plan(master_plan);
    }
 protected:
  unsigned precision;
  unsigned num_orbitals_;
  unsigned num_occupied_;
  uint64_t n_choose_o_;
}
christopherkang commented 3 years ago

I'm sorry @stevengj , I made a mistake with the self-contained code. This should be more accurate, with the following command being the culprit: ./fftw_test 13

#include <cstdint>
#include <cassert>
#include <iostream>
#include <complex.h>
#include <vector>
#include <fftw3.h>

uint64_t nChoosek(int n, int k)
{
    if (k > n)
        return 0;
    if (k * 2 > n)
        k = n - k;
    if (k == 0)
        return 1;

    uint64_t result = n;
    for (int i = 2; i <= k; ++i)
    {
        result *= (n - i + 1);
        result /= i;
    }
    return result;
}

class StateVector : public std::vector<std::complex<double>>
{
public:
    StateVector(unsigned precision, unsigned num_orbitals,
                unsigned num_occupied)
        : precision{precision}, num_orbitals_{num_orbitals}, num_occupied_{num_occupied}
    {

        assert(num_occupied <= num_orbitals);
        assert(precision > 0);
        n_choose_o_ = nChoosek(num_orbitals, num_occupied);
        resize((1ul << precision) * n_choose_o_ * n_choose_o_, 0);
        assert((*this).max_size() > (1ul << precision) * n_choose_o_ * n_choose_o_);
    }

    void qift()
    {
        StateVector &state_vector = *this;

        int NUMBER_OF_THREADS = 40;
        fftw_init_threads();
        fftw_plan_with_nthreads(NUMBER_OF_THREADS);

        uint64_t ld = n_choose_o_ * n_choose_o_;
        uint64_t num_trials = 1 << precision;

        fftw_iodim64 arr_dims[1];
        arr_dims[0].n = num_trials;
        arr_dims[0].is = ld;
        arr_dims[0].os = ld;

        fftw_iodim64 arr_howmany_dims[1];
        arr_howmany_dims[0].n = ld;
        arr_howmany_dims[0].is = 1;
        arr_howmany_dims[0].os = 1;

        auto io = reinterpret_cast<fftw_complex *>(state_vector.data());

        fftw_plan master_plan = fftw_plan_guru64_dft(
            1, arr_dims, 1, arr_howmany_dims,
            io, io, -1, FFTW_ESTIMATE);

        assert(master_plan != NULL);

        std::cout << "DFT Plan Initialized" << std::endl;

        // plan the Fourier Transform
        double NORMALIZATION_FACTOR = sqrt(num_trials);

        fftw_execute(master_plan);

        std::cout << "DFT Plan Executed" << std::endl;

#pragma omp parallel for
        for (uint64_t i = 0; i < ld * num_trials; i++)
        {
            state_vector[i] /= NORMALIZATION_FACTOR;
        }

        fftw_destroy_plan(master_plan);
    }

protected:
    unsigned precision;
    unsigned num_orbitals_;
    unsigned num_occupied_;
    uint64_t n_choose_o_;
};

int main(int argc, char *argv[])
{
    StateVector sve = StateVector(std::stoi(argv[1]), 15, 5);
    sve[0] = 1.0;

    sve.qift();

    return 0;
}
stevengj commented 3 years ago

You are trying to allocate an array of 2^13 3003^2 16 bytes, which is over 1 terabyte. I can't run it because I don't have this much RAM.

Are you sure you're not just running out of memory? resize should throw a bad_alloc exception…

christopherkang commented 3 years ago

Sorry, how should I go about debugging this? The challenge is that the command only fails once the parameter becomes large (the device I'm running the code on has ~4TB memory)

Best regards, Christopher Kang http://pronoun.is/he

On Sun, Jul 18, 2021 at 9:59 AM Steven G. Johnson @.***> wrote:

You are trying to allocate an array of 2^13 3003^2 16 bytes, which is over 1 terabyte. I can't run it because I don't have this much RAM.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/FFTW/fftw3/issues/245#issuecomment-882086846, or unsubscribe https://github.com/notifications/unsubscribe-auth/AKCTURDADZE573XCB77U4DLTYMB6FANCNFSM46DC5XCA .