Does fftw_plan_many_dft have a max size?

christopherkang commented 3 years ago

I've been using FFTW on an array of size 3003 3003 2^N for different values of N. What's strange is that when N <= 13, the code runs fine, but at N = 14 I get a segfault (code below).

Do you have any ideas as to what could be causing this?

    int NUMBER_OF_THREADS = 40;
    fftw_init_threads();
    fftw_plan_with_nthreads(NUMBER_OF_THREADS);
    // std::cout << "NUM THREADS: " << fftw_planner_nthreads() << std::endl;

    auto ld = n_choose_o_ * n_choose_o_;
    uint64_t num_trials = 1 << precision;

    int rank = 1;
    int n[] = {num_trials};
    int howmany = ld;
    int idist = 1;
    int odist = 1;

    int istride = ld;
    int ostride = ld;

    int *inembed = n;
    int *onembed = n;

    auto io = reinterpret_cast<fftw_complex*>(state_vector.data());

    fftw_plan master_plan = fftw_plan_many_dft(
      rank, n, howmany, 
      io, inembed, 
      istride, idist, 
      io, onembed, 
      ostride, odist, 
      -1, FFTW_ESTIMATE);

    std::cout << "DFT Plan Initialized" << std::endl;

    // plan the Fourier Transform
    double NORMALIZATION_FACTOR = sqrt(num_trials);

    fftw_execute(master_plan);

    std::cout << "DFT Plan Executed" << std::endl;

    #pragma omp parallel for
    for (uint64_t i = 0; i < ld * num_trials; i++){
      state_vector[i] /= NORMALIZATION_FACTOR;
    }

    fftw_destroy_plan(master_plan);

Where Statevector is a C++ complex vector initialized and then resize()ed; n_chooseo is 3003 in this case.

stevengj commented 3 years ago

fftw_plan_many_dft takes 32-bit integer arguments. You need to use the guru64 interface if you need sizes that are larger 2^31 - 1: http://fftw.org/fftw3_doc/64_002dbit-Guru-Interface.html#g_t64_002dbit-Guru-Interface

christopherkang commented 3 years ago

@stevengj thanks for the response! I updated the code as described but am still running into segfaults - where else do you think I could debug it?

    StateVector& state_vector = *this;

    // FFTW - PLANNING
    // methods to make this multi-threaded
    int NUMBER_OF_THREADS = 40;
    fftw_init_threads();
    fftw_plan_with_nthreads(NUMBER_OF_THREADS);
    // std::cout << "NUM THREADS: " << fftw_planner_nthreads() << std::endl;

    uint64_t ld = n_choose_o_ * n_choose_o_;
    uint64_t num_trials = 1 << precision;

    fftw_iodim64 arr_dims[1];
    arr_dims[0].n = num_trials;
    arr_dims[0].is = ld;
    arr_dims[0].os = ld;

    fftw_iodim64 arr_howmany_dims[1];
    arr_howmany_dims[0].n = ld;
    arr_howmany_dims[0].is = 1;
    arr_howmany_dims[0].os = 1;

    auto io = reinterpret_cast<fftw_complex*>(state_vector.data());

    fftw_plan master_plan = fftw_plan_guru64_dft(
      1, arr_dims, 1, arr_howmany_dims,
      io, io, -1, FFTW_ESTIMATE
    );

    assert(master_plan != NULL);

    std::cout << "DFT Plan Initialized" << std::endl;

    // plan the Fourier Transform
    double NORMALIZATION_FACTOR = sqrt(num_trials);

    fftw_execute(master_plan);

    std::cout << "DFT Plan Executed" << std::endl;

    #pragma omp parallel for
    for (uint64_t i = 0; i < ld * num_trials; i++){
      state_vector[i] /= NORMALIZATION_FACTOR;
    }

    fftw_destroy_plan(master_plan);

christopherkang commented 3 years ago

@stevengj would appreciate a lookover! Can open a new issue if preferable. Thanks, I appreciate your time and expertise.

stevengj commented 3 years ago

I don't see anything obviously wrong with your code at first glance; it's not possible to help your further without a self-contained runnable example.

christopherkang commented 3 years ago

Thanks for the follow up, @stevengj ! I've made a self-contained example below. Thanks.

Main

int main(int argc, char *argv[])
{
    StateVector sve = StateVector(std::stoi(argv[1]), 15, 5);
    sve[0] = 1.0;

    sve.qift();

    return 0;
}

Statevector file

#include <complex.h>
#include <vector>
#include <fftw3.h>

inline constexpr uint64_t nChoosek(int n, int k) {
  if (k > n) return 0;
  if (k * 2 > n) k = n - k;
  if (k == 0) return 1;

  uint64_t result = n;
  for (int i = 2; i <= k; ++i) {
    result *= (n - i + 1);
    result /= i;
  }
  return result;
}

class StateVector : public std::vector<std::complex<double>> {
 public:
  StateVector(unsigned precision, unsigned num_orbitals,
              unsigned num_occupied)
      : precision{precision},
        num_orbitals_{num_orbitals},
        num_occupied_{num_occupied} {

    assert(num_occupied <= num_orbitals);
    assert(precision > 0);
    n_choose_o_ = nChoosek(num_orbitals, num_occupied);
    resize((1ul << precision) * n_choose_o_ * n_choose_o_, 0);
    assert((*this).max_size() > (1ul << precision) * n_choose_o_ * n_choose_o_);
  }

  void qift() {
    StateVector& state_vector = *this;

    int NUMBER_OF_THREADS = 40;
    fftw_init_threads();
    fftw_plan_with_nthreads(NUMBER_OF_THREADS);

    uint64_t ld = n_choose_o_ * n_choose_o_;
    uint64_t num_trials = 1 << precision;

    fftw_iodim64 arr_dims[1];
    arr_dims[0].n = num_trials;
    arr_dims[0].is = ld;
    arr_dims[0].os = ld;

    fftw_iodim64 arr_howmany_dims[1];
    arr_howmany_dims[0].n = ld;
    arr_howmany_dims[0].is = 1;
    arr_howmany_dims[0].os = 1;

    auto io = reinterpret_cast<fftw_complex*>(state_vector.data());

    fftw_plan master_plan = fftw_plan_guru64_dft(
      1, arr_dims, 1, arr_howmany_dims,
      io, io, -1, FFTW_ESTIMATE
    );

    assert(master_plan != NULL);

    std::cout << "DFT Plan Initialized" << std::endl;

    // plan the Fourier Transform
    double NORMALIZATION_FACTOR = sqrt(num_trials);

    fftw_execute(master_plan);

    std::cout << "DFT Plan Executed" << std::endl;

    #pragma omp parallel for
    for (uint64_t i = 0; i < ld * num_trials; i++){
      state_vector[i] /= NORMALIZATION_FACTOR;
    }

    fftw_destroy_plan(master_plan);
    }
 protected:
  unsigned precision;
  unsigned num_orbitals_;
  unsigned num_occupied_;
  uint64_t n_choose_o_;
}

christopherkang commented 3 years ago

I'm sorry @stevengj , I made a mistake with the self-contained code. This should be more accurate, with the following command being the culprit: ./fftw_test 13

#include <cstdint>
#include <cassert>
#include <iostream>
#include <complex.h>
#include <vector>
#include <fftw3.h>

uint64_t nChoosek(int n, int k)
{
    if (k > n)
        return 0;
    if (k * 2 > n)
        k = n - k;
    if (k == 0)
        return 1;

    uint64_t result = n;
    for (int i = 2; i <= k; ++i)
    {
        result *= (n - i + 1);
        result /= i;
    }
    return result;
}

class StateVector : public std::vector<std::complex<double>>
{
public:
    StateVector(unsigned precision, unsigned num_orbitals,
                unsigned num_occupied)
        : precision{precision}, num_orbitals_{num_orbitals}, num_occupied_{num_occupied}
    {

        assert(num_occupied <= num_orbitals);
        assert(precision > 0);
        n_choose_o_ = nChoosek(num_orbitals, num_occupied);
        resize((1ul << precision) * n_choose_o_ * n_choose_o_, 0);
        assert((*this).max_size() > (1ul << precision) * n_choose_o_ * n_choose_o_);
    }

    void qift()
    {
        StateVector &state_vector = *this;

        int NUMBER_OF_THREADS = 40;
        fftw_init_threads();
        fftw_plan_with_nthreads(NUMBER_OF_THREADS);

        uint64_t ld = n_choose_o_ * n_choose_o_;
        uint64_t num_trials = 1 << precision;

        fftw_iodim64 arr_dims[1];
        arr_dims[0].n = num_trials;
        arr_dims[0].is = ld;
        arr_dims[0].os = ld;

        fftw_iodim64 arr_howmany_dims[1];
        arr_howmany_dims[0].n = ld;
        arr_howmany_dims[0].is = 1;
        arr_howmany_dims[0].os = 1;

        auto io = reinterpret_cast<fftw_complex *>(state_vector.data());

        fftw_plan master_plan = fftw_plan_guru64_dft(
            1, arr_dims, 1, arr_howmany_dims,
            io, io, -1, FFTW_ESTIMATE);

        assert(master_plan != NULL);

        std::cout << "DFT Plan Initialized" << std::endl;

        // plan the Fourier Transform
        double NORMALIZATION_FACTOR = sqrt(num_trials);

        fftw_execute(master_plan);

        std::cout << "DFT Plan Executed" << std::endl;

#pragma omp parallel for
        for (uint64_t i = 0; i < ld * num_trials; i++)
        {
            state_vector[i] /= NORMALIZATION_FACTOR;
        }

        fftw_destroy_plan(master_plan);
    }

protected:
    unsigned precision;
    unsigned num_orbitals_;
    unsigned num_occupied_;
    uint64_t n_choose_o_;
};

int main(int argc, char *argv[])
{
    StateVector sve = StateVector(std::stoi(argv[1]), 15, 5);
    sve[0] = 1.0;

    sve.qift();

    return 0;
}

stevengj commented 3 years ago

You are trying to allocate an array of 2^13 3003^2 16 bytes, which is over 1 terabyte. I can't run it because I don't have this much RAM.

Are you sure you're not just running out of memory? resize should throw a bad_alloc exception…

christopherkang commented 3 years ago

Sorry, how should I go about debugging this? The challenge is that the command only fails once the parameter becomes large (the device I'm running the code on has ~4TB memory)

Best regards, Christopher Kang http://pronoun.is/he

On Sun, Jul 18, 2021 at 9:59 AM Steven G. Johnson @.***> wrote:

You are trying to allocate an array of 2^13 3003^2 16 bytes, which is over 1 terabyte. I can't run it because I don't have this much RAM.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/FFTW/fftw3/issues/245#issuecomment-882086846, or unsubscribe https://github.com/notifications/unsubscribe-auth/AKCTURDADZE573XCB77U4DLTYMB6FANCNFSM46DC5XCA .

FFTW / fftw3

Does fftw_plan_many_dft have a max size? #245