Closed christopherkang closed 3 years ago
fftw_plan_many_dft
takes 32-bit integer arguments. You need to use the guru64 interface if you need sizes that are larger 2^31 - 1
: http://fftw.org/fftw3_doc/64_002dbit-Guru-Interface.html#g_t64_002dbit-Guru-Interface
@stevengj thanks for the response! I updated the code as described but am still running into segfaults - where else do you think I could debug it?
StateVector& state_vector = *this;
// FFTW - PLANNING
// methods to make this multi-threaded
int NUMBER_OF_THREADS = 40;
fftw_init_threads();
fftw_plan_with_nthreads(NUMBER_OF_THREADS);
// std::cout << "NUM THREADS: " << fftw_planner_nthreads() << std::endl;
uint64_t ld = n_choose_o_ * n_choose_o_;
uint64_t num_trials = 1 << precision;
fftw_iodim64 arr_dims[1];
arr_dims[0].n = num_trials;
arr_dims[0].is = ld;
arr_dims[0].os = ld;
fftw_iodim64 arr_howmany_dims[1];
arr_howmany_dims[0].n = ld;
arr_howmany_dims[0].is = 1;
arr_howmany_dims[0].os = 1;
auto io = reinterpret_cast<fftw_complex*>(state_vector.data());
fftw_plan master_plan = fftw_plan_guru64_dft(
1, arr_dims, 1, arr_howmany_dims,
io, io, -1, FFTW_ESTIMATE
);
assert(master_plan != NULL);
std::cout << "DFT Plan Initialized" << std::endl;
// plan the Fourier Transform
double NORMALIZATION_FACTOR = sqrt(num_trials);
fftw_execute(master_plan);
std::cout << "DFT Plan Executed" << std::endl;
#pragma omp parallel for
for (uint64_t i = 0; i < ld * num_trials; i++){
state_vector[i] /= NORMALIZATION_FACTOR;
}
fftw_destroy_plan(master_plan);
@stevengj would appreciate a lookover! Can open a new issue if preferable. Thanks, I appreciate your time and expertise.
I don't see anything obviously wrong with your code at first glance; it's not possible to help your further without a self-contained runnable example.
Thanks for the follow up, @stevengj ! I've made a self-contained example below. Thanks.
Main
int main(int argc, char *argv[])
{
StateVector sve = StateVector(std::stoi(argv[1]), 15, 5);
sve[0] = 1.0;
sve.qift();
return 0;
}
Statevector file
#include <complex.h>
#include <vector>
#include <fftw3.h>
inline constexpr uint64_t nChoosek(int n, int k) {
if (k > n) return 0;
if (k * 2 > n) k = n - k;
if (k == 0) return 1;
uint64_t result = n;
for (int i = 2; i <= k; ++i) {
result *= (n - i + 1);
result /= i;
}
return result;
}
class StateVector : public std::vector<std::complex<double>> {
public:
StateVector(unsigned precision, unsigned num_orbitals,
unsigned num_occupied)
: precision{precision},
num_orbitals_{num_orbitals},
num_occupied_{num_occupied} {
assert(num_occupied <= num_orbitals);
assert(precision > 0);
n_choose_o_ = nChoosek(num_orbitals, num_occupied);
resize((1ul << precision) * n_choose_o_ * n_choose_o_, 0);
assert((*this).max_size() > (1ul << precision) * n_choose_o_ * n_choose_o_);
}
void qift() {
StateVector& state_vector = *this;
int NUMBER_OF_THREADS = 40;
fftw_init_threads();
fftw_plan_with_nthreads(NUMBER_OF_THREADS);
uint64_t ld = n_choose_o_ * n_choose_o_;
uint64_t num_trials = 1 << precision;
fftw_iodim64 arr_dims[1];
arr_dims[0].n = num_trials;
arr_dims[0].is = ld;
arr_dims[0].os = ld;
fftw_iodim64 arr_howmany_dims[1];
arr_howmany_dims[0].n = ld;
arr_howmany_dims[0].is = 1;
arr_howmany_dims[0].os = 1;
auto io = reinterpret_cast<fftw_complex*>(state_vector.data());
fftw_plan master_plan = fftw_plan_guru64_dft(
1, arr_dims, 1, arr_howmany_dims,
io, io, -1, FFTW_ESTIMATE
);
assert(master_plan != NULL);
std::cout << "DFT Plan Initialized" << std::endl;
// plan the Fourier Transform
double NORMALIZATION_FACTOR = sqrt(num_trials);
fftw_execute(master_plan);
std::cout << "DFT Plan Executed" << std::endl;
#pragma omp parallel for
for (uint64_t i = 0; i < ld * num_trials; i++){
state_vector[i] /= NORMALIZATION_FACTOR;
}
fftw_destroy_plan(master_plan);
}
protected:
unsigned precision;
unsigned num_orbitals_;
unsigned num_occupied_;
uint64_t n_choose_o_;
}
I'm sorry @stevengj , I made a mistake with the self-contained code. This should be more accurate, with the following command being the culprit: ./fftw_test 13
#include <cstdint>
#include <cassert>
#include <iostream>
#include <complex.h>
#include <vector>
#include <fftw3.h>
uint64_t nChoosek(int n, int k)
{
if (k > n)
return 0;
if (k * 2 > n)
k = n - k;
if (k == 0)
return 1;
uint64_t result = n;
for (int i = 2; i <= k; ++i)
{
result *= (n - i + 1);
result /= i;
}
return result;
}
class StateVector : public std::vector<std::complex<double>>
{
public:
StateVector(unsigned precision, unsigned num_orbitals,
unsigned num_occupied)
: precision{precision}, num_orbitals_{num_orbitals}, num_occupied_{num_occupied}
{
assert(num_occupied <= num_orbitals);
assert(precision > 0);
n_choose_o_ = nChoosek(num_orbitals, num_occupied);
resize((1ul << precision) * n_choose_o_ * n_choose_o_, 0);
assert((*this).max_size() > (1ul << precision) * n_choose_o_ * n_choose_o_);
}
void qift()
{
StateVector &state_vector = *this;
int NUMBER_OF_THREADS = 40;
fftw_init_threads();
fftw_plan_with_nthreads(NUMBER_OF_THREADS);
uint64_t ld = n_choose_o_ * n_choose_o_;
uint64_t num_trials = 1 << precision;
fftw_iodim64 arr_dims[1];
arr_dims[0].n = num_trials;
arr_dims[0].is = ld;
arr_dims[0].os = ld;
fftw_iodim64 arr_howmany_dims[1];
arr_howmany_dims[0].n = ld;
arr_howmany_dims[0].is = 1;
arr_howmany_dims[0].os = 1;
auto io = reinterpret_cast<fftw_complex *>(state_vector.data());
fftw_plan master_plan = fftw_plan_guru64_dft(
1, arr_dims, 1, arr_howmany_dims,
io, io, -1, FFTW_ESTIMATE);
assert(master_plan != NULL);
std::cout << "DFT Plan Initialized" << std::endl;
// plan the Fourier Transform
double NORMALIZATION_FACTOR = sqrt(num_trials);
fftw_execute(master_plan);
std::cout << "DFT Plan Executed" << std::endl;
#pragma omp parallel for
for (uint64_t i = 0; i < ld * num_trials; i++)
{
state_vector[i] /= NORMALIZATION_FACTOR;
}
fftw_destroy_plan(master_plan);
}
protected:
unsigned precision;
unsigned num_orbitals_;
unsigned num_occupied_;
uint64_t n_choose_o_;
};
int main(int argc, char *argv[])
{
StateVector sve = StateVector(std::stoi(argv[1]), 15, 5);
sve[0] = 1.0;
sve.qift();
return 0;
}
You are trying to allocate an array of 2^13 3003^2 16 bytes, which is over 1 terabyte. I can't run it because I don't have this much RAM.
Are you sure you're not just running out of memory? resize
should throw a bad_alloc
exception…
Sorry, how should I go about debugging this? The challenge is that the command only fails once the parameter becomes large (the device I'm running the code on has ~4TB memory)
Best regards, Christopher Kang http://pronoun.is/he
On Sun, Jul 18, 2021 at 9:59 AM Steven G. Johnson @.***> wrote:
You are trying to allocate an array of 2^13 3003^2 16 bytes, which is over 1 terabyte. I can't run it because I don't have this much RAM.
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/FFTW/fftw3/issues/245#issuecomment-882086846, or unsubscribe https://github.com/notifications/unsubscribe-auth/AKCTURDADZE573XCB77U4DLTYMB6FANCNFSM46DC5XCA .
I've been using FFTW on an array of size 3003 3003 2^N for different values of N. What's strange is that when N <= 13, the code runs fine, but at N = 14 I get a segfault (code below).
Do you have any ideas as to what could be causing this?
Where vector initialized and then resize()ed; n_chooseo is 3003 in this case.
Statevector
is a C++ complex