hannorein / rebound

💫 An open-source multi-purpose N-body code.
https://rebound.readthedocs.io/
GNU General Public License v3.0
861 stars 221 forks source link

OpenMp Problem #396

Closed Tuhin-Ghosh closed 1 year ago

Tuhin-Ghosh commented 5 years ago

Hi, I am getting a slowdown if I use OpenMP. I am attaching the code below.

include

include

include

include

include

include <sys/time.h>

include

include "rebound.h"

include "tools.h"

include "output.h"

void run_sim(); void heartbeat(struct reb_simulation r); double a_to_P(double a, double m_star, double m_planet); double P_to_a(double P, double m_star, double m_planet); int reb_collision_resolve_merge_pass_through(struct reb_simulation const r, struct reb_collision c);

double E0;

int main(int argc, char* argv[]) { // Get the number of processors int np = omp_get_num_procs(); // Set the number of OpenMP threads to be the number of processors omp_set_num_threads(np);

// First, run it with the OpenMP turned on.
struct timeval tim;
gettimeofday(&tim, NULL);
double timing1 = tim.tv_sec + (tim.tv_usec / 1000000.0);
run_sim();

// Reduce the number of threads to 1 and run again.
gettimeofday(&tim, NULL);
double timing2 = tim.tv_sec + (tim.tv_usec / 1000000.0);
omp_set_num_threads(1);
run_sim();
gettimeofday(&tim, NULL);
double timing3 = tim.tv_sec + (tim.tv_usec / 1000000.0);

// Output speedup
printf("\n\nOpenMP speed-up: %.3fx (perfect scaling would give %dx)\n", (timing3 - timing2) / (timing2 - timing1), np);

}

void run_sim() {

struct reb_simulation* r = reb_create_simulation();

// Simulation Setup
r->integrator  = REB_INTEGRATOR_MERCURIUS;
//r->integrator  = REB_INTEGRATOR_IAS15;
r->heartbeat   = heartbeat;
r->testparticle_type = 1;

// Collisions
r->collision = REB_COLLISION_DIRECT;
r->collision_resolve = reb_collision_resolve_merge_pass_through;
r->track_energy_offset = 1;
r->collision_resolve_keep_sorted = 1;

// Boundaries
r->boundary    = REB_BOUNDARY_OPEN;
const double boxsize = 100;
reb_configure_box(r, boxsize, 1, 1, 1);

srand(12);

double M_sun = 1.989e33;                // In gm

// Masses in Solar Units
double m_earth = 3.003e-6;
double m_neptune = 5.1e-5;

double m_star = 1.;
double m_inner_planet = m_neptune;
double m_outer_planet = m_neptune;

// Radii in AU
double r_star = 0.00464913034;
double r_inner_planet = 4 * 4.26349651e-5;
double r_outer_planet = 4 * 4.26349651e-5;

// Semi Major Axis in AU
double a_inner_planet = 1.;
double a_outer_planet = P_to_a(2. * a_to_P(a_inner_planet, m_star, m_inner_planet), m_star, m_outer_planet);

// Period In Years
double P_inner_planet = a_to_P(a_inner_planet, m_star, m_inner_planet);
double P_outer_planet = a_to_P(a_outer_planet, m_star, m_outer_planet);

// Time Step : dt
r->dt = 2. * M_PI * P_inner_planet / 50;

// Star
struct reb_particle star = {0};
star.m         = m_star;
star.r        = r_star;      // Radius of particle is in AU!
reb_add(r, star);

// Planet 1 - Inner Planet
{
    double a = a_inner_planet, m = m_inner_planet, e = reb_random_rayleigh(0.04), inc = reb_random_rayleigh(0.024);
    double Omega = reb_random_uniform(0, 2.*M_PI);
    double apsis = reb_random_uniform(0, 2.*M_PI);
    double phi = reb_random_uniform(0, 2.*M_PI);
    struct reb_particle p = {0};
    p = reb_tools_orbit_to_particle(r->G, star, m, a, e, inc, Omega, apsis, phi);
    p.r = 4 * 4.26352e-5;
    reb_add(r, p);
}

// Planet 2 - Outer Planet
{
    double a = a_outer_planet, m = m_outer_planet, e = reb_random_rayleigh(0.04), inc = reb_random_rayleigh(0.024);
    double Omega = reb_random_uniform(0, 2.*M_PI);
    double apsis = reb_random_uniform(0, 2.*M_PI);
    double phi = reb_random_uniform(0, 2.*M_PI);
    struct reb_particle p = {0};
    p = reb_tools_orbit_to_particle(r->G, star, m, a, e, inc, Omega, apsis, phi);
    p.r =  4 * 4.26352e-5;
    reb_add(r, p);
}

r->N_active = r->N;

int N_planetesimals = 10000;
double rho_planetesimals = 4.;       // gm/cc
double total_disk_mass = 2. * (m_inner_planet + m_outer_planet);
double planetesimal_mass = total_disk_mass / N_planetesimals;
double R_planetesimals = pow((3.*planetesimal_mass * M_sun / 4. / M_PI / rho_planetesimals), 1. / 3.) / 1.496e+13;  // In AU

double Pmin = P_inner_planet / 3.;
double Pmax = P_outer_planet * 3.;
double amin = P_to_a(Pmin, m_star, planetesimal_mass), amax = P_to_a(Pmax, m_star, planetesimal_mass);
double powerlaw = -0.5;

printf("\nAdding Testparticles\n\n");
while (r->N < N_planetesimals + r->N_active) {
    struct reb_particle pt = {0};
    double a    = reb_random_powerlaw(amin, amax, powerlaw);
    double e    = reb_random_rayleigh(0.004);
    double inc  = reb_random_rayleigh(0.024);
    double Omega = reb_random_uniform(0, 2.*M_PI);
    double apsis = reb_random_uniform(0, 2.*M_PI);
    double phi   = reb_random_uniform(0, 2.*M_PI);
    pt = reb_tools_orbit_to_particle(r->G, star, r->testparticle_type ? planetesimal_mass : 0., a, e, inc, Omega, apsis, phi);
    pt.r         = R_planetesimals;
    reb_add(r, pt);
}

printf("\nWorking\n\n");
reb_move_to_com(r);
E0 = reb_tools_energy(r);
int err = reb_integrate(r, r->t + 2. * M_PI * P_outer_planet * 5);
reb_free_simulation(r);
printf("\nDone ! Error Code : %d\n\n", err);

}

int reb_collision_resolve_merge_pass_through(struct reb_simulation* const r, struct reb_collision c) { // This function passes the collision to the default merging routine. // If a merger occured, that routine will return a value other than 0. // This function then outputs some information about the merger. int result = reb_collision_resolve_merge(r, c); if (result != 0) { printf("A merger occured! Particles involved: %d, %d.\n", c.p1, c.p2); } return result; }

void heartbeat(struct reb_simulation r) { if (reb_output_check(r, 2. M_PI)) {

    //reb_output_timing(r, 2. * M_PI);
    //reb_integrator_synchronize(r);

    //relative energy error
    double E = reb_tools_energy(r);
    double relE = fabs((E - E0) / E0);

    printf("Time : %f, dt : %f, Del(E) :%e, Number Of Particles : %d\n", r->t / (2 * M_PI), r->dt / (2 * M_PI), relE, r->N);

}

}

double a_to_P(double a, double m_star, double m_planet) { return sqrt(a a a / (m_star + m_planet )); }

double P_to_a(double P, double m_star, double m_planet) { return pow((m_star + m_planet ) P P, 1.0 / 3.0); }

Note: I do see a speed up while running the OpenMP example. I just don't understand what I am doing wrong in the above code. I am using the same Makefile as one in the OpenMP example. When I look at cpu usage I see all cores are being utilized but the speedup is always below 1.

hannorein commented 5 years ago

In short: it's tricky to get a speed up in many cases. You'll need to find out what the bottleneck is. For example, try turning off collisions. I have myself not used MERCURIUS with OpenMP. There could be various places that are not fully optimized. You could try something simple (leapfrog) just for testing. But it will be major task to get a simulation like this run efficiently on many cores.

Tuhin-Ghosh commented 5 years ago

Even after setting "REB_COLLISION_NONE" and "REB_BOUNDARY_NONE", I am getting:

"OpenMP speed-up: 0.685x (perfect scaling would give 8x)"

I also tested by setting the integrator to MERCURIUS in your OpenMP example and there I do get a speedup(with 500 particles):

"OpenMP speed-up: 3.399x (perfect scaling would give 8x)".

I also used the LEAPFROG integrator as you suggested. Using 10k particle I am getting:

"OpenMP speed-up: 0.942x (perfect scaling would give 8x)"

But now when I look at my CPU usage I see only one core is being utilized all the time.

And after setting "REB_COLLISION_NONE" and "REB_BOUNDARY_NONE", I get :

"OpenMP speed-up: 1.074x (perfect scaling would give 8x)"

This time however I see all CPU cores are working. So I could not figure out what the bottleneck is.

hannorein commented 5 years ago

I'm afraid I don't have an easy solution for you. They way forward would be to use a profiler to see where the bottleneck is, then trying to remove it.