ICLDisco / parsec

PaRSEC is a generic framework for architecture aware scheduling and management of micro-tasks on distributed, GPU accelerated, many-core heterogeneous architectures. PaRSEC assigns computation threads to the cores, GPU accelerators, overlaps communications and computations and uses a dynamic, fully-distributed scheduler based on architectural features such as NUMA nodes and algorithmic features such as data reuse.
Other
50 stars 17 forks source link

parsec_remote_dep_set_ctx() does not return #135

Closed abouteiller closed 2 years ago

abouteiller commented 7 years ago

Original report by Reazul Hoque (Bitbucket: rhoque_icl, ).


I am trying the example given below.

  1. The program does not end
  2. This is the output I am getting:

old rank: 1, new_rank: -1
old rank: 0, new_rank: -1
Just Kidding I have a new rank. My old rank: 0, new_rank: 0
old rank: 2, new_rank: -1
Just Kidding I have a new rank. My old rank: 2, new_rank: 1

I did not do anything and my rank in MPI_COMM_WORLD is: 1

Command line: mpirun --np 3 ./executable

One of the possible reason might be, we are calling parsec_remote_dep_set_ctx() from a subset of ranks from the original communicator. Trying otherwise resulted in a failure of setting the comm_ctx.

#!c

#include "parsec_config.h"

/* system and io */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* parsec things */
#include "parsec.h"
#include "parsec/profiling.h"
#ifdef PARSEC_VTRACE
#include "parsec/vt_user.h"
#endif

#include "parsec/interfaces/superscalar/insert_function_internal.h"

#if defined(PARSEC_HAVE_STRING_H)
#include <string.h>
#endif  /* defined(PARSEC_HAVE_STRING_H) */

#if defined(PARSEC_HAVE_MPI)
#include <mpi.h>
#endif  /* defined(PARSEC_HAVE_MPI) */

int
task_task( parsec_execution_unit_t    *context,
           parsec_execution_context_t *this_task )
{
    (void)context;
    int *original_rank;

    parsec_dtd_unpack_args( this_task,
                            UNPACK_VALUE, &original_rank
                          );  

    printf("We are executing task in rank of subcomm: %d original rank: %d\n", this_task->parsec_handle->context->my_rank, *original_rank);

    return PARSEC_HOOK_RETURN_DONE;
}

int main(int argc, char **argv)
{
    parsec_context_t* parsec;
    int rank, world, cores, new_rank = -1, new_world = 0;

#if defined(PARSEC_HAVE_MPI)
    {   
        int provided;
        MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
    }   
    MPI_Comm_size(MPI_COMM_WORLD, &world);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#else
    world = 1;
    rank = 0;
#endif

    cores = 1;

    parsec = parsec_init( cores, &argc, &argv );

#if defined(PARSEC_HAVE_MPI)
    MPI_Comm subcomm;
    if( (rank % 2) == 0 ) { 
        MPI_Comm_split(MPI_COMM_WORLD, 0, 0, &subcomm);
    } else {
        MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, 0, &subcomm);
    } 

    printf("old rank: %d, new_rank: %d\n", rank, new_rank);

    if( subcomm != MPI_COMM_NULL ) {
        MPI_Comm_size(subcomm, &new_world);
        MPI_Comm_rank(subcomm, &new_rank);

        printf("Just Kidding I have a new rank. My old rank: %d, new_rank: %d\n", rank, new_rank);

        if( PARSEC_SUCCESS != parsec_remote_dep_set_ctx( parsec, (void *)&subcomm ) ) {
            printf("set_ctx did not succeed\n");
        } else {
            printf("set_ctx succeeded\n");

        }
#endif

        parsec_handle_t *parsec_dtd_handle = parsec_dtd_handle_new(  );

        if( 0 == rank ) {
            parsec_output( 0, "\nWe are testing a run using sub communicator\n" );
        }

        /* Registering the dtd_handle with PARSEC context */
        parsec_enqueue( parsec, parsec_dtd_handle );

        parsec_context_start(parsec);

        parsec_insert_task( parsec_dtd_handle, task_task,    0,  "task_task",
                            sizeof(int),    &rank,  VALUE,
                            0 );

        parsec_dtd_handle_wait( parsec, parsec_dtd_handle );

        parsec_context_wait(parsec);

        if( 0 == rank ) {
            parsec_output( 0, "\nPingpong is behaving correctly.\n" );
        }

        parsec_handle_free( parsec_dtd_handle );
        /* End of correctness checking */

        parsec_fini(&parsec);
#if defined(PARSEC_HAVE_MPI)
    } else {
        parsec_output( 0, "\nI did not do anything and my rank in MPI_COMM_WORLD is: %d\n", rank );
    }
#endif

#ifdef PARSEC_HAVE_MPI
    MPI_Finalize();
#endif

    return 0;
} 
abouteiller commented 5 years ago

Original comment by Thomas Herault (Bitbucket: herault, GitHub: therault).


PR bitbucket:#220 should fix

abouteiller commented 3 years ago

Removing version: 3.0.0 (automated comment)