Here's another issue I've encountered while trying to implement support for the teams directive in Score-P. When an application uses the teams directive and selects to use only a single team via num_teams(1), the dispatch of ompt_callback_implicit_task will not pass the parallel_data set in ompt_callback_parallel_begin. Using any other number of teams will yield the expected results. This also includes target teams when selecting x86_64 as the offload target.
Reproducer:
The source code to test this behavior is quite short (aside from the OMPT interface code).
Details
```c
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* MAIN */
int main( int argc, char** argv )
{
omp_set_num_threads(2);
omp_set_teams_thread_limit(2);
#pragma omp teams default(none)
{
}
}
/* OMPT INTERFACE */
#if ( defined( __ppc64__ ) || defined( __powerpc64__ ) || defined( __PPC64__ ) )
#define OMPT_TOOL_CPU_RELAX ( ( void )0 )
#elif ( defined( __x86_64 ) || defined( __x86_64__ ) || defined( __amd64 ) || defined( _M_X64 ) )
#define OMPT_TOOL_CPU_RELAX __asm__ volatile ( "pause" )
#elif ( defined( __aarch64__ ) || defined( __ARM64__ ) || defined( _M_ARM64 ) )
#define OMPT_TOOL_CPU_RELAX __asm__ volatile ( "yield" )
#else
#define OMPT_TOOL_CPU_RELAX ( ( void )0 )
#endif
/* Test-and-test-and-set lock. Mutexes are of type bool */
#define OMPT_TOOL_LOCK( MUTEX ) \
while ( true ) \
{ \
if ( atomic_flag_test_and_set_explicit( &( MUTEX ), memory_order_acquire ) != true ) \
{ \
break; \
} \
OMPT_TOOL_CPU_RELAX; \
}
#define OMPT_TOOL_UNLOCK( MUTEX ) atomic_flag_clear_explicit( &( MUTEX ), memory_order_release );
#define OMPT_TOOL_GUARDED_PRINTF( ... ) \
OMPT_TOOL_LOCK( ompt_tool_printf_mutex ) \
printf( __VA_ARGS__ ); \
OMPT_TOOL_UNLOCK( ompt_tool_printf_mutex )
_Thread_local int32_t ompt_tool_tid = -1; /* thread counter. >= 1 after thread_begin */
atomic_flag ompt_tool_printf_mutex = ATOMIC_FLAG_INIT;
static const char *
scope_endpoint2string(ompt_scope_endpoint_t t)
{
switch (t)
{
case ompt_scope_begin:
return "begin";
case ompt_scope_end:
return "end";
case ompt_scope_beginend:
return "beginend";
}
assert(false);
return "";
}
static const char*
parallel_flag2string( int /* ompt_parallel_flag_t */ t )
{
if ( t & ompt_parallel_invoker_program )
{
if ( t & ompt_parallel_league )
{
assert( t == ( ompt_parallel_invoker_program | ompt_parallel_league ) );
return "invoker_program_league";
}
else if ( t & ompt_parallel_team )
{
assert( t == ( ompt_parallel_invoker_program | ompt_parallel_team ) );
return "invoker_program_team";
}
else
{
assert( false );
}
}
else if ( t & ompt_parallel_invoker_runtime )
{
if ( t & ompt_parallel_league )
{
assert( t == ( ompt_parallel_invoker_runtime | ompt_parallel_league ) );
return "invoker_runtime_league";
}
else if ( t & ompt_parallel_team )
{
assert( t == ( ompt_parallel_invoker_runtime | ompt_parallel_team ) );
return "invoker_runtime_team";
}
else
{
assert( false );
}
}
else
{
assert( false );
}
return "";
}
static const char*
set_result2string( ompt_set_result_t t )
{
switch ( t )
{
case ompt_set_error:
return "error";
case ompt_set_never:
return "never";
case ompt_set_impossible:
return "impossible";
case ompt_set_sometimes:
return "sometimes";
case ompt_set_sometimes_paired:
return "sometimes_paired";
case ompt_set_always:
return "always";
}
assert( false );
return "";
}
static const char*
task_flag2string( int /* ompt_task_flag_t */ t )
{
if ( t & ompt_task_initial )
{
assert( t == ompt_task_initial );
return "initial";
}
else if ( t & ompt_task_implicit )
{
if ( t & ompt_task_undeferred )
{
return "implicit_undeferred";
}
else if ( t & ompt_task_untied )
{
return "implicit_untied";
}
else if ( t & ompt_task_final )
{
return "implicit_final";
}
else if ( t & ompt_task_mergeable )
{
return "implicit_mergeable";
}
else if ( t & ompt_task_merged )
{
return "implicit_merged";
}
else
{
assert( t == ompt_task_implicit );
return "implicit";
}
}
else if ( t & ompt_task_explicit )
{
if ( t & ompt_task_undeferred )
{
return "explicit_undeferred";
}
else if ( t & ompt_task_untied )
{
return "explicit_untied";
}
else if ( t & ompt_task_final )
{
return "explicit_final";
}
else if ( t & ompt_task_mergeable )
{
return "explicit_mergeable";
}
else if ( t & ompt_task_merged )
{
return "explicit_merged";
}
else
{
assert( t == ompt_task_explicit );
return "explicit";
}
}
else if ( t & ompt_task_target )
{
if ( t & ompt_task_undeferred )
{
return "target_undeferred";
}
else if ( t & ompt_task_untied )
{
return "target_untied";
}
else if ( t & ompt_task_final )
{
return "target_final";
}
else if ( t & ompt_task_mergeable )
{
return "target_mergeable";
}
else if ( t & ompt_task_merged )
{
return "target_merged";
}
else
{
assert( t == ompt_task_target );
return "target";
}
}
else if ( t & ompt_task_taskwait )
{
if ( t & ompt_task_undeferred )
{
return "taskwait_undeferred";
}
else if ( t & ompt_task_untied )
{
return "taskwait_untied";
}
else if ( t & ompt_task_final )
{
return "taskwait_final";
}
else if ( t & ompt_task_mergeable )
{
return "taskwait_mergeable";
}
else if ( t & ompt_task_merged )
{
return "taskwait_merged";
}
else
{
assert( t == ompt_task_taskwait );
return "taskwait";
}
}
else
{
assert( false );
}
return "";
}
static const char*
thread2string( ompt_thread_t t )
{
switch ( t )
{
case ompt_thread_initial:
return "initial";
case ompt_thread_worker:
return "worker";
case ompt_thread_other:
return "other";
case ompt_thread_unknown:
return "unknown";
}
assert( false );
return "";
}
void
thread_begin_cb( ompt_thread_t thread_type,
ompt_data_t* thread_data )
{
assert( ompt_tool_tid == -1 );
static atomic_int_least32_t thread_counter = 1; // ompt_tool_tid >= 1
ompt_tool_tid = atomic_fetch_add( &thread_counter, 1 );
thread_data->value = ompt_tool_tid;
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | type = %s\n",
__FUNCTION__,
ompt_tool_tid,
thread2string( thread_type ) );
}
#define INVALID_TASK 6666666
#define INVALID_PARALLEL 7777777
#define INVALID_THREAD 8888888
void
parallel_begin_cb( ompt_data_t* encountering_task_data,
const ompt_frame_t* encountering_task_frame,
ompt_data_t* parallel_data,
unsigned int requested_parallelism,
int flags,
const void* codeptr_ra )
{
static atomic_uint_least64_t parallel_counter = 7770001;
parallel_data->value = atomic_fetch_add( ¶llel_counter, 1 );
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | parallel_data = %" PRIu64 " | %sencountering_task_data = %" PRIu64 " | flags = %s | requested_parallelism = %u | codeptr_ra = %p\n",
__FUNCTION__,
ompt_tool_tid,
parallel_data->value,
( encountering_task_data == NULL || encountering_task_data->value == 0 ) ? "WARNING " : "",
encountering_task_data == NULL ? INVALID_TASK : encountering_task_data->value,
parallel_flag2string( flags ),
requested_parallelism,
codeptr_ra );
}
void
parallel_end_cb( ompt_data_t* parallel_data,
ompt_data_t* encountering_task_data,
int flags,
const void* codeptr_ra )
{
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | %sparallel_data = %" PRIu64 " | %sencountering_task_data = %" PRIu64 " | flags = %s | codeptr_ra = %p\n",
__FUNCTION__,
ompt_tool_tid,
( parallel_data == NULL || parallel_data->value == 0 ) ? "WARNING " : "",
parallel_data == NULL ? INVALID_PARALLEL : parallel_data->value,
( encountering_task_data == NULL || encountering_task_data->value == 0 ) ? "WARNING " : "",
encountering_task_data == NULL ? INVALID_TASK : encountering_task_data->value,
parallel_flag2string( flags ),
codeptr_ra );
}
static uint64_t
new_task( void )
{
static atomic_uint_least64_t task_counter = 6660001;
return atomic_fetch_add( &task_counter, 1 );
}
void
implicit_task_cb( ompt_scope_endpoint_t endpoint,
ompt_data_t* parallel_data,
ompt_data_t* task_data,
unsigned int actual_parallelism,
unsigned int index, /* For initial tasks, that are not created
by a teams construct, this argument is 1. */
int flags )
{
if ( endpoint == ompt_scope_begin )
{
uint64_t old = task_data->value;
task_data->value = new_task();
if( old != 0 )
{
// runtime might reuse implicit tasks but doesn't reset
// ompt_data_t. Seems to be legal. Seem with NVHPC 23.1.
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | parallel_data = %" PRIu64 " | task_data = %" PRIu64 " (reused: %" PRIu64 ") | endpoint = %s | actual_parallelism = %u | index = %u | flags = %s\n",
__FUNCTION__,
ompt_tool_tid,
parallel_data == NULL ? INVALID_PARALLEL : parallel_data->value,
task_data == NULL ? INVALID_TASK : task_data->value,
old,
scope_endpoint2string( endpoint ),
actual_parallelism,
index,
task_flag2string( flags ) );
return;
}
}
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | parallel_data = %" PRIu64 " | task_data = %" PRIu64 " | endpoint = %s | actual_parallelism = %u | index = %u | flags = %s\n",
__FUNCTION__,
ompt_tool_tid,
parallel_data == NULL ? INVALID_PARALLEL : parallel_data->value,
task_data == NULL ? INVALID_TASK : task_data->value,
scope_endpoint2string( endpoint ),
actual_parallelism,
index,
task_flag2string( flags ) );
}
static int
my_initialize_tool( ompt_function_lookup_t lookup,
int initial_device_num,
ompt_data_t* tool_data )
{
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | initial_device_num %d\n",
__FUNCTION__,
ompt_tool_tid,
initial_device_num );
ompt_set_callback_t set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
set_callback(ompt_callback_implicit_task, (ompt_callback_t)&implicit_task_cb);
set_callback(ompt_callback_thread_begin, (ompt_callback_t)&thread_begin_cb);
set_callback(ompt_callback_parallel_begin, (ompt_callback_t)¶llel_begin_cb);
set_callback(ompt_callback_parallel_end, (ompt_callback_t)¶llel_end_cb);
return 1; /* non-zero indicates success */
}
static void
my_finalize_tool( ompt_data_t* tool_data )
{
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 "\n",
__FUNCTION__,
ompt_tool_tid );
}
ompt_start_tool_result_t*
ompt_start_tool( unsigned int omp_version,
const char* runtime_version )
{
setbuf( stdout, NULL );
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | omp_version %d | runtime_version = \'%s\'\n",
__FUNCTION__,
ompt_tool_tid,
omp_version,
runtime_version );
static ompt_start_tool_result_t tool = { &my_initialize_tool,
&my_finalize_tool,
ompt_data_none };
return &tool;
}
```
If we compile and run the tool we can see the following:
You can see that parallel_data is set to 0 in the second ompt_callback_implicit_task for one team, but gets the correct value for two (and more) teams.
Description:
Here's another issue I've encountered while trying to implement support for the teams directive in Score-P. When an application uses the teams directive and selects to use only a single team via
num_teams(1)
, the dispatch ofompt_callback_implicit_task
will not pass theparallel_data
set inompt_callback_parallel_begin
. Using any other number of teams will yield the expected results. This also includestarget teams
when selectingx86_64
as the offload target.Reproducer:
The source code to test this behavior is quite short (aside from the OMPT interface code).
Details
```c #include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* MAIN */
int main( int argc, char** argv )
{
omp_set_num_threads(2);
omp_set_teams_thread_limit(2);
#pragma omp teams default(none)
{
}
}
/* OMPT INTERFACE */
#if ( defined( __ppc64__ ) || defined( __powerpc64__ ) || defined( __PPC64__ ) )
#define OMPT_TOOL_CPU_RELAX ( ( void )0 )
#elif ( defined( __x86_64 ) || defined( __x86_64__ ) || defined( __amd64 ) || defined( _M_X64 ) )
#define OMPT_TOOL_CPU_RELAX __asm__ volatile ( "pause" )
#elif ( defined( __aarch64__ ) || defined( __ARM64__ ) || defined( _M_ARM64 ) )
#define OMPT_TOOL_CPU_RELAX __asm__ volatile ( "yield" )
#else
#define OMPT_TOOL_CPU_RELAX ( ( void )0 )
#endif
/* Test-and-test-and-set lock. Mutexes are of type bool */
#define OMPT_TOOL_LOCK( MUTEX ) \
while ( true ) \
{ \
if ( atomic_flag_test_and_set_explicit( &( MUTEX ), memory_order_acquire ) != true ) \
{ \
break; \
} \
OMPT_TOOL_CPU_RELAX; \
}
#define OMPT_TOOL_UNLOCK( MUTEX ) atomic_flag_clear_explicit( &( MUTEX ), memory_order_release );
#define OMPT_TOOL_GUARDED_PRINTF( ... ) \
OMPT_TOOL_LOCK( ompt_tool_printf_mutex ) \
printf( __VA_ARGS__ ); \
OMPT_TOOL_UNLOCK( ompt_tool_printf_mutex )
_Thread_local int32_t ompt_tool_tid = -1; /* thread counter. >= 1 after thread_begin */
atomic_flag ompt_tool_printf_mutex = ATOMIC_FLAG_INIT;
static const char *
scope_endpoint2string(ompt_scope_endpoint_t t)
{
switch (t)
{
case ompt_scope_begin:
return "begin";
case ompt_scope_end:
return "end";
case ompt_scope_beginend:
return "beginend";
}
assert(false);
return "";
}
static const char*
parallel_flag2string( int /* ompt_parallel_flag_t */ t )
{
if ( t & ompt_parallel_invoker_program )
{
if ( t & ompt_parallel_league )
{
assert( t == ( ompt_parallel_invoker_program | ompt_parallel_league ) );
return "invoker_program_league";
}
else if ( t & ompt_parallel_team )
{
assert( t == ( ompt_parallel_invoker_program | ompt_parallel_team ) );
return "invoker_program_team";
}
else
{
assert( false );
}
}
else if ( t & ompt_parallel_invoker_runtime )
{
if ( t & ompt_parallel_league )
{
assert( t == ( ompt_parallel_invoker_runtime | ompt_parallel_league ) );
return "invoker_runtime_league";
}
else if ( t & ompt_parallel_team )
{
assert( t == ( ompt_parallel_invoker_runtime | ompt_parallel_team ) );
return "invoker_runtime_team";
}
else
{
assert( false );
}
}
else
{
assert( false );
}
return "";
}
static const char*
set_result2string( ompt_set_result_t t )
{
switch ( t )
{
case ompt_set_error:
return "error";
case ompt_set_never:
return "never";
case ompt_set_impossible:
return "impossible";
case ompt_set_sometimes:
return "sometimes";
case ompt_set_sometimes_paired:
return "sometimes_paired";
case ompt_set_always:
return "always";
}
assert( false );
return "";
}
static const char*
task_flag2string( int /* ompt_task_flag_t */ t )
{
if ( t & ompt_task_initial )
{
assert( t == ompt_task_initial );
return "initial";
}
else if ( t & ompt_task_implicit )
{
if ( t & ompt_task_undeferred )
{
return "implicit_undeferred";
}
else if ( t & ompt_task_untied )
{
return "implicit_untied";
}
else if ( t & ompt_task_final )
{
return "implicit_final";
}
else if ( t & ompt_task_mergeable )
{
return "implicit_mergeable";
}
else if ( t & ompt_task_merged )
{
return "implicit_merged";
}
else
{
assert( t == ompt_task_implicit );
return "implicit";
}
}
else if ( t & ompt_task_explicit )
{
if ( t & ompt_task_undeferred )
{
return "explicit_undeferred";
}
else if ( t & ompt_task_untied )
{
return "explicit_untied";
}
else if ( t & ompt_task_final )
{
return "explicit_final";
}
else if ( t & ompt_task_mergeable )
{
return "explicit_mergeable";
}
else if ( t & ompt_task_merged )
{
return "explicit_merged";
}
else
{
assert( t == ompt_task_explicit );
return "explicit";
}
}
else if ( t & ompt_task_target )
{
if ( t & ompt_task_undeferred )
{
return "target_undeferred";
}
else if ( t & ompt_task_untied )
{
return "target_untied";
}
else if ( t & ompt_task_final )
{
return "target_final";
}
else if ( t & ompt_task_mergeable )
{
return "target_mergeable";
}
else if ( t & ompt_task_merged )
{
return "target_merged";
}
else
{
assert( t == ompt_task_target );
return "target";
}
}
else if ( t & ompt_task_taskwait )
{
if ( t & ompt_task_undeferred )
{
return "taskwait_undeferred";
}
else if ( t & ompt_task_untied )
{
return "taskwait_untied";
}
else if ( t & ompt_task_final )
{
return "taskwait_final";
}
else if ( t & ompt_task_mergeable )
{
return "taskwait_mergeable";
}
else if ( t & ompt_task_merged )
{
return "taskwait_merged";
}
else
{
assert( t == ompt_task_taskwait );
return "taskwait";
}
}
else
{
assert( false );
}
return "";
}
static const char*
thread2string( ompt_thread_t t )
{
switch ( t )
{
case ompt_thread_initial:
return "initial";
case ompt_thread_worker:
return "worker";
case ompt_thread_other:
return "other";
case ompt_thread_unknown:
return "unknown";
}
assert( false );
return "";
}
void
thread_begin_cb( ompt_thread_t thread_type,
ompt_data_t* thread_data )
{
assert( ompt_tool_tid == -1 );
static atomic_int_least32_t thread_counter = 1; // ompt_tool_tid >= 1
ompt_tool_tid = atomic_fetch_add( &thread_counter, 1 );
thread_data->value = ompt_tool_tid;
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | type = %s\n",
__FUNCTION__,
ompt_tool_tid,
thread2string( thread_type ) );
}
#define INVALID_TASK 6666666
#define INVALID_PARALLEL 7777777
#define INVALID_THREAD 8888888
void
parallel_begin_cb( ompt_data_t* encountering_task_data,
const ompt_frame_t* encountering_task_frame,
ompt_data_t* parallel_data,
unsigned int requested_parallelism,
int flags,
const void* codeptr_ra )
{
static atomic_uint_least64_t parallel_counter = 7770001;
parallel_data->value = atomic_fetch_add( ¶llel_counter, 1 );
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | parallel_data = %" PRIu64 " | %sencountering_task_data = %" PRIu64 " | flags = %s | requested_parallelism = %u | codeptr_ra = %p\n",
__FUNCTION__,
ompt_tool_tid,
parallel_data->value,
( encountering_task_data == NULL || encountering_task_data->value == 0 ) ? "WARNING " : "",
encountering_task_data == NULL ? INVALID_TASK : encountering_task_data->value,
parallel_flag2string( flags ),
requested_parallelism,
codeptr_ra );
}
void
parallel_end_cb( ompt_data_t* parallel_data,
ompt_data_t* encountering_task_data,
int flags,
const void* codeptr_ra )
{
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | %sparallel_data = %" PRIu64 " | %sencountering_task_data = %" PRIu64 " | flags = %s | codeptr_ra = %p\n",
__FUNCTION__,
ompt_tool_tid,
( parallel_data == NULL || parallel_data->value == 0 ) ? "WARNING " : "",
parallel_data == NULL ? INVALID_PARALLEL : parallel_data->value,
( encountering_task_data == NULL || encountering_task_data->value == 0 ) ? "WARNING " : "",
encountering_task_data == NULL ? INVALID_TASK : encountering_task_data->value,
parallel_flag2string( flags ),
codeptr_ra );
}
static uint64_t
new_task( void )
{
static atomic_uint_least64_t task_counter = 6660001;
return atomic_fetch_add( &task_counter, 1 );
}
void
implicit_task_cb( ompt_scope_endpoint_t endpoint,
ompt_data_t* parallel_data,
ompt_data_t* task_data,
unsigned int actual_parallelism,
unsigned int index, /* For initial tasks, that are not created
by a teams construct, this argument is 1. */
int flags )
{
if ( endpoint == ompt_scope_begin )
{
uint64_t old = task_data->value;
task_data->value = new_task();
if( old != 0 )
{
// runtime might reuse implicit tasks but doesn't reset
// ompt_data_t. Seems to be legal. Seem with NVHPC 23.1.
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | parallel_data = %" PRIu64 " | task_data = %" PRIu64 " (reused: %" PRIu64 ") | endpoint = %s | actual_parallelism = %u | index = %u | flags = %s\n",
__FUNCTION__,
ompt_tool_tid,
parallel_data == NULL ? INVALID_PARALLEL : parallel_data->value,
task_data == NULL ? INVALID_TASK : task_data->value,
old,
scope_endpoint2string( endpoint ),
actual_parallelism,
index,
task_flag2string( flags ) );
return;
}
}
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | parallel_data = %" PRIu64 " | task_data = %" PRIu64 " | endpoint = %s | actual_parallelism = %u | index = %u | flags = %s\n",
__FUNCTION__,
ompt_tool_tid,
parallel_data == NULL ? INVALID_PARALLEL : parallel_data->value,
task_data == NULL ? INVALID_TASK : task_data->value,
scope_endpoint2string( endpoint ),
actual_parallelism,
index,
task_flag2string( flags ) );
}
static int
my_initialize_tool( ompt_function_lookup_t lookup,
int initial_device_num,
ompt_data_t* tool_data )
{
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | initial_device_num %d\n",
__FUNCTION__,
ompt_tool_tid,
initial_device_num );
ompt_set_callback_t set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
set_callback(ompt_callback_implicit_task, (ompt_callback_t)&implicit_task_cb);
set_callback(ompt_callback_thread_begin, (ompt_callback_t)&thread_begin_cb);
set_callback(ompt_callback_parallel_begin, (ompt_callback_t)¶llel_begin_cb);
set_callback(ompt_callback_parallel_end, (ompt_callback_t)¶llel_end_cb);
return 1; /* non-zero indicates success */
}
static void
my_finalize_tool( ompt_data_t* tool_data )
{
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 "\n",
__FUNCTION__,
ompt_tool_tid );
}
ompt_start_tool_result_t*
ompt_start_tool( unsigned int omp_version,
const char* runtime_version )
{
setbuf( stdout, NULL );
OMPT_TOOL_GUARDED_PRINTF( "[%s] tid = %" PRId32 " | omp_version %d | runtime_version = \'%s\'\n",
__FUNCTION__,
ompt_tool_tid,
omp_version,
runtime_version );
static ompt_start_tool_result_t tool = { &my_initialize_tool,
&my_finalize_tool,
ompt_data_none };
return &tool;
}
```
If we compile and run the tool we can see the following:
You can see that
parallel_data
is set to0
in the secondompt_callback_implicit_task
for one team, but gets the correct value for two (and more) teams.