llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
26.83k stars 11k forks source link

[OpenMP] Debug assert hit with nesting parallels #94260

Open mikaoP opened 1 month ago

mikaoP commented 1 month ago
//  This is a simplification of task_teams_stress_test.cpp test
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <omp.h>

// The number of times to run each test
#define NTIMES 2

// Every thread creates a single "increment" task
void test_tasks() {
  for (int i = 0; i < 100; ++i)
  #pragma omp task
  {
  int tid = omp_get_thread_num();
  }
}

// Testing single level of parallelism with increment tasks
void test_base(int nthreads) {
#ifdef VERBOSE
#pragma omp master
  printf("    test_base(%d)\n", nthreads);
#endif
#pragma omp parallel num_threads(nthreads)
  { test_tasks(); }
}

// Testing nested parallel with increment tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
void test_nest(int first, int second) {
#ifdef VERBOSE
#pragma omp master
  printf("   test_nest(%d, %d)\n", first, second);
#endif
#pragma omp parallel num_threads(first)
  {
  for (int i = 0; i < 100; ++i)
  #pragma omp task
  {
  int tid = omp_get_thread_num();
  }
    test_base(second);
  }
}

template <typename... Args>
void run_ntimes(int n, void (*func)(Args...), Args... args) {
  for (int i = 0; i < n; ++i) {
    func(args...);
  }
}

int main() {
  omp_set_max_active_levels(5);

  run_ntimes(NTIMES, test_nest, 4, 3);
  run_ntimes(NTIMES, test_nest, 2, 1);

  printf("PASS\n");
  return EXIT_SUCCESS;
}

Assertion failure at kmp_tasking.cpp(3238): victim_tid < task_team->tt.tt_nproc. clang++ -fopenmp t1.cpp -o t1 && while taskset -c 0-1 env KMP_USE_YIELD=0 ./t1 || break; do date; done Aparently it is necessary KMP_USE_YIELD=0. I've not managed how to reproduce it with 1 or 2. Also less cpus than threads is needed.

mikaoP commented 1 month ago

Reference commit of the assert 4ea24946e356be31446fc30ca3d11cc5783ba2a6 @jpeyton52

llvmbot commented 1 month ago

@llvm/issue-subscribers-openmp

Author: None (mikaoP)

```cpp // This is a simplification of task_teams_stress_test.cpp test #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <omp.h> // The number of times to run each test #define NTIMES 2 // Every thread creates a single "increment" task void test_tasks() { for (int i = 0; i < 100; ++i) #pragma omp task { int tid = omp_get_thread_num(); } } // Testing single level of parallelism with increment tasks void test_base(int nthreads) { #ifdef VERBOSE #pragma omp master printf(" test_base(%d)\n", nthreads); #endif #pragma omp parallel num_threads(nthreads) { test_tasks(); } } // Testing nested parallel with increment tasks // first = nthreads of outer parallel // second = nthreads of nested parallel void test_nest(int first, int second) { #ifdef VERBOSE #pragma omp master printf(" test_nest(%d, %d)\n", first, second); #endif #pragma omp parallel num_threads(first) { for (int i = 0; i < 100; ++i) #pragma omp task { int tid = omp_get_thread_num(); } test_base(second); } } template <typename... Args> void run_ntimes(int n, void (*func)(Args...), Args... args) { for (int i = 0; i < n; ++i) { func(args...); } } int main() { omp_set_max_active_levels(5); run_ntimes(NTIMES, test_nest, 4, 3); run_ntimes(NTIMES, test_nest, 2, 1); printf("PASS\n"); return EXIT_SUCCESS; } ``` `Assertion failure at kmp_tasking.cpp(3238): victim_tid < task_team->tt.tt_nproc.` `clang++ -fopenmp t1.cpp -o t1 && while taskset -c 0-1 env KMP_USE_YIELD=0 ./t1 || break; do date; done` Aparently it is necessary `KMP_USE_YIELD=0`. I've not managed how to reproduce it with 1 or 2. Also less cpus than threads is needed.
mikaoP commented 1 month ago

Reproduced too in the following example, based on omp_parallel_num_threads.c test. Basically I added tasks to enable tasking. Also without modifying the yield policy

#include <stdio.h>                                                                                                                                                                                                                              
#include <unistd.h>                                                                                                                                                                                                                             
#include "omp_testsuite.h"                                                                                                                                                                                                                      

int test_omp_parallel_num_threads()                                                                                                                                                                                                             
{                                                                                                                                                                                                                                               
  int num_failed;                                                                                                                                                                                                                               
  int threads;                                                                                                                                                                                                                                  
  int nthreads;                                                                                                                                                                                                                                 
  int max_threads = 0;                                                                                                                                                                                                                          

  num_failed = 0;                                                                                                                                                                                                                               
    #pragma omp task                                                                                                                                                                                                                            
    {}                                                                                                                                                                                                                                          

  /* first we check how many threads are available */                                                                                                                                                                                           
  #pragma omp parallel                                                                                                                                                                                                                          
  {                                                                                                                                                                                                                                             
    #pragma omp task                                                                                                                                                                                                                            
    {}                                                                                                                                                                                                                                          
    #pragma omp master                                                                                                                                                                                                                          
    max_threads = omp_get_num_threads ();                                                                                                                                                                                                       
  }                                                                                                                                                                                                                                             

  /* we increase the number of threads from one to maximum:*/                                                                                                                                                                                   
  for(threads = 1; threads <= max_threads; threads++) {                                                                                                                                                                                         
    nthreads = 0;                                                                                                                                                                                                                               
    #pragma omp parallel reduction(+:num_failed) num_threads(threads)                                                                                                                                                                           
    {                                                                                                                                                                                                                                           
    #pragma omp task                                                                                                                                                                                                                            
    {}                                                                                                                                                                                                                                          
      num_failed = num_failed + !(threads == omp_get_num_threads());                                                                                                                                                                            
      #pragma omp atomic                                                                                                                                                                                                                        
      nthreads += 1;                                                                                                                                                                                                                            
    }                                                                                                                                                                                                                                           
    num_failed = num_failed + !(nthreads == threads);                                                                                                                                                                                           
  }                                                                                                                                                                                                                                             
  return (!num_failed);                                                                                                                                                                                                                         
}                                                                                                                                                                                                                                               

int main()                                                                                                                                                                                                                                      
{                                                                                                                                                                                                                                               
  int i;                                                                                                                                                                                                                                        
  int num_failed=0;                                                                                                                                                                                                                             

  for(i = 0; i < REPETITIONS; i++) {                                                                                                                                                                                                            
    if(!test_omp_parallel_num_threads()) {                                                                                                                                                                                                      
      num_failed++;                                                                                                                                                                                                                             
    }                                                                                                                                                                                                                                           
  }                                                                                                                                                                                                                                             
  return num_failed;                                                                                                                                                                                                                            
}

clang++ -fopenmp t1.c -o t1 && while ./t1 || break; do date; done

jpeyton52 commented 1 month ago

Thanks, for reporting this. I just wanted to acknowledge that I've seen this and am investigating it now.