Sandia-OpenSHMEM / SOS

Sandia OpenSHMEM is an implementation of the OpenSHMEM specification over multiple Networking APIs, including Portals 4, the Open Fabric Interface (OFI), and UCX. Please click on the Wiki tab for help with building and using SOS.
Other
61 stars 53 forks source link

Some bugs in teams implementation #960

Closed akhillanger closed 4 years ago

akhillanger commented 4 years ago
  1. https://github.com/Sandia-OpenSHMEM/SOS/blob/master/src/shmem_team.c#L193 Should be shmem_internal_psync_pool = NULL, similarly in L197,L201
  2. https://github.com/Sandia-OpenSHMEM/SOS/blob/master/src/shmem_internal.h#L550 Should be return (ptr[index / CHAR_BIT] >> (index % CHAR_BIT)) & 1;
  3. https://github.com/Sandia-OpenSHMEM/SOS/blob/master/src/shmem_team.c#L491 Should be return &shmem_internal_psync_pool[team->psync_idx * PSYNC_CHUNK_SIZE];

Tagging @davidozog and @jdinan

davidozog commented 4 years ago

Awesome - thanks @akhillanger

These fixes all look correct to me at a glance. Out of curiosity, did you run into any specific problems you can share? I see that these bugs have some very undesirable side effects... Perhaps we could add some better unit tests to cover team creation/resource management.

Would you mind posting a PR with the fixes? We should include these in v1.5.0, which we're hoping to release pretty soon.

akhillanger commented 4 years ago

David, I came across these while trying to understand the code. I don't have test cases yet. Here is a patch with these fixes.


diff --git a/src/shmem_internal.h b/src/shmem_internal.h
index 3583b17..4e62f20 100644
--- a/src/shmem_internal.h
+++ b/src/shmem_internal.h
@@ -547,7 +547,7 @@ void shmem_internal_bit_clear(unsigned char *ptr, size_t size, size_t index)
 static inline
 unsigned char shmem_internal_bit_fetch(unsigned char *ptr, size_t index)
 {
-    return (ptr[index / CHAR_BIT] >> index) & 1;
+    return (ptr[index / CHAR_BIT] >> (index % CHAR_BIT)) & 1;
 }

 static inline
diff --git a/src/shmem_team.c b/src/shmem_team.c
index 0fa418d..56f352d 100644
--- a/src/shmem_team.c
+++ b/src/shmem_team.c
@@ -190,15 +190,15 @@ cleanup:
     }
     if (shmem_internal_psync_pool) {
         shmem_internal_free(shmem_internal_psync_pool);
-        shmem_internal_team_pool = NULL;
+        shmem_internal_psync_pool = NULL;
     }
     if (psync_pool_avail) {
         shmem_internal_free(psync_pool_avail);
-        shmem_internal_team_pool = NULL;
+        psync_pool_avail = NULL;
     }
     if (team_ret_val) {
         shmem_internal_free(team_ret_val);
alanger@prm-login:~/SOS/src$ git diff 929392c...1e9cc0e > patch
alanger@prm-login:~/SOS/src$ cat patch
diff --git a/src/shmem_internal.h b/src/shmem_internal.h
index 3583b17..4e62f20 100644
--- a/src/shmem_internal.h
+++ b/src/shmem_internal.h
@@ -547,7 +547,7 @@ void shmem_internal_bit_clear(unsigned char *ptr, size_t size, size_t index)
 static inline
 unsigned char shmem_internal_bit_fetch(unsigned char *ptr, size_t index)
 {
-    return (ptr[index / CHAR_BIT] >> index) & 1;
+    return (ptr[index / CHAR_BIT] >> (index % CHAR_BIT)) & 1;
 }

 static inline
diff --git a/src/shmem_team.c b/src/shmem_team.c
index 0fa418d..56f352d 100644
--- a/src/shmem_team.c
+++ b/src/shmem_team.c
@@ -190,15 +190,15 @@ cleanup:
     }
     if (shmem_internal_psync_pool) {
         shmem_internal_free(shmem_internal_psync_pool);
-        shmem_internal_team_pool = NULL;
+        shmem_internal_psync_pool = NULL;
     }
     if (psync_pool_avail) {
         shmem_internal_free(psync_pool_avail);
-        shmem_internal_team_pool = NULL;
+        psync_pool_avail = NULL;
     }
     if (team_ret_val) {
         shmem_internal_free(team_ret_val);
-        shmem_internal_team_pool = NULL;
+        team_ret_val = NULL;
     }

     return -1;
@@ -488,7 +488,7 @@ long * shmem_internal_team_choose_psync(shmem_internal_team_t *team, shmem_inter
             }
             team->psync_avail[0] = 0;

-            return &shmem_internal_psync_pool[psync];
+            return &shmem_internal_psync_pool[team->psync_idx * PSYNC_CHUNK_SIZE];
     }
 }
davidozog commented 4 years ago

Closed by #968