yt-project / yt

Main yt repository
http://yt-project.org
Other
461 stars 276 forks source link

Faster particle depositions #2281

Open cphyc opened 5 years ago

cphyc commented 5 years ago

In some AMR codes (such as RAMSES) the particles are read by yt in the order they appear in memory. This means that consecutive particles are likely to reside in the same oct (or in neighboring octs).

Using this, we could easily optimize the particle deposition scheme, so that when deposing a bunch of particles from the same oct, only one tree traversal is required.

This could either be done in particle_deposity.pyx https://github.com/yt-project/yt/blob/95f6c5dd1d06cb088f9369a15d478d6824090663/yt/geometry/particle_deposit.pyx#L89 or in the get method of the OctreeContainer object https://github.com/yt-project/yt/blob/a209a67f2977571925bc297739003254130ef9b3/yt/geometry/oct_container.pyx#L203

For instance, one could check that two consecutive particles leave in the same cell.

cphyc commented 4 years ago

Another way of boosting the neighbour querying is to use an algorithm as follow

diff --git a/yt/geometry/oct_container.pxd b/yt/geometry/oct_container.pxd
index 978aabd4c..7f0252586 100644
--- a/yt/geometry/oct_container.pxd
+++ b/yt/geometry/oct_container.pxd
@@ -69,6 +69,8 @@ cdef class OctreeContainer:
     cdef public int num_domains
     cdef Oct *get(self, np.float64_t ppos[3], OctInfo *oinfo = ?,
                   int max_level = ?)
+    cdef Oct *get_from(self, Oct *o, np.int64_t ipos[3], np.int64_t nipos[3],
+                  int level, OctInfo *oinfo = ?)
     cdef int get_root(self, int ind[3], Oct **o)
     cdef Oct **neighbors(self, OctInfo *oinfo, np.int64_t *nneighbors,
                          Oct *o, bint periodicity[3])
diff --git a/yt/geometry/oct_container.pyx b/yt/geometry/oct_container.pyx
index 0fabf9b19..1543d1d20 100644
--- a/yt/geometry/oct_container.pyx
+++ b/yt/geometry/oct_container.pyx
@@ -291,6 +291,70 @@ cdef class OctreeContainer:
                 domain_ids.append(i+1)
         return domain_ids

+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef Oct *get_from(self, Oct* o, np.int64_t ipos[3], np.int64_t nipos[3], int level, OctInfo *oinfo = NULL):
+        cdef np.uint64_t mask
+        cdef int i, j
+        cdef np.float64_t dx
+        cdef np.float64_t[3] fpos
+        cdef np.uint8_t neigh_ind
+        cdef Oct* next
+        cdef Oct* prev
+
+        # Binary mask
+        mask = (ipos[0] ^ nipos[0]) | (ipos[1] ^ nipos[1]) | (ipos[2] ^ nipos[2])
+
+        dx = (self.DRE[0]-self.DLE[0]) / (self.nn[0] << level)  # assuming square domain
+
+        # Common root is coarser than base grid, use .get method to get the neighbour
+        if (mask>>level) > 0:
+            for j in range(3):
+                fpos[j] = (nipos[j] + 0.5) * dx
+            return self.get(fpos, oinfo, level)
+
+        # Climb the tree up up to common root
+        j = 1
+        next = o.parent
+        while (mask >> j) > 0 :
+            next = next.parent
+            j += 1
+            print(f'\t\t>j={j}', next==NULL)
+
+        # Go down the tree
+        prev = next
+
+        while j > 0 and next != NULL:
+            j -= 1
+            neigh_ind = cind(
+                ((nipos[0] >> j) & 0b001),
+                ((nipos[1] >> j) & 0b001),
+                ((nipos[2] >> j) & 0b001))
+            print(f'\t\t<j={j}, neigh_ind={neigh_ind:03b}')
+            if next.children == NULL:
+                prev = next
+                next = NULL
+            else:
+                prev = next
+                next = next.children[neigh_ind]
+
+        if next == NULL:
+            j += 1
+            next = prev
+
+        if not ((0 <= j <= 1) and next != NULL):
+            print('THIS SHOULD NOT HAPPEN', j, next==NULL)
+
+        if oinfo == NULL: return next
+
+        oinfo.level = level - j
+        for i in range(3):
+            oinfo.ipos[i] = nipos[i] >> j
+            oinfo.dds[i] = dx * (1<<j)
+            oinfo.left_edge[i] = oinfo.ipos[i] * oinfo.dds[i]
+        return next
+
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.cdivision(True)
@@ -341,26 +405,30 @@ cdef class OctreeContainer:
                     if npos[2] >= ndim[2]: npos[2] -= ndim[2]
                     # Now we have our npos, which we just need to find.
                     # Level 0 gets bootstrapped
-                    for n in range(3):
-                        ind[n] = ((npos[n] >> (oi.level)) & 1)
-                    cand = NULL
-                    self.get_root(ind, &cand)
-                    # We should not get a NULL if we handle periodicity
-                    # correctly, but we might.
-                    if cand == NULL: continue
-                    for level in range(1, oi.level+1):
-                        dlevel = oi.level - level
-                        if cand.children == NULL: break
-                        for n in range(3):
-                            ind[n] = (npos[n] >> dlevel) & 1
-                        ii = cind(ind[0],ind[1],ind[2])
-                        if cand.children[ii] == NULL: break
-                        cand = cand.children[ii]
-                    if cand.children != NULL:
-                        olist = OctList_subneighbor_find(
-                            olist, cand, i, j, k)
-                    else:
+                    cand = self.get_from(o, oi.ipos, npos, oi.level)
+                    if cand != NULL:
                         olist = OctList_append(olist, cand)
         olist = my_list
         cdef int noct = OctList_count(olist)
         cdef Oct **neighbors
@@ -630,6 +698,7 @@ cdef class OctreeContainer:
         next = &cont.my_objs[cont.n_assigned]
         cont.n_assigned += 1
         self.root_mesh[ind[0]][ind[1]][ind[2]] = next
+        next.parent = NULL
         self.nocts += 1
         return next

@@ -649,6 +718,7 @@ cdef class OctreeContainer:
         next = &cont.my_objs[cont.n_assigned]
         cont.n_assigned += 1
         parent.children[cind(ind[0],ind[1],ind[2])] = next
+        next.parent = parent
         self.nocts += 1
         return next

@@ -873,6 +943,7 @@ cdef class SparseOctreeContainer(OctreeContainer):
         tsearch(<void*>ikey, &self.tree_root, root_node_compare)
         self.num_root += 1
         self.nocts += 1
+        next.parent = NULL
         return next

     def allocate_domains(self, domain_counts, int root_nodes):
diff --git a/yt/geometry/oct_visitors.pxd b/yt/geometry/oct_visitors.pxd
index 142adad60..a3d71e21a 100644
--- a/yt/geometry/oct_visitors.pxd
+++ b/yt/geometry/oct_visitors.pxd
@@ -16,6 +16,7 @@ cdef struct Oct:
     np.int64_t domain_ind   # index within the global set of domains
     np.int64_t domain       # (opt) addl int index
     Oct **children          # Up to 8 long
+    Oct *parent

 cdef struct OctPadded:
     np.int64_t file_ind
matthewturk commented 4 years ago

This is absolutely worth implementing.