Open cphyc opened 5 years ago
Another way of boosting the neighbour querying is to use an algorithm as follow
diff --git a/yt/geometry/oct_container.pxd b/yt/geometry/oct_container.pxd
index 978aabd4c..7f0252586 100644
--- a/yt/geometry/oct_container.pxd
+++ b/yt/geometry/oct_container.pxd
@@ -69,6 +69,8 @@ cdef class OctreeContainer:
cdef public int num_domains
cdef Oct *get(self, np.float64_t ppos[3], OctInfo *oinfo = ?,
int max_level = ?)
+ cdef Oct *get_from(self, Oct *o, np.int64_t ipos[3], np.int64_t nipos[3],
+ int level, OctInfo *oinfo = ?)
cdef int get_root(self, int ind[3], Oct **o)
cdef Oct **neighbors(self, OctInfo *oinfo, np.int64_t *nneighbors,
Oct *o, bint periodicity[3])
diff --git a/yt/geometry/oct_container.pyx b/yt/geometry/oct_container.pyx
index 0fabf9b19..1543d1d20 100644
--- a/yt/geometry/oct_container.pyx
+++ b/yt/geometry/oct_container.pyx
@@ -291,6 +291,70 @@ cdef class OctreeContainer:
domain_ids.append(i+1)
return domain_ids
+ @cython.boundscheck(False)
+ @cython.wraparound(False)
+ @cython.cdivision(True)
+ cdef Oct *get_from(self, Oct* o, np.int64_t ipos[3], np.int64_t nipos[3], int level, OctInfo *oinfo = NULL):
+ cdef np.uint64_t mask
+ cdef int i, j
+ cdef np.float64_t dx
+ cdef np.float64_t[3] fpos
+ cdef np.uint8_t neigh_ind
+ cdef Oct* next
+ cdef Oct* prev
+
+ # Binary mask
+ mask = (ipos[0] ^ nipos[0]) | (ipos[1] ^ nipos[1]) | (ipos[2] ^ nipos[2])
+
+ dx = (self.DRE[0]-self.DLE[0]) / (self.nn[0] << level) # assuming square domain
+
+ # Common root is coarser than base grid, use .get method to get the neighbour
+ if (mask>>level) > 0:
+ for j in range(3):
+ fpos[j] = (nipos[j] + 0.5) * dx
+ return self.get(fpos, oinfo, level)
+
+ # Climb the tree up up to common root
+ j = 1
+ next = o.parent
+ while (mask >> j) > 0 :
+ next = next.parent
+ j += 1
+ print(f'\t\t>j={j}', next==NULL)
+
+ # Go down the tree
+ prev = next
+
+ while j > 0 and next != NULL:
+ j -= 1
+ neigh_ind = cind(
+ ((nipos[0] >> j) & 0b001),
+ ((nipos[1] >> j) & 0b001),
+ ((nipos[2] >> j) & 0b001))
+ print(f'\t\t<j={j}, neigh_ind={neigh_ind:03b}')
+ if next.children == NULL:
+ prev = next
+ next = NULL
+ else:
+ prev = next
+ next = next.children[neigh_ind]
+
+ if next == NULL:
+ j += 1
+ next = prev
+
+ if not ((0 <= j <= 1) and next != NULL):
+ print('THIS SHOULD NOT HAPPEN', j, next==NULL)
+
+ if oinfo == NULL: return next
+
+ oinfo.level = level - j
+ for i in range(3):
+ oinfo.ipos[i] = nipos[i] >> j
+ oinfo.dds[i] = dx * (1<<j)
+ oinfo.left_edge[i] = oinfo.ipos[i] * oinfo.dds[i]
+ return next
+
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@@ -341,26 +405,30 @@ cdef class OctreeContainer:
if npos[2] >= ndim[2]: npos[2] -= ndim[2]
# Now we have our npos, which we just need to find.
# Level 0 gets bootstrapped
- for n in range(3):
- ind[n] = ((npos[n] >> (oi.level)) & 1)
- cand = NULL
- self.get_root(ind, &cand)
- # We should not get a NULL if we handle periodicity
- # correctly, but we might.
- if cand == NULL: continue
- for level in range(1, oi.level+1):
- dlevel = oi.level - level
- if cand.children == NULL: break
- for n in range(3):
- ind[n] = (npos[n] >> dlevel) & 1
- ii = cind(ind[0],ind[1],ind[2])
- if cand.children[ii] == NULL: break
- cand = cand.children[ii]
- if cand.children != NULL:
- olist = OctList_subneighbor_find(
- olist, cand, i, j, k)
- else:
+ cand = self.get_from(o, oi.ipos, npos, oi.level)
+ if cand != NULL:
olist = OctList_append(olist, cand)
olist = my_list
cdef int noct = OctList_count(olist)
cdef Oct **neighbors
@@ -630,6 +698,7 @@ cdef class OctreeContainer:
next = &cont.my_objs[cont.n_assigned]
cont.n_assigned += 1
self.root_mesh[ind[0]][ind[1]][ind[2]] = next
+ next.parent = NULL
self.nocts += 1
return next
@@ -649,6 +718,7 @@ cdef class OctreeContainer:
next = &cont.my_objs[cont.n_assigned]
cont.n_assigned += 1
parent.children[cind(ind[0],ind[1],ind[2])] = next
+ next.parent = parent
self.nocts += 1
return next
@@ -873,6 +943,7 @@ cdef class SparseOctreeContainer(OctreeContainer):
tsearch(<void*>ikey, &self.tree_root, root_node_compare)
self.num_root += 1
self.nocts += 1
+ next.parent = NULL
return next
def allocate_domains(self, domain_counts, int root_nodes):
diff --git a/yt/geometry/oct_visitors.pxd b/yt/geometry/oct_visitors.pxd
index 142adad60..a3d71e21a 100644
--- a/yt/geometry/oct_visitors.pxd
+++ b/yt/geometry/oct_visitors.pxd
@@ -16,6 +16,7 @@ cdef struct Oct:
np.int64_t domain_ind # index within the global set of domains
np.int64_t domain # (opt) addl int index
Oct **children # Up to 8 long
+ Oct *parent
cdef struct OctPadded:
np.int64_t file_ind
This is absolutely worth implementing.
In some AMR codes (such as RAMSES) the particles are read by yt in the order they appear in memory. This means that consecutive particles are likely to reside in the same oct (or in neighboring octs).
Using this, we could easily optimize the particle deposition scheme, so that when deposing a bunch of particles from the same oct, only one tree traversal is required.
This could either be done in
particle_deposity.pyx
https://github.com/yt-project/yt/blob/95f6c5dd1d06cb088f9369a15d478d6824090663/yt/geometry/particle_deposit.pyx#L89 or in theget
method of the OctreeContainer object https://github.com/yt-project/yt/blob/a209a67f2977571925bc297739003254130ef9b3/yt/geometry/oct_container.pyx#L203For instance, one could check that two consecutive particles leave in the same cell.