Closed krober10nd closed 2 years ago
Thank you for submitting this PR. Yes, if you already have some more efficient code to compute ipobo, it would be great to integrate it within PyTelTools. Moreover, could you share your mesh (by private message if required) to compare the performance.
Okay sure thing, I'll get to it shortly.
By the way, is there a way to visualize a cli file in Blue Kenue? I tried to find an option but to no avail. It would be nice to verify the cli file without it failing at the pre-process stage when simulation is attempted.
Ok perfect, nothing urgent.
To my knowledge, BlueKenue only loads its format for boundaries (*.bc2), but not cli. And unfortunately PyTelTools also do not handle this feature.
Hello Luc,
I tried for a little bit today to see how my Python implementation competes with yours iter_boundaries
. For this enormous mesh, it's also quite slow taking hours to complete. I'd imagine we'd need to go to Cython to avoid this bottleneck. Please contact me at (removedforprivacy) at gmail . com and I'll send you the mesh connectivity and nodes so you can try.
Previously I used my MATLAB implementation of this (which makes it a couple minutes long). But coding the same things from MATLAB into Python doesn't work that well.
For what's it worth, here is the older MATLAB implementation that bypasses the np.where
like search (which is the bottleneck).
https://github.com/CHLNDDEV/OceanMesh2D/blob/Projection/utilities/extdom_polygon.m
Okay, I implemented the MATLAB code above in Python and was able to get the times down to minutes. 207 seconds for 31k boundaries.
from time import time
import numpy as np
nan = np.nan
def unique_row_view(data):
"""https://github.com/numpy/numpy/issues/11136"""
b = np.ascontiguousarray(data).view(
np.dtype((np.void, data.dtype.itemsize * data.shape[1]))
)
u, cnts = np.unique(b, return_counts=True)
u = u.view(data.dtype).reshape(-1, data.shape[1])
return u, cnts
def get_edges(entities, dim=2):
"""Get the undirected edges of mesh in no order (NB: are repeated)
:param entities: the mesh connectivity
:type entities: numpy.ndarray[`int` x (dim+1)]
:param dim: dimension of the mesh
:type dim: `int`, optional
:return: edges: the edges that make up the mesh
:rtype: numpy.ndarray[`int`x 2]
"""
num_entities = len(entities)
entities = np.array(entities)
if dim == 2:
edges = entities[:, [[0, 1], [0, 2], [1, 2]]]
edges = edges.reshape((num_entities * 3, 2))
elif dim == 3:
edges = entities[:, [[0, 1], [1, 2], [2, 0], [0, 3], [1, 3], [2, 3]]]
edges = edges.reshape((num_entities * 6, 2))
return edges
def get_boundary_edges(entities, dim=2):
"""Get the boundary edges of the mesh. Boundary edges only appear (dim-1) times
:param entities: the mesh connectivity
:type entities: numpy.ndarray[`int` x (dim+1)]
:param dim: dimension of the mesh
:type dim: `int`, optional
:return: boundary_edges: the edges that make up the boundary of the mesh
:rtype: numpy.ndarray[`int` x 2]
"""
edges = get_edges(entities, dim=dim)
edges = np.sort(edges, axis=1)
unq, cnt = unique_row_view(edges)
boundary_edges = np.array([e for e, c in zip(unq, cnt) if c == (dim - 1)])
return boundary_edges
# @profile
def get_winded_boundary_edges(entities):
"""Order the boundary edges of the mesh in a winding fashion
:param entities: the mesh connectivity
:type entities: numpy.ndarray[`int` x (dim+1)]
:return: boundary_edges: the edges that make up the boundary of
the mesh in a winding order
:rtype: numpy.ndarray[`int` x 2]
"""
boundary_edges = get_boundary_edges(entities)
_bedges = boundary_edges.copy()
_bedges = np.append(_bedges, np.fliplr(_bedges), axis=0)
_bedges = _bedges[np.lexsort(_bedges.T[::-1])]
ned = len(_bedges)
is_visited = np.zeros(ned)
# given a vertex with a num gid, return where it exists last lid.
inverse_map = np.full((np.amax(_bedges) * 2, 1), -1)
for lid in range(ned):
gid = _bedges[lid, 0]
inverse_map[gid] = lid
all_boundary_edges = []
while ~np.all(is_visited):
choice = np.where(is_visited == 0)[0][0]
ordering = np.array([choice])
v_start, v_next = _bedges[choice, :]
is_visited[choice] = 1
while True:
# form local set to search for continuation of boundary walk
idx = inverse_map[v_next]
st = np.amax(np.array([idx[0] - 1, 0], dtype=int))
ed = np.amin(np.array([idx[0] + 2, ned - 1], dtype=int))
rows = np.where(_bedges[st:ed, 0] == v_next)[0]
rows += st
choices = [row for row in rows if is_visited[row] == 0]
if len(choices) == 0:
break
choice = choices[0]
ordering = np.append(ordering, [choice])
is_visited[choice] = 1
next_edge = _bedges[choice, :]
# flag flipped edge too
flipped_next_edge = next_edge[::-1]
idx = inverse_map[flipped_next_edge[0]]
st = np.amax(np.array([idx[0] - 1, 0], dtype=int))
ed = np.amin(np.array([idx[0] + 2, ned - 1], dtype=int))
rows = np.where(_bedges[st:ed, 1] == flipped_next_edge[1])
rows += st
is_visited[rows] = 1
tmp = [v for v in next_edge if v != v_next]
v_next = tmp[0]
boundary_edges = _bedges[ordering, :]
# logic to flip edges
flipped_boundary_edges = []
for idx in range(len(boundary_edges) - 1):
if idx == 0:
this_edge = boundary_edges[0]
flipped_boundary_edges.append(this_edge)
next_edge = boundary_edges[idx + 1]
if this_edge[-1] != next_edge[0]:
flipped_boundary_edges.append(next_edge[::-1])
else:
flipped_boundary_edges.append(next_edge)
this_edge = flipped_boundary_edges[-1]
flipped_boundary_edges = np.asarray(flipped_boundary_edges)
# print(f"Added a boundary with {len(boundary_edges)} edges")
all_boundary_edges.append(flipped_boundary_edges)
return all_boundary_edges
if __name__ == "__main__":
ikle = np.load("ikle.npy")
xy = np.load("xy.npy")
t1 = time()
ordered_bedges = get_winded_boundary_edges(ikle)
print(time() - t1)
np.save("ordered_bedges", ordered_bedges)
#ordered_bedges = np.load("ordered_bedges.npy", allow_pickle=True)
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
print(len(ordered_bedges))
fig, ax = plt.subplots()
for bedges in ordered_bedges:
if len(bedges) > 0:
tmp = xy[bedges].reshape(-1, 1, 2)
segments = np.concatenate([tmp[:-1], tmp[1:]], axis=1)
lc = LineCollection(segments)
lc.set_array(np.ones(len(ordered_bedges)))
line = ax.add_collection(lc)
ax.autoscale()
plt.show()
Thank you for your email with the data (*.npy), I will have a look when I will have time.
Hello Luc, Have you had any time to work on this recently? Thank you
I merged your commit (but did not want to close this PR) and I had a look at your script and I confirm the performance issue (on my laptop i7) on your complex test case:
Serafin.build_ipobo()
get_winded_boundary_edges(...)
At the same time, I had a look at the Telemac documentation and I found that the ipobo table is not used/read by Telemac... See https://gitlab.pam-retd.fr/otm/telemac-mascaret/-/blob/main/documentation/Misc/developer_guide/latex/A2-selafin_format.tex:
1 record containing table IPOBO (integer array of dimension NPOIN); the value is 0 for an internal point, and gives the numbering of boundary points for the others. This array is never used (its data can be retrieved by another way). In parallel the table KNOLG is given instead, keeping track of the global numbers of points in the original mesh.
So I don't know if it is necessary to spend more time on its improvement... However having a tool to iterate properly on all boundary edges can be interesting (for generating the boundary condition file for example).
I tried your implementation get_winded_boundary_edges(...)
and I would have some questions:
ordered_bedges[-1]
) is array([], dtype=float64)
[ 0, 56], [ 56, 0], [ 0, 58], [ 58, 60], [ 60, 62] ... , [ 59, 57], [ 57, 56]
=> why is the first edge repeated in each direction?On my side, I compared the IPOBO of _geopildepon.slf with the implemention of PyTelTools and the difference are in the islands only (depending on the first point to begin the boundary), see the files attached: PR12.zip.
$ python PR12_pildepon_rebuild_ipobo.py
Reading the input file: "geo_pildepon.slf" of size 170721 bytes
~> Difference after rebuilding IPOBO table:
74 210 226
75 209 225
76 208 224
77 207 223
78 206 222
79 205 221
80 204 220
83 212 196
84 202 218
87 213 197
88 201 217
91 214 198
92 200 216
95 215 199
96 199 215
99 216 200
100 198 214
103 217 201
104 197 213
107 218 202
108 196 212
111 220 204
112 221 205
113 222 206
114 223 207
115 224 208
116 225 209
117 226 210
251 211 195
252 195 211
255 219 203
258 203 219
Hey Luc, thank you very much for your response.
That first edge you observe repeating is a coding bug that I will need to fix. I'm glad you verified its speed. It is a significant time savings than the existing approach but I am somewhat disappointed that it's not actually used by TELEMAC. Perhaps then this step creating the IPOBO table should be by passed when reading/writing Serafin files then?
In terms of whether my function respects TELEMAC convention, at this moment no, it does not. That's actually what I wanted some help on. For example, how do you determine if the boundary is "external"? It contains all the other boundaries in its extent? That seems to be an expensive test for a mesh with 30k boundaries but maybe my logic is suboptimal..
Much appreciated,
Keith
Hey Keith,
I tried to have a look at my implementation but could not find easily the bottleneck. I tried to use masked array instead of a repeated call to np.delete (see https://github.com/CNR-Engineering/PyTelTools/blob/master/pyteltools/slf/Serafin.py#L599), but that does not decrease significantly the computation time.
To find the external boundary, I think you should search for the node with extreme coordinates (such as: min X, min Y, max Y... or min X+Y as my script seems to do) and find to which boundary it belongs to.
For your information, if you want to write your mesh in Selafin file format, you can use the code below (beware it has to be double precision because you have too much nodes):
import numpy as np
from pyteltools.slf import Serafin
ikle = np.load("ikle.npy")
xy = np.load("xy.npy")
with Serafin.Write('geo_krober10nd.slf', 'en', overwrite=True) as geo:
header = Serafin.SerafinHeader(format_type='SERAFIND')
ipobo = np.zeros(len(xy), dtype=np.int64)
header.from_triangulation(xy, ikle + 1, ipobo=ipobo)
header.add_variable_from_ID('B')
geo.write_header(header)
geo.write_entire_frame(header, 0.0, np.zeros((header.nb_var, header.nb_nodes), dtype=np.float64))
Then you can generate the IPOBO table within BlueKenue (File > New SELAFIN object and add/move variable)... BlueKenue implementation seems even much faster, it takes about 2-3 seconds!
I noticed a potential problem in my previous post, I pushed a commit 666217be4c860f365dafa9cb51248dcdc511489b to properly support double precision with from_triangulation
.
Here is the new script to write your mesh:
import numpy as np
from pyteltools.slf import Serafin
ikle = np.load("ikle.npy")
xy = np.load("xy.npy")
with Serafin.Write('geo_krober10nd_DP.slf', 'en', overwrite=True) as geo:
header = Serafin.SerafinHeader(format_type='SERAFIND') # double precision
ipobo = np.zeros(len(xy), dtype=np.int64)
header.from_triangulation(xy, ikle + 1, ipobo=ipobo)
header.add_variable_from_ID('B')
geo.write_header(header)
geo.write_entire_frame(header, 0.0, np.zeros((header.nb_var, header.nb_nodes), dtype=np.float64))
with Serafin.Write('geo_krober10nd_SP.slf', 'en', overwrite=True) as geo:
header = Serafin.SerafinHeader(format_type='SERAFIN ') # single precision
ipobo = np.zeros(len(xy), dtype=np.int32)
header.from_triangulation(xy, ikle + 1, ipobo=ipobo)
header.add_variable_from_ID('B')
geo.write_header(header)
geo.write_entire_frame(header, 0.0, np.zeros((header.nb_var, header.nb_nodes), dtype=np.float32))
Thank you Luc! This package has so far been very helpful for my work.
Glad to hear that PyTelTools is useful for your work.
But with large meshes, such as yours, many of our scripts are probably not efficient (e.g. volume or flux computations) and some approximations could speed-up calculations.