Grain segmentation and KAM map tool

Currently working on my thesis and I don't have the time to properly add this feature to orix, but as part of an unrelated project, I needed to segment a grain map, so I wrote two different functions for doing so.

If other members have input, I would be interested in hearing it, and if someone has interest in adding the feature themselves, they are more than welcome to adopt this in whatever way they see fit. Otherwise, I'll tackle it early 2025.

SHORT VERSION: I wrote a flood-fill method based off of skimage's pixel_graph, but didn't like that it took 10 seconds and didn't return bounding polygons. Then I wrote a second faster voronoi diagram-based function that returns the bounds, but it requires a package like shapely to calculate the polygons, and also cannot handle connectivity's other than a "plus" sign.

I think the voronoi method is a better approach long term, as it sets up a method to quickly get bounding polygons and grain edges that can be colored and plotted to show additional data. However, the flood fill is far easier to implement, since it just returns a 2D numpy array of grain Ids.

Examples of the results from both methods are shown below, as well as a Kernel Average Misorientation (KAM) map, which I get for free as part of the segmentation.

Comparison of segmentations

KAM map

Link to the data which is available via Globus: https://materialsdatafacility.org/detail/8f586539-f397-4af1-9c52-3c2e0da796c8-1.0 data shown here is located at Processed data/EBSD data 4 - SHI, distortion corrected/GES_HEDM1_ebsdPCSHIFT_310_regi.ang

"""
Created on Fri Nov  8 11:46:41 2024

@author: agerlt
"""

# scientific python stuff
import numpy as np
from scipy import stats, spatial
from skimage.util._map_array import map_array
from skimage.morphology._util import _raveled_offsets_and_distances
import sparse  # THis hsould probably be replaced with scipy.sparse...

# Orix stuff
from orix import io
from orix.plot import IPFColorKeyTSL
from orix.quaternion import Orientation, Rotation, symmetry
from orix.crystal_map import crystal_map

# plotting stuff
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import copy

# pseudo- grain boundry stuff
import shapely
from shapely import MultiPolygon
from shapely.ops import polygonize
from shapely.geometry import Point

# %% two different grain segmentation methods

# ==================================================================== #
# METHOD ONE: slower and no bounding polygons, but trivial to implement
# ==================================================================== #
def calc_grains_flood_fill(xmap: crystal_map,
                           mask: np.array = None,
                           cutoff: float = 3*np.pi/180):
    # TODO: add support for rotation AND orientation arrays
    # TODO: sanity checks for all inputs
    # TODO: add support for multi-phase maps

    # This function builds an adjacency map of "in-grain" connections,
    # then flood fills the pixels until grain_ID's stop changing.
    # this method is very brute force. I think the more "elegant" (faster)
    # solutions would be to merge nodes in parallel as the process evolves.
    # however, i spend a LONG time failing to get that to work, and this
    # method relyably works on everything I've tested, and does 3.6 million
    # pixels in about 15 seconds.
    # biggest caveat is this returns only the grain ID's not a boundary object.
    # this could be a plus or negative, depending on your goal.

    # CREDIT: The first half of this method is a loose adaptation of the
    # concept of a pixel_graph as seen in skimage.graph.pixel.pixel_graph.
    # I used their method of setting up a neighborhood map, but then found
    # flood filling to be MUSCH faster and space efficient in scipy.sparse as
    # opposed to networkx, which what skimage.graph._rag.py uses.

    # users can make a mask if they choose based on confidence index or some
    # other factor. otherwise, make the mask the indexed pixels.
    if mask is None:
        mask = xmap.is_indexed.reshape(xmap.shape)

    # TODO: if we want to support non-gridded methods, we need to replace this
    # starting part with a marginally slower voronoi tesselation method.

    # make a padded version of the mask so we can find all neighbors. we will
    # throw out the boundaries later.
    # TODO: I think the padding might be unnecessary.
    padded_mask = np.pad(mask, 1, mode='constant', constant_values=False)
    # list of pixel IDs for just the "True" pixels
    padded_nodes = np.flatnonzero(padded_mask)
    # calc the offsets based on connectivity, and build neighbors array
    # TODO: support other connectivities? this method breaks with
    # connectivities=1, and is slow for >=2....
    neighbor_offsets = _raveled_offsets_and_distances(
        padded_mask.shape, footprint=np.ones([3,3]))[0]
    padded_neighbors = padded_nodes[:, np.newaxis] + neighbor_offsets

    # remap to unpadded nodes. also calc sequential map to remap with later.
    nodes = np.flatnonzero(mask)
    nodes_sequential = np.arange(nodes.size)
    neighbors = map_array(padded_neighbors, padded_nodes, nodes)

    # build the to/from neighbor array, throwing away connections to masked
    # out nodes along the way.
    neighbors_mask = padded_mask.reshape(-1)[padded_neighbors]
    num_neighbors = np.sum(neighbors_mask, axis=1)
    indices = np.repeat(nodes, num_neighbors)
    indices_sequential = np.repeat(nodes_sequential, num_neighbors)
    neighbor_indices = neighbors[neighbors_mask]
    neighbor_indices_sequential = map_array(
        neighbor_indices, nodes, nodes_sequential
                )
    # TODO: Add option to break connnections based on phase here.
    # for the real neighbors between pixels within the mask, find the angle
    # between each pixel pair. remove pixel connections below the threshold.
    dominate_phase = stats.mode(xmap.phase_id[xmap.phase_id >= 0])[0]
    cs = xmap.phases[dominate_phase].point_group
    g1 = Orientation(xmap.rotations[indices], cs)
    g2 = Orientation(xmap.rotations[neighbor_indices], cs)
    d = g1.angle_with(g2)
    is_not_boundary = d < cutoff

    # build adjacency matrix
    loc = np.stack([indices_sequential, neighbor_indices_sequential])
    adj = sparse.COO(loc, is_not_boundary.astype(int))
    # add in identity as well, so nodes*adj gives connections AND original.
    eye_loc = np.arange(adj.shape[0])
    adj = sparse.COO(loc, is_not_boundary.astype(int))
    fill = adj + sparse.COO([eye_loc, eye_loc], 1)
    # also throw in the distance calculation, so we can make KAM maps later
    # TODO: I should break this up into two functions. one to get the angular
    # distance, one to make grain maps.
    dist = sparse.COO(loc, d*is_not_boundary)

    # now flood fill everything, keeping only the highest value at each step.
    # TODO: There is a more efficient way to do this where we merge nodes
    # as we go instead of flood-filling EVERY pixel at every step.
    # what I do here works, but is also over 90% of the compute time.

    node_labels = np.arange(nodes_sequential.size)+1
    delta = 1
    i = 0
    while delta > 0:
        count = np.unique(node_labels.data).size
        if i % 4 == 0:
            print("{} unique labels".format(count))
        i += 1
        for j in range(3):
            node_labels = np.max(node_labels*fill, axis=1)
        delta = count - np.unique(node_labels.data).size
    # having non-sequential grain_ids is annoying, lets reassign
    count = np.unique(node_labels.data).size
    print("{} unique labels, final count".format(count))
    ele, inv = np.unique(node_labels.todense(), False, True, False)
    sequential_labels = np.arange(ele.size)[inv]
    grain_map = np.zeros(mask.size)
    grain_map[nodes] = sequential_labels
    grain_map = grain_map.reshape(mask.shape)
    return grain_map, dist, nodes

# ==================================================================== #
# METHOD TWO: faster, and gives grain boundary polygons, but requires
# semi-commiting to a python package supporting polygons like shapely
# ==================================================================== #
def calc_grains_voronoi(xmap: crystal_map,
                        mask: np.array = None,
                        cutoff: float = 3*np.pi/180):

    # assume the xmap pixels are on some regular (hex or square) grid. We
    # want to make a bounding box of points with a similar spacing, so edge
    # voxels have reasonable borders
    # TODO: if we KNOW there is a square grid, there is a cleaner method.
    xy_inside = np.vstack([xmap.x, xmap.y])
    xu = np.unique(xy_inside[0])  # unique values
    xd = np.max([xu[1] - xu[0], (xu[-1]-xu[0])/10000])  # delta
    xl = np.arange(xu[0]-xd, xu[-1]+(xd*1.1), xd)  # line of x crds
    yu = np.unique(xy_inside[1])  # unique values
    yd = np.max([yu[1] - yu[0], (yu[-1]-yu[0])/10000])  # delta
    yl = np.arange(yu[0]-yd, yu[-1]+(yd*1.1), yd)  # line of y crds
    xo = np.hstack([xl, xl, xl[1:-1]*0, xl[1:-1]*0 + xl[-1]])
    yo = np.hstack([yl[1:-1]*0, yl[1:-1]*0 + yl[-1], yl, yl])
    padded_pixels = np.vstack([xy_inside.T, np.vstack([xo, yo]).T])

    # do a voronoi tesselation
    # TODO: this could be potentially faster if we throw out unindexed regions
    # before the tesselation.
    vor = spatial.Voronoi(padded_pixels)
    # get rotations for every padded pixel. set the outer ones to identity
    s = xmap.size
    rots = Rotation.identity(len(padded_pixels))
    rots[:s] = xmap.rotations
    # either use or make a mask, then expand it to the new padded pixels
    if mask is None:
        mask = xmap.is_indexed
    pm = np.zeros(len(padded_pixels))  # padded_mask
    pm[:s] = mask.flatten()

    # calc angular seperation for all graph edges.
    # TODO: should pre-exclude different phases and unindexed points...
    # TODO: support multi-phase ebsd.
    dominate_phase = stats.mode(xmap.phase_id[xmap.phase_id >= 0])[0]
    cs = xmap.phases[dominate_phase].point_group
    g1 = Orientation(rots[vor.ridge_points[:, 0]], cs)
    g2 = Orientation(rots[vor.ridge_points[:, 1]], cs)
    ang_dist = g1.angle_with(g2)
    grain_bounds = ang_dist > cutoff

    # re-add connections from indexed to unindexed regions, which will
    # reinclude bounds for grains with an orientation close to identity
    # that happen to be touching an unindexed zone with a similar place holder
    # orientation.
    border_edges = pm[vor.ridge_points[:, 1]] != pm[vor.ridge_points[:, 0]]
    grain_bounds[border_edges] = True
    # remove non-indexed to non_indexed connections
    index_to_index = np.max(pm[vor.ridge_points], 1).astype(bool)
    grain_bounds[~index_to_index] = False

    # grain_bounds is now a mask of all the voronoi ridges that sit on
    # boundaries. pull those out so we can build grain boundary polygons with
    # them.
    bounding_ridges = np.array(vor.ridge_vertices)[grain_bounds]
    vert_ids = np.unique(bounding_ridges)

    # NOTE: at this point, the bounding ridges plus vert_ids contain all the
    # grain boundary data. However, no connections know what envelopes they
    # are a part of, and none of the pixels have been assigned a grain id.
    # this requires some sort of envelope-building tool. I chose shapely,
    # but their might be another fastor or more convenient tool. Also, the
    # method i use for assigning pixels to polygons is by no means the best.
    seq_verts = vor.vertices[vert_ids]
    seq_edges = map_array(bounding_ridges, vert_ids, np.arange(len(vert_ids)))
    poly_grains = MultiPolygon(polygonize(seq_verts[seq_edges[:, (0, 1)]]))
    # TODO: output bounds as line segments as well, so we can make colored
    # grain bounds like MTEX does.

    # use a search tree to speed up the assigning of pixels to polygons
    tree = shapely.STRtree(poly_grains.geoms)
    p = [Point(x) for x in padded_pixels[pm > 0]]
    # TODO: not sure 'intersects' is the best option...
    indexed_grain_id = tree.query(p, predicate='intersects')[1]
    grain_map = np.zeros(xmap.is_indexed.shape)
    grain_map[xmap.is_indexed] = indexed_grain_id
    grain_map = grain_map.reshape(xmap.shape)
    # TODO: add option to ad this to xmap properties, or just output map
    # xmap.prop.grain_id = np.zeros(ebsd.is_indexed.shape)
    # TODO: this is too much to return, should clean up....
    return grain_map, [x for x in poly_grains.geoms], seq_verts, seq_edges

# %% load in some open source ebsd data, and clean up the xmap
ebsd = io.load("GES_HEDM1_ebsdPCSHIFT_310_regi.ang")
# fix the phase info. Nickel is m3m not 432
ebsd.phases[0].point_group = symmetry.Oh
cs = ebsd.phases[0].point_group
ebsd_ci = ebsd.prop["unknown2"]
ebsd._phase_id[ebsd_ci <= 0.1] = -1

ori2rgb = IPFColorKeyTSL(cs)
rgb_flat = ori2rgb.orientation2color(ebsd.rotations)
img = (rgb_flat*(ebsd.is_indexed[:, np.newaxis])).reshape(ebsd.shape + (3,))
rci_img = ((ebsd_ci.reshape(ebsd.shape)/0.245)[:, :, np.newaxis])*img

# %%
# make the flood-fill style grain map
gm_flood, dist, nodes = calc_grains_flood_fill(ebsd)

# %%
# make the voronoi style grain map
gm_vor, poly, verts, edges = calc_grains_voronoi(ebsd)

# make the matplotlib patches for plotting the edges as well
# NOTE: probably a faster way to do this....
mpl_bounds = PatchCollection(
    [Polygon(np.array(p.exterior.xy).T/2, ) for p in poly],
    facecolor='none',
    edgecolor='k'
    )
mpl_bounds_2 = copy.deepcopy(mpl_bounds)

# %% comparing the segmentation

fig, ax = plt.subplots(2, 3)
ax = ax.flatten()

ax[0].imshow(rci_img)
ax[1].imshow(gm_flood % np.pi)
ax[2].imshow(gm_vor % (np.pi**0.2))
ax[2].add_collection(mpl_bounds)

ax[3].imshow(rci_img)
ax[4].imshow(gm_flood % np.pi)
ax[5].imshow(gm_vor % (np.pi**0.2))
ax[5].add_collection(mpl_bounds_2)
for i in [3, 4, 5]:
    ax[i].set_xlim(180, 260)
    ax[i].set_ylim(130, 200)
for i in [0, 1, 2, 3, 4, 5]:
    ax[i].grid()

fig.suptitle("Comparing segmentation methods")
ax[0].set_title("IPF_coloring")
ax[1].set_title("Flood Fill")
ax[2].set_title("Voronoi Fill")
plt.tight_layout()

# %% make a KAM map, just to show we can.

# NOTE: I did this for the flood fill only,but it could be added to the
# voronoi semi-trivially

neighbors_count = np.sum(dist > 0, axis=1).todense()
cumulative_miso = (dist.sum(axis=1)).todense()
kam_map = np.zeros(ebsd.size)
kam_map[nodes] = np.nan_to_num(cumulative_miso/neighbors_count)
kam_map = kam_map.reshape(gm_flood.shape)
kam_map_d = kam_map*180/np.pi

plt.figure()
plt.imshow(kam_map_d, vmax=0.2)

EDIT: fixed a typo in the code

pyxem / orix

Grain segmentation and KAM map tool #531