Open iuryt opened 2 years ago
I am not getting proper plot. It is okay with Arcgis 10.5 , when I am trying xarray, plot looks many missing data or gridded points. Data source: https://giovanni.gsfc.nasa.gov/session/B4259246-EB9D-11EA-A3A4-16015F835E51/D2BACBDC-CE49-11EC-9A98-0352ECEEFA7B/D2BAE400-CE49-11EC-9A98-0352ECEEFA7B///scrubbed.MODISA_L3m_ZLEE_2018_Zeu_lee.20140101.nc https://giovanni.gsfc.nasa.gov/session/B4259246-EB9D-11EA-A3A4-16015F835E51/D2BACBDC-CE49-11EC-9A98-0352ECEEFA7B/D2BAE400-CE49-11EC-9A98-0352ECEEFA7B///scrubbed.MODISA_L3m_ZLEE_2018_Zeu_lee.20150101.nc
g=data_mean.MODISA_L3m_ZLEE_2018_Zeu_lee.plot(x='lon',y='lat',col='month',col_wrap=4,cmap='RdBu_r',subplot_kws={ "projection": ccrs.Robinson()},figsize=(20,20)) for i, ax in enumerate(g.axes.flat): ax.set_title(data_mean.month.values[i]) ax.coastlines() ax.add_feature(cfeature.BORDERS.with_scale('50m'), linewidth=0.5, edgecolor='black') ax.gridlines(crs=ccrs.PlateCarree(), linewidth=0.5, linestyle='-')
Would you please help me out with why I am not getting the proper surface?
Thank you
@hafez-ahmad please open a new discussion topic with a fully reproducible example.
I'd also find this very useful
Bumping
This would be super useful!
+1. As an alternative I think interpolate_na
from rioxarray
supports this: https://corteva.github.io/rioxarray/html/examples/interpolate_na.html
you might be interested in pyinterp. With some extreme tuning, this can even reconstruct the original image (set nx=1, ny=9
):
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pyinterp
import pyinterp.fill
n = 30
x = xr.DataArray(np.linspace(0, 2 * np.pi, n), dims=["x"])
y = xr.DataArray(np.linspace(0, 2 * np.pi, n), dims=["y"])
z = np.sin(x) * xr.ones_like(y)
mask = xr.DataArray(np.random.randint(0, 1 + 1, (n, n)).astype("bool"), dims=["x", "y"])
kw = dict(add_colorbar=False)
def interpolate_na(arr):
x = pyinterp.Axis(arr.x.data)
y = pyinterp.Axis(arr.x.data)
z = arr.data
grid = pyinterp.Grid2D(x, y, z)
filled = pyinterp.fill.loess(grid, nx=3, ny=3)
return arr.copy(data=filled)
fig, ax = plt.subplots(1, 4, figsize=(11, 4))
z.plot(ax=ax[0], **kw)
z.where(mask).plot(ax=ax[1], **kw)
z.where(mask).interpolate_na("x").plot(ax=ax[2], **kw)
z.where(mask).pipe(interpolate_na).plot(ax=ax[3], **kw)
It does have a xarray
backend, but it looks like that does not allow to customize the coordinate names, it insists on "latitude" and "longitude".
scipy.ndimage.distance_transform_edt
is somewhat useful for a nearest implementation:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.distance_transform_edt.html
If the sampling
argument isn't provided, it'll just look at rows/columns/etc., i.e. equivalent to use_coordinates=False
.
def _nearest(a):
nans = np.isnan(a)
if not nans.any():
return a.copy()
indices = distance_transform_edt(
input=np.isnan(a),
return_distances=False,
return_indices=True,
)
return a[tuple(indices)]
def interpolate_na(da, dim, keep_attrs=True):
arr = xr.apply_ufunc(
_nearest,
da,
input_core_dims=[dim],
output_core_dims=[dim],
output_dtypes=[da.dtype],
dask="parallelized",
vectorize=True,
keep_attrs=keep_attrs,
).transpose(*da.dims)
return arr
fig,ax = plt.subplots(1,4,figsize=(14,3))
z.plot(ax=ax[0],**kw)
z.where(mask).plot(ax=ax[1],**kw)
z.where(mask).interpolate_na('x').plot(ax=ax[2],**kw)
interpolate_na(z.where(mask), ["x", "y"]).plot(ax=ax[3],**kw)
Interpolating ["x", "y"]
versus ["y", "x"]
will give different answers; in case a nearest neighbor is one removed, scipy.ndimage.distance_transform_edt
will choose the last dimension.
The sampling
argument unfortunately only accepts sequence of floats (one for each dimension) so that only works for axis-aligned, equidistant coordinates.
Rioxarray's use of griddata can be made a little easier with apply_ufunc
:
def _griddata(arr, xi, method: str):
ar1d = arr.ravel()
valid = np.isfinite(ar1d)
if valid.all():
return arr
return griddata(
points=tuple(x[valid] for x in xi),
values=ar1d[valid],
xi=xi,
method=method,
fill_value=np.nan,
).reshape(arr.shape)
def interpolate_na(da, dim, method="nearest", use_coordinates=True, keep_attrs=True):
# Create points only once.
if use_coordinates:
coords = [da.coords[d] for d in dim]
else:
coords = [np.arange(da.sizes[d]) for d in dim]
xi = tuple(x.ravel() for x in np.meshgrid(*coords, indexing="ij"))
arr = xr.apply_ufunc(
_griddata,
da,
input_core_dims=[dim],
output_core_dims=[dim],
output_dtypes=[da.dtype],
dask="parallelized",
vectorize=True,
keep_attrs=keep_attrs,
kwargs={"xi": xi, "method": method},
).transpose(*da.dims)
return arr
fig,ax = plt.subplots(1,3,figsize=(11,3))
interpolate_na(z.where(mask), ["y", "x"], method="nearest").plot(ax=ax[0], **kw)
interpolate_na(z.where(mask), ["y", "x"], method="linear").plot(ax=ax[1], **kw)
interpolate_na(z.where(mask), ["y", "x"], method="cubic").plot(ax=ax[2], **kw)
griddata would work for non-1D coordinates as well, with a little extra logic.
As a final note: I'm personally quite fond of "Laplace interpolation" (see e.g. chapter 3.8 of Numerical Recipes for the idea):
from scipy import sparse
def _build_connectivity(shape):
# Get the Cartesian neighbors for a finite difference approximation.
# TODO: check order of dimensions with DataArray
size = np.prod(shape)
index = np.arange(size).reshape(shape)
# Build nD connectivity
ii = []
jj = []
for d in range(len(shape)):
slices = [slice(None)] * len(shape)
slices[d] = slice(None, -1)
left = index[tuple(slices)].ravel()
slices[d] = slice(1, None)
right = index[tuple(slices)].ravel()
ii.extend([left, right])
jj.extend([right, left])
i = np.concatenate(ii)
j = np.concatenate(jj)
return sparse.coo_matrix(
(np.ones(len(i)), (i, j)),
shape=(size, size)
).tocsr()
def _laplace(arr, connectivity):
ar1d = arr.ravel()
unknown = np.isnan(ar1d)
known = ~unknown
# Set up system of equations
A = connectivity.copy()
A.setdiag(-A.sum(axis=1).A[:, 0])
rhs = -A[:, known].dot(ar1d[known])
out = ar1d.copy()
# Linear solve
out[unknown] = sparse.linalg.spsolve(A[unknown][:, unknown], rhs[unknown])
return out.reshape(arr.shape)
def interpolate_na_laplace(da, dim, keep_attrs=True):
shape = tuple(da.sizes[d] for d in dim)
connectivity = _build_connectivity(shape)
arr = xr.apply_ufunc(
_laplace,
da,
input_core_dims=[dim],
output_core_dims=[dim],
output_dtypes=[da.dtype],
dask="parallelized",
vectorize=True,
keep_attrs=keep_attrs,
kwargs={"connectivity": connectivity},
).transpose(*da.dims)
return arr
It tends to produce much nicer results if there are "island" shaped gaps, since it'll use all values along the boundary.
The downside is that it's computationally expensive, and for more unknowns ( > 10 000 or so), the direct solve should be replaced by a conjugate-gradient iterative solver... which only works well with a decent preconditioner, which introduces a number of additional settings:
def _laplace(arr, connectivity: sparse.csr_matrix, direct: bool):
ar1d = arr.ravel()
unknown = np.isnan(ar1d)
known = ~unknown
# Set up system of equations.
matrix = connectivity.copy()
matrix.setdiag(-matrix.sum(axis=1).A[:, 0])
rhs = -matrix[:, known].dot(ar1d[known])
# Linear solve for the unknowns.
A = matrix[unknown][:, unknown]
b = rhs[unknown]
if direct:
x = sparse.linalg.spsolve(A, b)
else: # Preconditioned conjugate-gradient linear solve.
# Create preconditioner M
M = ILU0Preconditioner.from_csr_matrix(A, delta=0.0, relax=0.97)
# Call conjugate gradient solver
x, info = sparse.linalg.cg(A, b, rtol=1e-05, atol=0.0, maxiter=1000, M=M)
if info < 0:
raise ValueError("scipy.sparse.linalg.cg: illegal input or breakdown")
elif info > 0:
warnings.warn(f"Failed to converge after {maxiter} iterations")
out = ar1d.copy()
out[unknown] = x
return out.reshape(arr.shape)
Preconditioner here:
Not the best example, maybe, but to illustrate it does quite well even when data is 99% gap:
from scipy import datasets
import PIL
f = datasets.face()
f_array = np.array(f).astype(float) / 255.0
da = xr.DataArray(f_array, dims=["y", "x", "bands"])
mask = xr.DataArray(np.random.choice([False, True], size=da.shape[:2], p=[0.99, 0.01]), dims=['y','x'])
masked = da.where(mask)
kw = {"yincrease": False}
fig,ax = plt.subplots(2,3,figsize=(11,7))
da.plot.imshow(ax=ax[0, 0],**kw)
masked.plot.imshow(ax=ax[0, 1],**kw)
interpolate_na_laplace(masked, ["y", "x"]).plot.imshow(ax=ax[0, 2],**kw)
interpolate_na(masked, ["y", "x"], method="nearest").plot.imshow(ax=ax[1, 0], **kw)
interpolate_na(masked, ["y", "x"], method="linear").plot.imshow(ax=ax[1, 1], **kw)
interpolate_na(masked, ["y", "x"], method="cubic").plot.imshow(ax=ax[1, 2], **kw)
Is your feature request related to a problem?
I think that having a way to run a multidimensional interpolation for filling missing values would be awesome.
The code snippet below create a data and show the problem I am having now. If the data has some orientation, we couldn't simply interpolate dimensions separately.
I tried to use advanced interpolation for that, but it doesn't look like the best solution.
returns
Describe the solution you'd like
Simply
z.interpolate_na(['x','y'])
Describe alternatives you've considered
I could extract the data to
numpy
and interpolate usingscipy.interpolate.griddata
, but this is not the wayxarray
should work.Additional context
No response