GeoStat-Framework / PyKrige

Kriging Toolkit for Python
https://pykrige.readthedocs.io
BSD 3-Clause "New" or "Revised" License
740 stars 186 forks source link

Memory Issue with Large Dataset #267

Open pmclau04 opened 1 year ago

pmclau04 commented 1 year ago

Thanks for the great tool. I've managed to setup the tool for smaller xyz datasets (around 5,000 points total), however I have a very large xyz survey dataset that I'd like to implement PyKrige with. Below is my code, note there are approx 116,000 points in the initial dataset. I'm wondering if there are any known workarounds or suggestions for working with pyKrige on such large datasets?

Code

#Extract XYZ Data
x = model.df_main_sn.n
y = model.df_main_sn.s
z = model.df_main_sn.z

print(x.shape)

#Create grid for data and trim points by polygon
dx=25;dy=25
nmin,nmax,smin,smax = x.min(),x.max(),y.min(),y.max()
gridx = np.arange(nmin,nmax,dx)
gridy = np.arange(smin,smax,dy)

#Setup kriging Object
OK = OrdinaryKriging(x = x,
                     y = y,
                     z = z,
                     variogram_model='linear',
                     verbose=False,
                     enable_plotting=False,
                     anisotropy_angle = 90,
                     anisotropy_scaling = 5,
                     nlags = 6,
                     weight=True)
#Execute Kriging
z, ss = OK.execute(style = 'points', xpoints = point_n, ypoints = `point_s)`

Error

(116351,)
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
Input In [8], in <cell line: 15>()
     12 gridy = np.arange(smin,smax,dy)
     14 #Setup kriging output
---> 15 OK = OrdinaryKriging(x = x,
     16                      y = y,
     17                      z = z,
     18                      variogram_model='linear',
     19                      verbose=False,
     20                      enable_plotting=False,
     21                      anisotropy_angle = 90,
     22                      anisotropy_scaling = 5,
     23                      nlags = 6,
     24                      weight=True)
     25 #Execute Kriging
     26 z, ss = OK.execute(style = 'points', xpoints = point_n, ypoints = point_s)

File ~\Anaconda3\lib\site-packages\pykrige\ok.py:321, in OrdinaryKriging.__init__(self, x, y, z, variogram_model, variogram_parameters, variogram_function, nlags, weight, anisotropy_scaling, anisotropy_angle, verbose, enable_plotting, enable_statistics, coordinates_type, exact_values, pseudo_inv, pseudo_inv_type)
    312     print("Initializing variogram model...")
    314 vp_temp = _make_variogram_parameter_list(
    315     self.variogram_model, variogram_parameters
    316 )
    317 (
    318     self.lags,
    319     self.semivariance,
    320     self.variogram_model_parameters,
--> 321 ) = _initialize_variogram_model(
    322     np.vstack((self.X_ADJUSTED, self.Y_ADJUSTED)).T,
    323     self.Z,
    324     self.variogram_model,
    325     vp_temp,
    326     self.variogram_function,
    327     nlags,
    328     weight,
    329     self.coordinates_type,
    330 )
    332 if self.verbose:
    333     print("Coordinates type: '%s'" % self.coordinates_type, "\n")

File ~\Anaconda3\lib\site-packages\pykrige\core.py:460, in _initialize_variogram_model(X, y, variogram_model, variogram_model_parameters, variogram_function, nlags, weight, coordinates_type)
    458 if coordinates_type == "euclidean":
    459     d = pdist(X, metric="euclidean")
--> 460     g = 0.5 * pdist(y[:, None], metric="sqeuclidean")
    462 # geographic coordinates only accepted if the problem is 2D
    463 # assume X[:, 0] ('x') => lon, X[:, 1] ('y') => lat
    464 # old method of distance calculation is retained here...
    465 # could be improved in the future
    466 elif coordinates_type == "geographic":

File ~\Anaconda3\lib\site-packages\scipy\spatial\distance.py:2250, in pdist(X, metric, out, **kwargs)
   2248 if metric_info is not None:
   2249     pdist_fn = metric_info.pdist_func
-> 2250     return pdist_fn(X, out=out, **kwargs)
   2251 elif mstr.startswith("test_"):
   2252     metric_info = _TEST_METRICS.get(mstr, None)

MemoryError: Unable to allocate 50.4 GiB for an array with shape (6768719425,) and data type float64

Thanks again!