geopandas / dask-geopandas

Parallel GeoPandas with Dask
https://dask-geopandas.readthedocs.io/
BSD 3-Clause "New" or "Revised" License
503 stars 44 forks source link

Error when projecting to other CRS #218

Closed Jap8nted closed 2 years ago

Jap8nted commented 2 years ago

Hi all,

I am doing a basic example for projecting to other crs. However I get an error as if the crs was never set. Am I doing something wrong here?

xmin = np.linspace(0, 10000,1000000)
ymin = np.linspace(0, 10000,1000000)
state = np.random.random(1000000)
df = pd.DataFrame({"lon":xmin, "lat":ymin, "state":state})
daskdf = ddf.from_pandas(df,npartitions=8)
points = dask_geopandas.points_from_xy(daskdf, x='lon', y='lat', crs="EPSG:4326")
daskdf["geometry"] = points
daskdf = dask_geopandas.from_dask_dataframe(daskdf,geometry="geometry")
daskdf.set_crs(4326, allow_override=True)
daskdf.to_crs(32632)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/utils.py:195, in raise_on_meta_error(funcname, udf)
    194 try:
--> 195     yield
    196 except Exception as e:

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/core.py:6450, in _emulate(func, udf, *args, **kwargs)
   6449 with raise_on_meta_error(funcname(func), udf=udf):
-> 6450     return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/utils.py:1100, in methodcaller.__call__(self, _methodcaller__obj, *args, **kwargs)
   1099 def __call__(self, __obj, *args, **kwargs):
-> 1100     return getattr(__obj, self.method)(*args, **kwargs)

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/geopandas/geodataframe.py:1377, in GeoDataFrame.to_crs(self, crs, epsg, inplace)
   1376     df = self.copy()
-> 1377 geom = df.geometry.to_crs(crs=crs, epsg=epsg)
   1378 df.geometry = geom

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/geopandas/geoseries.py:1117, in GeoSeries.to_crs(self, crs, epsg)
   1041 """Returns a ``GeoSeries`` with all geometries transformed to a new
   1042 coordinate reference system.
   1043 
   (...)
   1114 
   1115 """
   1116 return GeoSeries(
-> 1117     self.values.to_crs(crs=crs, epsg=epsg), index=self.index, name=self.name
   1118 )

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/geopandas/array.py:750, in GeometryArray.to_crs(self, crs, epsg)
    749 if self.crs is None:
--> 750     raise ValueError(
    751         "Cannot transform naive geometries.  "
    752         "Please set a crs on the object first."
    753     )
    754 if crs is not None:

ValueError: Cannot transform naive geometries.  Please set a crs on the object first.

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Input In [105], in <cell line: 5>()
      3 daskdf = dask_geopandas.from_dask_dataframe(daskdf,geometry="geometry")
      4 daskdf.set_crs(4326, allow_override=True)
----> 5 daskdf.to_crs(32632)

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask_geopandas/core.py:193, in _Frame.to_crs(self, crs, epsg)
    191 @derived_from(geopandas.GeoSeries)
    192 def to_crs(self, crs=None, epsg=None):
--> 193     return self.map_partitions(M.to_crs, crs=crs, epsg=epsg)

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/core.py:867, in _Frame.map_partitions(self, func, *args, **kwargs)
    740 @insert_meta_param_description(pad=12)
    741 def map_partitions(self, func, *args, **kwargs):
    742     """Apply Python function on each DataFrame partition.
    743 
    744     Note that the index and divisions are assumed to remain unchanged.
   (...)
    865     None as the division.
    866     """
--> 867     return map_partitions(func, self, *args, **kwargs)

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/core.py:6519, in map_partitions(func, meta, enforce_metadata, transform_divisions, align_dataframes, *args, **kwargs)
   6512         raise ValueError(
   6513             f"{e}. If you don't want the partitions to be aligned, and are "
   6514             "calling `map_partitions` directly, pass `align_dataframes=False`."
   6515         ) from e
   6517 dfs = [df for df in args if isinstance(df, _Frame)]
-> 6519 meta = _get_meta_map_partitions(args, dfs, func, kwargs, meta, parent_meta)
   6521 if all(isinstance(arg, Scalar) for arg in args):
   6522     layer = {
   6523         (name, 0): (
   6524             apply,
   (...)
   6528         )
   6529     }

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/core.py:6631, in _get_meta_map_partitions(args, dfs, func, kwargs, meta, parent_meta)
   6627     parent_meta = dfs[0]._meta
   6628 if meta is no_default:
   6629     # Use non-normalized kwargs here, as we want the real values (not
   6630     # delayed values)
-> 6631     meta = _emulate(func, *args, udf=True, **kwargs)
   6632     meta_is_emulated = True
   6633 else:

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/core.py:6450, in _emulate(func, udf, *args, **kwargs)
   6445 """
   6446 Apply a function using args / kwargs. If arguments contain dd.DataFrame /
   6447 dd.Series, using internal cache (``_meta``) for calculation
   6448 """
   6449 with raise_on_meta_error(funcname(func), udf=udf):
-> 6450     return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))

File ~/opt/anaconda3/envs/geo/lib/python3.9/contextlib.py:135, in _GeneratorContextManager.__exit__(self, type, value, traceback)
    133     value = type()
    134 try:
--> 135     self.gen.throw(type, value, traceback)
    136 except StopIteration as exc:
    137     # Suppress StopIteration *unless* it's the same exception that
    138     # was passed to throw().  This prevents a StopIteration
    139     # raised inside the "with" statement from being suppressed.
    140     return exc is not value

File ~/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/utils.py:216, in raise_on_meta_error(funcname, udf)
    207 msg += (
    208     "Original error is below:\n"
    209     "------------------------\n"
   (...)
    213     "{2}"
    214 )
    215 msg = msg.format(f" in `{funcname}`" if funcname else "", repr(e), tb)
--> 216 raise ValueError(msg) from e

ValueError: Metadata inference failed in `to_crs`.

You have supplied a custom function and Dask is unable to 
determine the type of output that that function returns. 

To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.

Original error is below:
------------------------
ValueError('Cannot transform naive geometries.  Please set a crs on the object first.')

Traceback:
---------
  File "/Users/jonatanaponte/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/utils.py", line 195, in raise_on_meta_error
    yield
  File "/Users/jonatanaponte/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/dataframe/core.py", line 6450, in _emulate
    return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
  File "/Users/jonatanaponte/opt/anaconda3/envs/geo/lib/python3.9/site-packages/dask/utils.py", line 1100, in __call__
    return getattr(__obj, self.method)(*args, **kwargs)
  File "/Users/jonatanaponte/opt/anaconda3/envs/geo/lib/python3.9/site-packages/geopandas/geodataframe.py", line 1377, in to_crs
    geom = df.geometry.to_crs(crs=crs, epsg=epsg)
  File "/Users/jonatanaponte/opt/anaconda3/envs/geo/lib/python3.9/site-packages/geopandas/geoseries.py", line 1117, in to_crs
    self.values.to_crs(crs=crs, epsg=epsg), index=self.index, name=self.name
  File "/Users/jonatanaponte/opt/anaconda3/envs/geo/lib/python3.9/site-packages/geopandas/array.py", line 750, in to_crs
    raise ValueError(
martinfleis commented 2 years ago

Neither set_crs or to_crs work in place. You need to reassign the returned df to a variable.

daskdf = daskdf.set_crs(4326, allow_override=True)
daskdf = daskdf.to_crs(...)
Jap8nted commented 2 years ago

Thank you for the fast answer.