Closed mhidas closed 2 years ago
There are a few versions of errors I've seen related to this. They arise at different points in the code, but the underlying issue is probably the same (inefficient memory usage).
First reported error:
Traceback (most recent call last):
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/aodncore/pipeline/handlerbase.py", line 1038, in run
self.trigger(transition['trigger'])
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 65, in _get_trigger
return machine.events[trigger_name].trigger(model, *args, **kwargs)
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 405, in trigger
return self.machine._process(func)
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 1073, in _process
return trigger()
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 423, in _trigger
return self._process(event_data)
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 433, in _process
if trans.execute(event_data):
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 279, in execute
machine.callback(func, event_data)
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/transitions/core.py", line 1031, in callback
func(*event_data.args, **event_data.kwargs)
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/aodndata/moorings/products_handler.py", line 314, in preprocess
self._make_hourly_timeseries()
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/aodndata/moorings/products_handler.py", line 266, in _make_hourly_timeseries
opendap_url_prefix=OPENDAP_URL_PREFIX
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/aodntools/timeseries_products/hourly_timeseries.py", line 501, in hourly_aggregator
df_temp = nc_clean.to_dataframe()
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/xarray/core/dataset.py", line 3088, in to_dataframe
return self._to_dataframe(self.dims)
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/xarray/core/dataset.py", line 3077, in _to_dataframe
for k in columns]
File "/mnt/ebs/pipeline/lib/python3.5/site-packages/xarray/core/dataset.py", line 3077, in <listcomp>
for k in columns]
MemoryError
More recently ...
Traceback (most recent call last):
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/aodncore/pipeline/handlerbase.py", line 1054, in run
self.trigger(transition['trigger'])
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 899, in _get_trigger
return event.trigger(model, *args, **kwargs)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 401, in trigger
return self.machine._process(func)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 1188, in _process
return trigger()
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 426, in _trigger
return self._process(event_data)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 435, in _process
if trans.execute(event_data):
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 272, in execute
event_data.machine.callbacks(itertools.chain(event_data.machine.before_state_change, self.before), event_data)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 1123, in callbacks
self.callback(func, event_data)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/transitions/core.py", line 1144, in callback
func(*event_data.args, **event_data.kwargs)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/aodndata/moorings/products_handler.py", line 390, in preprocess
self._make_hourly_timeseries()
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/aodndata/moorings/products_handler.py", line 300, in _make_hourly_timeseries
**self.product_common_kwargs)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/aodntools/timeseries_products/hourly_timeseries.py", line 427, in hourly_aggregator
df_temp = df_temp.reset_index().set_index('TIME')
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/pandas/core/frame.py", line 4146, in set_index
frame = self.copy()
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/pandas/core/generic.py", line 5804, in copy
data = self._data.copy(deep=deep)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/pandas/core/internals/managers.py", line 734, in copy
do_integrity_check=False)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/pandas/core/internals/managers.py", line 395, in apply
applied = getattr(b, f)(**kwargs)
File "/mnt/ebs/pyenv/versions/3.5.2/envs/pipeline_prod_venv/lib/python3.5/site-packages/pandas/core/internals/blocks.py", line 753, in copy
values = values.copy()
MemoryError: Unable to allocate 456. MiB for an array with shape (4, 14931864) and data type float64
When aggregating larger datasets, the hourly averaging codes (velocity and non-velocity) can run out of memory and fail. In general it seems the process can easily eat up 4-5Gb of memory (and want more...)