Open-ET / flux-data-qaqc

Energy Balance Closure Analysis and Eddy Flux Data Post-Processing
BSD 3-Clause "New" or "Revised" License
23 stars 13 forks source link

Basic usage tutorial errors #9

Closed ashwinvis closed 2 years ago

ashwinvis commented 2 years ago
>>> d.plot(output_type='notebook', plot_width=700)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/tmp/ipykernel_793706/3470416942.py in <module>
----> 1 d.plot(output_type='notebook', plot_width=700)

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/fluxdataqaqc/data.py in plot(self, ncols, output_type, out_file, suptitle, plot_width, plot_height, sizing_mode, merge_tools, link_x, **kwargs)
    537 
    538         # create aggregrated plot structure from fluxdataqaqc.Plot._plot()
--> 539         ret = self._plot(
    540             self, ncols=ncols, output_type=output_type, out_file=out_file,
    541             suptitle=suptitle, plot_width=plot_width, plot_height=plot_height,

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/fluxdataqaqc/plot.py in _plot(self, FluxObj, ncols, output_type, out_file, suptitle, plot_width, plot_height, sizing_mode, merge_tools, link_x, **kwargs)
   1172             if fig is not None:
   1173                 daily_line.append(fig)
-> 1174             theta_vars = [
   1175                 v for v in variables if theta_re.match(v) and v in\
   1176                     monthly_df.columns

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/fluxdataqaqc/plot.py in <listcomp>(.0)
   1174             theta_vars = [
   1175                 v for v in variables if theta_re.match(v) and v in\
-> 1176                     monthly_df.columns
   1177             ]
   1178             if fig is not None and monthly and len(theta_vars) > 0:

NameError: free variable 'monthly_df' referenced before assignment in enclosing scope
>>> # creating a QaQc instance will automatically convert to daily
>>> d = Data('US-Tw3_config.ini')
>>> q = QaQc(d)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
/tmp/ipykernel_793706/3547759801.py in <module>
      1 # creating a QaQc instance will automatically convert to daily
      2 d = Data('US-Tw3_config.ini')
----> 3 q = QaQc(d)

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/fluxdataqaqc/qaqc.py in __init__(self, data, drop_gaps, daily_frac, max_interp_hours, max_interp_hours_night)
    288 
    289             # data will be loaded if it has not yet via Data.df
--> 290             self.temporal_freq = self._check_daily_freq(
    291                 drop_gaps, daily_frac, max_interp_hours, max_interp_hours_night
    292             )

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/fluxdataqaqc/qaqc.py in _check_daily_freq(self, drop_gaps, daily_frac, max_interp_hours, max_interp_hours_night)
    785             self.n_samples_per_day = 1
    786 
--> 787         self._df = df.rename(self.variables)
    788         return freq
    789 

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    322         @wraps(func)
    323         def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324             return func(*args, **kwargs)
    325 
    326         kind = inspect.Parameter.POSITIONAL_OR_KEYWORD

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/frame.py in rename(self, mapper, index, columns, axis, copy, inplace, level, errors)
   5032         4  3  6
   5033         """
-> 5034         return super().rename(
   5035             mapper=mapper,
   5036             index=index,

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/generic.py in rename(self, mapper, index, columns, axis, copy, inplace, level, errors)
   1145             # GH 13473
   1146             if not callable(replacements):
-> 1147                 indexer = ax.get_indexer_for(replacements)
   1148                 if errors == "raise" and len(indexer[indexer == -1]):
   1149                     missing_labels = [

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_indexer_for(self, target, **kwargs)
   5274         """
   5275         if self._index_as_unique:
-> 5276             return self.get_indexer(target, **kwargs)
   5277         indexer, _ = self.get_indexer_non_unique(target)
   5278         return indexer

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
   3435         # returned ndarray is np.intp
   3436         method = missing.clean_reindex_fill_method(method)
-> 3437         target = self._maybe_cast_listlike_indexer(target)
   3438 
   3439         self._check_indexing_method(method, limit, tolerance)

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/indexes/datetimelike.py in _maybe_cast_listlike_indexer(self, keyarr)
    599     def _maybe_cast_listlike_indexer(self, keyarr):
    600         try:
--> 601             res = self._data._validate_listlike(keyarr, allow_object=True)
    602         except (ValueError, TypeError):
    603             res = com.asarray_tuplesafe(keyarr)

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py in _validate_listlike(self, value, allow_object)
    701         # Do type inference if necessary up front
    702         # e.g. we passed PeriodIndex.values and got an ndarray of Periods
--> 703         value = pd_array(value)
    704         value = extract_array(value, extract_numpy=True)
    705 

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/construction.py in array(data, dtype, copy)
    344         elif inferred_dtype == "string":
    345             # StringArray/ArrowStringArray depending on pd.options.mode.string_storage
--> 346             return StringDtype().construct_array_type()._from_sequence(data, copy=copy)
    347 
    348         elif inferred_dtype == "integer":

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/arrays/string_.py in _from_sequence(cls, scalars, dtype, copy)
    345         else:
    346             # convert non-na-likes to str, and nan-likes to StringDtype.na_value
--> 347             result = lib.ensure_string_array(
    348                 scalars, na_value=StringDtype.na_value, copy=copy
    349             )

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.ensure_string_array()

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.ensure_string_array()

IndexError: too many indices for array
JohnVolk commented 2 years ago

The first error that you received should of been fixed in commit c9db99cc0db45ee7015c3420c5a31314c60fd261, I just added a new release to PyPI in case you were installing from there.

I've never seen the second error but will try to reproduce.

ashwinvis commented 2 years ago

I don't get the second error any longer. Must have been a side effect of the first one.

I get a lot of warnings like this:

Calculating mean for var: THETA from columns: ['SWC_1_1_1', 'SWC_1_2_1']
WARNING: renaming column G to input_G
Converting vpd from hpa to kpa
Calculating vapor pressure from vapor pressure deficit and air temperature
Calculating dew point temperature from vapor pressure

The input data temporal frequency appears to be less than daily.
Data is being resampled to daily temporal frequency.
Linearly interpolating gaps in energy balance components up to 4 hours when Rn < 0 and up to 2 hours when Rn >= 0.
Filtering days with less then 100.0% or 48/48 sub-daily measurements
WARNING: unable to find/read gridMET file
 /home/john/flux-data-qaqc/examples/Basic_usage/gridMET_data/US-Tw3_38.1083N_-121.6417W.csv

Obviously, I haven't edited the .ini files. I suppose somewhere on the docs it is mentioned how to download these files.

I get an error while accessing QaQc.monthly_df. Is it expected?

---------------------------------------------------------------------------
MergeError                                Traceback (most recent call last)
/tmp/ipykernel_824070/61093438.py in <module>
----> 1 q.monthly_df

~/src/sandbox/flux-data-qaqc/fluxdataqaqc/qaqc.py in monthly_df(self)
    833         """
    834         if not self.corrected and self._has_eb_vars:
--> 835             self.correct_data()
    836 
    837         # rename columns to internal names

~/src/sandbox/flux-data-qaqc/fluxdataqaqc/qaqc.py in correct_data(self, meth, et_gap_fill, y, refET, x, fit_intercept)
   1126 
   1127         if meth == 'ebr':
-> 1128             self._ebr_correction()
   1129         elif meth == 'br':
   1130             self._bowen_ratio_correction()

~/src/sandbox/flux-data-qaqc/fluxdataqaqc/qaqc.py in _ebr_correction(self)
   1713         # datetime indices of all remaining null elements
   1714         null_dates = df.loc[df.ebr_corr.isnull(), 'ebr_corr'].index
-> 1715         merged = pd.merge(
   1716             df, ebr_5day_clim, on='DOY', how='left', right_index=True
   1717         )

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
    105     validate: str | None = None,
    106 ) -> DataFrame:
--> 107     op = _MergeOperation(
    108         left,
    109         right,

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/reshape/merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)
    680             warnings.warn(msg, FutureWarning, stacklevel=3)
    681 
--> 682         self._validate_specification()
    683 
    684         cross_col = None

~/src/sandbox/flux-data-qaqc/venv/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _validate_specification(self)
   1360                 )
   1361             if self.left_index or self.right_index:
-> 1362                 raise MergeError(
   1363                     'Can only pass argument "on" OR "left_index" '
   1364                     'and "right_index", not a combination of both.'

MergeError: Can only pass argument "on" OR "left_index" and "right_index", not a combination of both.
ashwinvis commented 2 years ago

I am using pandas v1.3.2, if it helps.

JohnVolk commented 2 years ago

That first warning is because I must have left the path to the gridMET file on my personal machine in the config.ini file but yes since the field was filled in with a path specific to my computer, that WARNING was expected.

The second error you found is due to a change in the pandas merge function with the newer version you are using, the next commit should fix it.

ashwinvis commented 2 years ago

The tutorial runs fine now. We can mark this as solved. Thanks @JohnVolk

c.f. https://github.com/openjournals/joss-reviews/issues/3418