has2k1 / plydata

A grammar for data manipulation in Python
https://plydata.readthedocs.io/en/stable/
BSD 3-Clause "New" or "Revised" License
275 stars 11 forks source link

Can't define new column from a Series #21

Closed georgemarrows closed 4 years ago

georgemarrows commented 4 years ago

define docs say you can use an iterable to define a column. I was expecting to be able to use a pandas Series but it gives the error ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()

Example:

df >> define(sync_data=pd.to_datetime(df['time']).dt.normalize())
df >> define(xyz=df.logMessage.map(lambda x: re.search("(CREATED_\d*)", x).group(1)))

both give

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-17-3ecd59d68cbc> in <module>
----> 1 df >> define(xyz=df.logMessage.map(lambda x: re.search("(CREATED_\d*)", x).group(1)))

~/anaconda3/envs/oauth/lib/python3.7/site-packages/plydata/operators.py in __rrshift__(self, other)
    122         self.data = other
    123         func = get_verb_function(self.data, self.__class__.__name__)
--> 124         return func(self)
    125 
    126     def __call__(self, data):

~/anaconda3/envs/oauth/lib/python3.7/site-packages/plydata/dataframe/one_table.py in define(verb)
     55     verb.env = verb.env.with_outer_namespace(_outer_namespace)
     56     with regular_index(verb.data):
---> 57         new_data = Evaluator(verb).process()
     58         for col in new_data:
     59             verb.data[col] = new_data[col]

~/anaconda3/envs/oauth/lib/python3.7/site-packages/plydata/dataframe/common.py in process(self)
    203         """
    204         # Short cut
--> 205         if self._all_expressions_evaluated():
    206             if self.drop:
    207                 # Drop extra columns. They do not correspond to

~/anaconda3/envs/oauth/lib/python3.7/site-packages/plydata/dataframe/common.py in _all_expressions_evaluated(self)
    228         def present(expr):
    229             return expr.stmt == expr.column and expr.column in self.data
--> 230         return all(present(expr) for expr in self.expressions)
    231 
    232     def _get_group_dataframes(self):

~/anaconda3/envs/oauth/lib/python3.7/site-packages/plydata/dataframe/common.py in <genexpr>(.0)
    228         def present(expr):
    229             return expr.stmt == expr.column and expr.column in self.data
--> 230         return all(present(expr) for expr in self.expressions)
    231 
    232     def _get_group_dataframes(self):

~/anaconda3/envs/oauth/lib/python3.7/site-packages/plydata/dataframe/common.py in present(expr)
    227         """
    228         def present(expr):
--> 229             return expr.stmt == expr.column and expr.column in self.data
    230         return all(present(expr) for expr in self.expressions)
    231 

~/anaconda3/envs/oauth/lib/python3.7/site-packages/pandas/core/generic.py in __nonzero__(self)
   1477     def __nonzero__(self):
   1478         raise ValueError(
-> 1479             f"The truth value of a {type(self).__name__} is ambiguous. "
   1480             "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
   1481         )

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().