pandas-dev / pandas

Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
https://pandas.pydata.org
BSD 3-Clause "New" or "Revised" License
43.52k stars 17.88k forks source link

pandas-0.25.0rc0 problem on Python-3.8: 'PandasExprVisitor' object has no attribute 'visit_Constant' #27261

Closed stonebig closed 5 years ago

stonebig commented 5 years ago

Code Sample, a copy-pastable example if possible

import pandas as pd
import numpy as np

idx = pd.date_range('2000', '2005', freq='d', closed='left')
datas = pd.DataFrame({'Color':  [ 'green' if x> 1 else 'red' for x in np.random.randn(len(idx))], 
         'Measure': np.random.randn(len(idx)), 'Year': idx.year},
          index=idx.date)
datas.query('Measure > 0').groupby(['Color','Year']).size().unstack()

blows-up with

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-18-6c03ccdf9365> in <module>
      6          'Measure': np.random.randn(len(idx)), 'Year': idx.year},
      7           index=idx.date)
----> 8 datas.query('Measure > 0').groupby(['Color','Year']).size().unstack()

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\frame.py in query(self, expr, inplace, **kwargs)
   3190         kwargs["level"] = kwargs.pop("level", 0) + 1
   3191         kwargs["target"] = None
-> 3192         res = self.eval(expr, **kwargs)
   3193 
   3194         try:

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs)
   3306             kwargs["target"] = self
   3307         kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers)
-> 3308         return _eval(expr, inplace=inplace, **kwargs)
   3309 
   3310     def select_dtypes(self, include=None, exclude=None):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    320         )
    321 
--> 322         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env, truediv=truediv)
    323 
    324         # construct the engine and evaluate the parsed expression

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in __init__(self, expr, engine, parser, env, truediv, level)
    825         self.env.scope["truediv"] = truediv
    826         self._visitor = _parsers[parser](self.env, self.engine, self.parser)
--> 827         self.terms = self.parse()
    828 
    829     @property

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in parse(self)
    842     def parse(self):
    843         """Parse an expression"""
--> 844         return self._visitor.visit(self.expr)
    845 
    846     @property

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit_Module(self, node, **kwargs)
    445             raise SyntaxError("only a single expression is allowed")
    446         expr = node.body[0]
--> 447         return self.visit(expr, **kwargs)
    448 
    449     def visit_Expr(self, node, **kwargs):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit_Expr(self, node, **kwargs)
    448 
    449     def visit_Expr(self, node, **kwargs):
--> 450         return self.visit(node.value, **kwargs)
    451 
    452     def _rewrite_membership_op(self, node, left, right):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit_Compare(self, node, **kwargs)
    742             op = self.translate_In(ops[0])
    743             binop = ast.BinOp(op=op, left=node.left, right=comps[0])
--> 744             return self.visit(binop)
    745 
    746         # recursive case: we have a chained comparison, a CMP b CMP c, etc.

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit_BinOp(self, node, **kwargs)
    561 
    562     def visit_BinOp(self, node, **kwargs):
--> 563         op, op_class, left, right = self._maybe_transform_eq_ne(node)
    564         left, right = self._maybe_downcast_constants(left, right)
    565         return self._maybe_evaluate_binop(op, op_class, left, right)

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in _maybe_transform_eq_ne(self, node, left, right)
    482             left = self.visit(node.left, side="left")
    483         if right is None:
--> 484             right = self.visit(node.right, side="right")
    485         op, op_class, left, right = self._rewrite_membership_op(node, left, right)
    486         return op, op_class, left, right

C:\WinP\bd38\bu\WPy32-3800b2\python-3.8.0b2\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    438 
    439         method = "visit_" + node.__class__.__name__
--> 440         visitor = getattr(self, method)
    441         return visitor(node, **kwargs)
    442 

AttributeError: 'PandasExprVisitor' object has no attribute 'visit_Constant'
stonebig commented 5 years ago

well, it's a duplicate of https://github.com/pandas-dev/pandas/issues/26318, just now with Pandas-0.25.0rc0

jorisvandenbossche commented 5 years ago

This is related to the ast.Constant change in Python 3.8 (where Constant is replacing Str, Num, e..): https://github.com/python/cpython/pull/9445

Some related bpo issues with discussion around breakages and suggested solutions: https://bugs.python.org/issue32892, https://bugs.python.org/issue36917

It seems it shouldn't be to hard to fix this for 0.25.

@stonebig can you try if the following patch works?

--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -590,6 +590,16 @@ class BaseExprVisitor(ast.NodeVisitor):
         name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])
         return self.term_type(name, self.env)

+    def visit_Constant(self, node, **kwargs):
+        if isinstance(node.value, str):
+            self.visit_Str(node)
+        elif node.value in {True, False}:
+            self.visit_NameConstant(node)
+        elif isinstance(node.value, (int, float)):
+            self.visit_Num(node)
+        else:
+            raise Exception("...")
+
     visit_Tuple = visit_List
stonebig commented 5 years ago

If I did apply it right, I get now a :

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-99840eba54d9> in <module>
----> 1 datas.query('Measure > 0').groupby(['Color','Year']).size().unstack()

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\frame.py in query(self, expr, inplace, **kwargs)
   3190         kwargs["level"] = kwargs.pop("level", 0) + 1
   3191         kwargs["target"] = None
-> 3192         res = self.eval(expr, **kwargs)
   3193 
   3194         try:

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs)
   3306             kwargs["target"] = self
   3307         kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers)
-> 3308         return _eval(expr, inplace=inplace, **kwargs)
   3309 
   3310     def select_dtypes(self, include=None, exclude=None):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    320         )
    321 
--> 322         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env, truediv=truediv)
    323 
    324         # construct the engine and evaluate the parsed expression

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in __init__(self, expr, engine, parser, env, truediv, level)
    835         self.env.scope["truediv"] = truediv
    836         self._visitor = _parsers[parser](self.env, self.engine, self.parser)
--> 837         self.terms = self.parse()
    838 
    839     @property

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in parse(self)
    852     def parse(self):
    853         """Parse an expression"""
--> 854         return self._visitor.visit(self.expr)
    855 
    856     @property

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit_Module(self, node, **kwargs)
    445             raise SyntaxError("only a single expression is allowed")
    446         expr = node.body[0]
--> 447         return self.visit(expr, **kwargs)
    448 
    449     def visit_Expr(self, node, **kwargs):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit_Expr(self, node, **kwargs)
    448 
    449     def visit_Expr(self, node, **kwargs):
--> 450         return self.visit(node.value, **kwargs)
    451 
    452     def _rewrite_membership_op(self, node, left, right):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit_Compare(self, node, **kwargs)
    752             op = self.translate_In(ops[0])
    753             binop = ast.BinOp(op=op, left=node.left, right=comps[0])
--> 754             return self.visit(binop)
    755 
    756         # recursive case: we have a chained comparison, a CMP b CMP c, etc.

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs)
    439         method = "visit_" + node.__class__.__name__
    440         visitor = getattr(self, method)
--> 441         return visitor(node, **kwargs)
    442 
    443     def visit_Module(self, node, **kwargs):

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in visit_BinOp(self, node, **kwargs)
    562     def visit_BinOp(self, node, **kwargs):
    563         op, op_class, left, right = self._maybe_transform_eq_ne(node)
--> 564         left, right = self._maybe_downcast_constants(left, right)
    565         return self._maybe_evaluate_binop(op, op_class, left, right)
    566 

C:\WinP\bd38\bu\WPy64-3800b2\python-3.8.0b2.amd64\lib\site-packages\pandas\core\computation\expr.py in _maybe_downcast_constants(self, left, right)
    498             left = self.term_type(name, self.env)
    499         if (
--> 500             right.is_scalar
    501             and hasattr(right, "value")
    502             and not left.is_scalar

AttributeError: 'NoneType' object has no attribute 'is_scalar'
stonebig commented 5 years ago

pandas-1.0 is really near ?

stonebig commented 5 years ago

I confirm the proposed 2 lines patch fixes the issue.