gordonwatts / hep_tables

Prototyping Hierarchical data, with servicex as a backend
MIT License
2 stars 0 forks source link

Hard crash in complex, deeply nested expression #20

Closed gordonwatts closed 4 years ago

gordonwatts commented 4 years ago

From jupypter:

Rendering: df_1 = DataFrame()
df_2 = df_1.Jets('AntiKt4EMTopoJets')
df_3 = df_2.pt
df_4 = df_3 / 1000.0
df_5 = df_4 > 35.0
df_6 = df_2.eta
df_7 = df_6.abs()
df_8 = df_7 < 2.5
df_9 = df_5 & df_8
df_10 = df_2[df_9]
df_11 = <<lambda>(e)>(df_10)
df_12 = df_10[df_11]
df_13 = df_12.pt
render_callable: df_1 = DataFrame()
df_2 = df_1.Jets('AntiKt4EMTopoJets')
df_3 = df_2.pt
df_4 = df_3 / 1000.0
df_5 = df_4 > 35.0
df_6 = df_2.eta
df_7 = df_6.abs()
df_8 = df_7 < 2.5
df_9 = df_5 & df_8
df_10 = df_2[df_9]
df_11 = <<lambda>(source_p)>(df_10)
df_12 = df_11.Count()
df_13 = df_12 > 0
render_callable: df_1 = DataFrame()
df_2 = df_1.TruthParticles('TruthParticles')
df_3 = df_2.pdgId
df_4 = df_3 == 34
df_5 = df_2[df_4]
df_6 = df_5.pt
df_7 = df_6 / 1000.0
df_8 = df_7 > 20
df_9 = df_5.eta
df_10 = df_9.abs()
df_11 = df_10 < 1.4
df_12 = df_8 & df_11
df_13 = df_5[df_12]
df_14 = df_13.eta()
df_15 = df_13.phi()
df_16 = DataFrame()
df_17 = df_16.Jets('AntiKt4EMTopoJets')
df_18 = df_17.pt
df_19 = df_18 / 1000.0
df_20 = df_19 > 35.0
df_21 = df_17.eta
df_22 = df_21.abs()
df_23 = df_22 < 2.5
df_24 = df_20 & df_23
df_25 = df_17[df_24]
df_26 = df_25.eta()
df_27 = df_25.phi()
df_28 = DeltaR(df_14,df_15,df_26,df_27)
df_29 = df_28 < 0.2
df_30 = df_13[df_29]
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\servicex\xaod_runner.py in visit_ast_DataFrame(self, node)
    127                 if do_calc:
--> 128                     r = ast_awkward(await hep_tables.make_local_async(node.dataframe))
    129                     async with self._cached_lock:

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\local.py in make_local_async(df)
     95 
---> 96     return await _make_local_from_expression_async(expression, context, QueryVarTracker())
     97 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\local.py in _make_local_from_expression_async(expression, context, qvt)
     57     # Next the render
---> 58     statements, term = _render_expression(base_statement, expression, context, None, qvt)
     59     assert term.term == 'main_sequence'

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expression(current_sequence, a, context, p_tracker, qvt)
    649     r = render_expression(current_sequence, context, p_tracker, qvt)
--> 650     r.visit(a)
    651     assert len(r.term_stack) == 1 or (len(r.term_stack) == 0 and len(r.statements) == 0)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    446 
--> 447             ast.NodeVisitor.visit(self, a)
    448 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Attribute(self, a)
    591             '''
--> 592             self.process_with_mapper(a)
    593 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in process_with_mapper(self, a)
    576             mapper = _map_to_data(self.sequence, self.context, self)
--> 577             mapper.visit(a)
    578             if len(mapper.statements) > 0:

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    287 
--> 288         ast.NodeVisitor.visit(self, a)
    289 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Attribute(self, a)
    314         # if a.value is not self.sequence._ast:
--> 315         _render_expresion_as_transform(self, self.context, a.value)
    316 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expresion_as_transform(tracker, context, a)
     90     '''
---> 91     statements, term = _render_expression(tracker.sequence, a, context, tracker)
     92 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expression(current_sequence, a, context, p_tracker, qvt)
    649     r = render_expression(current_sequence, context, p_tracker, qvt)
--> 650     r.visit(a)
    651     assert len(r.term_stack) == 1 or (len(r.term_stack) == 0 and len(r.statements) == 0)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    446 
--> 447             ast.NodeVisitor.visit(self, a)
    448 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_ast_Filter(self, a)
    597             '''
--> 598             self.process_with_mapper(a)
    599 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in process_with_mapper(self, a)
    576             mapper = _map_to_data(self.sequence, self.context, self)
--> 577             mapper.visit(a)
    578             if len(mapper.statements) > 0:

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    287 
--> 288         ast.NodeVisitor.visit(self, a)
    289 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_ast_Filter(self, a)
    302             # we will be dealing with an unwrapped sequence.
--> 303             term = _resolve_expr_inline(self.sequence, a.filter, self.context, self)
    304             st = statement_where(a, self.sequence.result_type, var_name, term, self.qvt)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _resolve_expr_inline(curret_sequence, expr, context, p_tracker)
    700     # else:
--> 701     filter_sequence, trm = _render_expression(curret_sequence, expr, context, p_tracker)
    702 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expression(current_sequence, a, context, p_tracker, qvt)
    649     r = render_expression(current_sequence, context, p_tracker, qvt)
--> 650     r.visit(a)
    651     assert len(r.term_stack) == 1 or (len(r.term_stack) == 0 and len(r.statements) == 0)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    446 
--> 447             ast.NodeVisitor.visit(self, a)
    448 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Call(self, a)
    638                 func = cast(ast_Callable, a.func)
--> 639                 t = _render_callable(a, func, self.context, self)
    640                 self.term_stack.append(t)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_callable(a, callable, context, tracker)
    131         seq = tracker.sequence.unwrap_if_possible()
--> 132         s, t = _render_expression(seq, expr, new_context, tracker)
    133         assert t.term == 'main_sequence'

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expression(current_sequence, a, context, p_tracker, qvt)
    649     r = render_expression(current_sequence, context, p_tracker, qvt)
--> 650     r.visit(a)
    651     assert len(r.term_stack) == 1 or (len(r.term_stack) == 0 and len(r.statements) == 0)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    446 
--> 447             ast.NodeVisitor.visit(self, a)
    448 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Compare(self, a)
    514 
--> 515             self.binary_op_statement(a.ops[0], a.left, a.comparators[0])
    516 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in binary_op_statement(self, operator, a_left, a_right)
    452             '''
--> 453             s_left, left = _render_expression(self.sequence, a_left, self.context, self)
    454             s_right, right = _render_expression(self.sequence, a_right, self.context, self)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expression(current_sequence, a, context, p_tracker, qvt)
    649     r = render_expression(current_sequence, context, p_tracker, qvt)
--> 650     r.visit(a)
    651     assert len(r.term_stack) == 1 or (len(r.term_stack) == 0 and len(r.statements) == 0)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    446 
--> 447             ast.NodeVisitor.visit(self, a)
    448 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Call(self, a)
    607                 if a.func.attr not in _known_simple_math_functions:
--> 608                     self.process_with_mapper(a)
    609                 else:

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in process_with_mapper(self, a)
    576             mapper = _map_to_data(self.sequence, self.context, self)
--> 577             mapper.visit(a)
    578             if len(mapper.statements) > 0:

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    287 
--> 288         ast.NodeVisitor.visit(self, a)
    289 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Call(self, a)
    342             else:
--> 343                 _render_expresion_as_transform(self, self.context, a.func.value)
    344 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expresion_as_transform(tracker, context, a)
     90     '''
---> 91     statements, term = _render_expression(tracker.sequence, a, context, tracker)
     92 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_expression(current_sequence, a, context, p_tracker, qvt)
    649     r = render_expression(current_sequence, context, p_tracker, qvt)
--> 650     r.visit(a)
    651     assert len(r.term_stack) == 1 or (len(r.term_stack) == 0 and len(r.statements) == 0)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit(self, a)
    446 
--> 447             ast.NodeVisitor.visit(self, a)
    448 

~\AppData\Local\Programs\Python\Python37\lib\ast.py in visit(self, node)
    270         visitor = getattr(self, method, self.generic_visit)
--> 271         return visitor(node)
    272 

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in visit_Call(self, a)
    638                 func = cast(ast_Callable, a.func)
--> 639                 t = _render_callable(a, func, self.context, self)
    640                 self.term_stack.append(t)

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in _render_callable(a, callable, context, tracker)
    141     elif root_expr is not None:
--> 142         monad_index = tracker.carry_monad_forward(root_expr)
    143         monad_ref = _monad_manager.new_monad_ref()

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    216             # last chance - someone above us?
--> 217             m_index = self._parent_tracker.carry_monad_forward(a)
    218             index = -1

c:\users\gordo\documents\code\iris-hep\hep_tables\hep_tables\render.py in carry_monad_forward(self, a)
    209             assert len(possible) != 0 or self._parent_tracker is not None, \
--> 210                 f'Internal error, unable capture {ast.dump(a)}'
    211 

AssertionError: Internal error, unable capture ast_DataFrame()

The above exception was the direct cause of the following exception:

Exception                                 Traceback (most recent call last)
c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\servicex\xaod_runner.py in visit_ast_DataFrame(self, node)
    149 
--> 150                 await asyncio.gather(*results)
    151 

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in visit(self, node)
     26         visitor = getattr(self, method, self.generic_visit)
---> 27         return await visitor(node)
     28 

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in generic_visit(self, node)
     93         results = [eval_single_field(field, old_value) for field, old_value in iter_fields(node)]
---> 94         await asyncio.gather(*results)
     95 

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in eval_single_field(field, old_value)
     86             elif isinstance(old_value, AST):
---> 87                 new_node = await self.visit(old_value)
     88                 if new_node is None:

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in visit(self, node)
     26         visitor = getattr(self, method, self.generic_visit)
---> 27         return await visitor(node)
     28 

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in generic_visit(self, node)
     93         results = [eval_single_field(field, old_value) for field, old_value in iter_fields(node)]
---> 94         await asyncio.gather(*results)
     95 

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in eval_single_field(field, old_value)
     86             elif isinstance(old_value, AST):
---> 87                 new_node = await self.visit(old_value)
     88                 if new_node is None:

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in visit(self, node)
     26         visitor = getattr(self, method, self.generic_visit)
---> 27         return await visitor(node)
     28 

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\servicex\xaod_runner.py in visit_ast_DataFrame(self, node)
    155             bad_df = '\n'.join(dumps(node.dataframe))
--> 156             raise Exception(f'Internal Error: Failed to render DataFrame: {bad_df}') from e
    157 

Exception: Internal Error: Failed to render DataFrame: df_1 = DataFrame()
df_2 = df_1.Jets('AntiKt4EMTopoJets')
df_3 = df_2.pt
df_4 = df_3 / 1000.0
df_5 = df_4 > 35.0
df_6 = df_2.eta
df_7 = df_6.abs()
df_8 = df_7 < 2.5
df_9 = df_5 & df_8
df_10 = df_2[df_9]
df_11 = <<lambda>(e)>(df_10)
df_12 = df_10[df_11]
df_13 = df_12.pt

The above exception was the direct cause of the following exception:

Exception                                 Traceback (most recent call last)
<ipython-input-10-c6e3cc9f0e5a> in <module>
----> 1 histogram(all_samples[0]['data'].loose_jets_LLP.pt, bins=100, range=(0,400))

c:\users\gordo\documents\code\calratio2019\calms\.venv\lib\site-packages\make_it_sync\func_wrapper.py in wrapped_call(*args, **kwargs)
     61         @wraps(fn)
     62         def wrapped_call(*args, **kwargs):
---> 63             return _sync_version_of_function(fn, *args, **kwargs)
     64 
     65         return wrapped_call

c:\users\gordo\documents\code\calratio2019\calms\.venv\lib\site-packages\make_it_sync\func_wrapper.py in _sync_version_of_function(fn, *args, **kwargs)
     24         future = exector.submit(get_data_wrapper, *args, **kwargs)
     25 
---> 26         return future.result()
     27 
     28 

~\AppData\Local\Programs\Python\Python37\lib\concurrent\futures\_base.py in result(self, timeout)
    433                 raise CancelledError()
    434             elif self._state == FINISHED:
--> 435                 return self.__get_result()
    436             else:
    437                 raise TimeoutError()

~\AppData\Local\Programs\Python\Python37\lib\concurrent\futures\_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

~\AppData\Local\Programs\Python\Python37\lib\concurrent\futures\thread.py in run(self)
     55 
     56         try:
---> 57             result = self.fn(*self.args, **self.kwargs)
     58         except BaseException as exc:
     59             self.future.set_exception(exc)

c:\users\gordo\documents\code\calratio2019\calms\.venv\lib\site-packages\make_it_sync\func_wrapper.py in get_data_wrapper(*args, **kwargs)
     19             asyncio.set_event_loop(loop)
     20             assert not loop.is_running()
---> 21             return loop.run_until_complete(fn(*args, **kwargs))
     22 
     23         exector = ThreadPoolExecutor(max_workers=1)

~\AppData\Local\Programs\Python\Python37\lib\asyncio\base_events.py in run_until_complete(self, future)
    585             raise RuntimeError('Event loop stopped before Future completed.')
    586 
--> 587         return future.result()
    588 
    589     def stop(self):

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\plot.py in histogram_async(df, bins, range, density)
     51 
     52     # Now render locally so we can plot it.
---> 53     h, bins = await local.make_local_async(hist_data)
     54     f, ax = plt.subplots()
     55     ax.fill_between(bins, np.r_[h, h[-1]], step='post')

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\local.py in make_local_async(df)
     18     modified_df = df
     19     for r in runners:
---> 20         modified_df = await r.process(modified_df)
     21         if isinstance(modified_df, result):
     22             break

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\servicex\xaod_runner.py in process(self, df)
    205     async def process(self, df: DataFrame) -> Union[DataFrame, Column, result]:
    206         'Process as much of the tree as we can process'
--> 207         return await _process(df)

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\servicex\xaod_runner.py in _process(df)
    187     # Run the transformation to see what we can actually convert.
    188     t = _transform(marker)
--> 189     r = await t.visit(top_level_ast)
    190 
    191     if isinstance(r, ast_Column):

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\ast_utils.py in visit(self, node)
     25         method = 'visit_' + node.__class__.__name__
     26         visitor = getattr(self, method, self.generic_visit)
---> 27         return await visitor(node)
     28 
     29     async def generic_visit(self, node: AST, context: Optional[Any] = None):

c:\users\gordo\documents\code\calratio2019\hl_tables\hl_tables\servicex\xaod_runner.py in visit_ast_DataFrame(self, node)
    154             from dataframe_expressions import dumps
    155             bad_df = '\n'.join(dumps(node.dataframe))
--> 156             raise Exception(f'Internal Error: Failed to render DataFrame: {bad_df}') from e
    157 
    158     async def visit_ast_Column(self, node: ast_Column) -> ast.AST:

Exception: Internal Error: Failed to render DataFrame: df_1 = DataFrame()
df_2 = df_1.Jets('AntiKt4EMTopoJets')
df_3 = df_2.pt
df_4 = df_3 / 1000.0
df_5 = df_4 > 35.0
df_6 = df_2.eta
df_7 = df_6.abs()
df_8 = df_7 < 2.5
df_9 = df_5 & df_8
df_10 = df_2[df_9]
df_11 = <<lambda>(e)>(df_10)
df_12 = df_10[df_11]
df_13 = df_12.pt
df_14 = df_13.histogram()
gordonwatts commented 4 years ago

If you look at the dump above, you'll note that the DataFrame() gets created twice in that code! Caused by a deepcopy mistake we made.