Closed sangaj closed 6 years ago
把错误栈贴出来看下。
IndexError Traceback (most recent call last) /home/hjh/anaconda3/lib/python3.6/site-packages/IPython/core/formatters.py in call(self, obj) 670 type_pprinters=self.type_printers, 671 deferred_pprinters=self.deferred_printers) --> 672 printer.pretty(obj) 673 printer.flush() 674 return stream.getvalue()
/home/hjh/anaconda3/lib/python3.6/site-packages/IPython/lib/pretty.py in pretty(self, obj) 381 if callable(meth): 382 return meth(obj, self, cycle) --> 383 return _default_pprint(obj, self, cycle) 384 finally: 385 self.end_group()
/home/hjh/anaconda3/lib/python3.6/site-packages/IPython/lib/pretty.py in _default_pprint(obj, p, cycle) 501 if _safe_getattr(klass, 'repr', None) not in _baseclassreprs: 502 # A user-provided repr. Find newlines and replace them with p.break() --> 503 _repr_pprint(obj, p, cycle) 504 return 505 p.begin_group(1, '<')
/home/hjh/anaconda3/lib/python3.6/site-packages/IPython/lib/pretty.py in _reprpprint(obj, p, cycle) 699 """A pprint that just redirects to the normal repr function.""" 700 # Find newlines and replace them with p.break() --> 701 output = repr(obj) 702 for idx,output_line in enumerate(output.splitlines()): 703 if idx:
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/expr/expressions.py in repr(self) 112 if self.execution is None: 113 try: --> 114 self.execution = self.execute() 115 except Exception as e: 116 self.__execution = e
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/expr/expressions.py in execute(self, kwargs) 185 return result 186 --> 187 return self._handle_delay_call('execute', self, wrapper=wrapper, kwargs) 188 189 def compile(self):
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/expr/expressions.py in _handle_delay_call(self, method, *args, *kwargs) 144 145 wrapper = kwargs.pop('wrapper', None) --> 146 result = getattr(engine, method)(args, **kwargs) 147 if wrapper is None: 148 return result
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in execute(self, exprs_args_kwargs, kwargs) 722 kwargs['ui'] = self._create_ui(kwargs) 723 kwargs['action'] = '_execute' --> 724 return self._action(exprs_args_kwargs, *kwargs) 725 726 def persist(self, exprs_args_kwargs, **kwargs):
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in _action(self, *exprs_args_kwargs, **kwargs) 547 try: 548 res = self._execute_dag(dag, ui=ui, async=async, n_parallel=n_parallel, --> 549 timeout=timeout, progress_proportion=progress_proportion) 550 except KeyboardInterrupt: 551 self.stop()
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in _execute_dag(cls, dag, ui, async, n_parallel, timeout, close_and_notify, progress_proportion) 786 progress_proportion=1.0): 787 return dag.execute(ui=ui, async=async, n_parallel=n_parallel, timeout=timeout, --> 788 close_and_notify=close_and_notify, progress_proportion=progress_proportion) 789 790 def _get_libraries(self, libraries):
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in execute(self, ui, async, n_parallel, timeout, close_and_notify, progress_proportion) 329 try: 330 if n_parallel <= 1: --> 331 results = self._run(ui, progress_proportion) 332 else: 333 results = self._run_in_parallel(ui, n_parallel, progress_proportion=progress_proportion)
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in _run(self, ui, progress_proportion) 205 result_idx = dict() 206 for i, call in enumerate(calls): --> 207 res = call(ui=ui, progress_proportion=progress_proportion / len(calls)) 208 results[i] = res 209 if call.result_index is not None:
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in call(self, ui, progress_proportion) 184 185 def call(self, ui=None, progress_proportion=None): --> 186 res = self.run(ui=ui, progress_proportion=progress_proportion) 187 if self.callback: 188 self.callback(res)
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/core.py in run(s, execute_kw) 580 if 'ui' in kw: 581 kw['ui'].add_keys(group_key) --> 582 result = engine._do_execute(expr_dag, expr, kw) 583 if 'ui' in kw: 584 kw['ui'].remove_keys(group_key)
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/odpssql/engine.py in _do_execute(self, expr_dag, expr, ui, progress_proportion, lifecycle, head, tail, hints, priority, **kw) 323 return result 324 --> 325 sql = self._compile(expr, libraries=libraries) 326 327 cache_data = None
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/odpssql/engine.py in _compile(self, expr, prettify, libraries) 190 191 libraries = self._ctx.prepare_resources(self._get_libraries(libraries)) --> 192 self._ctx.register_udfs(*gen_udf(expr, UDF_CLASS_NAME, libraries=libraries)) 193 194 return backend.compile(expr)
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/odpssql/codegen.py in gen_udf(expr, func_cls_name, libraries) 671 if isinstance(node, MappedExpr): 672 _gen_map_udf(node, func_cls_name, libraries, func, resources, --> 673 func_to_udfs, func_to_resources, func_params) 674 elif isinstance(node, RowAppliedCollectionExpr): 675 _gen_apply_udf(node, func_cls_name, libraries, func, resources,
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/df/backends/odpssql/codegen.py in _gen_map_udf(node, func_cls_name, libraries, func, resources, func_to_udfs, func_to_resources, func_params) 515 'to_type': to_type, 516 'func_cls_name': func_cls_name, --> 517 'func_str': to_str(base64.b64encode(cloudpickle.dumps(func, dump_code=options.df.dump_udf))), 518 'func_args_str': func_args_str, 519 'func_kwargs_str': func_kwargs_str,
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/lib/cloudpickle.py in dumps(obj, protocol, dump_code) 672 673 cp = CloudPickler(file, protocol, dump_code) --> 674 cp.dump(obj) 675 676 return file.getvalue()
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/lib/cloudpickle.py in dump(self, obj) 185 self.inject_addons() 186 try: --> 187 return Pickler.dump(self, obj) 188 except RuntimeError as e: 189 if 'recursion' in e.args[0]:
/home/hjh/anaconda3/lib/python3.6/pickle.py in dump(self, obj) 407 if self.proto >= 4: 408 self.framer.start_framing() --> 409 self.save(obj) 410 self.write(STOP) 411 self.framer.end_framing()
/home/hjh/anaconda3/lib/python3.6/pickle.py in save(self, obj, save_persistent_id) 474 f = self.dispatch.get(t) 475 if f is not None: --> 476 f(self, obj) # Call unbound method with explicit self 477 return 478
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/lib/cloudpickle.py in save_function(self, obj, name) 268 klass = getattr(themodule, name, None) 269 if klass is None or klass is not obj: --> 270 self.save_function_tuple(obj) 271 return 272
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/lib/cloudpickle.py in save_function_tuple(self, func) 298 write = self.write 299 --> 300 code, f_globals, defaults, closure, dct, base_globals = self.extract_func_data(func) 301 302 save(_fill_function) # skeleton function updater
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/lib/cloudpickle.py in extract_func_data(self, func) 359 360 # extract all global ref's --> 361 func_global_refs = self.extract_code_globals(code) 362 363 # process all variables referenced by global environment
/home/hjh/anaconda3/lib/python3.6/site-packages/odps/lib/cloudpickle.py in extract_code_globals(co) 341 extended_arg = oparg*65536 342 if op in GLOBAL_OPS: --> 343 out_names.add(names[oparg]) 344 345 # see if nested function have any global refs
IndexError: tuple index out of range
这个是因为 replace 的内部实现使用了 Python 函数,这个函数要被 pickle 到 MaxCompute 执行,而你的环境是 3.6,而 MaxCompute 内部是 2.7,我们对 3.6 的字节码改写还没有支持。
解决这个问题是使用小于等于 3.5 的版本,最好是用 Python 2.7,这样和 MaxCompute 这边兼容会较好。
Py36 code support already added.
用replace 函数 报错为tuple index out of range。 dataframe 名为list, 列名为p, 其中有些行为空,有些有数,有些含有+86 代码为 list.p.repalce('+86', '')
我尝试用另外一个数据源的数据,也有相同问题, 我下载了iris的数据,然后传至公共服务器,然后用replace,还是报错相同问题, 是不是本身这个function 有问题,或者能否在文档中举例说明如何使用replace?