mars-project / mars

Mars is a tensor-based unified framework for large-scale data computation which scales numpy, pandas, scikit-learn and Python functions.
https://mars-project.readthedocs.io
Apache License 2.0
2.7k stars 326 forks source link

[BUG] Executing `dataframe.to_csv()` failed after dataframe is executed #1664

Closed fengwuyun closed 3 years ago

fengwuyun commented 3 years ago

Describe the bug

I had encountered a problem when I tried to save my dataframe to csv. My mars version is 0.5.3. I am confused about there is a msg about mars version 0.5.1.

And the full stack of this error is as below:

TypeError                                 Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider.serialize_field()
    640             try:
--> 641                 self._set_value(value, field_obj, field.type, weak_ref=field.weak_ref)
    642             except TypeError:

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_value()
    559         else:
--> 560             cls._set_typed_value(value, obj, tp, weak_ref=weak_ref)
    561 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_typed_value()
    485             # dict type
--> 486             self._set_dict(<dict>value, obj, tp, weak_ref=weak_ref)
    487         else:

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_dict()
    407             value_obj = obj.dict.values.value.add()
--> 408             self._set_value(v, value_obj, tp=tp.value_type if tp is not None else tp)
    409 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_value()
    557         if tp is None:
--> 558             cls._set_untyped_value(value, obj, weak_ref=weak_ref)
    559         else:

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_untyped_value()
    553         else:
--> 554             raise TypeError(f'Unknown type to serialize: {type(value)}')
    555 

TypeError: Unknown type to serialize: <enum 'TensorOrder'>

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
/home/admin/work/public-mars-0.5.1.zip/mars/scheduler/graph.py in _execute_graph()

/home/admin/work/public-mars-0.5.1.zip/mars/utils.py in _wrapped()

/home/admin/work/public-mars-0.5.1.zip/mars/scheduler/graph.py in create_operand_actors()

/home/admin/work/public-mars-0.5.1.zip/mars/utils.py in _wrapped()

/home/admin/work/public-mars-0.5.1.zip/mars/scheduler/graph.py in get_executable_operand_dag()

/home/admin/work/public-mars-0.5.1.zip/mars/utils.py in serialize_graph()

~\AppData\Roaming\Python\Python37\site-packages\mars\graph.pyx in mars.graph.DirectedGraph.to_pb()
    420         return graph
--> 421 
    422     def to_pb(self, pb_obj=None, data_serial_type=None, pickle_protocol=None):

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Serializable.to_pb()
    686                                              pickle_protocol=pickle_protocol)
--> 687         return self.serialize(provider, obj=obj)
    688 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Serializable.serialize()
    669     def serialize(self, Provider provider, obj=None):
--> 670         return provider.serialize_model(self, obj=obj)
    671 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Provider.serialize_model()
    797     cpdef serialize_model(self, model_instance, obj=None):
--> 798         if obj is None:
    799             obj = model_instance.cls(self)()

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Field.serialize()
    154 
--> 155     cpdef serialize(self, Provider provider, model_instance, obj):
    156         return provider.serialize_field(self, model_instance, obj)

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Field.serialize()
    155     cpdef serialize(self, Provider provider, model_instance, obj):
--> 156         return provider.serialize_field(self, model_instance, obj)
    157 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider.serialize_field()
    630                     if val is not None:
--> 631                         self._serial_reference_value(tag, field.type.type.model, val, it_obj)
    632                     elif isinstance(it_obj, Value):

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._serial_reference_value()
    572                 field_obj = value.cls(self)()
--> 573                 value.serialize(self, obj=field_obj)
    574                 value_pb.type_id = value.__serializable_index__

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Serializable.serialize()
    669     def serialize(self, Provider provider, obj=None):
--> 670         return provider.serialize_model(self, obj=obj)
    671 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Provider.serialize_model()
    797     cpdef serialize_model(self, model_instance, obj=None):
--> 798         if obj is None:
    799             obj = model_instance.cls(self)()

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Field.serialize()
    154 
--> 155     cpdef serialize(self, Provider provider, model_instance, obj):
    156         return provider.serialize_field(self, model_instance, obj)

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\core.pyx in mars.serialize.core.Field.serialize()
    155     cpdef serialize(self, Provider provider, model_instance, obj):
--> 156         return provider.serialize_field(self, model_instance, obj)
    157 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider.serialize_field()
    643                 exc_info = sys.exc_info()
--> 644                 raise TypeError(f'Failed to set field `{tag}` for {model_instance} with '
    645                                 f'value {value}, reason: {exc_info[1]}') \

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider.serialize_field()
    640             try:
--> 641                 self._set_value(value, field_obj, field.type, weak_ref=field.weak_ref)
    642             except TypeError:

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_value()
    559         else:
--> 560             cls._set_typed_value(value, obj, tp, weak_ref=weak_ref)
    561 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_typed_value()
    485             # dict type
--> 486             self._set_dict(<dict>value, obj, tp, weak_ref=weak_ref)
    487         else:

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_dict()
    407             value_obj = obj.dict.values.value.add()
--> 408             self._set_value(v, value_obj, tp=tp.value_type if tp is not None else tp)
    409 

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_value()
    557         if tp is None:
--> 558             cls._set_untyped_value(value, obj, weak_ref=weak_ref)
    559         else:

~\AppData\Roaming\Python\Python37\site-packages\mars\serialize\pbserializer.pyx in mars.serialize.pbserializer.ProtobufSerializeProvider._set_untyped_value()
    553         else:
--> 554             raise TypeError(f'Unknown type to serialize: {type(value)}')
    555 

TypeError: Failed to set field `extra_params` for Chunk <op=DataFrameFetch, key=08ec6fd4af751d9f5dcec87ea3a6dde3> with value {'order': <TensorOrder.C_ORDER: 'C'>, 'dtype': dtype('<U'), '_i': 0}, reason: Unknown type to serialize: <enum 'TensorOrder'>

The above exception was the direct cause of the following exception:

ExecutionFailed                           Traceback (most recent call last)
<ipython-input-41-4dd810a40cfc> in <module>
----> 1 fsr_st.to_csv("fsr_st03.csv",encoding="utf8").execute()

~\AppData\Roaming\Python\Python37\site-packages\mars\core.py in execute(self, session, **kw)
    626 
    627         if wait:
--> 628             return run()
    629         else:
    630             thread_executor = ThreadPoolExecutor(1)

~\AppData\Roaming\Python\Python37\site-packages\mars\core.py in run()
    622 
    623         def run():
--> 624             self.data.execute(session, **kw)
    625             return self
    626 

~\AppData\Roaming\Python\Python37\site-packages\mars\core.py in execute(self, session, **kw)
    373 
    374         if wait:
--> 375             return run()
    376         else:
    377             # leverage ThreadPoolExecutor to submit task,

~\AppData\Roaming\Python\Python37\site-packages\mars\core.py in run()
    368         def run():
    369             # no more fetch, thus just fire run
--> 370             session.run(self, **kw)
    371             # return Tileable or ExecutableTuple itself
    372             return self

~\AppData\Roaming\Python\Python37\site-packages\mars\session.py in run(self, *tileables, **kw)
    476         tileables = tuple(mt.tensor(t) if not isinstance(t, (Entity, Base)) else t
    477                           for t in tileables)
--> 478         result = self._sess.run(*tileables, **kw)
    479 
    480         for t in tileables:

~\AppData\Roaming\Python\Python37\site-packages\mars\web\session.py in run(self, *tileables, **kw)
    212             timeout_val = min(check_interval, timeout - time_elapsed) if timeout > 0 else check_interval
    213             try:
--> 214                 if self._check_response_finished(graph_url, timeout_val):
    215                     break
    216             except KeyboardInterrupt:

~\AppData\Roaming\Python\Python37\site-packages\mars\web\session.py in _check_response_finished(self, graph_url, timeout)
    172                 exc_info = pickle.loads(base64.b64decode(resp_json['exc_info']))
    173                 exc = exc_info[1].with_traceback(exc_info[2])
--> 174                 raise ExecutionFailed('Graph execution failed.') from exc
    175             else:
    176                 raise ExecutionFailed('Graph execution failed with unknown reason.')

ExecutionFailed: 'Graph execution failed.'
wjsi commented 3 years ago

Please provide full code which can reproducing causing this issue. Mars submits a whole graph when execution is triggered, and to_csv() may not be the true cause of the issue.

fengwuyun commented 3 years ago
import mars.dataframe as md

mf = md.DataFrame(data=[1,2,3,4],columns=['count']).execute()
mf.to_csv("mf.csv").execute()
wjsi commented 3 years ago

OK, I can reproduce this issue now. We will fix it ASAP.