alibaba / euler

A distributed graph deep learning framework.
Apache License 2.0
2.89k stars 559 forks source link

get_dense_feature这个api可以指定GPU吗? #235

Closed chxk closed 4 years ago

chxk commented 4 years ago

如题,直接指定GPU会报错

with tf.device('/device:GPU:0'):
     labels = tf_euler.get_dense_feature(inputs, [self.label_idx],
                                                [self.label_dim])[0]
Traceback (most recent call last):
  File "ori.py", line 110, in <module>
    tf.train.StopAtStepHook(2000)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 504, in MonitoredTrainingSession
    stop_grace_period_secs=stop_grace_period_secs)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 921, in __init__
    stop_grace_period_secs=stop_grace_period_secs)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 643, in __init__
    self._sess = _RecoverableSession(self._coordinated_creator)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1107, in __init__
    _WrappedSession.__init__(self, self._create_session())
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1112, in _create_session
    return self._sess_creator.create_session()
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 800, in create_session
    self.tf_sess = self._session_creator.create_session()
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 566, in create_session
    init_fn=self._scaffold.init_fn)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/training/session_manager.py", line 294, in prepare_session
    sess.run(init_op, feed_dict=init_feed_dict)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 929, in run
    run_metadata_ptr)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1152, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
    run_metadata)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Cannot assign a device for operation graphsage_1/split: Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
  device='XLA_CPU'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_GPU'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, ..., DT_QINT32, DT_BFLOAT16, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_CPU_JIT'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_GPU_JIT'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, ..., DT_QINT32, DT_BFLOAT16, DT_HALF, DT_UINT32, DT_UINT64]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_VARIANT]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_VARIANT]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_RESOURCE]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_RESOURCE]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_STRING]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_STRING]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_BOOL]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_BOOL]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX128]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX128]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX64]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX64]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_DOUBLE]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_DOUBLE]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_FLOAT]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_FLOAT]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_BFLOAT16]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_BFLOAT16]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_HALF]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_HALF]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT8]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT8]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_UINT8]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_UINT8]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT16]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT16]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_UINT16]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_UINT16]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT32]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT32]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT64]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT64]
  device='GPU'; T in [DT_INT32]; Tlen in [DT_INT64]
  device='GPU'; T in [DT_INT32]; Tlen in [DT_INT32]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_BFLOAT16]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_BFLOAT16]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX128]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX128]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX64]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX64]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_DOUBLE]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_DOUBLE]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_FLOAT]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_FLOAT]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_HALF]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_HALF]

     [[node graphsage_1/split (defined at /home/cxk/oula/lib/python2.7/site-packages/tf_euler/python/euler_ops/feature_ops.py:32)  = SplitV[T=DT_INT64, Tlen=DT_INT32, num_split=1, _device="/device:GPU:0"](SampleNode, graphsage_1/packed, graphsage_1/split/split_dim)]]

Caused by op u'graphsage_1/split', defined at:
  File "ori.py", line 100, in <module>
    _, loss, metric_name, metric = model(source)
  File "/home/cxk/oula/lib/python2.7/site-packages/tf_euler/python/base_layers.py", line 62, in __call__
    outputs = self.call(inputs)
  File "ori.py", line 68, in call
    nodes, labels = self.sampler(inputs)
  File "ori.py", line 76, in sampler
    [self.label_dim])[0]
  File "/home/cxk/oula/lib/python2.7/site-packages/tf_euler/python/euler_ops/feature_ops.py", line 110, in get_dense_feature
    base._LIB_OP.get_dense_feature, thread_num)
  File "/home/cxk/oula/lib/python2.7/site-packages/tf_euler/python/euler_ops/feature_ops.py", line 88, in _get_dense_feature
    split_data_list = _split_input_data(nodes_or_edges, thread_num)
  File "/home/cxk/oula/lib/python2.7/site-packages/tf_euler/python/euler_ops/feature_ops.py", line 32, in _split_input_data
    split_data_list = tf.split(data_list, split_size)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 1337, in split
    value=value, size_splits=size_splits, axis=axis, num_split=num, name=name)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 8148, in split_v
    num_split=num_split, name=name)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/cxk/oula/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Cannot assign a device for operation graphsage_1/split: Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
  device='XLA_CPU'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_GPU'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, ..., DT_QINT32, DT_BFLOAT16, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_CPU_JIT'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT32, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_GPU_JIT'; Tlen in [DT_INT32, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT8, ..., DT_QINT32, DT_BFLOAT16, DT_HALF, DT_UINT32, DT_UINT64]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_VARIANT]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_VARIANT]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_RESOURCE]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_RESOURCE]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_STRING]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_STRING]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_BOOL]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_BOOL]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX128]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX128]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX64]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX64]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_DOUBLE]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_DOUBLE]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_FLOAT]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_FLOAT]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_BFLOAT16]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_BFLOAT16]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_HALF]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_HALF]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT8]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT8]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_UINT8]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_UINT8]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT16]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT16]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_UINT16]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_UINT16]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT32]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT32]
  device='CPU'; Tlen in [DT_INT64]; T in [DT_INT64]
  device='CPU'; Tlen in [DT_INT32]; T in [DT_INT64]
  device='GPU'; T in [DT_INT32]; Tlen in [DT_INT64]
  device='GPU'; T in [DT_INT32]; Tlen in [DT_INT32]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_BFLOAT16]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_BFLOAT16]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX128]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX128]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_COMPLEX64]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_COMPLEX64]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_DOUBLE]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_DOUBLE]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_FLOAT]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_FLOAT]
  device='GPU'; Tlen in [DT_INT64]; T in [DT_HALF]
  device='GPU'; Tlen in [DT_INT32]; T in [DT_HALF]

     [[node graphsage_1/split (defined at /home/cxk/oula/lib/python2.7/site-packages/tf_euler/python/euler_ops/feature_ops.py:32)  = SplitV[T=DT_INT64, Tlen=DT_INT32, num_split=1, _device="/device:GPU:0"](SampleNode, graphsage_1/packed, graphsage_1/split/split_dim)]]