nccl not support for float16?

Description

    inputs = flow.ones([4, 10],dtype=flow.float32, placement=placement, sbp=flow.sbp.broadcast)
    weight = flow.ones([10, 4],dtype=flow.float32, placement=placement, sbp=flow.sbp.split(1)) * 2
    y = flow.matmul(inputs, weight, )
    y.to_global(placement, flow.sbp.broadcast).to_local()

this is ok, but when i change the dtype to float16, just as fllow:

    inputs = flow.ones([4, 10],dtype=flow.float16, placement=placement, sbp=flow.sbp.broadcast)
    weight = flow.ones([10, 4],dtype=flow.float16, placement=placement, sbp=flow.sbp.split(1)) * 2
    y = flow.matmul(inputs, weight, )
    y.to_global(placement, flow.sbp.broadcast).to_local()

this is wrong, the error is :

RuntimeError: InferDataType Failed. Expected kFloat, but got kFloat16
  File "oneflow/core/functional/impl/nn_functor.cpp", line 359, in operator()
    OpInterpUtil::Dispatch<Tensor>(*matmul_op_, {cast_a, cast_b}, attrs)
  File "oneflow/core/framework/op_interpreter/op_interpreter_util.cpp", line 144, in Dispatch<oneflow::one::Tensor>
    Dispatch<TensorTuple>(op_expr, inputs, ctx)
  File "oneflow/core/framework/op_interpreter/op_interpreter_util.cpp", line 135, in Dispatch<oneflow::one::TensorTuple>
    Dispatch(op_expr, inputs, outputs.get(), ctx)
  File "oneflow/core/framework/op_interpreter/op_interpreter.cpp", line 103, in Apply
    internal_->Apply(op_expr, inputs, outputs, ctx)
  File "oneflow/core/framework/op_interpreter/eager_global_op_interpreter.cpp", line 219, in Interpret
    user_op_expr.mut_global_tensor_infer_cache()->GetOrInfer(*infer_args)
  File "oneflow/core/framework/global_tensor_infer_cache.cpp", line 367, in GetOrInfer
    Infer(*user_op_expr, infer_args)
  File "oneflow/core/framework/global_tensor_infer_cache.cpp", line 274, in Infer
    user_op_expr.InferLogicalTensorDesc( infer_args.attrs ... return output_mut_metas.at(i).mut_tensor_meta(); })
  File "oneflow/core/framework/op_expr.cpp", line 612, in InferLogicalTensorDesc
    dtype_infer_fn_(&infer_ctx)
  File "oneflow/user/ops/matmul_op.cpp", line 69, in InferDataType4Matmul
    CHECK_EQ_OR_RETURN(ctx->InputDType("b", 0), dtype)
Error Type: oneflow.ErrorProto.check_failed_error

please help me !

Oneflow-Inc / oneflow

nccl not support for float16? #10342

Description