NVIDIA / Fuser

A Fusion Code Generator for NVIDIA GPUs (commonly known as "nvFuser")
Other
259 stars 51 forks source link

ExpressionEvaluator errors out when running `logical_and` #2697

Open jjsjann123 opened 2 months ago

jjsjann123 commented 2 months ago

I believe the issue is somewhere inside some DynamicType that is not handling the evaluation properly. We might be able to work around it via change something in expression evaluator, but I wasn't able to locate the issue yet.

TEST_F(NVFuserTest, Repro) {
  auto fusion = std::make_unique<Fusion>();
  FusionGuard fg(fusion.get());

  TensorView* tv0 = makeContigConcreteTensor({4, 10}, DataType::Bool);
  TensorView* tv1 = makeContigConcreteTensor({4, 10}, DataType::Bool);
  TensorView* tv2 = logical_and(tv0, tv1);

  fusion->addInput(tv0);
  fusion->addInput(tv1);
  fusion->addOutput(tv2);

  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
  at::Tensor t0 = at::randn({4, 10}, options) > 0.5;
  at::Tensor t1 = at::randn({4, 10}, options) > 0.5;
  std::vector<c10::IValue> aten_inputs = {t0, t1};

  FusionExecutorCache fec(std::move(fusion));
  auto out_tensors = fec.runFusionWithInputs(aten_inputs);

  FusionKernelRuntime* runtime = fec.getMostRecentKernelRuntime();
  EXPECT_EQ(runtime->fusionSegments()->groups().size(), 1);

  testValidate(fec.fusion(), out_tensors, aten_inputs, __LINE__, __FILE__);
}

backtrace:

(gdb) bt
#0  0x00007fffba2a54a1 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6
#1  0x0000555555af0142 in nvfuser::nvfCheckFail (func=0x555556fbd616 "operator()", file=0x555556fbd5d0 "/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h", line=228,
    msg="false INTERNAL ASSERT FAILED at \"/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h\":228, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues."...) at /opt/pytorch/nvfuser/csrc/exceptions.cpp:274
#2  0x0000555555af0357 in nvfuser::nvfErrorFail (func=0x555556fbd616 "operator()", file=0x555556fbd5d0 "/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h", line=228,
    condMsg=0x555556fbd500 "false INTERNAL ASSERT FAILED at \"/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h\":228, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues."..., userMsg="Result is dynamic but not convertible to result type") at /opt/pytorch/nvfuser/csrc/exceptions.cpp:300
#3  0x0000555555dd0357 in dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>::dispatch<dynamic_type::operator&&<dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>, void>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&)::{lambda(auto:1&&, auto:2&&)#1}, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&)::{lambda(auto:1)#3}::operator()<std::type_identity<at::Tensor> >(std::type_identity<at::Tensor>) const (__closure=0x7fffffffc240, t=...)
    at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:228
#4  0x0000555555dd3b0e in _ZNK12dynamic_type11ForAllTypesIJN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS5_EEEJN7nvfuser12StructHandleENS9_7PointerENS9_6OpaqueES2_S4_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc271, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:384
#5  0x0000555555dd3bbc in _ZNK12dynamic_type11ForAllTypesIJN7nvfuser6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS7_EEEJNS1_12StructHandleENS1_7PointerES2_S4_S6_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc2d0, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385
#6  0x0000555555dd3c4a in _ZNK12dynamic_type11ForAllTypesIJN7nvfuser7PointerENS1_6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS8_EEEJNS1_12StructHandleES2_S3_S5_S7_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc32f, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385
#7  0x0000555555dd3cd8 in _ZNK12dynamic_type11ForAllTypesIJN7nvfuser12StructHandleENS1_7PointerENS1_6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS9_EEEJS2_S3_S4_S6_S8_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc38e, f=...)
    at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385
#8  0x0000555555dd3d66 in _ZNK12dynamic_type11ForAllTypesIJSt9monostateN7nvfuser12StructHandleENS2_7PointerENS2_6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJSA_EEEJS3_S4_S5_S7_S9_dlbEEESaISE_EEEEclIZNSE_8dispatchIZNS_aaIRKSE_SM_SE_vEET1_OT_OT0_EUlSP_SR_E_SM_JSM_EEEDcSP_SR_DpOT1_EUlSO_E1_EEDaSO_ (
    this=0x555556dae8a4 <dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>::for_all_types>, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385
#9  0x0000555555dd3e29 in dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>::dispatch<dynamic_type::operator&&<dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>, void>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&)::{lambda(auto:1&&, auto:2&&)#1}, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&) (f=..., arg0=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:220
#10 0x0000555555dd3ea7 in dynamic_type::operator&&<dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>, void> (x=..., y=...)
    at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:681
#11 0x0000555555d7a07c in nvfuser::BinaryOp::evaluate (this=0x7fff36f46030, ee=..., inputs=std::vector of length 2, capacity 2 = {...}) at /opt/pytorch/nvfuser/csrc/ir/nodes.cpp:611
#12 0x0000555555d42abe in nvfuser::Expr::evaluate (this=0x7fff36f46030, ee=..., known_values=std::unordered_map with 2 elements = {...}) at /opt/pytorch/nvfuser/csrc/ir/base_nodes.cpp:418
#13 0x0000555555b8a831 in nvfuser::ExpressionEvaluator::evaluate (this=0x7fffffffcb60, value=0x7fff36c65140, known_values=std::unordered_map with 2 elements = {...})
    at /opt/pytorch/nvfuser/csrc/expr_evaluator.cpp:235
#14 0x0000555555b8a623 in nvfuser::ExpressionEvaluator::evaluate (this=0x7fffffffcb60, value=0x7fff36c65140) at /opt/pytorch/nvfuser/csrc/expr_evaluator.cpp:213

#15 0x000055555675c65b in nvfuser::testValidate (fusion=0x7fff39189800, fusion_outputs=std::vector of length 1, capacity 1 = {...}, aten_inputs=..., aten_outputs=std::vector of length 0, capacity 0,
    line_number=8452, file_name=0x55555727c0c8 "/opt/pytorch/nvfuser/tests/cpp/test_gpu3.cpp", err_msg="", lparams=..., tolerances=...) at /opt/pytorch/nvfuser/tests/cpp/validator.h:59
#16 0x000055555675d515 in nvfuser::testValidate (fusion=0x7fff39189800, fusion_outputs=std::vector of length 1, capacity 1 = {...}, aten_inputs=..., line_number=8452,
    file_name=0x55555727c0c8 "/opt/pytorch/nvfuser/tests/cpp/test_gpu3.cpp", err_msg="", lparams=..., tolerances=...) at /opt/pytorch/nvfuser/tests/cpp/validator.h:173
jjsjann123 commented 2 months ago

cc'ing @Priya2698 . This is low in priority since it's only affecting tests.