Open jjsjann123 opened 2 months ago
I believe the issue is somewhere inside some DynamicType that is not handling the evaluation properly. We might be able to work around it via change something in expression evaluator, but I wasn't able to locate the issue yet.
TEST_F(NVFuserTest, Repro) { auto fusion = std::make_unique<Fusion>(); FusionGuard fg(fusion.get()); TensorView* tv0 = makeContigConcreteTensor({4, 10}, DataType::Bool); TensorView* tv1 = makeContigConcreteTensor({4, 10}, DataType::Bool); TensorView* tv2 = logical_and(tv0, tv1); fusion->addInput(tv0); fusion->addInput(tv1); fusion->addOutput(tv2); auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); at::Tensor t0 = at::randn({4, 10}, options) > 0.5; at::Tensor t1 = at::randn({4, 10}, options) > 0.5; std::vector<c10::IValue> aten_inputs = {t0, t1}; FusionExecutorCache fec(std::move(fusion)); auto out_tensors = fec.runFusionWithInputs(aten_inputs); FusionKernelRuntime* runtime = fec.getMostRecentKernelRuntime(); EXPECT_EQ(runtime->fusionSegments()->groups().size(), 1); testValidate(fec.fusion(), out_tensors, aten_inputs, __LINE__, __FILE__); }
backtrace:
(gdb) bt #0 0x00007fffba2a54a1 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6 #1 0x0000555555af0142 in nvfuser::nvfCheckFail (func=0x555556fbd616 "operator()", file=0x555556fbd5d0 "/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h", line=228, msg="false INTERNAL ASSERT FAILED at \"/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h\":228, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues."...) at /opt/pytorch/nvfuser/csrc/exceptions.cpp:274 #2 0x0000555555af0357 in nvfuser::nvfErrorFail (func=0x555556fbd616 "operator()", file=0x555556fbd5d0 "/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h", line=228, condMsg=0x555556fbd500 "false INTERNAL ASSERT FAILED at \"/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h\":228, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues."..., userMsg="Result is dynamic but not convertible to result type") at /opt/pytorch/nvfuser/csrc/exceptions.cpp:300 #3 0x0000555555dd0357 in dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>::dispatch<dynamic_type::operator&&<dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>, void>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&)::{lambda(auto:1&&, auto:2&&)#1}, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&)::{lambda(auto:1)#3}::operator()<std::type_identity<at::Tensor> >(std::type_identity<at::Tensor>) const (__closure=0x7fffffffc240, t=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:228 #4 0x0000555555dd3b0e in _ZNK12dynamic_type11ForAllTypesIJN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS5_EEEJN7nvfuser12StructHandleENS9_7PointerENS9_6OpaqueES2_S4_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc271, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:384 #5 0x0000555555dd3bbc in _ZNK12dynamic_type11ForAllTypesIJN7nvfuser6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS7_EEEJNS1_12StructHandleENS1_7PointerES2_S4_S6_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc2d0, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385 #6 0x0000555555dd3c4a in _ZNK12dynamic_type11ForAllTypesIJN7nvfuser7PointerENS1_6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS8_EEEJNS1_12StructHandleES2_S3_S5_S7_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc32f, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385 #7 0x0000555555dd3cd8 in _ZNK12dynamic_type11ForAllTypesIJN7nvfuser12StructHandleENS1_7PointerENS1_6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJS9_EEEJS2_S3_S4_S6_S8_dlbEEESaISD_EEEEclIZNSD_8dispatchIZNS_aaIRKSD_SL_SD_vEET1_OT_OT0_EUlSO_SQ_E_SL_JSL_EEEDcSO_SQ_DpOT1_EUlSN_E1_EEDaSN_ (this=0x7fffffffc38e, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385 #8 0x0000555555dd3d66 in _ZNK12dynamic_type11ForAllTypesIJSt9monostateN7nvfuser12StructHandleENS2_7PointerENS2_6OpaqueEN2at6TensorESt7complexIdEdlbSt6vectorINS_11DynamicTypeINS_10ContainersIJSA_EEEJS3_S4_S5_S7_S9_dlbEEESaISE_EEEEclIZNSE_8dispatchIZNS_aaIRKSE_SM_SE_vEET1_OT_OT0_EUlSP_SR_E_SM_JSM_EEEDcSP_SR_DpOT1_EUlSO_E1_EEDaSO_ ( this=0x555556dae8a4 <dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>::for_all_types>, f=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/type_traits.h:385 #9 0x0000555555dd3e29 in dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>::dispatch<dynamic_type::operator&&<dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>, void>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&)::{lambda(auto:1&&, auto:2&&)#1}, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&>(dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&) (f=..., arg0=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:220 #10 0x0000555555dd3ea7 in dynamic_type::operator&&<dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool> const&, dynamic_type::DynamicType<dynamic_type::Containers<std::vector>, nvfuser::StructHandle, nvfuser::Pointer, nvfuser::Opaque, at::Tensor, std::complex<double>, double, long, bool>, void> (x=..., y=...) at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:681 #11 0x0000555555d7a07c in nvfuser::BinaryOp::evaluate (this=0x7fff36f46030, ee=..., inputs=std::vector of length 2, capacity 2 = {...}) at /opt/pytorch/nvfuser/csrc/ir/nodes.cpp:611 #12 0x0000555555d42abe in nvfuser::Expr::evaluate (this=0x7fff36f46030, ee=..., known_values=std::unordered_map with 2 elements = {...}) at /opt/pytorch/nvfuser/csrc/ir/base_nodes.cpp:418 #13 0x0000555555b8a831 in nvfuser::ExpressionEvaluator::evaluate (this=0x7fffffffcb60, value=0x7fff36c65140, known_values=std::unordered_map with 2 elements = {...}) at /opt/pytorch/nvfuser/csrc/expr_evaluator.cpp:235 #14 0x0000555555b8a623 in nvfuser::ExpressionEvaluator::evaluate (this=0x7fffffffcb60, value=0x7fff36c65140) at /opt/pytorch/nvfuser/csrc/expr_evaluator.cpp:213 #15 0x000055555675c65b in nvfuser::testValidate (fusion=0x7fff39189800, fusion_outputs=std::vector of length 1, capacity 1 = {...}, aten_inputs=..., aten_outputs=std::vector of length 0, capacity 0, line_number=8452, file_name=0x55555727c0c8 "/opt/pytorch/nvfuser/tests/cpp/test_gpu3.cpp", err_msg="", lparams=..., tolerances=...) at /opt/pytorch/nvfuser/tests/cpp/validator.h:59 #16 0x000055555675d515 in nvfuser::testValidate (fusion=0x7fff39189800, fusion_outputs=std::vector of length 1, capacity 1 = {...}, aten_inputs=..., line_number=8452, file_name=0x55555727c0c8 "/opt/pytorch/nvfuser/tests/cpp/test_gpu3.cpp", err_msg="", lparams=..., tolerances=...) at /opt/pytorch/nvfuser/tests/cpp/validator.h:173
cc'ing @Priya2698 . This is low in priority since it's only affecting tests.
I believe the issue is somewhere inside some DynamicType that is not handling the evaluation properly. We might be able to work around it via change something in expression evaluator, but I wasn't able to locate the issue yet.
backtrace: