Open nuj7 opened 5 years ago
test name: test_gluon_gpu.test_rnn_unroll_variant_length
log:
test_gluon_gpu.test_rnn_unroll_variant_length ... Segmentation fault: 11 Stack trace: [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x515d559) [0x7fd123274559] [bt] (1) /lib/x86_64-linux-gnu/libc.so.6(+0x354b0) [0x7fd1978a44b0] [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(nnvm::Symbol::ListInputs(nnvm::Symbol::ListInputOption) const+0x24d) [0x7fd125ed873d] [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(nnvm::Symbol::ListInputNames[abi:cxx11](nnvm::Symbol::ListInputOption) const+0x2a) [0x7fd125ed93ba] [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::FusedOp::GenerateCode(int, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::vector<mxnet::TShape, std::allocator<mxnet::TShape> > const&, std::vector<int, std::allocator<int> > const&, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<unsigned int, std::allocator<unsigned int> >*)+0x38c1) [0x7fd125a9e3c1] [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::FusedOp::Forward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x2b1) [0x7fd125aa3631] [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x1423) [0x7fd1229795e3] [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7fd122979ac7] [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&)+0x1ec) [0x7fd1230b119c] terminate called without an active exception /work/runtime_functions.sh: line 1106: 6 Aborted (core dumped) nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu
build link: http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/mxnet-validation%2Funix-gpu/detail/PR-16720/5/pipeline
@ptrendx @DickJC123 Could you guys provide some insights to this issue? Seems like related to the fused ops
Thanks for the report, we will look into this issue.
test name: test_gluon_gpu.test_rnn_unroll_variant_length
log:
build link: http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/mxnet-validation%2Funix-gpu/detail/PR-16720/5/pipeline