Open avildema opened 3 years ago
#0 0x00007ffff34749fd in nanosleep () from /usr/lib64/libc.so.6
#1 0x00007ffff34a52d4 in usleep () from /usr/lib64/libc.so.6
#2 0x00000000009fb115 in ucp_test::short_progress_loop (this=this@entry=0x1ecde30, worker_index=worker_index@entry=0) at /__w/1/s/contrib/../test/gtest/ucp/ucp_test.cc:177
#3 0x00000000009d992a in test_ucp_sockaddr_protocols::do_unexp_recv (this=this@entry=0x1ecde30, recv_buf='y' <repeats 65536 times>, size=size@entry=65536, sreq=<optimized out>,
send_stop=send_stop@entry=false, recv_stop=recv_stop@entry=false) at /__w/1/s/contrib/../test/gtest/ucp/test_ucp_sockaddr.cc:1862
#4 0x0000000000994259 in test_ucp_sockaddr_protocols::test_tag_send_recv (this=0x1ecde30, size=65536, is_sync=false, recv_stop=false, send_stop=false, is_exp=false)
at /__w/1/s/contrib/../test/gtest/ucp/test_ucp_sockaddr.cc:1964
#5 0x00000000005f0856 in run (this=0x1ecde30) at /__w/1/s/contrib/../test/gtest/common/test.cc:356
#6 ucs::test_base::TestBodyProxy (this=0x1ecde30) at /__w/1/s/contrib/../test/gtest/common/test.cc:382
#7 0x0000000000c8a099 in HandleSehExceptionsInMethodIfSupported<testing::Test, void> (location=0xdb7c55 "the test body", method=<optimized out>, object=<optimized out>)
at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2433
#8 testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void> (object=object@entry=0x1ecdeb0, method=<optimized out>, location=location@entry=0xdb7c55 "the test body")
at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2469
#9 0x0000000000c80e59 in testing::Test::Run (this=this@entry=0x1ecdeb0) at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2509
#10 0x0000000000c80f81 in testing::TestInfo::Run (this=0x1d2c120) at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2687
#11 0x0000000000c81045 in testing::TestSuite::Run (this=0x1d1e2d0) at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2819
#12 0x0000000000c81b6a in testing::internal::UnitTestImpl::RunAllTests (this=this@entry=0x118f100) at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:5350
#13 0x0000000000c81d21 in HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool> (location=<optimized out>, method=<optimized out>, object=<optimized out>)
at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2433
#14 HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool> (location=0xdb8a60 "auxiliary test code (environments or event listeners)",
method=(bool (testing::internal::UnitTestImpl::*)(testing::internal::UnitTestImpl * const)) 0xc81730 <testing::internal::UnitTestImpl::RunAllTests()>, object=0x118f100)
at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:2469
#15 testing::UnitTest::Run (this=<optimized out>) at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.cc:4940
#16 0x0000000000577a65 in RUN_ALL_TESTS () at /__w/1/s/contrib/../test/gtest/common/googletest/gtest.h:2473
#17 main (argc=1, argv=0x7fffffffe038) at /__w/1/s/contrib/../test/gtest/common/main.cc:118
it could be fixed by using send_recv()
infrastructure from #7639 and applying the following changes before do_unexp_recv()
:
void test_tag_send_recv(size_t size, bool is_exp, bool is_sync = false,
bool send_stop = false, bool recv_stop = false)
{
...
if (is_sync) {
sreq = ucp_tag_send_sync_nbx(sender().ep(), &send_buf[0], size, 0,
&send_param);
} else {
sreq = ucp_tag_send_nbx(sender().ep(), &send_buf[0], size, 0,
&send_param);
}
ucs_status_t send_status = wait_req_comp(sender(), receiver(),
false, sreq);
if (!check_send_status(send_status, receiver(), rreq, cb_type())) {
return;
}
if (!is_exp) {
rreq = do_unexp_recv(recv_buf, size, sreq, send_stop,
recv_stop);
reqs.push_back(rreq);
}
...
}
@dmitrygx what is the reason for the failure?
@dmitrygx what is the reason for the failure?
the reason is no IB devices available, so it tries to connect and send completed with UCS_ERR_UNREACHABLE
.
so, we should add the same check as we have in other sockaddr tests:
https://github.com/openucx/ucx/blob/9b41ab63855fe3db4801dcc53b0d3718ac1a8580/test/gtest/ucp/test_ucp_sockaddr.cc#L354
this is related to infrastructure issues, lets stabilize the infra then fix gtest. skipping missed IB device will hide it.
this is related to infrastructure issues, lets stabilize the infra then fix gtest. skipping missed IB device will hide it.
@avildema could you check why no IB devices are listed in the Docker container pls? and we will provide a patch to skip the test in such cases.
Describe the bug
Fail udx/test_ucp_sockaddr_protocols.tag_zcopy_64k_unexp/0 test
Steps to Reproduce