dmlc / dmlc-core

A common bricks library for building scalable and portable distributed machine learning.
Apache License 2.0
864 stars 520 forks source link

Fix memory error in unit test ThreadedIter.dmlc_exception #550

Closed hcho3 closed 5 years ago

hcho3 commented 5 years ago

[ RUN ] ThreadedIter.dmlc_exception dmlc_unittest(63607,0x7fffb7c29380) malloc: error for object 0x7fff7f4e6770: pointer being freed was not allocated set a breakpoint in malloc_error_break to debug

I suspect this is a race condition because it happens only once in a while.

hcho3 commented 5 years ago

Indeed, we have data races in dmlc::ThreadedIter. I compiled dmlc-core with Thread Sanitizer and got the following error:

ubuntu@ip-172-31-1-97:~/dmlc-core/build$ ./test/unittest/dmlc_unit_tests --gtest_filter=ThreadedIter.dmlc_exception                                       Note: Google Test filter = ThreadedIter.dmlc_exception
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from ThreadedIter
[ RUN      ] ThreadedIter.dmlc_exception
[21:54:10] /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:84: recycle exception caught
==================
WARNING: ThreadSanitizer: data race (pid=12251)
  Write of size 4 at 0x7ffcdd0902c0 by main thread:
    #0 dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>) /home/ubuntu/dmlc-core/include/dmlc/threadediter.h:303 (dmlc_uni
t_tests+0x118609)
    #1 dmlc::ThreadedIter<int>::Init(dmlc::ThreadedIter<int>::Producer*, bool) <null> (dmlc_unit_tests+0x11682b)
    #2 ThreadedIter_dmlc_exception_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:87 (dmlc_unit_tests+0x11c7b3)
    #3 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/u
buntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #4 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubun
tu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #5 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #6 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #7 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #8 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #9 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testi
ng::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #10 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing
::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #11 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #12 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #13 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

                                                                                                                                                 [81/1941]
  Previous read of size 4 at 0x7ffcdd0902c0 by thread T1 (mutexes: write M163):
    #0 dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lambda()#1}::operator()() const /home/ubuntu/dmlc-core/include
/dmlc/threadediter.h:388 (dmlc_unit_tests+0x1181cf)
    #1 void std::__invoke_impl<void, dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lambda()#1}>(std::__invoke_other
, dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lambda()#1}&&) <null> (dmlc_unit_tests+0x11a767)
    #2 std::__invoke_result<dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lambda()#1}>::type std::__invoke<dmlc::Th
readedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lambda()#1}>(std::__invoke_result&&, (dmlc::ThreadedIter<int>::Init(std::func
tion<bool (int**)>, std::function<void ()>)::{lambda()#1}&&)...) <null> (dmlc_unit_tests+0x1197c6)
    #3 decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker<std::tuple<dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<
void ()>)::{lambda()#1}> >::_M_invoke<0ul>(std::_Index_tuple<0ul>) /usr/include/c++/7/thread:234 (dmlc_unit_tests+0x11c4c6)
    #4 std::thread::_Invoker<std::tuple<dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lambda()#1}> >::operator()()
/usr/include/c++/7/thread:243 (dmlc_unit_tests+0x11c458)
    #5 std::thread::_State_impl<std::thread::_Invoker<std::tuple<dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>)::{lamb
da()#1}> > >::_M_run() /usr/include/c++/7/thread:186 (dmlc_unit_tests+0x11c398)
    #6 <null> <null> (libstdc++.so.6+0xbd66e)

  As if synchronized via sleep:
    #0 nanosleep <null> (libtsan.so.0+0x4dac0)
    #1 void std::this_thread::sleep_for<long, std::ratio<1l, 1000l> >(std::chrono::duration<long, std::ratio<1l, 1000l> > const&) /usr/include/c++/7/threa
d:373 (dmlc_unit_tests+0x748ea)
    #2 producer_test::delay(int) /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:19 (dmlc_unit_tests+0x115d3a)
    #3 ThreadedIter_dmlc_exception_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:80 (dmlc_unit_tests+0x11c717)
    #4 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/u
buntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #5 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubun
tu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #6 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #7 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #8 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #9 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #10 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (test
ing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #11 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing
::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #12 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #13 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #14 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

  Location is stack of main thread.

  Location is global '<null>' at 0x000000000000 ([stack]+0x00000001f2c0)

  Mutex M163 (0x7ffcdd0902e0) created at:
    #0 pthread_mutex_lock <null> (libtsan.so.0+0x3fadb)
    #1 __gthread_mutex_lock /usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h:748 (dmlc_unit_tests+0x6c2bf)
    #2 std::mutex::lock() /usr/include/c++/7/bits/std_mutex.h:103 (dmlc_unit_tests+0x6fcc2)
    #3 std::unique_lock<std::mutex>::lock() /usr/include/c++/7/bits/std_mutex.h:267 (dmlc_unit_tests+0x74e1d)
    #4 std::unique_lock<std::mutex>::unique_lock(std::mutex&) /usr/include/c++/7/bits/std_mutex.h:197 (dmlc_unit_tests+0x72b14)
    #5 dmlc::ThreadedIter<int>::BeforeFirst() /home/ubuntu/dmlc-core/include/dmlc/threadediter.h:181 (dmlc_unit_tests+0x116b33)
    #6 ThreadedIter_dmlc_exception_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:78 (dmlc_unit_tests+0x11c70d)
    #7 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/u
buntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #8 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubun
tu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #9 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #10 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #11 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #12 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #13 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (test
ing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #14 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing
::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #15 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #16 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #17 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

  Thread T1 (tid=12253, finished) created by main thread at:
    #0 pthread_create <null> (libtsan.so.0+0x2bcee)
    #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0x
bd924)
    #2 dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>) /home/ubuntu/dmlc-core/include/dmlc/threadediter.h:409 (dmlc_uni
t_tests+0x1186b0)
    #3 dmlc::ThreadedIter<int>::Init(dmlc::ThreadedIter<int>::Producer*, bool) <null> (dmlc_unit_tests+0x11682b)
    #4 ThreadedIter_dmlc_exception_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:77 (dmlc_unit_tests+0x11c6fe)
    #5 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/u
buntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #6 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubun
tu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #7 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #8 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #9 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #10 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #11 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (test
ing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #12 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing
::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #13 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #14 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #15 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

SUMMARY: ThreadSanitizer: data race /home/ubuntu/dmlc-core/include/dmlc/threadediter.h:303 in dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, s
td::function<void ()>)
==================
[21:54:11] /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:96: next exception caught
[21:54:11] /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:99: finish
[21:54:11] /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:109: beforefirst exception caught
[21:54:11] /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:115: beforefirst exception thrown/caught
[       OK ] ThreadedIter.dmlc_exception (1137 ms)
[----------] 1 test from ThreadedIter (1137 ms total)

[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1137 ms total)
[  PASSED  ] 1 test.
hcho3 commented 5 years ago

Also, dmlc::ThreadedIter is not exception safe. Upon receiving an exception, it leaks a thread:

WARNING: ThreadSanitizer: thread leak (pid=12251)
  Thread T1 (tid=12253, finished) created by main thread at:
    #0 pthread_create <null> (libtsan.so.0+0x2bcee)
    #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0x
bd924)
    #2 dmlc::ThreadedIter<int>::Init(std::function<bool (int**)>, std::function<void ()>) /home/ubuntu/dmlc-core/include/dmlc/threadediter.h:409 (dmlc_uni
t_tests+0x1186b0)
    #3 dmlc::ThreadedIter<int>::Init(dmlc::ThreadedIter<int>::Producer*, bool) <null> (dmlc_unit_tests+0x11682b)
    #4 ThreadedIter_dmlc_exception_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_threaditer_exc_handling.cc:77 (dmlc_unit_tests+0x11c6fe)
    #5 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/u
buntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #6 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubun
tu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #7 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #8 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #9 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #10 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #11 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (test
ing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #12 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing
::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #13 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #14 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #15 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

SUMMARY: ThreadSanitizer: thread leak (/usr/lib/x86_64-linux-gnu/libtsan.so.0+0x2bcee) in pthread_create