Closed balazsracz closed 7 years ago
Backtrace:
(gdb) bt
#0 0x20024e2c in resetblink (pattern=pattern@entry=2147536074) at /usr/local/google/home/bracz/train/openlcb/tcs_booster/targets/freertos.armv7m.cs_mini_reva/HwInit.cxx:247
#1 diewith (pattern=pattern@entry=2147536074) at /usr/local/google/home/bracz/train/openlcb/tcs_booster/targets/freertos.armv7m.cs_mini_reva/HwInit.cxx:264
#2 0x200249d0 in abort () at /usr/local/google/home/bracz/train/openlcb/openmrn/src/os/os.c:738
#3 0x20014c68 in Allocator<std::_Rb_tree_node<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::allocate (cnt=1, this=0x20033780)
at /usr/local/google/home/bracz/train/openlcb/openmrn/src/utils/Allocator.hxx:152
#4 std::allocator_traits<Allocator<std::_Rb_tree_node<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > > >::allocate (__n=1, __a=...)
at /usr/local/google/train/opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/alloc_traits.h:301
#5 std::_Rb_tree<unsigned short, std::pair<unsigned short const, openlcb::AliasCache::Metadata*>, std::_Select1st<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::_M_get_node (this=0x20033780)
at /usr/local/google/train/opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_tree.h:505
#6 std::_Rb_tree<unsigned short, std::pair<unsigned short const, openlcb::AliasCache::Metadata*>, std::_Select1st<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::_M_create_node<std::piecewise_construct_t const&, std::tuple<unsigned short const&>, std::tuple<> >(std::piecewise_construct_t const&, std::tuple<unsigned short const&>&&, std::tuple<>&&) (this=0x20033780)
at /usr/local/google/train/opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_tree.h:559
#7 std::_Rb_tree<unsigned short, std::pair<unsigned short const, openlcb::AliasCache::Metadata*>, std::_Select1st<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::_M_emplace_hint_unique<std::piecewise_construct_t const&, std::tuple<unsigned short const&>, std::tuple<> >(std::_Rb_tree_const_iterator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::piecewise_construct_t const&, std::tuple<unsigned short const&>&&, std::tuple<>&&) (__pos=..., this=0x20033780) at /usr/local/google/train/opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_tree.h:2196
#8 std::map<unsigned short, openlcb::AliasCache::Metadata*, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::operator[] (
__k=@0x20036980: 1715, this=0x20033780) at /usr/local/google/train/opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_map.h:483
#9 StlMap<unsigned short, openlcb::AliasCache::Metadata*>::operator[] (this=this@entry=0x20031360 <stack+696>, key=@0x20036980: 1715)
at /usr/local/google/home/bracz/train/openlcb/openmrn/src/utils/StlMap.hxx:121
#10 0x20015cb4 in openlcb::AliasCache::add (this=0x20031358 <stack+688>, id=<optimized out>, alias=<optimized out>, alias@entry=1715)
at /usr/local/google/home/bracz/train/openlcb/openmrn/src/openlcb/AliasCache.cxx:119
#11 0x2001dfee in openlcb::RemoteAliasCacheUpdater::entry (this=0x200339f0) at /usr/local/google/home/bracz/train/openlcb/openmrn/src/openlcb/IfCan.cxx:229
#12 0x2000b2d6 in StateFlowBase::run (this=0x200339f0) at /usr/local/google/home/bracz/train/openlcb/openmrn/src/executor/StateFlow.cxx:80
#13 0x2000a518 in ExecutorBase::entry (this=this@entry=0x200310ac <stack+4>) at /usr/local/google/home/bracz/train/openlcb/openmrn/src/executor/Executor.cxx:321
#14 0x20024cbe in Executor<5u>::thread_body (this=0x200310ac <stack+4>) at /home/bracz/train/openlcb/openmrn/src/executor/Executor.hxx:349
#15 openlcb::SimpleCanStackBase::loop_executor (delay_start=false, this=0x200310a8 <stack>) at /home/bracz/train/openlcb/openmrn/src/openlcb/SimpleStack.hxx:262
#16 appl_main (argc=argc@entry=1, argv=argv@entry=0x20036ae8) at /usr/local/google/home/bracz/train/openlcb/tcs_booster/booster_main/cs_mini_main.hxx:273
#17 0x20025706 in main_thread (arg=<optimized out>) at /usr/local/google/home/bracz/train/openlcb/openmrn/src/os/os.c:880
#18 0x20025f34 in prvSwitchTimerLists () at /opt/FreeRTOS/default/Source/timers.c:914
#19 0x20031d24 in pxReadyTasksLists ()
The AliasMap class:
(gdb) frame 10
#10 0x20015cb4 in openlcb::AliasCache::add (this=0x20031358 <stack+688>, id=<optimized out>, alias=<optimized out>, alias@entry=1715)
at /usr/local/google/home/bracz/train/openlcb/openmrn/src/openlcb/AliasCache.cxx:119
119 aliasMap[alias] = insert;
(gdb) p *this
$4 = {
static RESERVED_ALIAS_NODE_ID = 1,
pool = 0x20033638,
freeList = 0x0,
aliasMap = {
<StlMap<unsigned short, openlcb::AliasCache::Metadata*>> = {
mappingAllocator = 0x20033780,
mapping = 0x0
}, <No data fields>},
idMap = {
<StlMap<unsigned long long, openlcb::AliasCache::Metadata*>> = {
mappingAllocator = 0x200337b0,
mapping = 0x0
}, <No data fields>},
oldest = 0x20033758,
newest = 0x20033638,
seed = 0,
entries = 10,
removeCallback = 0x0,
context = 0x0
the currently selected freelist entry:
(gdb) p insert
$5 = (openlcb::AliasCache::Metadata *) 0x20033758
(gdb) p/x *insert
$7 = {
id = 0x90099dd0103,
alias = 0x6b3,
timestamp = 0x40faedd3a5dc,
{
prev = 0x20033718,
newer = 0x20033718
},
{
next = 0x0,
older = 0x0
}
}
the contents of the aliasMap are as follows. ignore the second component of the keys, they contain garbage due to alignment issues.
(gdb) pmap $10 uint16_t[2] void*
elem[0].left: $171 = {0x94, 0xd001}
elem[0].right: $172 = (void *) 0x200336f8
elem[1].left: $173 = {0x12e, 0xf8d5}
elem[1].right: $174 = (void *) 0x20033718
elem[2].left: $175 = {0x294, 0xea82}
elem[2].right: $176 = (void *) 0x200336d8
elem[3].left: $177 = {0x2d4, 0x66}
elem[3].right: $178 = (void *) 0x20033678
elem[4].left: $179 = {0x314, 0x47f0}
elem[4].right: $180 = (void *) 0x20033658
elem[5].left: $181 = {0x354, 0xd1f4}
elem[5].right: $182 = (void *) 0x200336b8
elem[6].left: $183 = {0x394, 0xc018}
elem[6].right: $184 = (void *) 0x20033698
elem[7].left: $185 = {0x773, 0x108c}
elem[7].right: $186 = (void *) 0x20033738
elem[8].left: $187 = {0x7e5, 0x7650}
elem[8].right: $188 = (void *) 0x20033758
elem[9].left: $189 = {0xfd5, 0x2003}
elem[9].right: $190 = (void *) 0x20033638
Map size = 10
idMap is the following:
(gdb) pmap $193 uint64_t void*
elem[0].left: $194 = 0x1
elem[0].right: $195 = (void *) 0x20033718
elem[1].left: $196 = 0x5010101143f
elem[1].right: $197 = (void *) 0x20033638
elem[2].left: $198 = 0x50101011442
elem[2].right: $199 = (void *) 0x200336f8
elem[3].left: $200 = 0x5010101144a
elem[3].right: $201 = (void *) 0x200336d8
elem[4].left: $202 = 0x5010101144b
elem[4].right: $203 = (void *) 0x20033678
elem[5].left: $204 = 0x5010101144c
elem[5].right: $205 = (void *) 0x20033658
elem[6].left: $206 = 0x5010101144d
elem[6].right: $207 = (void *) 0x200336b8
elem[7].left: $208 = 0x5010101144e
elem[7].right: $209 = (void *) 0x20033698
elem[8].left: $210 = 0x5010101181f
elem[8].right: $211 = (void *) 0x20033758
elem[9].left: $212 = 0x90099dd0104
elem[9].right: $213 = (void *) 0x20033738
Map size = 10
one strange thing to see: (gdb) p oldest $218 = (openlcb::AliasCache::Metadata *) 0x20033758
at the specific point oldest has already been supposedly rewritten. But oldest == insert can only happen if oldest == freelist (should not happen) or oldest == oldest->newer (also should not happen, that's an infinite list)
we can exclude the ejection codepath, because on the jection codepath oldest == insert can only happen if oldest == oldest->newer == insert which would also cause insert->newer==insert which is false. Thus we popped insert from the freelist.
Caught another one of these.
#0 0x0102f220 in resetblink (pattern=pattern@entry=2147536074)
at /home/bracz/train/openlcb/tcs_booster/targets/freertos.armv7m.cs_mini_reva_cc3220sf/HwInit.cxx:255
#1 diewith (pattern=pattern@entry=2147536074)
at /home/bracz/train/openlcb/tcs_booster/targets/freertos.armv7m.cs_mini_reva_cc3220sf/HwInit.cxx:272
#2 0x0102eda4 in abort () at /home/bracz/train/openlcb/openmrn/src/os/os.c:741
#3 0x0101fb18 in Allocator<std::_Rb_tree_node<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::allocate (cnt=1, this=0x20005460)
at /home/bracz/train/openlcb/openmrn/src/utils/Allocator.hxx:152
#4 std::allocator_traits<Allocator<std::_Rb_tree_node<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > > >::allocate (__n=1, __a=...)
at /opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/alloc_traits.h:301
#5 std::_Rb_tree<unsigned short, std::pair<unsigned short const, openlcb::AliasCache::Metadata*>, std::_Select1st<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::_M_get_node (this=0x20005460)
at /opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_tree.h:505
#6 std::_Rb_tree<unsigned short, std::pair<unsigned short const, openlcb::AliasCache::Metadata*>, std::_Select1st<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::_M_create_node<std::piecewise_construct_t const&, std::tuple<unsigned short const&>, std::tuple<> >(std::piecewise_construct_t const&, std::tuple<unsigned short const&>&&, std::tuple<>&&) (
this=0x20005460)
at /opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_tree.h:559
#7 std::_Rb_tree<unsigned short, std::pair<unsigned short const, openlcb::AliasCache::Metadata*>, std::_Select1st<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::_M_---Type <return> to continue, or q <return> to quit---
emplace_hint_unique<std::piecewise_construct_t const&, std::tuple<unsigned short const&>, std::tuple<> >(std::_Rb_tree_const_iterator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> >, std::piecewise_construct_t const&, std::tuple<unsigned short const&>&&, std::tuple<>&&) (
__pos=..., this=0x20005460)
at /opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_tree.h:2196
#8 std::map<unsigned short, openlcb::AliasCache::Metadata*, std::less<unsigned short>, Allocator<std::pair<unsigned short const, openlcb::AliasCache::Metadata*> > >::operator[] (
__k=@0x200085f8: 2772, this=0x20005460)
at /opt/armgcc/gcc-arm-none-eabi-6-2017-q1-update/arm-none-eabi/include/c++/6.3.1/bits/stl_map.h:483
#9 StlMap<unsigned short, openlcb::AliasCache::Metadata*>::operator[] (
this=this@entry=0x20001540 <stack+696>, key=@0x200085f8: 2772)
at /home/bracz/train/openlcb/openmrn/src/utils/StlMap.hxx:121
#10 0x01020b64 in openlcb::AliasCache::add (this=this@entry=0x20001538 <stack+688>,
id=<optimized out>, alias=<optimized out>, alias@entry=2772)
at /home/bracz/train/openlcb/openmrn/src/openlcb/AliasCache.cxx:119
#11 0x010290a2 in openlcb::RemoteAliasCacheUpdater::entry (this=0x200056d0)
at /home/bracz/train/openlcb/openmrn/src/openlcb/IfCan.cxx:229
#12 0x01015a5e in StateFlowBase::run (this=0x200056d0)
at /home/bracz/train/openlcb/openmrn/src/executor/StateFlow.cxx:80
#13 0x01014c20 in ExecutorBase::entry (this=this@entry=0x2000128c <stack+4>)
at /home/bracz/train/openlcb/openmrn/src/executor/Executor.cxx:321
#14 0x0102f0a8 in OSThread::inherit (this=0x2000128c <stack+4>)
at /home/bracz/train/openlcb/openmrn/src/os/OS.hxx:99
#15 Executor<5u>::thread_body (this=0x2000128c <stack+4>)
at /home/bracz/train/openlcb/openmrn/src/executor/Executor.hxx:351
#16 openlcb::SimpleCanStackBase::loop_executor (delay_start=false, this=0x20001288 <stack>)
at /home/bracz/train/openlcb/openmrn/src/openlcb/SimpleStack.hxx:262
#17 appl_main (argc=argc@entry=1, argv=argv@entry=0x20008760)
at /home/bracz/train/openlcb/tcs_booster/booster_main/cs_mini_main.hxx:277
One common theme is that the crash happened in RemoteAliasUpdater. Before this crash I had a lot of executions of the send_datagram utility.
$15 = {
id = 0x1,
alias = 0xad4,
timestamp = 0xe5a13e80cd78,
{
prev = 0x200053d8,
newer = 0x200053d8
},
{
next = 0x20005418,
older = 0x20005418
}
}
so the node ID == 1 which is really weird. It does seem to come form the CAN-bus message though:
(gdb) p/x *$8
$11 = {
<BufferBase> = {
<QMember> = {
next = 0x0
},
members of BufferBase:
size_ = 0x24,
count_ = 0x1,
pool_ = 0x20004d90,
done_ = 0x0
},
members of Buffer<CanMessageData>:
data_ = {
<can_frame> = {
can_id = 0x10701ad4,
can_dlc = 0x6,
can_rtr = 0x0,
can_eff = 0x1,
can_err = 0x0,
can_res = 0x1,
data = {0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2e, 0x8}
},
members of CanMessageData:
static STANDARD_FRAME_BIT = 0x40000000,
static CAN_EXT_FRAME_FILTER = 0x0,
static CAN_EXT_FRAME_MASK = 0xe0000000,
static CAN_STD_FRAME_FILTER = 0x40000000,
static CAN_STD_FRAME_MASK = 0xfffff800,
unused = 0x2000bb54
}
}
surprisingly, freelist is not null, but there is definitely list corruption since freelist->next is insert.
(gdb) p freeList
$17 = (openlcb::AliasCache::Metadata *) 0x20005418
(gdb) p insert
$18 = (openlcb::AliasCache::Metadata *) 0x20005438
(gdb) p *freeList
$19 = {
id = 5501869955103,
alias = 2021,
timestamp = 170008784286004,
{
prev = 0x20005438,
newer = 0x20005438
},
{
next = 0x20005438,
older = 0x20005438
}
}
(gdb) p/x *freeList
$20 = {
id = 0x5010101181f,
alias = 0x7e5,
timestamp = 0x9a9f41323134,
{
prev = 0x20005438,
newer = 0x20005438
},
{
next = 0x20005438,
older = 0x20005438
}
}
(gdb) p/x *freeList->next
$21 = {
id = 0x1,
alias = 0xad4,
timestamp = 0xe5a13e80cd78,
{
prev = 0x200053d8,
newer = 0x200053d8
},
{
next = 0x20005418,
older = 0x20005418
}
}
Seen in production: an assertion failure during the remote alias map insert. Likely cause is that there was no free entry in the StlMap allocator for the AliasMap class even though there was exactly one entry on the freelist for the metadata list. During the next add the entry was taken from the freelist, but the StlMap allocator (which is a fixed-size allocator) crashed with an std::bad_alloc exception (turning into an assert).