ydb-platform / nbs

Network Block Store
Apache License 2.0
52 stars 21 forks source link

[NBS] blockstore-server crashed in TExternalVhostEndpointListener #312

Closed EvgeniyKozev closed 6 months ago

EvgeniyKozev commented 7 months ago
Core was generated by `/usr/bin/blockstore-server --domain pre-prod_vla --ic-port 29010 --mon-port 876'.
#0 TIntrusivePtr<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > >, TStringPtrOps<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > > > >::Get (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/ptr.h:560
[Current thread is 11716 (LWP 3461804)]

Thread 11716 (LWP 3461804):
#0 TIntrusivePtr<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > >, TStringPtrOps<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > > > >::Get (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/ptr.h +560
#1 TPointerCommon<TIntrusivePtr<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > >, TStringPtrOps<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > > > >, TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > > >::AsT (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/ptr.h +132
#2 TPointerBase<TIntrusivePtr<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > >, TStringPtrOps<TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > > > >, TStdString<std::__y1::basic_string<char, std::__y1::char_traits<char>, std::__y1::allocator<char> > > >::operator* (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/ptr.h +148
#3 TBasicString<char, std::__y1::char_traits<char> >::StdStr (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/string.h +218
#4 TBasicString<char, std::__y1::char_traits<char> >::ConstRef (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/string.h +240
#5 TBasicString<char, std::__y1::char_traits<char> >::data (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/string.h +303
#6 TStringBase<TBasicString<char, std::__y1::char_traits<char> >, char, std::__y1::char_traits<char> >::Ptr (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/strbase.h +536
#7 TStringBase<TBasicString<char, std::__y1::char_traits<char> >, char, std::__y1::char_traits<char> >::data (this=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/strbase.h +128
#8 TBasicStringBuf<char, std::__y1::char_traits<char> >::TBasicStringBuf<TBasicString<char, std::__y1::char_traits<char> >, std::__y1::char_traits<char> > (str=..., this=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/strbuf.h +121
#9 THashTable<std::__y1::pair<TBasicString<char, std::__y1::char_traits<char> > const, std::__y1::shared_ptr<NCloud::NBlockStore::NServer::IExternalEndpoint> >, TBasicString<char, std::__y1::char_traits<char> >, THash<TBasicString<char, std::__y1::char_traits<char> > >, TSelect1st, TEqualTo<TBasicString<char, std::__y1::char_traits<char> > >, std::__y1::allocator<TBasicString<char, std::__y1::char_traits<char> > > >::bkt_num_key<TBasicString<char, std::__y1::char_traits<char> > >(TBasicString<char, std::__y1::char_traits<char> > const&, NPrivate::TReciprocalDivisor<unsigned int, unsigned long, NPrivate::TMulUnsignedUpper<unsigned long, unsigned __int128, 64ul> >) const (this=0x1702ff8057a0, key=..., n=...) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/hash_table.h +923
#10 THashTable<std::__y1::pair<TBasicString<char, std::__y1::char_traits<char> > const, std::__y1::shared_ptr<NCloud::NBlockStore::NServer::IExternalEndpoint> >, TBasicString<char, std::__y1::char_traits<char> >, THash<TBasicString<char, std::__y1::char_traits<char> > >, TSelect1st, TEqualTo<TBasicString<char, std::__y1::char_traits<char> > >, std::__y1::allocator<TBasicString<char, std::__y1::char_traits<char> > > >::bkt_num_key<TBasicString<char, std::__y1::char_traits<char> > > (this=0x1702ff8057a0, key=...) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/hash_table.h +913
#11 THashTable<std::__y1::pair<TBasicString<char, std::__y1::char_traits<char> > const, std::__y1::shared_ptr<NCloud::NBlockStore::NServer::IExternalEndpoint> >, TBasicString<char, std::__y1::char_traits<char> >, THash<TBasicString<char, std::__y1::char_traits<char> > >, TSelect1st, TEqualTo<TBasicString<char, std::__y1::char_traits<char> > >, std::__y1::allocator<TBasicString<char, std::__y1::char_traits<char> > > >::find<TBasicString<char, std::__y1::char_traits<char> > > (this=0x1702ff8057a0, key=...) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/hash_table.h +767
#12 THashMap<TBasicString<char, std::__y1::char_traits<char> >, std::__y1::shared_ptr<NCloud::NBlockStore::NServer::IExternalEndpoint>, THash<TBasicString<char, std::__y1::char_traits<char> > >, TEqualTo<TBasicString<char, std::__y1::char_traits<char> > >, std::__y1::allocator<TBasicString<char, std::__y1::char_traits<char> > > >::find<TBasicString<char, std::__y1::char_traits<char> > > (this=0x1702ff8057a0, key=...) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/generic/hash.h +213
#13 NCloud::NBlockStore::NServer::(anonymous namespace)::TExternalVhostEndpointListener::SwitchEndpoint (this=0x1702ff8056d0, request=..., volume=..., session=...) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/cloud/blockstore/libs/endpoints_vhost/external_vhost_server.cpp +675
#14 NCloud::NBlockStore::NServer::(anonymous namespace)::TEndpointManager::TrySwitchEndpoint (this=0x1702ff3c8620, diskId=...) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/cloud/blockstore/libs/endpoints/endpoint_manager.cpp +660
#15 NCloud::NBlockStore::NServer::(anonymous namespace)::TEndpointManager::OnVolumeConnectionEstablished(TBasicString<char, std::__y1::char_traits<char> > const&)::$_5::operator()() const (this=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/cloud/blockstore/libs/endpoints/endpoint_manager.cpp +676
#16 NCloud::TSimpleTask<NCloud::NBlockStore::NServer::(anonymous namespace)::TEndpointManager::OnVolumeConnectionEstablished(TBasicString<char, std::__y1::char_traits<char> > const&)::$_5>::Execute() (this=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/cloud/storage/core/libs/common/task_queue.h +61
#17 NCloud::(anonymous namespace)::TWorker::Execute (this=0x1703245722c0, c=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/cloud/storage/core/libs/coroutine/executor.cpp +43
#18 ContHelperMemberFunc<NCloud::(anonymous namespace)::TWorker, &NCloud::(anonymous namespace)::TWorker::Execute> (c=<optimized out>, arg=0x0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/library/cpp/coroutine/engine/impl.h +151
#19 TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3::operator()(TCont*) const (cont=0x1702ff8057a0, this=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/library/cpp/coroutine/engine/impl.cpp +249
#20 std::__y1::__invoke<TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3&, TCont*>(TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3&, TCont*&&) (__f=..., __args=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/contrib/libs/cxxsupp/libcxx/include/type_traits +3663
#21 std::__y1::__invoke_void_return_wrapper<void, true>::__call<TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3&, TCont*>(TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3&, TCont*&&) (__args=<optimized out>, __args=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/contrib/libs/cxxsupp/libcxx/include/__functional/invoke.h +61
#22 std::__y1::__function::__alloc_func<TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3, std::__y1::allocator<TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3>, void (TCont*)>::operator()(TCont*&&) (this=0x76c487d7bd98f408, __arg=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/contrib/libs/cxxsupp/libcxx/include/__functional/function.h +181
#23 std::__y1::__function::__func<TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3, std::__y1::allocator<TContExecutor::Create(void (*)(TCont*, void*), void*, char const*, TMaybe<unsigned int, NMaybe::TPolicyUndefinedExcept>)::$_3>, void (TCont*)>::operator()(TCont*&&) (this=0x76c487d7bd98f400, __arg=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/contrib/libs/cxxsupp/libcxx/include/__functional/function.h +355
#24 std::__y1::__function::__value_func<void (TCont*)>::operator()(TCont*&&) const (this=0x1702f13fe9d0, [__args=@0x1702d20b8f58](mailto:__args=@0x1702d20b8f58): 0x1702f13fe910) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/contrib/libs/cxxsupp/libcxx/include/__functional/function.h +508
#25 std::__y1::function<void (TCont*)>::operator()(TCont*) const (this=0x1702f13fe9d0, __arg=0x1702f13fe910) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/contrib/libs/cxxsupp/libcxx/include/__functional/function.h +1192
#26 NCoro::TTrampoline::DoRun (this=0x1702f13fe930) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/library/cpp/coroutine/engine/trampoline.cpp +30
#27 NCoro::TTrampoline::DoRunNaked (this=0x1702ff8057a0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/library/cpp/coroutine/engine/trampoline.cpp +46
#28 Run (arg=0x1702ff8057a0) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/system/context.cpp +47
#29 ContextTrampoLine (t1=0x1702f13fe930, t2=<optimized out>) at /opt/buildagent/work/4ec98910e7de170b/__FUSE/mount_path/util/system/context.cpp +124
#30 ?? ()
budevg commented 7 months ago

There is a race between call to TrySwitchEndpoint from VolumeClient actor and a call to StopEndpoint

There is locking protection only between Start/StopEndpoint here

But no such protection for the SwitchEndpoint call exists. Running concurrently results in dangling pointer de-reference