DOCGroup / ACE_TAO

ACE and TAO
https://www.dre.vanderbilt.edu/~schmidt/TAO.html
707 stars 381 forks source link

Hang in DTP when server thread limit is exhausted #1527

Open adi1831 opened 3 years ago

adi1831 commented 3 years ago

Version ACE+TAO-6.4.6

Host machine and operating system SUSE Linux Enterprise Server 11 (x86_64)

Compiler name and version (including patch level) gcc version 6.1.0

The problem affects : Hang after thread creation limit is hit system and following message is displayed : DTP_Task::svc() failed to grow thread pool.

Description: Clients wait indefinitely for response from server when thread creation limit is hit on server. All the threads on server has similar back trace as follows:

Thread 9 (Thread 0x7fffeaffd710 (LWP 23528)):
#0  0x00007ffff48c642c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1  0x00007ffff5e120bf in ACE_OS::cond_wait (cv=0x72d110, external_mutex=0x72ce98)
    at /home/rdg/ACE_wrappers/ace/OS_NS_Thread.inl:398
#2  0x00007ffff5e12323 in ACE_Condition<ACE_Thread_Mutex>::wait (this=0x72d110)
    at /home/rdg/ACE_wrappers/ace/Condition_Thread_Mutex.cpp:86
#3  0x00007ffff6461079 in TAO_Condition<ACE_Thread_Mutex>::wait (this=0x72ced0) at ../../tao/Condition.inl:8
#4  0x00007ffff64610bb in TAO::CSD::TP_Synch_Helper::wait_while_pending (this=0x72ce98)
    at ../../tao/CSD_ThreadPool/CSD_TP_Synch_Helper.inl:28
#5  0x00007ffff646112f in TAO::CSD::TP_Collocated_Synch_Request::wait (this=0x72ce00)
    at ../../tao/CSD_ThreadPool/CSD_TP_Collocated_Synch_Request.inl:29
#6  0x00007ffff645e3fb in TAO_DTP_POA_Strategy::dispatch_collocated_request_i (this=0x669b10, server_request=...,
    object_id=..., poa=0x656820, operation=0x4143c7 "_get_poaName", servant=0x729d50)
    at /home/rdg/ACE_wrappers/TAO/tao/Dynamic_TP/DTP_POA_Strategy.cpp:269
#7  0x00007ffff57e3d34 in TAO::CSD::Strategy_Base::dispatch_request (this=0x669b10, server_request=..., upcall=...)
    at ../../tao/CSD_Framework/CSD_Strategy_Base.inl:26
#8  0x00007ffff57e3f96 in TAO::CSD::Strategy_Proxy::dispatch_request (this=0x669af0, server_request=..., upcall=...)
    at ../../tao/CSD_Framework/CSD_Strategy_Proxy.inl:31
#9  0x00007ffff57e3aae in TAO_CSD_Object_Adapter::do_dispatch (this=0x6542c0, req=..., upcall=...)
    at /home/rdg/ACE_wrappers/TAO/tao/CSD_Framework/CSD_Object_Adapter.cpp:35
#10 0x00007ffff5a901da in TAO_Object_Adapter::dispatch_servant (this=0x6542c0, key=..., req=..., forward_to=...)
    at /home/rdg/ACE_wrappers/TAO/tao/PortableServer/Object_Adapter.cpp:351
#11 0x00007ffff5a903b1 in TAO_Object_Adapter::dispatch (this=0x6542c0, key=..., request=..., forward_to=...)
    at /home/rdg/ACE_wrappers/TAO/tao/PortableServer/Object_Adapter.cpp:764
#12 0x00007ffff7a7c84c in TAO_Adapter_Registry::dispatch (this=0x63b6a8, key=..., request=..., forward_to=...)
    at /home/rdg/ACE_wrappers/TAO/tao/Adapter_Registry.cpp:107
#13 0x00007ffff7b11900 in TAO_Request_Dispatcher::dispatch (this=0x642260, orb_core=0x63b1f0, request=..., forward_to=...)
    at /home/rdg/ACE_wrappers/TAO/tao/Request_Dispatcher.cpp:19
#14 0x00007ffff7a8907b in TAO::Collocated_Invocation::invoke (this=0x7fffeaffc520, strat=TAO::TAO_CS_THRU_POA_STRATEGY)
---Type <return> to continue, or q <return> to quit---
    at /home/rdg/ACE_wrappers/TAO/tao/Collocated_Invocation.cpp:71
#15 0x00007ffff7ac9989 in TAO::Invocation_Adapter::invoke_collocated_i (this=0x7fffeaffc820, stub=0x72ca70, details=...,
    effective_target=..., strat=TAO::TAO_CS_THRU_POA_STRATEGY) at /home/rdg/ACE_wrappers/TAO/tao/Invocation_Adapter.cpp:180
#16 0x00007ffff7ac9c94 in TAO::Invocation_Adapter::invoke_i (this=0x7fffeaffc820, stub=0x72ca70, details=...)
    at /home/rdg/ACE_wrappers/TAO/tao/Invocation_Adapter.cpp:115
#17 0x00007ffff7ac9f1d in TAO::Invocation_Adapter::invoke (this=0x7fffeaffc820, ex_data=0x0, ex_count=0)
    at /home/rdg/ACE_wrappers/TAO/tao/Invocation_Adapter.cpp:47
#18 0x0000000000409b61 in MyPOA::poaName (this=0x72e3c0) at Hello_c.cc:400
#19 0x00000000004120fc in Hello_impl::GetMyWorldPOA (this=0x6567f0) at Hello_impl.cpp:86
#20 0x0000000000411e45 in Hello_impl::HelloEnd (this=0x6567f0) at Hello_impl.cpp:65
#21 0x000000000041136d in Hello_impl::HelloStart (this=0x6567f0) at Hello_impl.cpp:18

To Reproduce the issue : On server, limit the number of threads, we set the ulimit -u 40.

cat Hello.idl

interface HelloIntf
{
        void HelloStart();
        void HelloEnd();
};

interface MyPOA
{
                readonly attribute string poaName;
};

interface MyWorld : MyPOA {
                void HelloWorld();
};

cat Server.cpp

#include "Hello_impl.h"

int main(int argc, char* argv[])
{
        CORBA::ORB_var orb;
        try
        {
                orb = CORBA::ORB_init(argc, argv);
        }
        catch(const CORBA::Exception& e)
        {
                std::cerr << e << std::endl;
                return -1;
        }

        // Get a reference to the root POA
        CORBA::Object_var obj;
        try
        {
                obj = orb->resolve_initial_references("RootPOA");
        }
        catch(const CORBA::Exception& e)
        {
                std::cerr <<"Caught Corba Exception. Probably this is because you are not authorized to use specified port."<< std::endl;
                return -1;
        }
        PortableServer::POA_var rootPOA = PortableServer::POA::_narrow(obj);

        try
        {
                // Apply Dynamic Thread Pool strategy
                TAO_DTP_Definition dtp_config;
                dtp_config.min_threads_ = 4; // Set low water mark to 8 threads.
                dtp_config.init_threads_ = 4; // Start 8 threads to start.
                dtp_config.max_threads_ = -1; // Create threads as needed (no limit).
                dtp_config.queue_depth_ = -1; // Allow infinite queue depth.
                dtp_config.stack_size_ = ACE_DEFAULT_THREAD_STACKSIZE; // Stacksize of each thread
                dtp_config.timeout_ = ACE_Time_Value(10,0); // Expire thread that is idle for 10 seconds (default)
                TAO_Intrusive_Ref_Count_Handle<TAO_DTP_POA_Strategy> dtp_strategy = new TAO_DTP_POA_Strategy(&dtp_config, false);
                dtp_strategy->apply_to(rootPOA.in());

                // get the POA Manager
                PortableServer::POAManager_var poa_manager = rootPOA->the_POAManager();

                Hello_impl* pHelloImpl = new Hello_impl(orb);

                CORBA::Object_var table_obj = orb->resolve_initial_references("IORTable");
                IORTable::Table_var table = IORTable::Table::_narrow(table_obj.in());
                CORBA::Object_var HelloImplObj = pHelloImpl->_this();
                CORBA::String_var HelloImplString = orb->object_to_string(HelloImplObj.in());
                table->bind("Server", HelloImplString.in());

                // Activate the POA Manager
                poa_manager->activate();
        }
        catch(const CORBA::Exception& e)
        {
                std::cerr << e << std::endl;
                return -1;
        }

        // Wait for incoming requests
        std::cout << std::endl << "Server is ready ...... " << std::endl;
        orb->run();

        return 0;
}

cat Client2.cpp

#include <iostream>
#include <string>
#include "Corba.h"
#include "Hello_c.hh"

int main(int argc, char** argv)
{
        CORBA::ORB_var orb;
        try
        {
                orb = CORBA::ORB_init(argc, argv);
        }
        catch(const CORBA::Exception& e)
        {
                std::cerr << e << std::endl;
                return -1;
        }

        char* host = getenv("HOST");
        char* port = getenv("PORT");

        std::string url = std::string("corbaloc::") + std::string(host) + std::string(":") + std::string(port) + std::string("/Server");

        std::cout<<"################Client2 EXECUTING###############"<<std::endl;
        try
        {
                CORBA::Object_var HelloIntfObj = orb->string_to_object(url.c_str());

                HelloIntf_var HelloIntf = HelloIntf::_narrow(HelloIntfObj);

                HelloIntf->HelloStart();
        }
        catch(const CORBA::Exception& e)
        {
                std::cerr << e << std::endl;
                return -1;
        }
        std::cout<<"################Client2 FINISHED###############"<<std::endl;
        return 0;
}

cat Hello_impl.cpp

#include "Hello_impl.h"
#include <pthread.h>

Hello_impl::Hello_impl(CORBA::ORB_ptr orb):m_orb(orb)
{
}

Hello_impl::~Hello_impl()
{
}

void Hello_impl::HelloStart()
{
        std::cout << "HelloStart BEGIN!!!" << std::endl;
        sleep(15);
        //HelloSecondary(); //Remote Call to secondary
        HelloEnd();
        std::cout << "HelloStart END!!!" << std::endl;
}

void Hello_impl::HelloEnd()
{
                GetMyWorldPOA();
        std::cout << "HelloEnd BEGIN!!!" << std::endl;
        //sleep(1);
        //sleep(15);
        std::cout << "HelloEnd END!!!" << std::endl;
}

MyPOA* Hello_impl::GetMyWorldIntf()
{
        MyWorld_impl* pMyWorldImpl = NULL;
        PortableServer::ServantBase_var servant = pMyWorldImpl = new MyWorld_impl(m_orb);
        PortableServer::POA_var default_poa =  servant->_default_POA();
        PortableServer::ObjectId_var oid = default_poa->activate_object(servant.in());
        CORBA::Object_var ref = default_poa->servant_to_reference(servant.in());
        MyWorld_var ret = MyWorld::_narrow(ref);
        return ret._retn();
}

void Hello_impl::GetMyWorldPOA()
{
                MyPOA* pMyPOA = GetMyWorldIntf();
                char* pPOAName = pMyPOA->poaName();
                std::cout << "POA Name is :: " << pPOAName << std::endl;
}

MyWorld_impl::MyWorld_impl(CORBA::ORB_ptr orb):m_orb(orb)
{
}

MyWorld_impl::~MyWorld_impl()
{
}

void MyWorld_impl::HelloWorld()
{
        std::cout << "Hello World!!!" << std::endl;
}

char * MyWorld_impl::poaName()
{
    sleep(5);
        return CORBA::string_dup("/");
}

cat RunClientHere.sh

export HOST1=$HOSTNAME
export PORT=29860

for((i = 0; i < 25; i++)) #atleast 25 client processes needed
do
./Client2 > output2_1_$i 2>&1 &
done
jwillemsen commented 3 years ago

This version is ancient, please upgrade to TAO 3.0.2. When that doesn't resolve your issue please extend or add a new automated unit test under TAO/tests that reproduces this automatically.

Be aware that all support on github is best effort, when you require a guaranteed response consider hiring one of the commercial companies who provide support and services for ACE/TAO.

jwillemsen commented 3 years ago

Instead of limiting the threads on the OS, have you tried to limit it by settting max_threads_