boost-ext / sml

C++14 State Machine library
https://boost-ext.github.io/sml
Boost Software License 1.0
1.16k stars 179 forks source link

Segmentation fault in sml::back::sm_impl::process_queued_events when using a process_event call inside of the postponed event handling #465

Open akira1703 opened 3 years ago

akira1703 commented 3 years ago

Expected Behavior

No segmentation fault.

Actual Behavior

Segmentation fault in...

  template <class TDeps, class TSubs, class TDeferQueue, class... TEvents>
  bool process_queued_events(....)

...when using a process_event call inside of a postponed event handling (via "sml :: back :: process<....>").

Problem description + possible fix:

The segmentation fault seems to be caused by removing the postponed event from "sml :: back :: smimpl::process" after the event handling. So it's possible to recall this postponed event again when process_event is triggered in the postponed event handling. This results into removing the event twice from the queue which results into a segmentation fault. See the following code snipped on Compiler Explorer (godbolt.org):

#include <https://raw.githubusercontent.com/boost-ext/sml/v1.1.4/include/boost/sml.hpp>
#include <cassert>
#include <queue>
#include <memory>
#include <cstdio>

namespace sml = boost::sml;

namespace {
struct Logger {
  template <class SM, class TEvent>
  void log_process_event(const TEvent&) {
    printf("[process_event] %s\n", sml::aux::get_type_name<TEvent>());
  }

  template <class SM, class TAction, class TEvent>
  void log_action(const TAction&, const TEvent&) {
    printf("[action]        %s %s\n", sml::aux::get_type_name<TAction>(),
           sml::aux::get_type_name<TEvent>());
  }

  template <class SM, class TSrcState, class TDstState>
  void log_state_change(const TSrcState& src, const TDstState& dst) {
    printf("[transition]    %s -> %s\n", src.c_str(), dst.c_str());
  }
};
}

////////////////////////////////////////
// Events
struct e1 {};
struct e2 {};
struct e3 {};

////////////////////////////////////////
// class Sm
// comprises sml::sm
class Sm
{
private:
    struct action1
    {
        void operator()(sml::back::process<e2> aProcess)
        {
            // Enques e2 to sml::back::sm_impl::process_
            // for execution after s1-onentry
            aProcess(e2{});
        }
    };
    struct action2
    {
        // This action gets processed in context of the
        // event processing e2 which is inside of the 
        // while loop "while (!process_.empty())" in "process_queued_events" of SML.
        // Now this action calls process_event which directly executes
        // the event processing and calls at the end again "process_queued_events".
        // Issue:
        //      Now the event e2 is still in sml::back::sm_impl::process_ since
        //      the first "process_queued_events" will pop it out after processing.
        //      Finally the second "process_queued_events" triggers the e2 a
        //      second time, afterwards destroys it (destructor is called) and
        //      finishes the second "process_queued_events".
        //      Then the first "process_queued_events" tries to also pop the e2
        //      which will result into a segmentation fault.
        void operator()(Sm* aSm) { aSm->process_event(e3{}); }
    };
    struct action3 { void operator()() {} };
    struct onentry { void operator()() {} };
    struct onexit { void operator()() {} };

    ////////////////////////////////////////
    // state machine definition
    struct SmDef {
        auto operator()() const noexcept {
            using namespace sml;
            return make_transition_table(
            *"idle"_s + event<e1> / action1() = "s1"_s
            ,  "s1"_s + event<e2> / action2() = "s2"_s
            ,  "s2"_s + event<e3> / action3()

            ,  "s1"_s + on_entry<_> / onentry()
            ,  "s1"_s + on_exit<_>  / onexit()
            ,  "s2"_s + on_entry<_> / onentry()
            ,  "s2"_s + on_exit<_>  / onexit()
            );
        }
    };

    Logger mLogger;
    using SmlSm = sml::sm<
            SmDef,
            sml::defer_queue<std::deque>,
            sml::logger<Logger>,
            sml::process_queue<std::queue> >;
    std::unique_ptr<SmlSm> mSm;

public:
    Sm() : mSm( std::make_unique< SmlSm >( mLogger, this ) )
    {
    }

    template <class TEvent>
    bool process_event(const TEvent& aEvent)
    {
        return mSm->process_event(aEvent);
    }
};

int main() {
    Sm sm;   
    sm.process_event(e1{});
}

Possible fix

@krzysztof-jusiak and @redboltz Please can you check if this fix is suitable? Thanks! A possible fix (based on v1.1.4) might be the following which worked in my case:

/include/boost/sml/back/queue_handler.hpp

  queue_event &operator=(queue_event &&other) {
    dtor(data);

    id = other.id;
    dtor = other.dtor;
    move = other.move;
    move(data, static_cast<queue_event &&>(other));
    return *this;
  }
+
+  queue_event(queue_event &other) : id(other.id), dtor(other.dtor), move(other.move) {
+    move(data, static_cast<queue_event &&>(other));
+  }
+
  queue_event(const queue_event &) = delete;
  queue_event &operator=(const queue_event &) = delete;

  template <class T>
  queue_event(T object) {  // non explicit

include/boost/sml/back/state_machine.hpp


  template <class TDeps, class TSubs, class TDeferQueue, class... TEvents>
  bool process_queued_events(TDeps &deps, TSubs &subs, const aux::type<TDeferQueue> &, const aux::type_list<TEvents...> &) {
    using dispatch_table_t = bool (sm_impl::*)(TDeps &, TSubs &, const void *);
    const static dispatch_table_t dispatch_table[__BOOST_SML_ZERO_SIZE_ARRAY_CREATE(sizeof...(TEvents))] = {
        &sm_impl::process_event_no_queue<TDeps, TSubs, TEvents>...};
    bool wasnt_empty = !process_.empty();
    while (!process_.empty()) {
-      (this->*dispatch_table[process_.front().id])(deps, subs, process_.front().data);
+      // Copied the event before processing to avoid recursive re-process
+      // of process_ queue when someone calls process_event in the handling of the postponed event
+      auto event( process_.front() );
       process_.pop();
+      (this->*dispatch_table[event.id])(deps, subs, event.data);
    }
    return wasnt_empty;
  }

include/boost/sml.hpp

--- a/sml.hpp
+++ b/sml.hpp
@@ -633,6 +633,9 @@ class queue_event {
     move(data, static_cast<queue_event &&>(other));
     return *this;
   }
+  queue_event(queue_event &other) : id(other.id), dtor(other.dtor), move(other.move) {
+    move(data, static_cast<queue_event &&>(other));
+  }
   queue_event(const queue_event &) = delete;
   queue_event &operator=(const queue_event &) = delete;
   template <class T>
@@ -1632,8 +1635,11 @@ struct sm_impl : aux::conditional_t<aux::is_empty<typename TSM::sm>::value, aux:
         &sm_impl::process_event_no_queue<TDeps, TSubs, TEvents>...};
     bool wasnt_empty = !process_.empty();
     while (!process_.empty()) {
-      (this->*dispatch_table[process_.front().id])(deps, subs, process_.front().data);
+      // Copied the event before processing to avoid recursive re-process
+      // of process_ queue when someone calls process_event in the handling of the postponed event
+      auto event( process_.front() );
       process_.pop();
+      (this->*dispatch_table[event.id])(deps, subs, event.data);
     }
     return wasnt_empty;
   }

Specifications

mayurpatelgec commented 3 years ago

thanx for the workaround @akira1703 : we also had a crash/segmentation fault and then after we tried your changes mentioned in include/boost/sml.hpp and then it worked.

@krzysztof-jusiak : now the quetion is, is there any plan in near future to merge this in main/official release?

redboltz commented 3 years ago

@akira1703 , I noticed the issue now.

        void operator()(Sm* aSm) { aSm->process_event(e3{}); }

What is the expected behavior? If it means process event immediately, then I personally think it should be prohibited. UML 2.x defines event processing order. Events should be checked and processed after the series of transitions are finished. In other words, in the stable state. During a transition (e.g. in action2()), event can be posted to process later as action1() but can't be processed immediately. It breaks UML 2.x semantics.

redboltz commented 3 years ago

It breaks UML 2.x semantics.

Let me clarify.

I wrote UML state machine diagram from your code:

image http://www.plantuml.com/plantuml/uml/SoWkIImgAStDuIesLB1IICqhAQfKq5V8pmEpD3IXmXMP9GfWOH0B96g4NR4HDiNHMh4Akhfs2fafEQbS80BCWnXi25IOc5oIcPzdgA6fKAsG652KdvnQNAoHQbHTgwbG2xGVh5f10MAs4Loz4KHzSAwkNG54Jtng6T0X6gd649qG3SRw4EN6N0wfUIb0Zm80

@startuml
s1 : entry / onentry()
s1 : exit  / onexit()
s2 : entry / onentry()
s2 : exit  / onexit()
[*] --> idle
idle --> s1 : e1 / action1() { aProcess(e2{}) }
s1 -->   s2 : e2 / action2() { aSm->process_event(e3{}) }
s2 : e3/action3
@enduml

During the transition by e1, exit from idle and execute action1(), in action1() post e2, then transition to s1 and do s1::onentry(). Now, the statemachine has a chance to process event. So check the event queue and e2 is found. Then start the next transition by e2. First, execute s1::onexit() and then leave the s1 and execute action2(). Now the code tries to execute e3 immediately. But there is no current state. That means the current state has already exit from s2 but not enter s3 yet. So which state should process e3 is not decided. That means "It breaks UML 2.x semantics.".

UML defines s2 can process events the source state is s2 after on_entry processed. s2 might does some preparation for process other events in onentry(). onentry() is similar concept as C++'s constructor and onexit() is similar concept as C++'s destructor.

mayurpatelgec commented 3 years ago

@redboltz Thanks for the answer. Does that mean,

redboltz commented 3 years ago

@redboltz Thanks for the answer. Does that mean,

  • instead of firing the event in On_Entry directly, create an action first and fire there the event.
  • that means firing the event directly in On_Entry is prohibited. is that correct?

I think so because in the onentry handler, the transition is NOT finished. I'm not sure what you want to do. I think that POST event on action including OnEntry and OnExit can satisfies most cases. If you show me some concrete example case with your expected execution order, then I will show you how UML2.x StateMachine do that using only POST event in action.

redboltz commented 3 years ago

Here is an example how UML2.x state machine works.

image http://www.plantuml.com/plantuml/png/SoWkIImgAStDuIesLB1IICqhAQfKq5V8pmEpD3IXmXMP9GfWOH0B96g4NR4HLiN6s1KROrLiWbsn2JR5qLgn2hgwTWh5Xa1tWbaG5nW25IKcbsJcvnbfQ2fKAnJa5vSef1fLrohKmXH2R3S2EXd2DO5m5RWSKlDIWE410000

For example,

I use the following notation to describe the state machine status.

  1. Initial transition is started. The state machine become CANT
  2. s1::onentry() is invoked
  3. Initial transition is finished. The state machine become CAN
  4. event e1 post (from outside)
  5. Transition is started. The state machine become CANT
  6. s1::onexit() is processed
  7. action2() is processed. post e2.
  8. s2::onentry() is processed
  9. Transition is finished. The state machine become CAN
  10. e2 in the internal event queue is handled
  11. Transition is started. The state machine become CANT
  12. s2::onexit() is processed
  13. s4::onentry() is processed.
  14. Transition is finished. The state machine become CAN
mayurpatelgec commented 3 years ago

i simply want to fire an event when SM reaches that state and nothing else. (ex. as soon as machine goes to disconnected state -> fire connect event directly. On_entry makes it easy u have to do it once (for all transtions thats in that SM) when I want to do it with Action then I have to put same action for every transition that reaches that state. (ex. machine can go to disconnected state via diff transitions).

isn't this little bit critical?

redboltz commented 3 years ago

I don't 100% understand what you mean. I think that in your case, simply don't use on_entry, use transition action instead.

I need a concrete state machine and scenario to answer.

redboltz commented 3 years ago

Do you want to this? e_ prefix means event. connect() is action.

image http://www.plantuml.com/plantuml/png/SoWkIImgAStDuKh9B4xEpyjBJIv9JL6mKiZFYq_DAocgLD1NW0fhQ795QyKgwEhQAI2hHT48baKs9ZKUmajDAU62YyFCG5M8OgX3QbuAq5K0

mayurpatelgec commented 3 years ago

liked your example. Thanks. now there is one more state Interupted -> e_timeout -> Disconnected again your link leads to png only cant edit uml part myself

redboltz commented 3 years ago

liked your example. Thanks. now there is one more state Interupted -> e_timeout -> Disconnected again

Sorry, I don't understand what you mean. State machine is very difficult to explain in English text so state machine diagram is used :) You can modify the state machine diagram with very simple notation. (Click my comment's link)

@startuml
disconnected : on_entry / connect()
[*] --> disconnected
disconnected --> connected : e_connecedt
connected --> disconnected : e_disconnected
@enduml

It seems that in order to satisfy your case, process event in action (immediately) functionality is not needed.

mayurpatelgec commented 3 years ago

here I have added my state

http://www.plantuml.com/plantuml/uml/SoWkIImgAStDuKh9B4xEpyjBJIv9JL6mKiZFYq_DAocgLD1NW0fhQ795QyKgwEhQAI2hHT48baKs9ZKUmajDAU62YyFCG5M8OcXcNabgKMa1JiLWUP22-9BCtDJyqX8kXzIy5A0_0000

redboltz commented 3 years ago

Thanks! I've checked. (NOTE mouse right click on the state machine picture and "Copy Image" and then, paste here (comment text box) then you can paste the picture.

image

It seems that you can simply implement the statemachine using sml. You don't need to use sm->process_event(..) in action().

mayurpatelgec commented 3 years ago

Thanx for the info.

Now thats d main point. Now firing the new CONNECT event in that connect() function and according to your instructions thats prohibited. So now can you show me how to do this without Action()/OnEntryFunction()?

redboltz commented 3 years ago

There are two ways.

  1. Use some asynchronous mechanism like Boost.Asio (or future standard C++ network library) See https://stackoverflow.com/questions/64387248/boostext-sml-is-there-a-way-to-store-a-callback-to-later-process-an-event/64402640#64402640

    void memfun1() override {
        std::cout << __PRETTY_FUNCTION__ << std::endl;
        boost::asio::post(
            ioc, 
            [this] { 
                std::cout << "async callback is called. call process_event()" << std::endl;
                sm_.process_event(e2{});
    
                using namespace sml;
                assert(sm_.is("s1"_s));
            }
        );
    }

    memfun1() is action. sm_.process_event(e2{}); is NOT called in action directly. It is called in post(). In actual case post() is replaced with async_connect(). It works well with asynchronous network library. For example, async_connect() just requests connect and then callback is called asynchronously.

  2. Use posting event functionality. https://github.com/boost-ext/sml/issues/465#issue-931275651

    struct action1
    {
        void operator()(sml::back::process<e2> aProcess)
        {
            // Enques e2 to sml::back::sm_impl::process_
            // for execution after s1-onentry
            aProcess(e2{});
        }
    };

    aProcess is it.

    struct connect
    {
        void operator()(sml::back::process<e_connected> aProcess)
        {
            sync_connect(); // Sync API call
            // connection established here
            aProcess(e_connected{}); // Then post event
        }
    };

    See also https://stackoverflow.com/questions/54039909/boost-sml-violation-of-memory-protection-segmentation-fault/54650214#54650214

redboltz commented 3 years ago

Here is more compact example for 2 (post event). https://github.com/boost-ext/sml/issues/144#issuecomment-581370339

Demo: https://wandbox.org/permlink/V36TUw1PGMUA8gat

I personally think that the parameter name process_event should be post_event. It reflects the actual behavior. It is named after the type sml::back::process I guess.

redboltz commented 3 years ago

I wrote minimal and complete code to demonstrate the approach 2 (sync). It doesn't contain interrupt state because it is not an essential part of the issue.

https://wandbox.org/permlink/XGXErmOkMetLlYt9

#include <iostream>
#include <cassert>
#include <queue>

#include <boost/sml.hpp>

namespace sml = boost::sml;

struct e_connected {};
struct e_disconnected {};

struct app_base {
    virtual void connect() const = 0;
    virtual ~app_base() = default;
};

struct app_table {
    auto operator()() const noexcept {
        using namespace sml;
        return make_transition_table(

            // entry/exit
            *"disconnected"_s + sml::on_entry<_>
            / [](app_base& a, sml::back::process<e_connected> post_event) {
                  std::cout << "entry disconnected" << std::endl;
                  a.connect();
                  std::cout << "post e_conencted{}" << std::endl;
                  post_event(e_connected{});
              }
            ,"disconnected"_s + sml::on_exit<_>
            / [] (app_base&) {
                  std::cout << "exit disconnected" << std::endl;
              }

            ,"connected"_s + sml::on_entry<_>
            / [] (app_base&) {
                  std::cout << "entry connected" << std::endl;
              }
            ,"connected"_s + sml::on_exit<_>
            / [] (app_base&) {
                  std::cout << "exit connected" << std::endl;
              }

            // source           event                     target
            ,"disconnected"_s + event<e_connected>     = "connected"_s
            ,"connected"_s    + event<e_disconnected>  = "disconnected"_s
        );
    }
};

struct app : app_base {
    void connect() const override {
        std::cout << __PRETTY_FUNCTION__ << std::endl;
    }
    sml::sm<
        app_table,
        sml::process_queue<std::queue>
    > sm_ { static_cast<app_base&>(*this) };
};

int main() {
    using namespace sml;
    app a;
    assert(a.sm_.is("connected"_s));
    std::cout << "process_event(e_disconenct{}) from outside" << std::endl;
    a.sm_.process_event(e_disconnected{});
}

Output

entry disconnected
virtual void app::connect() const
post e_conencted{}
exit disconnected
entry connected
process_event(e_disconenct{}) from outside
exit connected
entry disconnected
virtual void app::connect() const
post e_conencted{}
exit disconnected
entry connected