BenHanson / parsertl14

C++14 version of parsertl
32 stars 4 forks source link

What's in the parser state machine table ? #17

Closed mingodad closed 8 months ago

mingodad commented 8 months ago

I need hep to understand what is the content in basic_state_machine::_table.

While trying to dump the parser/lexer for grammars with parsertl/lexertl I found this function parsertl::serialise::save and I'm using it to base my variant that save to SQL, using the chapel grammar (that can be seem here https://mingodad.github.io/parsertl-playground/playground/, select Chapel parser from examples, then select Grammar parser state machine from Debug mode: and click Parser to see the grammar state machine dump) where there is 1284 states and several states seems to have huge number of entries in basic_state_machine::_table like the states 1282 and 1283 having 73 entries but when looking at the above mentioned dump there is only one entry there.

state 1282

(598)  new_expr : TNEW TOWNED TLP expr TRP TLP opt_actual_ls TRP TQUESTION .

state 1283

(599)  new_expr : TNEW TSHARED TLP expr TRP TLP opt_actual_ls TRP TQUESTION .
create table parser_sm(
  id integer primary key,
  version integer,
  sizeof_id_type integer,
  sm_columns integer,
  sm_rows integre,
  sm_rules integer
);
insert into parser_sm(version, sizeof_id_type, sm_columns, sm_rows, sm_rules)
values(1,2,368,1284,720);
...
create table parser_sm_states(
  id integer primary key,
  size integer
);
create table parser_sm_states_entries(
  id integer primary key,
  state integer,
  token integer,
  action integer,
  param integer,
);
insert into parser_sm_states(size) values
...
(73),(73);
insert into parser_sm_states_entries(state, token, action, param) values
...
,(1282,10,2,598),(1282,11,2,598),(1282,17,2,598),(1282,25,2,598)
,(1282,29,2,598),(1282,30,2,598),(1282,32,2,598),(1282,34,2,598)
,(1282,45,2,598),(1282,48,2,598),(1282,51,2,598),(1282,58,2,598)
,(1282,70,2,598),(1282,71,2,598),(1282,74,2,598),(1282,76,2,598)
,(1282,81,2,598),(1282,87,2,598),(1282,88,2,598),(1282,90,2,598)
,(1282,91,2,598),(1282,100,2,598),(1282,103,2,598),(1282,107,2,598)
,(1282,113,2,598),(1282,116,2,598),(1282,118,2,598),(1282,121,2,598)
,(1282,122,2,598),(1282,123,2,598),(1282,124,2,598),(1282,125,2,598)
,(1282,126,2,598),(1282,127,2,598),(1282,128,2,598),(1282,131,2,598)
,(1282,132,2,598),(1282,133,2,598),(1282,134,2,598),(1282,136,2,598)
,(1282,137,2,598),(1282,139,2,598),(1282,140,2,598),(1282,142,2,598)
,(1282,143,2,598),(1282,144,2,598),(1282,145,2,598),(1282,146,2,598)
,(1282,147,2,598),(1282,148,2,598),(1282,149,2,598),(1282,150,2,598)
,(1282,151,2,598),(1282,152,2,598),(1282,153,2,598),(1282,154,2,598)
,(1282,155,2,598),(1282,156,2,598),(1282,157,2,598),(1282,158,2,598)
,(1282,159,2,598),(1282,160,2,598),(1282,161,2,598),(1282,162,2,598)
,(1282,163,2,598),(1282,164,2,598),(1282,165,2,598),(1282,166,2,598)
,(1282,168,2,598),(1282,169,2,598),(1282,171,2,598),(1282,173,2,598)
,(1282,175,2,598),(1283,10,2,599),(1283,11,2,599),(1283,17,2,599)
,(1283,25,2,599),(1283,29,2,599),(1283,30,2,599),(1283,32,2,599)
,(1283,34,2,599),(1283,45,2,599),(1283,48,2,599),(1283,51,2,599)
,(1283,58,2,599),(1283,70,2,599),(1283,71,2,599),(1283,74,2,599)
,(1283,76,2,599),(1283,81,2,599),(1283,87,2,599),(1283,88,2,599)
,(1283,90,2,599),(1283,91,2,599),(1283,100,2,599),(1283,103,2,599)
,(1283,107,2,599),(1283,113,2,599),(1283,116,2,599),(1283,118,2,599)
,(1283,121,2,599),(1283,122,2,599),(1283,123,2,599),(1283,124,2,599)
,(1283,125,2,599),(1283,126,2,599),(1283,127,2,599),(1283,128,2,599)
,(1283,131,2,599),(1283,132,2,599),(1283,133,2,599),(1283,134,2,599)
,(1283,136,2,599),(1283,137,2,599),(1283,139,2,599),(1283,140,2,599)
,(1283,142,2,599),(1283,143,2,599),(1283,144,2,599),(1283,145,2,599)
,(1283,146,2,599),(1283,147,2,599),(1283,148,2,599),(1283,149,2,599)
,(1283,150,2,599),(1283,151,2,599),(1283,152,2,599),(1283,153,2,599)
,(1283,154,2,599),(1283,155,2,599),(1283,156,2,599),(1283,157,2,599)
,(1283,158,2,599),(1283,159,2,599),(1283,160,2,599),(1283,161,2,599)
,(1283,162,2,599),(1283,163,2,599),(1283,164,2,599),(1283,165,2,599)
,(1283,166,2,599),(1283,168,2,599),(1283,169,2,599),(1283,171,2,599)
,(1283,173,2,599),(1283,175,2,599);

This is the code used to generate the SQL in parsertl::serialise:

    template <typename id_type, class stream>
    void save2sql(const basic_state_machine<id_type>& sm_, stream& stream_)
    {
        stream_ << "create table parser_sm(\n"
                "  id integer primary key,\n"
                "  version integer,\n"
                "  sizeof_id_type integer,\n"
                "  sm_columns integer,\n"
                "  sm_rows integre,\n"
                "  sm_rules integer\n"
                ");\n"
                "insert into parser_sm(version, sizeof_id_type, sm_columns, sm_rows, sm_rules)\n"
                "values(";
        // Version number
        stream_ << 1 << ",";
        stream_ << sizeof(id_type) << ",";
        stream_ << sm_._columns << ",";
        stream_ << sm_._rows << ",";
        stream_ << sm_._rules.size() << ");\n";

        stream_ << "create table parser_sm_rules_lhs(\n"
                "  id integer primary key,\n"
                "  lhs integer\n"
                ");\n";
        stream_ << "create table parser_sm_rules_rhs(\n"
                "  id integer primary key,\n"
                "  lhs integer,\n"
                "  rhs integer,\n"
                "  pos integer\n"
                ");\n";
        stream_ << "insert into parser_sm_rules_lhs(lhs) values\n";
        bool need_sep = false;
        int loop_count = 1;
        for (const auto& rule_ : sm_._rules)
        {
            if(need_sep) stream_ << ",";
            else need_sep = true;
            stream_ << "(" << rule_.first << ")";
            if((loop_count++ % 6) == 0) stream_ << "\n";
        }
        stream_ << "insert into parser_sm_rules_rhs(lhs,rhs, pos) values\n";
        need_sep = false;
        loop_count = 1;
        for (const auto& rule_ : sm_._rules)
        {
            if(rule_.second.size())
            {
                int pos = 0;
                for (const id_type id_ : rule_.second)
                {
                    if(need_sep) stream_ << ",";
                    else need_sep = true;
                    stream_ << "(" << rule_.first << "," << id_ << "," << pos++ << ")";
                    if((loop_count++ % 4) == 0) stream_ << "\n";
                }
            }
            else
            {
                if(need_sep) stream_ << ",";
                else need_sep = true;
                stream_ << "(" << rule_.first << ",NULL, 0)";
                if((loop_count++ % 4) == 0) stream_ << "\n";
            }
        }
        stream_ << ";\n";
/*
        stream_ << sm_._captures.size() << " //sm_._captures.size()\n";

        for (const auto& capture_ : sm_._captures)
        {
            stream_ << capture_.first << '\n';
            stream_ << capture_.second.size() << '\n';

            for (const auto& pair_ : capture_.second)
            {
                stream_ << pair_.first << ' ' << pair_.second << '\n';
            }
        }
*/
        stream_ << "create table parser_sm_states(\n"
                "  id integer primary key,\n"
                "  size integer\n"
                ");\n";
        stream_ << "create table parser_sm_states_entries(\n"
                "  id integer primary key,\n"
                "  state integer,\n"
                "  token integer,\n"
                "  action integer,\n"
                "  param integer,\n"
                ");\n";
        //stream_ << sm_._table.size() << " //sm_._table.size()\n";

        stream_ << "insert into parser_sm_states(size) values\n";
        need_sep = false;
        loop_count = 1;
        for (const auto& vec_ : sm_._table)
        {
            if(need_sep) stream_ << ",";
            else need_sep = true;
            stream_ << "(" << vec_.size() << ")";
            if((loop_count++ % 6) == 0) stream_ << "\n";
        }
        stream_ << ";\n";
        stream_ << "insert into parser_sm_states_entries(state, token, action, param) values\n";
        need_sep = false;
        loop_count = 1;
        int state_no = 0;
        for (const auto& vec_ : sm_._table)
        {
            for (const auto& pair_ : vec_)
            {
                if(need_sep) stream_ << ",";
                else need_sep = true;
                stream_ << "(" << state_no << "," << pair_.first << ",";
                stream_ << static_cast<std::size_t>(pair_.second.action) << ",";
                stream_ << pair_.second.param << ")";
                if((loop_count++ % 4) == 0) stream_ << "\n";
            }
            ++state_no;
        }
        stream_ << ";\n";
    }

PS.: edited parser_sm_states_entries table replacing rule by token.

BenHanson commented 8 months ago

Sorry, I don't have time to wade through enormous brain dumps like this. If you can locate and bug and produce a minimal example that shows it, I will have a look.