yhirose / cpp-peglib

A single file C++ header-only PEG (Parsing Expression Grammars) library
MIT License
879 stars 112 forks source link

Inconsistent syntax error for the attached grammar #288

Closed priyankarghosh closed 5 months ago

priyankarghosh commented 6 months ago

I am observing some inconsistent behaviour with the following grammar. Both the working and non working version are provided below. Working version

#include <cassert>
#include <cstdlib>

#include <iostream>
#include <string>
#include <vector>

#include <peglib.h>

using namespace peg;
using namespace std;

class parser_class {
public:
    parser_class() {
        create_parser();
    }
    void create_parser() {
        string         grammar(R"(
            LINE        <- EVAL_EXPR (',' EVAL_EXPR)*
            EVAL_EXPR   <- '%eval' EXPR / NUMBER / VAR
            EXPR        <- ATOM (OPERATOR ATOM)* {
                             precedence
                               L - +
                               L / *
                               L %
                               L ^
                           }
            ATOM        <- '(' EXPR ')' / NUMBER / VAR
            OPERATOR    <- < [-+/*%^] >
            NUMBER      <- < [0-9]+ >
            VAR         <- < ':' [a-z][a-z0-9_]* ':' >
            %whitespace <- [ \t\r\n]*
        )");
        my_parser = make_shared<peg::parser>();
        my_parser->set_logger([](size_t line, size_t col, const string& msg, const string &rule) {
                cout << line << ":" << col << ": " << msg << ", with rule: " << rule << "\n";
        });
        assert(my_parser->load_grammar(grammar) == true);
    }

    bool parse_str(const string& str) {
        bool ret = my_parser->parse(str);
        return ret;
    }
private:
    shared_ptr<peg::parser>  my_parser;
};

int main(int argc, const char **argv) {
    string str("%eval(4*:x:),%eval(2*:y:)");
    shared_ptr<parser_class> my_parser = make_shared<parser_class>();

    std::cout << "Simple parsing '" << str << "'" << '\n';
    if (my_parser->parse_str(str) == false) {
        std::cout << "syntax error..." << std::endl;
    } else {
        std::cout << "Parsing successful" << '\n';
    }

    return 0;
}

Version that throws syntax error

#include <cassert>
#include <cstdlib>

#include <iostream>
#include <string>
#include <vector>

#include <peglib.h>

using namespace peg;
using namespace std;

class parser_class {
public:
    parser_class();
    void create_parser();
    bool parse_str(const string& str);
private:
    shared_ptr<peg::parser>  my_parser;
};

parser_class::parser_class() {
    create_parser();
}
void parser_class::create_parser() {
    string         grammar(R"(
        LINE        <- EVAL_EXPR (',' EVAL_EXPR)*
        EVAL_EXPR   <- '%eval' EXPR / NUMBER / VAR
        EXPR        <- ATOM (OPERATOR ATOM)* {
                         precedence
                           L - +
                           L / *
                           L %
                           L ^
                       }
        ATOM        <- '(' EXPR ')' / NUMBER / VAR
        OPERATOR    <- < [-+/*%^] >
        NUMBER      <- < [0-9]+ >
        VAR         <- < ':' [a-z][a-z0-9_]* ':' >
        %whitespace <- [ \t\r\n]*
    )");
    my_parser = make_shared<peg::parser>();
    my_parser->set_logger([](size_t line, size_t col, const string& msg, const string &rule) {
            cout << line << ":" << col << ": " << msg << ", with rule: " << rule << "\n";
    });
    assert(my_parser->load_grammar(grammar) == true);
}

bool parser_class::parse_str(const string& str) {
    bool ret = my_parser->parse(str);
    return ret;
}

int main(int argc, const char **argv) {
    string str("%eval(4*:x:),%eval(2*:y:)");
    shared_ptr<parser_class> my_parser = make_shared<parser_class>();

    std::cout << "Simple parsing '" << str << "'" << '\n';
    if (my_parser->parse_str(str) == false) {
        std::cout << "syntax error..." << std::endl;
    } else {
        std::cout << "Parsing successful" << '\n';
    }

    return 0;
}

Also if I try with -fsanitize=address, the following error is observed SUMMARY: AddressSanitizer: heap-use-after-free ... The grammar and the string works fine in https://yhirose.github.io/cpp-peglib/. I am using g++ (GCC) 12.2.0 for compilation. My code-base is synced to 6201458, which is the latest.

Could you please take a look at the above test-case? It seems there is some memory corruption taking place with this grammar. Please let me know if you need any other details.

yhirose commented 5 months ago

@priyankarghosh if you make grammar variable static, then this problem will be gone. Hope it helps.