hsutter / cppfront

A personal experimental C++ Syntax 2 -> Syntax 1 compiler
Other
5.53k stars 246 forks source link

[SUGGESTION] Grammar railroad diagram #910

Open mingodad opened 10 months ago

mingodad commented 10 months ago

If the syntax to describe the grammar is changed to an EBNF understood by https://www.bottlecaps.de/rr/ui we can have a nice navigable railroad diagram to help document/develop/debug the grammar/parser, see bellow:

//
// EBNF to be viewd at https://www.bottlecaps.de/rr/ui
//
// Copy and paste this at https://www.bottlecaps.de/rr/ui in the 'Edit Grammar' tab
// then click the 'View Diagram' tab.
//

translation_unit ::=
    declaration-seq?

declaration-seq ::=
    declaration
    | declaration-seq declaration

declaration ::=
    access-specifier? identifier "..."? unnamed-declaration
    |access-specifier? identifier alias

access-specifier ::=
     "public"
     | "protected"
     | "private"

alias ::=
    ':' template-parameter-declaration-list? "type" requires-clause? "==" type-id ';'
    | ':' "namespace" "==" id-expression ';'
    | ':' template-parameter-declaration-list? type-id? requires-clause? "==" expression ';'
//TODO  | ':' function-type "==" expression ';'
        // See commit 63efa6ed21c4d4f4f136a7a73e9f6b2c110c81d7 comment
        // for why I don't see a need to enable this yet

prefix-operator ::=
    '!' | '-' | '+'
    //| parameter-direction

postfix-operator ::=
    "++" | "--" | '*' | '&' | '~' | '$' | "..."

assignment-operator ::=
    '=' | "*=" | "/=" | "%=" | "+=" | "-=" | ">>=" | "<<=" | "&=" | "^=" | "|="

primary-expression ::=
    inspect-expression
    | id-expression
    | literal
    | '(' expression-list ')'
//TODO  | '{' expression-list '}'
    | unnamed-declaration

postfix-expression ::=
    primary-expression
    | postfix-expression postfix-operator     //[Note: without whitespace before the operator]
    | postfix-expression '[' expression-list? ']'
    | postfix-expression '(' expression-list? ')'
    | postfix-expression '.' id-expression

prefix-expression ::=
    postfix-expression
    | prefix-operator prefix-expression
//TODO     | await-expression
//TODO     | 'sizeof' '(' type-id ')'
//TODO     | 'sizeof' '...' ( identifier ')'
//TODO     | 'alignof' '(' type-id ')'
//TODO     | throws-expression

multiplicative-expression ::=
    is-as-expression
    | multiplicative-expression '*' is-as-expression
    | multiplicative-expression '/' is-as-expression
    | multiplicative-expression '%' is-as-expression

additive-expression ::=
    multiplicative-expression
    | additive-expression '+' multiplicative-expression
    | additive-expression '-' multiplicative-expression

shift-expression ::=
    additive-expression
    | shift-expression '<<' additive-expression
    | shift-expression '>>' additive-expression

compare-expression ::=
    shift-expression
    | compare-expression '<=>' shift-expression

relational-expression ::=
    compare-expression
    | relational-expression '<'  compare-expression
    | relational-expression '>'  compare-expression
    | relational-expression '<=' compare-expression
    | relational-expression '>=' compare-expression

equality-expression ::=
    relational-expression
    | equality-expression '==' relational-expression
    | equality-expression '!=' relational-expression

bit-and-expression ::=
    equality-expression
    | bit-and-expression '&' equality-expression

bit-xor-expression ::=
    bit-and-expression
    | bit-xor-expression '^' bit-and-expression

bit-or-expression ::=
    bit-xor-expression
    | bit-or-expression '|' bit-xor-expression

logical-and-expression ::=
    bit-or-expression
    | logical-and-expression '&&' bit-or-expression

//  constant-expression:    // don't need intermediate production, just use:
//  conditional-expression: // don't need intermediate production, just use:
logical-or-expression ::=
    logical-and-expression
    | logical-or-expression '||' logical-and-expression

assignment-expression ::=
    logical-or-expression
    | assignment-expression assignment-operator logical-or-expression

expression ::=               // eliminated 'condition:' - just use 'expression:'
    assignment-expression
//TODO    | try expression

expression-list ::=
    parameter-direction? expression
    | expression-list ',' parameter-direction? expression

type-id ::=
    type-qualifier-seq? qualified-id
    | type-qualifier-seq? unqualified-id

type-qualifier-seq ::=
    type-qualifier
    | type-qualifier-seq type-qualifier

type-qualifier ::=
    'const'
    | '*'

is-as-expression ::=
    prefix-expression
    | is-as-expression is-type-constraint
    | is-as-expression is-value-constraint
    | is-as-expression as-type-cast
//TODO     | type-id is-type-constraint

is-type-constraint ::=
    'is' type-id

is-value-constraint ::=
    'is' expression

as-type-cast ::=
    'as' type-id

unqualified-id ::=
    identifier
    | keyword
    | template-id
//TODO     | operator-function-id
    // ...

template-id ::=
    identifier '<' template-argument-list? '>'

template-argument-list ::=
    template-argument-list ',' template-argument

template-argument ::=
    // note: < > << >> are not allowed in expressions until new ( is opened
    'const' type-id
    | expression
    | type-id

qualified-id ::=
    nested-name-specifier unqualified-id
    | member-name-specifier unqualified-id

nested-name-specifier ::=
    '::'
    | unqualified-id '::'

member-name-specifier ::=
    unqualified-id '.'

id-expression ::=
    qualified-id
    | unqualified-id

literal ::=
    integer-literal ud-suffix?
    | character-literal ud-suffix?
    | floating-point-literal ud-suffix?
    | string-literal ud-suffix?
    | boolean-literal ud-suffix?
    | pointer-literal ud-suffix?
    | user-defined-literal ud-suffix?

expression-statement ::=
    expression ';'
    | expression

selection-statement ::=
    'if' 'constexpr'? logical-or-expression compound-statement
    | 'if' 'constexpr'? logical-or-expression compound-statement 'else' compound-statement

return-statement ::=
    "return" expression? ';'

iteration-statement ::=
    label? 'while' logical-or-expression next-clause? compound-statement
    | label? 'do' compound-statement next-clause? 'while' logical-or-expression ';'
    | label? 'for' expression next-clause? 'do' unnamed-declaration

label ::=
    identifier ':'

next-clause ::=
    'next' assignment-expression

alternative ::=
    alt-name? is-type-constraint '=' statement
    | alt-name? is-value-constraint '=' statement
    | alt-name? as-type-cast '=' statement

//TODO alt-name ::=
//  unqualified-id ':'

inspect-expression ::=
    'inspect' 'constexpr'? expression '{' alternative-seq? '}'
    | 'inspect' 'constexpr'? expression '->' type-id '{' alternative-seq? '}'

alternative-seq ::=
    alternative
    | alternative-seq alternative

jump-statement ::=
    'break' identifier? ';'
    | 'continue' identifier? ';'

using-statement ::=
    'using' id-expression ';'
    | 'using' 'namespace' id-expression ';'

statement ::=
    selection-statement
    | using-statement
    | inspect-expression
    | return-statement
    | jump-statement
    | iteration-statement
    | compound-statement
    | contract-statement
    | declaration
    | expression-statement

contract-statement ::=
    contract ';'

//TODO     try-block

compound-statement ::=
    '{' statement-seq? '}'

statement-seq ::=
    statement
    | statement-seq statement

parameter-declaration ::=
    this-specifier? parameter-direction? declaration

parameter-direction ::=
    'in' | 'copy' | 'inout' | 'out' | 'move' | 'forward'

this-specifier ::=
    'implicit'
    | 'virtual'
    | 'override'
    | 'final'

parameter-declaration-list ::=
    '(' parameter-declaration-seq? ')'

parameter-declaration-seq ::=
    parameter-declaration
    | parameter-declaration-seq ',' parameter-declaration

contract ::=
    contract-kind contract-group? ':' '(' logical-or-expression ')'
    | contract-kind contract-group? ':' '(' logical-or-expression ',' expression ')'

contract-group ::=
    '<' id-expression contract-flags?'>'

contract-flags ::=
    ',' id-expression contract-flags?

contract-kind ::=
    'pre' | 'post' | 'assert'

function-type ::=
    parameter-declaration-list throws-specifier? return-list? contract-seq?

throws-specifier ::=
    'throws'

return-list ::=
    expression-statement
    | '->' parameter-direction? type-id
    | '->' parameter-declaration-list

contract-seq ::=
    contract
    | contract-seq contract

unnamed-declaration ::=
    ':' meta-functions-list? template-parameter-declaration-list? function-type requires-clause? '=' statement
    | ':' meta-functions-list? template-parameter-declaration-list? function-type statement
    | ':' meta-functions-list? template-parameter-declaration-list? type-id? requires-clause? '=' statement
    | ':' meta-functions-list? template-parameter-declaration-list? type-id
    | ':' meta-functions-list? template-parameter-declaration-list? 'final'? 'type' requires-clause? '=' statement
    | ':' 'namespace' '=' statement

meta-functions-list ::=
    '@' id-expression
    | meta-functions-list '@' id-expression

requires-clause ::=
    // note: for aliases, == is not allowed in expressions until new ( is opened
    'requires' logical-or-expression

template-parameter-declaration-list ::=
    '<' parameter-declaration-seq '>'
mingodad commented 10 months ago

Also to help there is here https://github.com/robertoraggi/cplusplus/issues/281 the latest C++ draft standard grammar in an EBNF understood by https://www.bottlecaps.de/rr/ui .

hsutter commented 10 months ago

Thanks for the suggestion.

I know there are different grammar styles, EBNF being a popular one. I try to generally follow the way the current C++ grammar is specified, though, and I want to stick with that unless there's a compelling must-do reason to change.

Is there a compelling benefit to this change that we don't get with grep //G *.h ?

mingodad commented 10 months ago

The compelling reason for me is the ability to generate a railroad diagram and it's less ambiguous when one alternative is long and need to be split between more than one line (the way the current C++ manage such situations is by adding space indentation).

It seems that you've not tried visualize the railroad diagrams from your own grammar that I adapted here on the first message (https://github.com/hsutter/cppfront/issues/910#issue-2057282008) and also the latest C++ standard also adapted by me here https://github.com/robertoraggi/cplusplus/issues/281#issue-2050759937 or here https://github.com/cplusplus/draft/issues/6742#issue-2054070525 .

If you try then you'll probably will have a different opinion.

To try then only copy and paste then at https://www.bottlecaps.de/rr/ui in the 'Edit Grammar' tab then click the 'View Diagram' tab.

realgdman commented 10 months ago

Actually its not hard to write script, may be 100-200 lines long, converting current grammar to EBNF, using regex.

mingodad commented 10 months ago

I've just added the C++11, C++14, C++117, C++20, C++23-draft converted to the EBNF understood by https://www.bottlecaps.de/rr/ui and the navigable railroad diagrams read to view here https://mingodad.github.io/cpp-grammars/ .

hsutter commented 10 months ago

the navigable railroad diagrams read to view here https://mingodad.github.io/cpp-grammars/

Ah, now I see what you mean. Sorry, I misunderstood that the example in the initial comment was the railroad diagram. You're right, I've never used the graphical kind, just occasionally seen it.

it's less ambiguous when one alternative is long and need to be split between more than one line

I've tried to keep the productions under 110 characters, and just did a cleanup pass to make them all less than 90: 9585ecc34406cfe92800a62489d194613c4ed62a

So maybe I just haven't felt the pain because the productions are short and not complex.

I definitely see your point with horrible productions like cast-expression in this example: https://mingodad.github.io/cpp-grammars/c++23.ebnf.xhtml#cast-expression

There's nothing like that in my grammar fortunately. But, I admit I do like the ability to directly link to #cast-expression like that!

Let me take a quick poll of readers of this thread...

If you want to enable railroad diagrams and find them useful, please upvote this reply!

gregmarr commented 10 months ago

Looks like the bottlecaps.de site has gone offline. I've worked with the diagram like that for the JSON grammar, but had no idea what they were called. I would say that I have found it useful on occasion, and would probably find them more useful if I used them more regularly. I could imagine having them for the new syntax being very useful as we start using it more.

Also, since all my parsing work was done with lex and yacc (and yes, I have/had the O'Reilly bird book), the EBNF form is much more familiar to me.

mingodad commented 10 months ago

The software used by https://www.bottlecaps.de/rr/ui is here https://github.com/GuntherRademacher/rr and can be used offline (web/command line).

hsutter commented 7 months ago

Following up on this in the context of #1031: Above, I see 5 upvotes for this suggestion. So without rewriting the grammar, are there any blockers right now to mechanically generate EBNF grammar from the //G comment grammar I'm using now (and which likely still has a few oversight corrections needed)? If so is there anything incremental I can do to unblock that mechanical transformation?

I'll ask the same question on the sister issues.

Thanks!

JohelEGP commented 7 months ago

(and which likely still has a few oversight corrections needed)

387 has some comments on that.

mingodad commented 7 months ago

Here is one script that extract the grammar using Lua string pattern match (https://github.com/mingodad/squilu):

auto fname = "source/parse.h";
auto txt = readfile(fname);
//print(txt);

auto rule_lines = 0;
txt.gmatch(
    "//G[^\n]+",
    function(m)
    {
        //print(m);
        if(m.match("//G [^ \t\n]-:[^\n]*"))
        {
            auto rec = m.match("//G ([^ \t\n]-):([^\n]*)")
            print("\n" + rec[0] + " ::=" + rec[1]);
            rule_lines = 0;
        }
        else
        {
            auto line = m.match("//G%s+([^\n]+)")
            if(line)
            {
                if(line.match("one of "))
                {
                    line = line.match("one of%s+([^\n]+)");
                    line = "( " + line.gsub("%s+", " | ") + " )";
                }
                auto isComment = line[0] == '#';
                if(isComment)
                {
                    line = "//" + line;
                }
                if(rule_lines == 0)
                {
                    print("\t" + line);
                }
                else print("\t| " + line);
                if(isComment)
                {
                    --rule_lines;
                }
            }
            else
            {
                line = m.match("//GT%s+([^\n]+)")
                if(line)
                {
                    if(line[0] == '#') line = "//" + line;
                    print("\t\t" + line);
                }
            }
            ++rule_lines;
        }
        return true;
    }
);

Output:

prefix-operator ::=
    ( '!' | '-' | '+' )
        parameter-direction

postfix-operator ::=
    ( '++' | '--' | '*' | '&' | '~' | '$' | '...' )

assignment-operator ::=
    ( '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '>>=' | '<<=' | '&=' | '^=' | '|=' )

primary-expression ::=
    inspect-expression
    | id-expression
    | literal
    | '(' expression-list ','? ')'
    | unnamed-declaration

postfix-expression ::=
    primary-expression
    | postfix-expression postfix-operator     [Note: without whitespace before the operator]
    | postfix-expression '[' expression-list? ','? ']'
    | postfix-expression '(' expression-list? ','? ')'
    | postfix-expression '.' id-expression

prefix-expression ::=
    postfix-expression
    | prefix-operator prefix-expression

multiplicative-expression ::=
    is-as-expression
    | multiplicative-expression '*' is-as-expression
    | multiplicative-expression '/' is-as-expression
    | multiplicative-expression '%' is-as-expression

additive-expression ::=
    multiplicative-expression
    | additive-expression '+' multiplicative-expression
    | additive-expression '-' multiplicative-expression

shift-expression ::=
    additive-expression
    | shift-expression '<<' additive-expression
    | shift-expression '>>' additive-expression

compare-expression ::=
    shift-expression
    | compare-expression '<=>' shift-expression

relational-expression ::=
    compare-expression
    | relational-expression '<'  compare-expression
    | relational-expression '>'  compare-expression
    | relational-expression '<=' compare-expression
    | relational-expression '>=' compare-expression

equality-expression ::=
    relational-expression
    | equality-expression '==' relational-expression
    | equality-expression '!=' relational-expression

bit-and-expression ::=
    equality-expression
    | bit-and-expression '&' equality-expression

bit-xor-expression ::=
    bit-and-expression
    | bit-xor-expression '^' bit-and-expression

bit-or-expression ::=
    bit-xor-expression
    | bit-or-expression '|' bit-xor-expression

logical-and-expression ::=
    bit-or-expression
    | logical-and-expression '&&' bit-or-expression

logical-or-expression ::=
    logical-and-expression
    | logical-or-expression '||' logical-and-expression

assignment-expression ::=
    logical-or-expression
    | assignment-expression assignment-operator logical-or-expression

expression ::=               // eliminated 'condition:' - just use 'expression:'
    assignment-expression

expression-list ::=
    parameter-direction? expression
    | expression-list ',' parameter-direction? expression

type-id ::=
    type-qualifier-seq? qualified-id
    | type-qualifier-seq? unqualified-id

type-qualifier-seq ::=
    type-qualifier
    | type-qualifier-seq type-qualifier

type-qualifier ::=
    'const'
    | '*'

is-as-expression ::=
    prefix-expression
    | is-as-expression is-type-constraint
    | is-as-expression is-value-constraint
    | is-as-expression as-type-cast

is-type-constraint ::=
    'is' type-id

is-value-constraint ::=
    'is' expression

as-type-cast ::=
    'as' type-id

unqualified-id ::=
    identifier
    | keyword
    | template-id
    | ...

template-id ::=
    identifier '<' template-arguments? '>'

template-arguments ::=
    template-arguments ',' template-argument

template-argument ::=
    //# note: < > << >> are not allowed in expressions until new ( is opened
    'const' type-id
    | expression
    | type-id

qualified-id ::=
    nested-name-specifier unqualified-id
    | member-name-specifier unqualified-id

nested-name-specifier ::=
    '::'
    | unqualified-id '::'

member-name-specifier ::=
    unqualified-id '.'

id-expression ::=
    qualified-id
    | unqualified-id

literal ::=
    integer-literal ud-suffix?
    | character-literal ud-suffix?
    | floating-point-literal ud-suffix?
    | string-literal ud-suffix?
    | boolean-literal ud-suffix?
    | pointer-literal ud-suffix?
    | user-defined-literal ud-suffix?

expression-statement ::=
    expression ';'
    | expression

selection-statement ::=
    'if' 'constexpr'? logical-or-expression compound-statement
    | 'if' 'constexpr'? logical-or-expression compound-statement 'else' compound-statement

return-statement ::=
    return expression? ';'

iteration-statement ::=
    label? 'while' logical-or-expression next-clause? compound-statement
    | label? 'do' compound-statement next-clause? 'while' logical-or-expression ';'
    | label? 'for' expression next-clause? 'do' unnamed-declaration

label ::=
    identifier ':'

next-clause ::=
    'next' assignment-expression

alternative ::=
    alt-name? is-type-constraint '=' statement
    | alt-name? is-value-constraint '=' statement
    | alt-name? as-type-cast '=' statement
    | unqualified-id ':'

inspect-expression ::=
    'inspect' 'constexpr'? expression '{' alternative-seq? '}'
    | 'inspect' 'constexpr'? expression '->' type-id '{' alternative-seq? '}'

alternative-seq ::=
    alternative
    | alternative-seq alternative

jump-statement ::=
    'break' identifier? ';'
    | 'continue' identifier? ';'

using-statement ::=
    'using' qualified-id ';'
    | 'using' 'namespace' id-expression ';'

statement ::=
    selection-statement
    | using-statement
    | inspect-expression
    | return-statement
    | jump-statement
    | iteration-statement
    | compound-statement
    | contract-statement
    | declaration
    | expression-statement

contract-statement ::=
    contract ';'

compound-statement ::=
    '{' statement-seq? '}'

statement-seq ::=
    statement
    | statement-seq statement

parameter-declaration ::=
    this-specifier? parameter-direction? declaration

parameter-direction ::= one of
    'in' 'copy' 'inout' 'out' 'move' 'forward'

this-specifier ::=
    'implicit'
    | 'virtual'
    | 'override'
    | 'final'

parameter-declaration-list ::=
    '(' parameter-declaration-seq? ','? ')'

parameter-declaration-seq ::=
    parameter-declaration
    | parameter-declaration-seq ',' parameter-declaration

contract ::=
    contract-kind contract-group? ':' '(' logical-or-expression ')'
    | contract-kind contract-group? ':' '(' logical-or-expression ',' expression ')'

contract-group ::=
    '<' id-expression contract-flags?'>'

contract-flags ::=
    ',' id-expression contract-flags?

contract-kind ::= one of
    'pre' 'post' 'assert'

function-type ::=
    parameter-declaration-list throws-specifier? return-list? contract-seq?

throws-specifier ::=
    'throws'

return-list ::=
    expression-statement
    | '->' parameter-direction? type-id
    | '->' parameter-declaration-list

contract-seq ::=
    contract
    | contract-seq contract

unnamed-declaration ::=
    ':' meta-functions? template-parameters? function-type requires-clause? '=' statement
    | ':' meta-functions? template-parameters? function-type statement
    | ':' meta-functions? template-parameters? type-id? requires-clause? '=' statement
    | ':' meta-functions? template-parameters? type-id
    | ':' meta-functions? template-parameters? 'final'? 'type' requires-clause? '=' statement
    | ':' 'namespace' '=' statement

meta-functions ::=
    '@' id-expression
    | meta-functions '@' id-expression

requires-clause ::=
    //# note: for aliases, == is not allowed in expressions until new ( is opened
    'requires' logical-or-expression

template-parameters ::=
    '<' parameter-declaration-seq '>'

alias ::=
    ':' template-parameters? 'type' requires-clause? '==' type-id ';'
    | ':' 'namespace' '==' id-expression ';'
    | ':' template-parameters? type-id? requires-clause? '==' expression ';'
        ':' function-type '==' expression ';'
        //# See commit 63efa6ed21c4d4f4f136a7a73e9f6b2c110c81d7 comment
        //# for why I don't see a need to enable this yet

declaration ::=
    access-specifier? identifier '...'? unnamed-declaration
    | access-specifier? identifier alias

access-specifier ::=
    public
    | protected
    | private

declaration-seq ::=
    declaration
    | declaration-seq declaration

translation-unit ::=
    declaration-seq?
hsutter commented 7 months ago

Great! How close is that to what you need for bottlecaps?

I'll have to get rid of that whitespace note...

mingodad commented 7 months ago

You don't need to remove the notes but only prefix then with "//" will be enough.

It's actually generating the railroad diagram, but for navigation purposes probably the script should turn then upside down to start with the top level translation-unit.

mingodad commented 7 months ago

I've just changed the script to invert the rules order (turn then upside down):

auto fname = "source/parse.h";
auto txt = readfile(fname);
//print(txt);

auto rule_list = [];
auto rule_buf = [];
auto rule_lines = 0;

function checkRuleBuf()
{
    if(rule_buf.size() > 0)
    {
        rule_list.append(rule_buf.join("\n"));
        rule_buf.clear();
    }
}

txt.gmatch(
    "//G[^\n]+",
    function(m)
    {
        //print(m);
        if(m.match("//G [^ \t\n]-:[^\n]*"))
        {
            checkRuleBuf();
            auto rec = m.match("//G ([^ \t\n]-):([^\n]*)")
            auto line = rec[0] + " ::=" + rec[1];
            rule_buf.append(line);
            //print(line);
            rule_lines = 0;
        }
        else
        {
            auto line = m.match("//G%s+([^\n]+)")
            if(line)
            {
                if(line.match("one of "))
                {
                    line = line.match("one of%s+([^\n]+)");
                    line = "( " + line.gsub("%s+", " | ") + " )";
                }
                auto isComment = line[0] == '#';
                if(isComment)
                {
                    line = "//" + line;
                }
                if(rule_lines == 0)
                {
                    line = "\t" + line;
                    rule_buf.append(line);
                    //print(line);
                }
                else
                {
                    line = "\t| " + line;
                    rule_buf.append(line);
                    //print(line);
                }
                if(isComment)
                {
                    --rule_lines;
                }
            }
            else
            {
                line = m.match("//GT%s+([^\n]+)")
                if(line)
                {
                    if(line[0] == '#') line = "//" + line;
                    line = "\t\t" + line;
                    rule_buf.append(line);
                    //print(line);
                }
            }
            ++rule_lines;
        }
        return true;
    }
);

checkRuleBuf();

for(auto i=rule_list.size()-1; i >= 0; --i)
{
    print("\n" + rule_list[i]);
}

Output:

translation-unit ::=
    declaration-seq?

declaration-seq ::=
    declaration
    | declaration-seq declaration

access-specifier ::=
    public
    | protected
    | private

declaration ::=
    access-specifier? identifier '...'? unnamed-declaration
    | access-specifier? identifier alias

alias ::=
    ':' template-parameters? 'type' requires-clause? '==' type-id ';'
    | ':' 'namespace' '==' id-expression ';'
    | ':' template-parameters? type-id? requires-clause? '==' expression ';'
        ':' function-type '==' expression ';'
        //# See commit 63efa6ed21c4d4f4f136a7a73e9f6b2c110c81d7 comment
        //# for why I don't see a need to enable this yet

template-parameters ::=
    '<' parameter-declaration-seq '>'

requires-clause ::=
    //# note: for aliases, == is not allowed in expressions until new ( is opened
    'requires' logical-or-expression

meta-functions ::=
    '@' id-expression
    | meta-functions '@' id-expression

unnamed-declaration ::=
    ':' meta-functions? template-parameters? function-type requires-clause? '=' statement
    | ':' meta-functions? template-parameters? function-type statement
    | ':' meta-functions? template-parameters? type-id? requires-clause? '=' statement
    | ':' meta-functions? template-parameters? type-id
    | ':' meta-functions? template-parameters? 'final'? 'type' requires-clause? '=' statement
    | ':' 'namespace' '=' statement

contract-seq ::=
    contract
    | contract-seq contract

return-list ::=
    expression-statement
    | '->' parameter-direction? type-id
    | '->' parameter-declaration-list

throws-specifier ::=
    'throws'

function-type ::=
    parameter-declaration-list throws-specifier? return-list? contract-seq?

contract-kind ::= one of
    'pre' 'post' 'assert'

contract-flags ::=
    ',' id-expression contract-flags?

contract-group ::=
    '<' id-expression contract-flags?'>'

contract ::=
    contract-kind contract-group? ':' '(' logical-or-expression ')'
    | contract-kind contract-group? ':' '(' logical-or-expression ',' expression ')'

parameter-declaration-seq ::=
    parameter-declaration
    | parameter-declaration-seq ',' parameter-declaration

parameter-declaration-list ::=
    '(' parameter-declaration-seq? ','? ')'

this-specifier ::=
    'implicit'
    | 'virtual'
    | 'override'
    | 'final'

parameter-direction ::= one of
    'in' 'copy' 'inout' 'out' 'move' 'forward'

parameter-declaration ::=
    this-specifier? parameter-direction? declaration

statement-seq ::=
    statement
    | statement-seq statement

compound-statement ::=
    '{' statement-seq? '}'

contract-statement ::=
    contract ';'

statement ::=
    selection-statement
    | using-statement
    | inspect-expression
    | return-statement
    | jump-statement
    | iteration-statement
    | compound-statement
    | contract-statement
    | declaration
    | expression-statement

using-statement ::=
    'using' qualified-id ';'
    | 'using' 'namespace' id-expression ';'

jump-statement ::=
    'break' identifier? ';'
    | 'continue' identifier? ';'

alternative-seq ::=
    alternative
    | alternative-seq alternative

inspect-expression ::=
    'inspect' 'constexpr'? expression '{' alternative-seq? '}'
    | 'inspect' 'constexpr'? expression '->' type-id '{' alternative-seq? '}'

alternative ::=
    alt-name? is-type-constraint '=' statement
    | alt-name? is-value-constraint '=' statement
    | alt-name? as-type-cast '=' statement
    | unqualified-id ':'

next-clause ::=
    'next' assignment-expression

label ::=
    identifier ':'

iteration-statement ::=
    label? 'while' logical-or-expression next-clause? compound-statement
    | label? 'do' compound-statement next-clause? 'while' logical-or-expression ';'
    | label? 'for' expression next-clause? 'do' unnamed-declaration

return-statement ::=
    return expression? ';'

selection-statement ::=
    'if' 'constexpr'? logical-or-expression compound-statement
    | 'if' 'constexpr'? logical-or-expression compound-statement 'else' compound-statement

expression-statement ::=
    expression ';'
    | expression

literal ::=
    integer-literal ud-suffix?
    | character-literal ud-suffix?
    | floating-point-literal ud-suffix?
    | string-literal ud-suffix?
    | boolean-literal ud-suffix?
    | pointer-literal ud-suffix?
    | user-defined-literal ud-suffix?

id-expression ::=
    qualified-id
    | unqualified-id

member-name-specifier ::=
    unqualified-id '.'

nested-name-specifier ::=
    '::'
    | unqualified-id '::'

qualified-id ::=
    nested-name-specifier unqualified-id
    | member-name-specifier unqualified-id

template-argument ::=
    //# note: < > << >> are not allowed in expressions until new ( is opened
    'const' type-id
    | expression
    | type-id

template-arguments ::=
    template-arguments ',' template-argument

template-id ::=
    identifier '<' template-arguments? '>'

unqualified-id ::=
    identifier
    | keyword
    | template-id
    | ...

as-type-cast ::=
    'as' type-id

is-value-constraint ::=
    'is' expression

is-type-constraint ::=
    'is' type-id

is-as-expression ::=
    prefix-expression
    | is-as-expression is-type-constraint
    | is-as-expression is-value-constraint
    | is-as-expression as-type-cast

type-qualifier ::=
    'const'
    | '*'

type-qualifier-seq ::=
    type-qualifier
    | type-qualifier-seq type-qualifier

type-id ::=
    type-qualifier-seq? qualified-id
    | type-qualifier-seq? unqualified-id

expression-list ::=
    parameter-direction? expression
    | expression-list ',' parameter-direction? expression

expression ::=               // eliminated 'condition:' - just use 'expression:'
    assignment-expression

assignment-expression ::=
    logical-or-expression
    | assignment-expression assignment-operator logical-or-expression

logical-or-expression ::=
    logical-and-expression
    | logical-or-expression '||' logical-and-expression

logical-and-expression ::=
    bit-or-expression
    | logical-and-expression '&&' bit-or-expression

bit-or-expression ::=
    bit-xor-expression
    | bit-or-expression '|' bit-xor-expression

bit-xor-expression ::=
    bit-and-expression
    | bit-xor-expression '^' bit-and-expression

bit-and-expression ::=
    equality-expression
    | bit-and-expression '&' equality-expression

equality-expression ::=
    relational-expression
    | equality-expression '==' relational-expression
    | equality-expression '!=' relational-expression

relational-expression ::=
    compare-expression
    | relational-expression '<'  compare-expression
    | relational-expression '>'  compare-expression
    | relational-expression '<=' compare-expression
    | relational-expression '>=' compare-expression

compare-expression ::=
    shift-expression
    | compare-expression '<=>' shift-expression

shift-expression ::=
    additive-expression
    | shift-expression '<<' additive-expression
    | shift-expression '>>' additive-expression

additive-expression ::=
    multiplicative-expression
    | additive-expression '+' multiplicative-expression
    | additive-expression '-' multiplicative-expression

multiplicative-expression ::=
    is-as-expression
    | multiplicative-expression '*' is-as-expression
    | multiplicative-expression '/' is-as-expression
    | multiplicative-expression '%' is-as-expression

prefix-expression ::=
    postfix-expression
    | prefix-operator prefix-expression

postfix-expression ::=
    primary-expression
    | postfix-expression postfix-operator     [Note: without whitespace before the operator]
    | postfix-expression '[' expression-list? ','? ']'
    | postfix-expression '(' expression-list? ','? ')'
    | postfix-expression '.' id-expression

primary-expression ::=
    inspect-expression
    | id-expression
    | literal
    | '(' expression-list ','? ')'
    | unnamed-declaration

assignment-operator ::=
    ( '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '>>=' | '<<=' | '&=' | '^=' | '|=' )

postfix-operator ::=
    ( '++' | '--' | '*' | '&' | '~' | '$' | '...' )

prefix-operator ::=
    ( '!' | '-' | '+' )
        parameter-direction
mingodad commented 7 months ago

I've tried to also run the script against the source/lex.h but there is several trick problems, bellow is the grammar for the parser plus lexer with several manual fixes.

Why use { and } to group regex in:

                //G floating-point-literal:
                //G     digit { ''' | digit }* . digit ({ ''' | digit }*)? ([eE][-+]?digit { ' | digit }*) [fFlL]

Parser+lexer EBNF with manual fixes:

translation-unit ::=
    declaration-seq?

declaration-seq ::=
    declaration
    | declaration-seq declaration

access-specifier ::=
    public
    | protected
    | private

declaration ::=
    access-specifier? identifier '...'? unnamed-declaration
    | access-specifier? identifier alias

alias ::=
    ':' template-parameters? 'type' requires-clause? '==' type-id ';'
    | ':' 'namespace' '==' id-expression ';'
    | ':' template-parameters? type-id? requires-clause? '==' expression ';'
        ':' function-type '==' expression ';'
        //# See commit 63efa6ed21c4d4f4f136a7a73e9f6b2c110c81d7 comment
        //# for why I don't see a need to enable this yet

template-parameters ::=
    '<' parameter-declaration-seq '>'

requires-clause ::=
    //# note: for aliases, == is not allowed in expressions until new ( is opened
    'requires' logical-or-expression

meta-functions ::=
    '@' id-expression
    | meta-functions '@' id-expression

unnamed-declaration ::=
    ':' meta-functions? template-parameters? function-type requires-clause? '=' statement
    | ':' meta-functions? template-parameters? function-type statement
    | ':' meta-functions? template-parameters? type-id? requires-clause? '=' statement
    | ':' meta-functions? template-parameters? type-id
    | ':' meta-functions? template-parameters? 'final'? 'type' requires-clause? '=' statement
    | ':' 'namespace' '=' statement

contract-seq ::=
    contract
    | contract-seq contract

return-list ::=
    expression-statement
    | '->' parameter-direction? type-id
    | '->' parameter-declaration-list

throws-specifier ::=
    'throws'

function-type ::=
    parameter-declaration-list throws-specifier? return-list? contract-seq?

contract-kind ::= //one of
    'pre' | 'post' | 'assert'

contract-flags ::=
    ',' id-expression contract-flags?

contract-group ::=
    '<' id-expression contract-flags?'>'

contract ::=
    contract-kind contract-group? ':' '(' logical-or-expression ')'
    | contract-kind contract-group? ':' '(' logical-or-expression ',' expression ')'

parameter-declaration-seq ::=
    parameter-declaration
    | parameter-declaration-seq ',' parameter-declaration

parameter-declaration-list ::=
    '(' parameter-declaration-seq? ','? ')'

this-specifier ::=
    'implicit'
    | 'virtual'
    | 'override'
    | 'final'

parameter-direction ::= //one of
    'in' | 'copy' | 'inout' | 'out' | 'move' | 'forward'

parameter-declaration ::=
    this-specifier? parameter-direction? declaration

statement-seq ::=
    statement
    | statement-seq statement

compound-statement ::=
    '{' statement-seq? '}'

contract-statement ::=
    contract ';'

statement ::=
    selection-statement
    | using-statement
    | inspect-expression
    | return-statement
    | jump-statement
    | iteration-statement
    | compound-statement
    | contract-statement
    | declaration
    | expression-statement

using-statement ::=
    'using' qualified-id ';'
    | 'using' 'namespace' id-expression ';'

jump-statement ::=
    'break' identifier? ';'
    | 'continue' identifier? ';'

alternative-seq ::=
    alternative
    | alternative-seq alternative

inspect-expression ::=
    'inspect' 'constexpr'? expression '{' alternative-seq? '}'
    | 'inspect' 'constexpr'? expression '->' type-id '{' alternative-seq? '}'

alternative ::=
    alt-name? is-type-constraint '=' statement
    | alt-name? is-value-constraint '=' statement
    | alt-name? as-type-cast '=' statement
    | unqualified-id ':'

next-clause ::=
    'next' assignment-expression

label ::=
    identifier ':'

iteration-statement ::=
    label? 'while' logical-or-expression next-clause? compound-statement
    | label? 'do' compound-statement next-clause? 'while' logical-or-expression ';'
    | label? 'for' expression next-clause? 'do' unnamed-declaration

return-statement ::=
    return expression? ';'

selection-statement ::=
    'if' 'constexpr'? logical-or-expression compound-statement
    | 'if' 'constexpr'? logical-or-expression compound-statement 'else' compound-statement

expression-statement ::=
    expression ';'
    | expression

literal ::=
    integer-literal ud-suffix?
    | character-literal ud-suffix?
    | floating-point-literal ud-suffix?
    | string-literal ud-suffix?
    | boolean-literal ud-suffix?
    | pointer-literal ud-suffix?
    | user-defined-literal ud-suffix?

id-expression ::=
    qualified-id
    | unqualified-id

member-name-specifier ::=
    unqualified-id '.'

nested-name-specifier ::=
    '::'
    | unqualified-id '::'

qualified-id ::=
    nested-name-specifier unqualified-id
    | member-name-specifier unqualified-id

template-argument ::=
    //# note: < > << >> are not allowed in expressions until new ( is opened
    'const' type-id
    | expression
    | type-id

template-arguments ::=
    template-arguments ',' template-argument

template-id ::=
    identifier '<' template-arguments? '>'

unqualified-id ::=
    identifier
    | keyword
    | template-id
    | ...

as-type-cast ::=
    'as' type-id

is-value-constraint ::=
    'is' expression

is-type-constraint ::=
    'is' type-id

is-as-expression ::=
    prefix-expression
    | is-as-expression is-type-constraint
    | is-as-expression is-value-constraint
    | is-as-expression as-type-cast

type-qualifier ::=
    'const'
    | '*'

type-qualifier-seq ::=
    type-qualifier
    | type-qualifier-seq type-qualifier

type-id ::=
    type-qualifier-seq? qualified-id
    | type-qualifier-seq? unqualified-id

expression-list ::=
    parameter-direction? expression
    | expression-list ',' parameter-direction? expression

expression ::=               // eliminated 'condition:' - just use 'expression:'
    assignment-expression

assignment-expression ::=
    logical-or-expression
    | assignment-expression assignment-operator logical-or-expression

logical-or-expression ::=
    logical-and-expression
    | logical-or-expression '||' logical-and-expression

logical-and-expression ::=
    bit-or-expression
    | logical-and-expression '&&' bit-or-expression

bit-or-expression ::=
    bit-xor-expression
    | bit-or-expression '|' bit-xor-expression

bit-xor-expression ::=
    bit-and-expression
    | bit-xor-expression '^' bit-and-expression

bit-and-expression ::=
    equality-expression
    | bit-and-expression '&' equality-expression

equality-expression ::=
    relational-expression
    | equality-expression '==' relational-expression
    | equality-expression '!=' relational-expression

relational-expression ::=
    compare-expression
    | relational-expression '<'  compare-expression
    | relational-expression '>'  compare-expression
    | relational-expression '<=' compare-expression
    | relational-expression '>=' compare-expression

compare-expression ::=
    shift-expression
    | compare-expression '<=>' shift-expression

shift-expression ::=
    additive-expression
    | shift-expression '<<' additive-expression
    | shift-expression '>>' additive-expression

additive-expression ::=
    multiplicative-expression
    | additive-expression '+' multiplicative-expression
    | additive-expression '-' multiplicative-expression

multiplicative-expression ::=
    is-as-expression
    | multiplicative-expression '*' is-as-expression
    | multiplicative-expression '/' is-as-expression
    | multiplicative-expression '%' is-as-expression

prefix-expression ::=
    postfix-expression
    | prefix-operator prefix-expression

postfix-expression ::=
    primary-expression
    | postfix-expression postfix-operator     //[Note: without whitespace before the operator]
    | postfix-expression '[' expression-list? ','? ']'
    | postfix-expression '(' expression-list? ','? ')'
    | postfix-expression '.' id-expression

primary-expression ::=
    inspect-expression
    | id-expression
    | literal
    | '(' expression-list ','? ')'
    | unnamed-declaration

assignment-operator ::=
    ( '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '>>=' | '<<=' | '&=' | '^=' | '|=' )

postfix-operator ::=
    ( '++' | '--' | '*' | '&' | '~' | '$' | '...' )

prefix-operator ::=
    ( '!' | '-' | '+' )
        parameter-direction

//Lexer

simple-escape-sequence ::=
    '\' "{ any member of the basic character set except u, U, or x }"

hexadecimal-escape-sequence ::=
    '\x' hexadecimal-digit
    | hexadecimal-escape-sequence hexadecimal-digit

universal-character-name ::=
    '\u' hex-quad
    | '\U' hex-quad hex-quad

hex-quad ::=
    hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit

escape-sequence ::=
    hexadecimal-escape-sequence
    | simple-escape-sequence

s-char ::=
    universal-character-name
    | escape-sequence
    | basic-s-char

basic-s-char ::=
    'any member of the basic source character set except " \ or new-line'

c-char ::=
    universal-character-name
    | escape-sequence
    | basic-c-char

basic-c-char ::=
    "any member of the basic source character set except ' \ or new-line"

keyword ::=
    "any Cpp1-and-Cpp2 keyword"
    | (/*one of:*/ 'import' | 'module' | 'export' | 'is' | 'as')

encoding-prefix ::= //one of
    'u8' | 'u' | 'uR' | 'u8R' | 'U' | 'UR' | 'L' | 'LR' | 'R'

token ::=
    identifier
    | keyword
    | literal
    | operator-or-punctuator

operator-or-punctuator ::=
    operator
    | punctuator

operator ::= //one of
    '/=' | '/'
    | '<<=' | '<<' | '<=>' | '<=' | '<'
    | '>>=' | '>>' | '>=' | '>'
    | '++' | '+=' | '+'
    | '--' | '-=' | '->' | '-'
    | '||=' | '||' | '|=' | '|'
    | '&&=' | '&&' | '&=' | '&'
    | '*=' | '*'
    | '%=' | '%'
    | '^=' | '^'
    | '~=' | '~'
    | '==' | '='
    | '!=' | '!'

punctuator ::= //one of
    '...' | '.'
    | '::' | ':'
    | '{' | '}' | '(' | ')' | '[' | ']' | ';' | ',' | '?' | '$'

literal ::=
    integer-literal
    | character-literal
    | floating-point-literal
    | string-literal

integer-literal ::=
    binary-literal
    | hexadecimal-literal
    | decimal-literal

binary-literal ::=
    '0b' binary-digit
    | '0B' binary-digit
    | binary-literal binary-digit
    | binary-literal "'" binary-digit

hexadecimal-literal ::=
    '0x' hexadecimal-digit
    | '0X' hexadecimal-digit
    | hexadecimal-literal hexadecimal-digit
    | hexadecimal-literal "'" hexadecimal-digit

decimal-literal ::=
    digit [uU][lL][lL]
    | decimal-literal digit [uU][lL][lL]
    | decimal-literal "'" digit [uU][lL][lL]

floating-point-literal ::=
    digit ( "'" | digit )* "." digit (( "'" | digit )*)? ([eE][-+]?digit ( "'" | digit )*) [fFlL]
    //| TODO full grammar & refactor to utility functions with their
    //| own unit test rather than inline everything here

string-literal ::=
    encoding-prefix? '"' s-char-seq? '"'
    | encoding-prefix? 'R"' d-char-seq? '(' s-char-seq? ')' d-char-seq? '"'

s-char-seq ::=
    interpolation? s-char
    | interpolation? s-char-seq s-char

d-char-seq ::=
    d-char

interpolation ::=
    '(' expression ')' '$'

character-literal ::=
    encoding-prefix? "'" c-char-seq? "'"

c-char-seq ::=
    c-char
    | c-char-seq c-char
hsutter commented 7 months ago

You don't need to remove the notes but only prefix then with "//" will be enough.

👍 OK, will be in the next commit.

(BTW, I intend to take a look at eliminating the need for that note soon, but for now at least the // should help this issue.)