exaloop / codon

A high-performance, zero-overhead, extensible Python compiler using LLVM
https://docs.exaloop.io/codon
Other
15.12k stars 522 forks source link

Small improvement in "peglib/codon" grammar #386

Open mingodad opened 1 year ago

mingodad commented 1 year ago

After mechanically extracting the peglib/codon grammar from peglib.h I noticed 2 places that can be improved see aldo this issue https://github.com/yhirose/cpp-peglib/issues/275#issue-1720813202 .

----------------------------------- peglib.h -----------------------------------
index faf7b63..f37b789 100644
@@ -3388,7 +3388,7 @@ private:
             g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);

     g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
-    g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
+    g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));

     const static std::vector<std::pair<char32_t, char32_t>> range = {
         {0x0080, 0xFFFF}};
@@ -3536,7 +3536,7 @@ private:

     // Codon: C++ code support
     g["CppInstr"] <= seq(g["CppCode"], g["Spacing"]);
-    g["CppCode"] <= seq(chr('{'), zom(g["CppChar"]), chr('}'));
+    g["CppCode"] <= tok(seq(chr('{'), zom(g["CppChar"]), chr('}')));
     g["CppChar"] <= cho(g["CppCode"], seq(npd(chr('{')), npd(chr('}')), dot()));

     // Set definition names

Bellow is the extracted grammar already with the two mentioned enhancements:

    #// Setup PEG syntax parser
Grammar <- (Spacing  (Definition)+ EndOfFile)
    #// Codon: Support for name <- rule { cpp_code }
Definition <-
        ((Ignore IdentCont Parameters LEFTARROW
TopExpression  (Instruction)?) /
            (Ignore Identifier LEFTARROW TopExpression
                (Instruction)?) /
            (Ignore IdentCont Spacing CppInstr))
TopExpression <-
        ((SLASH)? TopChoice  ((SLASH TopChoice))*)
TopChoice <- (Sequence  (CppInstr)?)
Expression <- (Sequence  ((SLASH Sequence))*)
Sequence <- ((CUT /Prefix))*
Prefix <- (((AND /NOT))? SuffixWithLabel)
SuffixWithLabel <-
        (Suffix  ((LABEL Identifier))?)
Suffix <- (Primary  (Loop)?)
Loop <- (QUESTION /STAR /PLUS /Repetition)
Primary <-
        ((Ignore IdentCont Arguments
                !(LEFTARROW)) /
            (Ignore Identifier
                !(((Parameters)? LEFTARROW))) /
            (OPEN Expression CLOSE) /
            (BeginTok Expression EndTok) /CapScope /
            (BeginCap Expression EndCap) /BackRef /
LiteralI /Dictionary /Literal /NegatedClassI /
NegatedClass /ClassI /Class /DOT)

Identifier <- (IdentCont Spacing)
IdentCont <- <(IdentStart  (IdentRest)*)>

IdentStart <- (!("↑")  !("⇑")
                           ([a-zA-Z_%] / [0x0080-0xFFFF]))

IdentRest <- (IdentStart / [0-9])

Dictionary <- (LiteralD  ((PIPE LiteralD))+)

    lit_ope <- (([']  <(((!([']) Char))*)>
                           ['] Spacing) /
                       (["]  <(((!(["]) Char))*)>
                           ["] Spacing))
Literal <- lit_ope
LiteralD <- lit_ope

LiteralI <-
        (([']  <(((!([']) Char))*)>  "'i"
Spacing) /
            (["]  <(((!(["]) Char))*)>  "\"i"
Spacing))

    #// NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
Class <- ('['  !('^')
                      <(((!(']') Range))+)>  ']'
Spacing)
ClassI <- ('['  !('^')
                       <(((!(']') Range))+)>  "]i"
Spacing)

NegatedClass <- ("[^"
                             <(((!(']') Range))+)>  ']'
Spacing)
NegatedClassI <- ("[^"
                              <(((!(']') Range))+)>
                              "]i" Spacing)

    #// NOTE: This is different from The original Brian Ford's paper, and this
    #// modification allows us to specify `[+-]` as a valid char class.
Range <-
        ((Char  '-'  !(']') Char) /Char)

Char <-
        (('\\'  [fnrtv'"\[\]\\^]) /
            ('\\'  [0-3]  [0-7]  [0-7]) /
            ('\\'  [0-7]  ([0-7])?) /
            ("\\x"  [0-9a-fA-F]  ([0-9a-fA-F])?) /
            ("\\u"
                (((('0'  [0-9a-fA-F]) / "10")
                        ([0-9a-fA-F]{4,4})) /
                    ([0-9a-fA-F]{4,5}))) /
            (!('\\')  .))

Repetition <-
        (BeginBlacket RepetitionRange EndBlacket)
RepetitionRange <- ((Number COMMA Number) /
                                (Number COMMA) /Number /
                                (COMMA Number))
Number <- (([0-9])+ Spacing)

CapScope <- (BeginCapScope Expression EndCapScope)

LEFTARROW <- (("<-" / "←") Spacing)
~SLASH <- ('/' Spacing)
~PIPE <- ('|' Spacing)
AND <- ('&' Spacing)
NOT <- ('!' Spacing)
QUESTION <- ('?' Spacing)
STAR <- ('*' Spacing)
PLUS <- ('+' Spacing)
~OPEN <- ('(' Spacing)
~CLOSE <- (')' Spacing)
DOT <- ('.' Spacing)

CUT <- ('^' Spacing)    #// Codon: Change from ↑ to ^
~LABEL <- ('@' Spacing) #// Codon: Change from ⇑ to @

~Spacing <- ((Space /Comment))*
Comment <-
        ('#'  ((!(EndOfLine)  .))* EndOfLine)
Space <- (' ' / '\t' /EndOfLine)
EndOfLine <- ("\r\n" / '\n' / '\r')
EndOfFile <- !(.)

~BeginTok <- ('<' Spacing)
~EndTok <- ('>' Spacing)

~BeginCapScope <- ('$'  '(' Spacing)
~EndCapScope <- (')' Spacing)

BeginCap <- ('$'  <(IdentCont)>  '<' Spacing)
~EndCap <- ('>' Spacing)

BackRef <- ('$'  <(IdentCont)> Spacing)

IGNORE <- '~'

Ignore <- (IGNORE)?
Parameters <- (OPEN Identifier
                           ((COMMA Identifier))* CLOSE)
Arguments <- (OPEN Expression
                          ((COMMA Expression))* CLOSE)
~COMMA <- (',' Spacing)

    #// Instruction grammars
Instruction <-
        (BeginBlacket
            ((InstructionItem  ((InstructionItemSeparator
InstructionItem))*))?
EndBlacket)
InstructionItem <-
        (PrecedenceClimbing /ErrorMessage /NoAstOpt)
~InstructionItemSeparator <- (';' Spacing)

~SpacesZom <- (Space)*
~SpacesOom <- (Space)+
~BeginBlacket <- ('{' Spacing)
~EndBlacket <- ('}' Spacing)

    #// PrecedenceClimbing instruction
PrecedenceClimbing <-
        ("precedence" SpacesOom PrecedenceInfo
            ((SpacesOom PrecedenceInfo))* SpacesZom)
PrecedenceInfo <-
        (PrecedenceAssoc
            ((&(SpacesOom) PrecedenceOpe))+)
PrecedenceOpe <-
        (([']
                <(((!((Space / ['])) Char))*)>
                [']) /
            (["]
                <(((!((Space / ["])) Char))*)>
                ["]) /
            <(((!((PrecedenceAssoc /Space / '}'))
                        .))+)>)
PrecedenceAssoc <- [LR]

    #// Error message instruction
ErrorMessage <- ("error_message" SpacesOom
LiteralD SpacesZom)

    #// No Ast node optimazation instruction
NoAstOpt <- ("no_ast_opt" SpacesZom)

    #// Codon: C++ code support
CppInstr <- (CppCode Spacing)
CppCode <- <('{'  (CppChar)*  '}')>
CppChar <- (CppCode / (!('{')  !('}')  .))

The grammar above can be used on https://yhirose.github.io/cpp-peglib/ to test/debug/develop, here are the numbers with and without the enhancements when parsing grammar.peg.

Before the enhancements:

duration: 0.1445s (144500µs)

  id       total      %     success        fail  definition
          104603              50701       53902  Total counters
                              48.47       51.53  % success/fail

After the enhancements:

duration: 0.0802s (80200µs)

  id       total      %     success        fail  definition
           45677              14277       31400  Total counters
                              31.26       68.74  % success/fail
mingodad commented 1 year ago

And here is a manually converted grammar.peg to an EBNF understood by https://www.bottlecaps.de/rr/ui .

Copy and paste the EBNF shown bellow on https://www.bottlecaps.de/rr/ui on the tab Edit Grammar the click on the tab View Diagram to see/download a navigable railroad diagram.

//# Copyright (C) 2022 Exaloop Inc. <https://exaloop.io>
//# Codon PEG grammar
//# Adopted from Python 3's PEG grammar (https://docs.python.org/3/reference/grammar.html)

//# TODO: nice docstrs

program ::= (statements ( /*_*/EOL)* | ( /*_*/EOL)*) EndOfFile
fstring ::= star_expressions /*_*/(':' format_spec)? /*_*/EndOfFile

//# Macros
//list(c, e)  ::= e ( /*_*/c /*_*/e)*
//tlist(c, e) ::= e ( /*_*/c /*_*/e)* ( /*_*/<c>)?

statements ::= (( /*_*/EOL)* statement)+
statement ::= SAMEDENT compound_stmt | SAMEDENT simple_stmt
simple_stmt ::= small_stmt (';' small_stmt)* /*_*/EOL
small_stmt ::=
  | assignment
  | 'pass' /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
  | 'break' /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
  | 'continue' /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
  | global_stmt
  | nonlocal_stmt
  | yield_stmt /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
  | assert_stmt
  | del_stmt
  | return_stmt /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
  | raise_stmt /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
  | print_stmt
  | import_stmt
  | expressions /*_*/AND /*_*/( /*_*/';' | /*_*/EOL)
  | custom_small_stmt

assignment ::=
  | id /*_*/':' /*_*/expression ( /*_*/'=' /*_*/star_expressions)?
  | (star_targets /*_*/(_NOT /*_*/'==' '=') /*_*/)+ star_expressions /*_*/NOT_( /*_*/'=')
  | star_expression /*_*/augassign '=' ^ /*_*/star_expressions
augassign ::=
  '+' | '-' | '**' | '*' | '@' | '//' | '/' | '%' | '&' | '|' | '^' | '<<' | '>>'

global_stmt ::= 'global'  /*SPACE*/ NAME (',' NAME)*
nonlocal_stmt ::= 'nonlocal'  /*SPACE*/ NAME (',' NAME)*
yield_stmt ::=
  | 'yield'  /*SPACE*/ 'from'  /*SPACE*/ expression
  | 'yield' ( /*SPACE*/ expressions)?
assert_stmt ::= 'assert'  /*SPACE*/ expression ( /*_*/',' /*_*/expression)?
//# TODO: do targets as in Python
del_stmt ::= 'del'  /*SPACE*/ expression (',' expression)*
return_stmt ::= 'return' ( /*SPACE*/ expressions)?
//# TODO: raise expression 'from' expression
raise_stmt ::= 'raise' ( /*SPACE*/ expression)?
print_stmt ::=
  | 'print'  /*SPACE*/ star_expression ( /*_*/',' /*_*/star_expression)* ( /*_*/',')?
  | 'print' /*_*/_AND /*_*/EOL
import_stmt ::= import_name | import_from
import_name ::= 'import'  /*SPACE*/ as_name (',' as_name)*
as_name ::= dot_name ( /*SPACE*/ 'as'  /*SPACE*/ NAME)?
import_from ::=
  | 'from'  /*SPACE*/ ( /*_*/'.')* ( /*_*/dot_name)?  /*SPACE*/ 'import'  /*SPACE*/ '*'
  | 'from'  /*SPACE*/ ( /*_*/'.')* ( /*_*/dot_name)?  /*SPACE*/ 'import'  /*SPACE*/
    (from_as_parens | from_as_items)
from_as_parens ::= '(' /*_*/from_as (',' from_as)* /*_*/')'
from_as_items ::= from_as (',' from_as)*
from_as ::= from_id ( /*SPACE*/ 'as'  /*SPACE*/ NAME)?
from_id ::=
  | dot_name /*_*/':' /*_*/expression
  | dot_name /*_*/from_params ( /*_*/'->' /*_*/expression)?
  | dot_name
dot_name ::= id ( /*_*/'.' /*_*/NAME)*
from_params ::= '(' /*_*/(from_param (',' from_param)*)? /*_*/')'
from_param ::= expression
//#TODO expand import logic | param

suite ::= (simple_stmt | ( /*_*/EOL)+ /*_*/AND /*_*/INDENT statements ( /*_*/EOL)* /*_*/AND /*_*/DEDENT)
compound_stmt ::=
  | function
  | if_stmt
  | class
  | with_stmt
  | for
  | try_stmt
  | while_stmt
  | match_stmt
  | custom_stmt
if_stmt ::= ('if'  /*SPACE*/ named_expression /*_*/':' /*_*/suite)
           (SAMEDENT 'elif'  /*SPACE*/ named_expression /*_*/':' /*_*/suite)*
           (SAMEDENT 'else' /*_*/':' /*_*/suite)?
while_stmt ::= ('while'  /*SPACE*/ named_expression /*_*/':' /*_*/suite)
              (SAMEDENT 'else' ( /*SPACE*/ 'not'  /*SPACE*/ 'break')*  /*_*/':' /*_*/suite)?
for ::= decorator? for_stmt
for_stmt ::= ('for'  /*SPACE*/ star_targets)
            ( /*SPACE*/ 'in'  /*SPACE*/ star_expressions /*_*/':' /*_*/suite)
            (SAMEDENT 'else' ( /*SPACE*/ 'not'  /*SPACE*/ 'break')* /*_*/':' /*_*/suite)?
with_stmt ::= 'with'  /*SPACE*/ (with_parens_item | with_item) /*_*/':' /*_*/suite
with_parens_item ::= '(' /*_*/as_item (',' as_item)* /*_*/')'
with_item ::= as_item (',' as_item)*
as_item ::=
  | expression  /*SPACE*/ 'as'  /*SPACE*/ id /*_*/AND_( /*_*/(',' | ')' | ':'))
  | expression
//# TODO: else block?
try_stmt ::=
  | ('try' /*_*/':' /*_*/suite)
    excepts
    (SAMEDENT 'finally' /*_*/':' /*_*/suite)?
  | ('try' /*_*/':' /*_*/suite) (SAMEDENT 'finally' /*_*/':' /*_*/suite)?
excepts ::= (SAMEDENT except_block)+
except_block ::=
  | 'except'  /*SPACE*/ expression ( /*SPACE*/ 'as'  /*SPACE*/ NAME)? /*_*/':' /*_*/suite
  | 'except' /*_*/':' /*_*/suite
function ::=
  | extern_decorators function_def ( /*_*/EOL)+ /*_*/AND /*_*/INDENT extern ( /*_*/EOL)* /*_*/AND /*_*/DEDENT
  | decorators? function_def /*_*/suite
extern ::= (empty_line* EXTERNDENT (_NOT /*_*/EOL .)* EOL empty_line*)+
empty_line ::= (' ' | '\t')* EOL
function_def ::=
  | 'def'  /*SPACE*/ NAME /*_*/generics /*_*/params ( /*_*/'->' /*_*/expression)? /*_*/':'
  | 'def'  /*SPACE*/ NAME /*_*/params ( /*_*/'->' /*_*/expression)? /*_*/':'
params ::= '(' /*_*/(param (',' param)*)? /*_*/')'
param ::=
  | param_name /*_*/':' /*_*/expression ( /*_*/'=' /*_*/expression)?
  | param_name ( /*_*/'=' /*_*/expression)?
param_name ::= ('**' | '*')? /*_*/NAME
generics ::= '[' /*_*/param (',' param)* /*_*/']'
decorators ::= decorator+
decorator ::= ('@' /*_*/_NOT /*_*/(('llvm' | 'python') /*_*/EOL) named_expression /*_*/EOL SAMEDENT)
extern_decorators ::=
  | decorators? ('@' /*_*/('llvm' | 'python') /*_*/EOL SAMEDENT) decorators?
class ::= decorators? class_def
base_class_args ::= '(' /*_*/(expression (',' expression)*)? /*_*/')'
class_args ::=
  | generics /*_*/base_class_args
  | generics
  | base_class_args
class_def ::= 'class'  /*SPACE*/ NAME /*_*/class_args? /*_*/':' /*_*/suite
match_stmt ::= 'match'  /*SPACE*/ expression /*_*/':' ( /*_*/EOL)+
              /*_*/AND /*_*/INDENT (SAMEDENT case)+ ( /*_*/EOL)* /*_*/AND /*_*/DEDENT
case ::=
  | 'case'  /*SPACE*/ expression  /*SPACE*/ 'if'  /*SPACE*/ pipe /*_*/':' /*_*/suite
  | 'case'  /*SPACE*/ expression /*_*/':' /*_*/suite
custom_stmt ::=
  | NAME  /*SPACE*/ expression /*_*/':' /*_*/suite
  | NAME /*_*/':' /*_*/suite
custom_stmt__PREDICATE ::=

custom_small_stmt ::= NAME  /*SPACE*/ expressions
custom_small_stmt__PREDICATE ::=

//########################################################################################
//# (2) Expressions
//########################################################################################

expressions ::= expression (',' expression)*
expression ::=
  | lambdef
  | disjunction  /*SPACE*/ 'if'  /*SPACE*/ disjunction  /*SPACE*/ 'else'  /*SPACE*/ expression
  | pipe
//# TODO: make it more pythonic
lambdef ::=
  | 'lambda'  /*SPACE*/ NAME (',' NAME)* /*_*/':' /*_*/expression
  | 'lambda' /*_*/':' /*_*/expression
pipe ::=
  | disjunction ( /*_*/('|>' | '||>') /*_*/disjunction)+
  | disjunction
disjunction ::=
  | conjunction ( /*SPACE*/ 'or'  /*SPACE*/ conjunction)+
  | conjunction
conjunction ::=
  | inversion ( /*SPACE*/ 'and'  /*SPACE*/ inversion)+
  | inversion
inversion ::=
  | 'not'  /*SPACE*/ inversion
  | comparison
comparison ::= bitwise_or compare_op_bitwise_or*
compare_op_bitwise_or ::=
  |  /*SPACE*/ 'not'  /*SPACE*/ 'in'  /*SPACE*/ bitwise_or
  |  /*SPACE*/ 'is'  /*SPACE*/ 'not'  /*SPACE*/ bitwise_or
  |  /*SPACE*/ ('in' | 'is')  /*SPACE*/ bitwise_or
  | /*_*/('==' | '!=' | '<=' | '<' | '>=' | '>') /*_*/bitwise_or
bitwise_or  ::= bitwise_xor ( /*_*/'|' /*_*/bitwise_xor)*
bitwise_xor ::= bitwise_and ( /*_*/'^' /*_*/bitwise_and)*
bitwise_and ::= shift_expr  ( /*_*/'&' /*_*/shift_expr )*
shift_expr ::= sum  ( /*_*/('<<' | '>>') /*_*/sum )*
sum        ::= term ( /*_*/('+' | '-')   /*_*/term)*
term ::= factor ( /*_*/('*' | '//' | '/' | '%' | '@') /*_*/factor)*
factor ::=
  | ('+' | '-' | '~') /*_*/factor
  | power
power ::=
  | primary /*_*/'**' /*_*/factor
  | primary
primary ::= atom ( /*_*/primary_tail)*
primary_tail ::=
  | '.' /*_*/NAME
  | genexp
  | arguments
  | slices
slices ::= '[' /*_*/slice (',' slice)* /*_*/']'
slice ::=
  | slice_part /*_*/':' /*_*/slice_part ( /*_*/':' /*_*/slice_part)?
  | expression
slice_part ::= expression?
atom ::=
  | STRING ( /*SPACE*/ STRING)*
  | id
  | 'True'
  | 'False'
  | 'None'
  | INT /*_*/'...' /*_*/INT
  | FLOAT NAME?
  | INT NAME?
  | parentheses
  | '...'
parentheses ::= (
  tuple | yield | named | genexp | listexpr | listcomp | dict | set | dictcomp | setcomp
)
tuple ::=
  | '(' /*_*/')'
  | '(' /*_*/star_named_expression (',' star_named_expression)* /*_*/')'
yield ::= '(' /*_*/'yield' /*_*/')'
named ::= '(' /*_*/named_expression /*_*/')'
genexp ::= '(' /*_*/named_expression  /*SPACE*/ for_if_clauses /*_*/')'
listexpr ::= '[' /*_*/(star_named_expression (',' star_named_expression)*)? /*_*/']'
listcomp ::= '[' /*_*/named_expression  /*SPACE*/ for_if_clauses /*_*/']'
set ::= '{' /*_*/star_named_expression (',' star_named_expression)* /*_*/'}'
setcomp ::= '{' /*_*/named_expression  /*SPACE*/ for_if_clauses /*_*/'}'
dict ::= '{' /*_*/(double_starred_kvpair (',' double_starred_kvpair)*)? /*_*/'}'
dictcomp ::= '{' /*_*/kvpair  /*SPACE*/ for_if_clauses /*_*/'}'
double_starred_kvpair ::=
  | '**' /*_*/bitwise_or
  | kvpair
kvpair ::= expression /*_*/':' /*_*/expression
for_if_clauses ::= for_if_clause ( /*SPACE*/ for_if_clause)*
for_if_clause ::= 'for'  /*SPACE*/ star_targets  /*SPACE*/ 'in'  /*SPACE*/ disjunction
                 ( /*SPACE*/ 'if'  /*SPACE*/ disjunction)*

star_targets ::= star_target (',' star_target)*
star_target ::=
  | '*' /*_*/_NOT /*_*/'*' star_target
  | star_parens
  | primary
star_parens ::=
  | '(' /*_*/star_target (',' star_target)* /*_*/')'
  | '[' /*_*/star_target (',' star_target)* /*_*/']'

star_expressions ::= star_expression (',' star_expression)*
star_expression ::=
  | '*' /*_*/bitwise_or
  | expression
star_named_expression ::=
  | '*' /*_*/bitwise_or
  | named_expression
named_expression ::=
  | NAME /*_*/':=' /*_*/^ expression
  | expression /*_*/NOT /*_*/( /*_*/':=')
arguments ::= '(' /*_*/(args (',' args)*)? /*_*/')'
args ::= (simple_args ( /*_*/',' /*_*/kwargs)? | kwargs)
simple_args ::= (starred_expression | named_expression /*_*/NOT /*_*/( /*_*/'=')) (',' (starred_expression | named_expression /*_*/NOT /*_*/( /*_*/'=')))*
starred_expression ::= '*' /*_*/expression
kwargs ::=
  | kwarg_or_starred (',' kwarg_or_starred)* /*_*/',' /*_*/kwarg_or_double_starred (',' kwarg_or_double_starred)*
  | kwarg_or_starred (',' kwarg_or_starred)*
  | kwarg_or_double_starred (',' kwarg_or_double_starred)*
kwarg_or_starred ::=
  | NAME /*_*/'=' /*_*/expression
  | starred_expression
kwarg_or_double_starred ::=
  | NAME /*_*/'=' /*_*/expression
  | '**' /*_*/expression
id ::= NAME
INT ::= (BININT | HEXINT | DECINT)
BININT ::= '0' [bB] [0-1] ('_'* [0-1])*
HEXINT ::= '0' [xX] [0-9a-fA-F] ('_'? [0-9a-fA-F])*
DECINT ::= [0-9] ('_'? [0-9])*
FLOAT ::= (EXPFLOAT | PTFLOAT)
PTFLOAT ::= DECINT? '.' DECINT | DECINT '.'
EXPFLOAT ::= (PTFLOAT | DECINT) [eE] ('+' | '-')? DECINT
NAME ::=
  | keyword [a-zA-Z_0-9]+
  | /*_*/NOT /*_*/keyword [a-zA-Z_] [a-zA-Z_0-9]*
STRING ::= NAME? STR
STRING__PREDICATE ::=
STR ::=
  '"""' (_NOT /*_*/'"""' CHAR)*       '"""'   |  "'''" (_NOT /*_*/"'''" CHAR)*     "'''" |
  '"'   (_NOT /*_*/('"' | EOL) CHAR)* '"'     |  "'"     (_NOT /*_*/("'" | EOL) CHAR)* "'"

CHAR ::= ('\\' . | .)
COMMENT ::= '#' (_NOT /*_*/EOL .)*
INDENT__NOPACKRAT ::= (' ' | '\t')*
INDENT__PREDICATE ::=
SAMEDENT__NOPACKRAT ::= (' ' | '\t')*
SAMEDENT__PREDICATE ::=
DEDENT__NOPACKRAT ::= (' ' | '\t')*
DEDENT__PREDICATE ::=
EXTERNDENT__NOPACKRAT ::= (' ' | '\t')*
EXTERNDENT__PREDICATE ::=
EOL ::= (('\r' | '\n') | '\n\r')
// SPACE ::= ((' ' | '\t')+ | COMMENT | NLP EOL) SPACE?
// _ ::= SPACE?

//# TODO: add async | await
keyword ::=
  'False' | 'else' | 'import' | 'pass' | 'None' | 'break' | 'except' | 'in' | 'raise' |
  'True' | 'class' | 'finally' | 'is' | 'return' | 'and' | 'continue' | 'for' | 'as'  |
  'lambda' | 'try' | 'def' | 'from' | 'while' | 'assert' | 'del' | 'global' | 'not' |
  'with' | 'elif' | 'if' | 'or' | 'yield'

//# https://docs.python.org/3/library/string.html#formatspec
format_spec ::= ([<>=^] | [^{}] [<>=^])? [+-]? 'z'? '#'? '0'? [0-9]* [_,]* ('.' [0-9]+)? [bcdeEfFgGnosxX%]?

//Added tokens for railroad generation
_NOT /*_*/::= '!'
_AND /*_*/::= '&'
EndOfFile ::= /*_*/NOT /*_*/ .
mingodad commented 1 year ago

I noticed that the resulting codon_rules.cpp doesn't preserve the order from codon/codon/parser/peg/grammar.peg and I think that this can cause unexpected surprises because in PEG the order is relevant.