Open mingodad opened 1 year ago
And here is a manually converted grammar.peg
to an EBNF
understood by https://www.bottlecaps.de/rr/ui .
Copy and paste the EBNF
shown bellow on https://www.bottlecaps.de/rr/ui on the tab Edit Grammar
the click on the tab View Diagram
to see/download a navigable railroad diagram.
//# Copyright (C) 2022 Exaloop Inc. <https://exaloop.io>
//# Codon PEG grammar
//# Adopted from Python 3's PEG grammar (https://docs.python.org/3/reference/grammar.html)
//# TODO: nice docstrs
program ::= (statements ( /*_*/EOL)* | ( /*_*/EOL)*) EndOfFile
fstring ::= star_expressions /*_*/(':' format_spec)? /*_*/EndOfFile
//# Macros
//list(c, e) ::= e ( /*_*/c /*_*/e)*
//tlist(c, e) ::= e ( /*_*/c /*_*/e)* ( /*_*/<c>)?
statements ::= (( /*_*/EOL)* statement)+
statement ::= SAMEDENT compound_stmt | SAMEDENT simple_stmt
simple_stmt ::= small_stmt (';' small_stmt)* /*_*/EOL
small_stmt ::=
| assignment
| 'pass' /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
| 'break' /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
| 'continue' /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
| global_stmt
| nonlocal_stmt
| yield_stmt /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
| assert_stmt
| del_stmt
| return_stmt /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
| raise_stmt /*_*/AND /*_*/( /*SPACE*/ | ';' | EOL)
| print_stmt
| import_stmt
| expressions /*_*/AND /*_*/( /*_*/';' | /*_*/EOL)
| custom_small_stmt
assignment ::=
| id /*_*/':' /*_*/expression ( /*_*/'=' /*_*/star_expressions)?
| (star_targets /*_*/(_NOT /*_*/'==' '=') /*_*/)+ star_expressions /*_*/NOT_( /*_*/'=')
| star_expression /*_*/augassign '=' ^ /*_*/star_expressions
augassign ::=
'+' | '-' | '**' | '*' | '@' | '//' | '/' | '%' | '&' | '|' | '^' | '<<' | '>>'
global_stmt ::= 'global' /*SPACE*/ NAME (',' NAME)*
nonlocal_stmt ::= 'nonlocal' /*SPACE*/ NAME (',' NAME)*
yield_stmt ::=
| 'yield' /*SPACE*/ 'from' /*SPACE*/ expression
| 'yield' ( /*SPACE*/ expressions)?
assert_stmt ::= 'assert' /*SPACE*/ expression ( /*_*/',' /*_*/expression)?
//# TODO: do targets as in Python
del_stmt ::= 'del' /*SPACE*/ expression (',' expression)*
return_stmt ::= 'return' ( /*SPACE*/ expressions)?
//# TODO: raise expression 'from' expression
raise_stmt ::= 'raise' ( /*SPACE*/ expression)?
print_stmt ::=
| 'print' /*SPACE*/ star_expression ( /*_*/',' /*_*/star_expression)* ( /*_*/',')?
| 'print' /*_*/_AND /*_*/EOL
import_stmt ::= import_name | import_from
import_name ::= 'import' /*SPACE*/ as_name (',' as_name)*
as_name ::= dot_name ( /*SPACE*/ 'as' /*SPACE*/ NAME)?
import_from ::=
| 'from' /*SPACE*/ ( /*_*/'.')* ( /*_*/dot_name)? /*SPACE*/ 'import' /*SPACE*/ '*'
| 'from' /*SPACE*/ ( /*_*/'.')* ( /*_*/dot_name)? /*SPACE*/ 'import' /*SPACE*/
(from_as_parens | from_as_items)
from_as_parens ::= '(' /*_*/from_as (',' from_as)* /*_*/')'
from_as_items ::= from_as (',' from_as)*
from_as ::= from_id ( /*SPACE*/ 'as' /*SPACE*/ NAME)?
from_id ::=
| dot_name /*_*/':' /*_*/expression
| dot_name /*_*/from_params ( /*_*/'->' /*_*/expression)?
| dot_name
dot_name ::= id ( /*_*/'.' /*_*/NAME)*
from_params ::= '(' /*_*/(from_param (',' from_param)*)? /*_*/')'
from_param ::= expression
//#TODO expand import logic | param
suite ::= (simple_stmt | ( /*_*/EOL)+ /*_*/AND /*_*/INDENT statements ( /*_*/EOL)* /*_*/AND /*_*/DEDENT)
compound_stmt ::=
| function
| if_stmt
| class
| with_stmt
| for
| try_stmt
| while_stmt
| match_stmt
| custom_stmt
if_stmt ::= ('if' /*SPACE*/ named_expression /*_*/':' /*_*/suite)
(SAMEDENT 'elif' /*SPACE*/ named_expression /*_*/':' /*_*/suite)*
(SAMEDENT 'else' /*_*/':' /*_*/suite)?
while_stmt ::= ('while' /*SPACE*/ named_expression /*_*/':' /*_*/suite)
(SAMEDENT 'else' ( /*SPACE*/ 'not' /*SPACE*/ 'break')* /*_*/':' /*_*/suite)?
for ::= decorator? for_stmt
for_stmt ::= ('for' /*SPACE*/ star_targets)
( /*SPACE*/ 'in' /*SPACE*/ star_expressions /*_*/':' /*_*/suite)
(SAMEDENT 'else' ( /*SPACE*/ 'not' /*SPACE*/ 'break')* /*_*/':' /*_*/suite)?
with_stmt ::= 'with' /*SPACE*/ (with_parens_item | with_item) /*_*/':' /*_*/suite
with_parens_item ::= '(' /*_*/as_item (',' as_item)* /*_*/')'
with_item ::= as_item (',' as_item)*
as_item ::=
| expression /*SPACE*/ 'as' /*SPACE*/ id /*_*/AND_( /*_*/(',' | ')' | ':'))
| expression
//# TODO: else block?
try_stmt ::=
| ('try' /*_*/':' /*_*/suite)
excepts
(SAMEDENT 'finally' /*_*/':' /*_*/suite)?
| ('try' /*_*/':' /*_*/suite) (SAMEDENT 'finally' /*_*/':' /*_*/suite)?
excepts ::= (SAMEDENT except_block)+
except_block ::=
| 'except' /*SPACE*/ expression ( /*SPACE*/ 'as' /*SPACE*/ NAME)? /*_*/':' /*_*/suite
| 'except' /*_*/':' /*_*/suite
function ::=
| extern_decorators function_def ( /*_*/EOL)+ /*_*/AND /*_*/INDENT extern ( /*_*/EOL)* /*_*/AND /*_*/DEDENT
| decorators? function_def /*_*/suite
extern ::= (empty_line* EXTERNDENT (_NOT /*_*/EOL .)* EOL empty_line*)+
empty_line ::= (' ' | '\t')* EOL
function_def ::=
| 'def' /*SPACE*/ NAME /*_*/generics /*_*/params ( /*_*/'->' /*_*/expression)? /*_*/':'
| 'def' /*SPACE*/ NAME /*_*/params ( /*_*/'->' /*_*/expression)? /*_*/':'
params ::= '(' /*_*/(param (',' param)*)? /*_*/')'
param ::=
| param_name /*_*/':' /*_*/expression ( /*_*/'=' /*_*/expression)?
| param_name ( /*_*/'=' /*_*/expression)?
param_name ::= ('**' | '*')? /*_*/NAME
generics ::= '[' /*_*/param (',' param)* /*_*/']'
decorators ::= decorator+
decorator ::= ('@' /*_*/_NOT /*_*/(('llvm' | 'python') /*_*/EOL) named_expression /*_*/EOL SAMEDENT)
extern_decorators ::=
| decorators? ('@' /*_*/('llvm' | 'python') /*_*/EOL SAMEDENT) decorators?
class ::= decorators? class_def
base_class_args ::= '(' /*_*/(expression (',' expression)*)? /*_*/')'
class_args ::=
| generics /*_*/base_class_args
| generics
| base_class_args
class_def ::= 'class' /*SPACE*/ NAME /*_*/class_args? /*_*/':' /*_*/suite
match_stmt ::= 'match' /*SPACE*/ expression /*_*/':' ( /*_*/EOL)+
/*_*/AND /*_*/INDENT (SAMEDENT case)+ ( /*_*/EOL)* /*_*/AND /*_*/DEDENT
case ::=
| 'case' /*SPACE*/ expression /*SPACE*/ 'if' /*SPACE*/ pipe /*_*/':' /*_*/suite
| 'case' /*SPACE*/ expression /*_*/':' /*_*/suite
custom_stmt ::=
| NAME /*SPACE*/ expression /*_*/':' /*_*/suite
| NAME /*_*/':' /*_*/suite
custom_stmt__PREDICATE ::=
custom_small_stmt ::= NAME /*SPACE*/ expressions
custom_small_stmt__PREDICATE ::=
//########################################################################################
//# (2) Expressions
//########################################################################################
expressions ::= expression (',' expression)*
expression ::=
| lambdef
| disjunction /*SPACE*/ 'if' /*SPACE*/ disjunction /*SPACE*/ 'else' /*SPACE*/ expression
| pipe
//# TODO: make it more pythonic
lambdef ::=
| 'lambda' /*SPACE*/ NAME (',' NAME)* /*_*/':' /*_*/expression
| 'lambda' /*_*/':' /*_*/expression
pipe ::=
| disjunction ( /*_*/('|>' | '||>') /*_*/disjunction)+
| disjunction
disjunction ::=
| conjunction ( /*SPACE*/ 'or' /*SPACE*/ conjunction)+
| conjunction
conjunction ::=
| inversion ( /*SPACE*/ 'and' /*SPACE*/ inversion)+
| inversion
inversion ::=
| 'not' /*SPACE*/ inversion
| comparison
comparison ::= bitwise_or compare_op_bitwise_or*
compare_op_bitwise_or ::=
| /*SPACE*/ 'not' /*SPACE*/ 'in' /*SPACE*/ bitwise_or
| /*SPACE*/ 'is' /*SPACE*/ 'not' /*SPACE*/ bitwise_or
| /*SPACE*/ ('in' | 'is') /*SPACE*/ bitwise_or
| /*_*/('==' | '!=' | '<=' | '<' | '>=' | '>') /*_*/bitwise_or
bitwise_or ::= bitwise_xor ( /*_*/'|' /*_*/bitwise_xor)*
bitwise_xor ::= bitwise_and ( /*_*/'^' /*_*/bitwise_and)*
bitwise_and ::= shift_expr ( /*_*/'&' /*_*/shift_expr )*
shift_expr ::= sum ( /*_*/('<<' | '>>') /*_*/sum )*
sum ::= term ( /*_*/('+' | '-') /*_*/term)*
term ::= factor ( /*_*/('*' | '//' | '/' | '%' | '@') /*_*/factor)*
factor ::=
| ('+' | '-' | '~') /*_*/factor
| power
power ::=
| primary /*_*/'**' /*_*/factor
| primary
primary ::= atom ( /*_*/primary_tail)*
primary_tail ::=
| '.' /*_*/NAME
| genexp
| arguments
| slices
slices ::= '[' /*_*/slice (',' slice)* /*_*/']'
slice ::=
| slice_part /*_*/':' /*_*/slice_part ( /*_*/':' /*_*/slice_part)?
| expression
slice_part ::= expression?
atom ::=
| STRING ( /*SPACE*/ STRING)*
| id
| 'True'
| 'False'
| 'None'
| INT /*_*/'...' /*_*/INT
| FLOAT NAME?
| INT NAME?
| parentheses
| '...'
parentheses ::= (
tuple | yield | named | genexp | listexpr | listcomp | dict | set | dictcomp | setcomp
)
tuple ::=
| '(' /*_*/')'
| '(' /*_*/star_named_expression (',' star_named_expression)* /*_*/')'
yield ::= '(' /*_*/'yield' /*_*/')'
named ::= '(' /*_*/named_expression /*_*/')'
genexp ::= '(' /*_*/named_expression /*SPACE*/ for_if_clauses /*_*/')'
listexpr ::= '[' /*_*/(star_named_expression (',' star_named_expression)*)? /*_*/']'
listcomp ::= '[' /*_*/named_expression /*SPACE*/ for_if_clauses /*_*/']'
set ::= '{' /*_*/star_named_expression (',' star_named_expression)* /*_*/'}'
setcomp ::= '{' /*_*/named_expression /*SPACE*/ for_if_clauses /*_*/'}'
dict ::= '{' /*_*/(double_starred_kvpair (',' double_starred_kvpair)*)? /*_*/'}'
dictcomp ::= '{' /*_*/kvpair /*SPACE*/ for_if_clauses /*_*/'}'
double_starred_kvpair ::=
| '**' /*_*/bitwise_or
| kvpair
kvpair ::= expression /*_*/':' /*_*/expression
for_if_clauses ::= for_if_clause ( /*SPACE*/ for_if_clause)*
for_if_clause ::= 'for' /*SPACE*/ star_targets /*SPACE*/ 'in' /*SPACE*/ disjunction
( /*SPACE*/ 'if' /*SPACE*/ disjunction)*
star_targets ::= star_target (',' star_target)*
star_target ::=
| '*' /*_*/_NOT /*_*/'*' star_target
| star_parens
| primary
star_parens ::=
| '(' /*_*/star_target (',' star_target)* /*_*/')'
| '[' /*_*/star_target (',' star_target)* /*_*/']'
star_expressions ::= star_expression (',' star_expression)*
star_expression ::=
| '*' /*_*/bitwise_or
| expression
star_named_expression ::=
| '*' /*_*/bitwise_or
| named_expression
named_expression ::=
| NAME /*_*/':=' /*_*/^ expression
| expression /*_*/NOT /*_*/( /*_*/':=')
arguments ::= '(' /*_*/(args (',' args)*)? /*_*/')'
args ::= (simple_args ( /*_*/',' /*_*/kwargs)? | kwargs)
simple_args ::= (starred_expression | named_expression /*_*/NOT /*_*/( /*_*/'=')) (',' (starred_expression | named_expression /*_*/NOT /*_*/( /*_*/'=')))*
starred_expression ::= '*' /*_*/expression
kwargs ::=
| kwarg_or_starred (',' kwarg_or_starred)* /*_*/',' /*_*/kwarg_or_double_starred (',' kwarg_or_double_starred)*
| kwarg_or_starred (',' kwarg_or_starred)*
| kwarg_or_double_starred (',' kwarg_or_double_starred)*
kwarg_or_starred ::=
| NAME /*_*/'=' /*_*/expression
| starred_expression
kwarg_or_double_starred ::=
| NAME /*_*/'=' /*_*/expression
| '**' /*_*/expression
id ::= NAME
INT ::= (BININT | HEXINT | DECINT)
BININT ::= '0' [bB] [0-1] ('_'* [0-1])*
HEXINT ::= '0' [xX] [0-9a-fA-F] ('_'? [0-9a-fA-F])*
DECINT ::= [0-9] ('_'? [0-9])*
FLOAT ::= (EXPFLOAT | PTFLOAT)
PTFLOAT ::= DECINT? '.' DECINT | DECINT '.'
EXPFLOAT ::= (PTFLOAT | DECINT) [eE] ('+' | '-')? DECINT
NAME ::=
| keyword [a-zA-Z_0-9]+
| /*_*/NOT /*_*/keyword [a-zA-Z_] [a-zA-Z_0-9]*
STRING ::= NAME? STR
STRING__PREDICATE ::=
STR ::=
'"""' (_NOT /*_*/'"""' CHAR)* '"""' | "'''" (_NOT /*_*/"'''" CHAR)* "'''" |
'"' (_NOT /*_*/('"' | EOL) CHAR)* '"' | "'" (_NOT /*_*/("'" | EOL) CHAR)* "'"
CHAR ::= ('\\' . | .)
COMMENT ::= '#' (_NOT /*_*/EOL .)*
INDENT__NOPACKRAT ::= (' ' | '\t')*
INDENT__PREDICATE ::=
SAMEDENT__NOPACKRAT ::= (' ' | '\t')*
SAMEDENT__PREDICATE ::=
DEDENT__NOPACKRAT ::= (' ' | '\t')*
DEDENT__PREDICATE ::=
EXTERNDENT__NOPACKRAT ::= (' ' | '\t')*
EXTERNDENT__PREDICATE ::=
EOL ::= (('\r' | '\n') | '\n\r')
// SPACE ::= ((' ' | '\t')+ | COMMENT | NLP EOL) SPACE?
// _ ::= SPACE?
//# TODO: add async | await
keyword ::=
'False' | 'else' | 'import' | 'pass' | 'None' | 'break' | 'except' | 'in' | 'raise' |
'True' | 'class' | 'finally' | 'is' | 'return' | 'and' | 'continue' | 'for' | 'as' |
'lambda' | 'try' | 'def' | 'from' | 'while' | 'assert' | 'del' | 'global' | 'not' |
'with' | 'elif' | 'if' | 'or' | 'yield'
//# https://docs.python.org/3/library/string.html#formatspec
format_spec ::= ([<>=^] | [^{}] [<>=^])? [+-]? 'z'? '#'? '0'? [0-9]* [_,]* ('.' [0-9]+)? [bcdeEfFgGnosxX%]?
//Added tokens for railroad generation
_NOT /*_*/::= '!'
_AND /*_*/::= '&'
EndOfFile ::= /*_*/NOT /*_*/ .
I noticed that the resulting codon_rules.cpp
doesn't preserve the order from codon/codon/parser/peg/grammar.peg
and I think that this can cause unexpected surprises because in PEG
the order is relevant.
After mechanically extracting the
peglib/codon
grammar frompeglib.h
I noticed 2 places that can be improved see aldo this issue https://github.com/yhirose/cpp-peglib/issues/275#issue-1720813202 .Bellow is the extracted grammar already with the two mentioned enhancements:
The grammar above can be used on https://yhirose.github.io/cpp-peglib/ to test/debug/develop, here are the numbers with and without the enhancements when parsing
grammar.peg
.Before the enhancements:
After the enhancements: