thepowersgang / mrustc

Alternative rust compiler (re-implementation)
MIT License
2.17k stars 108 forks source link

Grammar railroad diagram #296

Closed mingodad closed 1 year ago

mingodad commented 1 year ago

Using some online tools like https://www.bottlecaps.de/rr/ui and https://www.bottlecaps.de/convert/ we can have a nice navigable railroad diagram.

Copy and paste the EBNF shown bellow on https://www.bottlecaps.de/rr/ui on the tab Edit Grammar the click on the tab View Diagram to see/download a navigable railroad diagram.

/* converted on Thu Feb 23, 2023, 13:14 (UTC+01) by bison-to-w3c v0.62 which is Copyright (c) 2011-2022 by Gunther Rademacher <grd@gmx.net> */

IDENT    ::= IDENT_
           | RWC_default
           | RWC_union
parse_root
         ::= module_root
module_root
         ::= super_attrs ( attrs ( item | macro_item ) )*
tt_list  ::= ( tt_subtree | tt_tok )*
tt_subtree
         ::= tt_paren
           | tt_brace
           | tt_square
tt_paren ::= '(' tt_list ')'
tt_brace ::= '{' tt_list '}'
tt_square
         ::= '[' tt_list ']'
super_attrs
         ::= super_attr*
opt_vis  ::= vis?
vis      ::= RWD_pub ( '(' use_path ')' )?
opt_comma
         ::= ','?
opt_unsafe
         ::= RWD_unsafe?
opt_lifetime
         ::= LIFETIME?
attrs_p  ::= attrs? attr
attrs    ::= attr*
super_attr
         ::= HASHBANG '[' meta_item ']'
           | SUPER_DOC_COMMENT
attr     ::= '#' '[' meta_item ']'
           | DOC_COMMENT
meta_item
         ::= IDENT ( tt_paren | '=' STRING )?
macro_braced
         ::= use_path '!' IDENT? tt_brace
macro_nonbrace
         ::= use_path '!' IDENT? ( tt_paren | tt_square )
macro_inv
         ::= macro_braced
           | macro_nonbrace ';'
macro_item
         ::= macro_inv
item     ::= opt_vis ( vis_item | RWD_extern RWD_crate extern_crate )
           | RWD_impl impl_def
           | vis RWD_unsafe unsafe_vis_item
           | RWD_unsafe unsafe_item
           | RWD_extern ( extern_block | RWD_crate extern_crate )
vis_item ::= RWD_mod module_def
           | RWD_type type_def
           | RWD_use use_def
           | RWD_static static_def
           | RWD_const const_def
           | RWD_struct struct_def
           | RWC_union union_def
           | RWD_enum enum_def
           | unsafe_vis_item
unsafe_vis_item
         ::= fn_qualifiers? RWD_fn fn_def
           | IDENT? RWD_trait trait_def
unsafe_item
         ::= unsafe_vis_item
           | RWD_impl impl_def
extern_crate
         ::= IDENT ( RWD_as IDENT )? ';'
extern_block
         ::= extern_abi '{' extern_item* '}'
extern_abi
         ::= STRING?
extern_item
         ::= attrs opt_vis ( RWD_fn fn_def_hdr | RWD_static opt_mut IDENT ':' type ) ';'
module_def
         ::= IDENT ( '{' module_root '}' | ';' )
fn_def   ::= fn_def_hdr code
fn_def_hdr
         ::= IDENT generic_def '(' fn_def_args ')' fn_def_ret where_clause
fn_def_hdr_PROTO
         ::= IDENT generic_def '(' fn_def_args_PROTO ')' fn_def_ret where_clause
fn_def_ret
         ::= ( THINARROW ( type | '!' ) )?
fn_def_args
         ::= ( fn_def_self ( ',' fn_def_arg_list opt_comma )? | fn_def_arg_list ( opt_comma | ',' TRIPLEDOT ) )?
fn_def_args_PROTO
         ::= ( fn_def_self ( ',' fn_def_arg_list_PROTO opt_comma )? | fn_def_arg_list_PROTO ( opt_comma | ',' TRIPLEDOT ) )?
fn_def_self
         ::= RWD_mut? RWD_self ( ':' type )?
           | '&' LIFETIME? RWD_mut? RWD_self
fn_def_arg_list
         ::= fn_def_arg ( ',' fn_def_arg )*
fn_def_arg
         ::= pattern ':' type
fn_def_arg_list_PROTO
         ::= fn_def_arg_PROTO ( ',' fn_def_arg_PROTO )*
fn_def_arg_PROTO
         ::= ( ( RWD_mut? IDENT | '_' ) ':' )? type
fn_qualifiers
         ::= RWD_extern extern_abi
           | RWD_const RWD_unsafe?
type_def ::= IDENT generic_def '=' type ';'
use_def  ::= ( use_path ( RWD_as ( IDENT | '_' ) | DOUBLECOLON ( '*' | '{' use_inners opt_comma '}' ) )? | '{' use_inners opt_comma '}' ) ';'
use_inners
         ::= use_inner_one ( ',' use_inner_one )*
use_inner_one
         ::= '*'
           | RWD_self
           | ident_chain ( RWD_as ( IDENT | '_' ) | DOUBLECOLON ( '*' | '{' use_inners opt_comma '}' ) )?
ident_chain
         ::= IDENT ( DOUBLECOLON IDENT )*
opt_mut  ::= RWD_mut?
static_def
         ::= opt_mut IDENT ':' type '=' const_value
const_def
         ::= IDENT ':' type '=' const_value
const_value
         ::= ( expr | error ) ';'
struct_def
         ::= IDENT generic_def ( where_clause ( ';' | '{' struct_def_items opt_comma '}' ) | '(' tuple_struct_def_item ( ',' tuple_struct_def_item )* opt_comma ')' where_clause ';' )
tuple_struct_def_item
         ::= attrs opt_vis type
struct_def_items
         ::= struct_def_item ( ',' struct_def_item )*
struct_def_item
         ::= attrs opt_vis IDENT ':' type
enum_def ::= IDENT generic_def where_clause '{' enum_variants '}'
enum_variants
         ::= ( enum_variant ( ',' enum_variant )* ','? )?
enum_variant
         ::= attrs enum_variant_
enum_variant_
         ::= IDENT ( '=' expr | '(' tuple_var_def_item ( ',' tuple_var_def_item )* opt_comma ')' | '{' struct_def_items opt_comma '}' )?
tuple_var_def_item
         ::= attrs type
union_def
         ::= IDENT generic_def where_clause '{' struct_def_items opt_comma '}'
trait_def
         ::= IDENT generic_def trait_bounds where_clause '{' ( attrs trait_item )* '}'
trait_bounds
         ::= ( ':' bound ( '+' bound )* )?
trait_item
         ::= ( RWD_type IDENT trait_bounds ( '=' type )? | RWD_const IDENT ':' type opt_assign_value ) ';'
           | opt_unsafe fn_qualifiers? RWD_fn fn_def_hdr_PROTO ( ';' | code )
opt_assign_value
         ::= ( '=' expr )?
impl_def ::= generic_def impl_def_line '{' super_attrs ( attrs impl_item )* '}'
impl_def_line
         ::= ( trait_path RWD_for ( type | DOUBLEDOT ) | '!' trait_path RWD_for type | type_noufcs ) where_clause
impl_item
         ::= macro_inv
           | ( RWC_default | opt_vis RWC_default? ) impl_item_nomac
impl_item_nomac
         ::= opt_unsafe fn_qualifiers? RWD_fn fn_def
           | RWD_type generic_def IDENT '=' type ';'
           | RWD_const ( const_def | RWD_unsafe? RWD_fn fn_def )
generic_def
         ::= ( '<' generic_def_one? ( ',' generic_def_one )* '>' )?
generic_def_one
         ::= attrs ( IDENT ( ':' bounds )? ( '=' type )? | LIFETIME ( ':' lifetime_bounds )? )
where_clause
         ::= ( RWD_where where_clause_ent ( ',' where_clause_ent )* ','? )?
where_clause_ent
         ::= hrlb_def type ':' bounds
           | LIFETIME ':' lifetime_bounds
lifetime_bounds
         ::= LIFETIME ( '+' LIFETIME )*
hrlb_def ::= ( RWD_for '<' LIFETIME ( ',' LIFETIME )* '>' )?
bounds   ::= bound ( '+' bound )*
bound    ::= LIFETIME
           | ( '?' | hrlb_def ) trait_path
use_path ::= ( DOUBLECOLON? IDENT | RWD_crate | RWD_super | RWD_self ) ( DOUBLECOLON IDENT )*
dlt      ::= DOUBLELT
type_args
         ::= dlt* '<' type_exprs ( '>' | DOUBLEGT )
expr_path
         ::= ( ( ufcs_path | RWD_self | RWD_super )? DOUBLECOLON )? expr_path_segs
expr_path_segs
         ::= IDENT ( DOUBLECOLON type_args )? ( DOUBLECOLON IDENT ( DOUBLECOLON type_args )? )*
expr_path_seg
         ::= IDENT ( DOUBLECOLON type_args )?
trait_path
         ::= ( ( RWD_super | RWD_self )? DOUBLECOLON type_path_segs | type_path_segs ( '(' type_list ')' fn_def_ret )? )?
type_path
         ::= ufcs_path DOUBLECOLON IDENT
           | trait_path
ufcs_path
         ::= ( '<' | DOUBLELT ufcs_path_tail DOUBLECOLON ) ufcs_path_tail
ufcs_path_tail
         ::= type ( '>' | RWD_as trait_path ( '>' | DOUBLEGT ) )
type_path_segs
         ::= type_path_seg ( DOUBLECOLON type_path_seg )*
type_path_seg
         ::= IDENT type_args?
type_exprs
         ::= type_arg ( ',' type_arg )*
type_arg ::= ( IDENT '=' )? type
           | LIFETIME
type     ::= ( RWD_dyn | RWD_impl )? trait_list
           | type_ele
type_noufcs
         ::= ( RWD_dyn | RWD_impl )? trait_list
           | type_nopath
type_ele ::= type_path
           | ( RWD_dyn | RWD_impl ) ( trait_path | '(' trait_list ')' )
           | type_nopath
type_nopath
         ::= hrlb_def opt_unsafe ( RWD_extern extern_abi )? RWD_fn '(' fn_def_arg_list_PROTO ')' fn_def_ret
           | '_'
           | ( ( '&' | DOUBLEAMP ) LIFETIME? RWD_mut? | '*' ( RWD_const | RWD_mut ) ) type_ele
           | '[' type ( ';' expr )? ']'
           | '(' ( type ( ',' type_list? )? )? ')'
           | '!'
trait_list
         ::= type_path ( '+' trait_list_ent )*
trait_list_ent
         ::= trait_path
           | LIFETIME
type_list
         ::= type ( ',' type )*
tuple_pattern
         ::= '(' ( pattern_list ','? )? ')'
struct_pattern
         ::= expr_path ( '{' struct_pattern_item ( ',' struct_pattern_item )* '}' | '(' pattern_list ')' )
struct_pattern_item
         ::= IDENT ( ':' pattern )?
           | ( RWD_ref RWD_mut? | RWD_mut ) IDENT
           | DOUBLEDOT
slice_pattern
         ::= '[' pattern_list ( ',' DOUBLEDOT )? ']'
pattern  ::= ( IDENT '@' )? nonbind_pattern
           | ( RWD_ref RWD_mut? | RWD_mut ) IDENT ( '@' nonbind_pattern )?
nonbind_pattern
         ::= '_'
           | DOUBLEDOT
           | struct_pattern
           | tuple_pattern
           | value_pattern ( ( TRIPLEDOT | DOUBLEDOTEQ ) value_pattern )?
           | slice_pattern
           | ( ( '&' | DOUBLEAMP ) RWD_mut? | RWD_box ) pattern
value_pattern
         ::= expr_path
           | '-'? INTEGER
           | CHARLIT
           | STRING
pattern_list
         ::= pattern ( ',' pattern )*
code     ::= block
block    ::= '{' block_contents '}'
block_contents
         ::= ( super_attr | ';' | attrs ( ( expr_na | RWD_let let_binding ) ';' | expr_blocks | block | item | use_path '!' ( IDENT? tt_brace | tt_paren ';' ) ) | LIFETIME ':' loop_block )* ( attrs expr_na )?
tail_expr
         ::= ( expr_noblock_NOBRACE | flow_control )?
flow_control
         ::= RWD_return expr_0?
           | ( RWD_break | RWD_continue ) opt_lifetime
opt_type_annotation
         ::= ( ':' type )?
let_binding
         ::= pattern opt_type_annotation ( '=' expr )?
expr_list
         ::= expr? ( ',' expr )*
struct_literal_ent
         ::= IDENT ( ':' expr )?
struct_literal_list
         ::= struct_literal_ent ( ',' struct_literal_ent )*
expr_na  ::= '{' block_contents '}'
           | expr_noblock
expr     ::= attrs_p? expr_na
expr_NOSTRLIT
         ::= block
           | attrs_p? expr_noblock_NOSTRLIT
expr_NOBRACE
         ::= block
           | attrs_p? expr_noblock_NOBRACE
expr_blocks
         ::= ( RWD_match expr_NOSTRLIT '{' match_arms | RWD_unsafe '{' block_contents ) '}'
           | RWD_if if_block_head ( RWD_else RWD_if if_block_head )* ( RWD_else code )?
           | flow_control
           | loop_block
loop_block
         ::= ( RWD_loop | ( RWD_for pattern RWD_in | RWD_while ( RWD_let pattern '=' )? ) expr_NOSTRLIT ) '{' block_contents '}'
if_block_head
         ::= ( RWD_let pattern '=' )? expr_NOSTRLIT code
match_arms
         ::= ( ( attrs ( match_arm ',' | match_arm_brace ) )* match_arm_last )?
match_arm_last
         ::= attrs match_arm ','?
match_pattern
         ::= pattern ( RWD_if expr_0 )?
match_patterns
         ::= match_pattern ( '|' match_pattern )*
match_arm
         ::= match_arm_brace
           | match_arm_expr
match_arm_brace
         ::= match_patterns FATARROW '{' block_contents '}'
match_arm_expr
         ::= match_patterns FATARROW tail_expr

//
// Tokens
//

//\("[^"]+"\)\s+{ return \(\S[^;]+\); }
RWD_mod ::= "mod"
RWD_impl ::= "impl"
RWD_use ::= "use"
RWD_type ::= "type"
RWD_static ::= "static"
RWD_const ::= "const"
RWD_struct ::= "struct"
RWD_trait ::= "trait"
RWD_enum ::= "enum"

RWD_fn ::= "fn"
RWD_as ::= "as"
RWD_in ::= "in"
RWD_mut ::= "mut"
RWD_pub ::= "pub"
RWD_where ::= "where"
RWD_extern ::= "extern"
RWD_crate ::= "crate"

RWD_let ::= "let"
RWD_ref ::= "ref"
RWD_move ::= "move"
RWD_box ::= "box"

RWD_self ::= "self"
RWD_super ::= "super"

RWD_match ::= "match"
RWD_if ::= "if"
RWD_else ::= "else"
RWD_loop ::= "loop"
RWD_while ::= "while"
RWD_for ::= "for"
RWD_unsafe ::= "unsafe"
RWD_return ::= "return"
RWD_break ::= "break"
RWD_continue ::= "continue"

RWD_dyn ::= "dyn"

RWC_default ::= "default"
RWC_union ::= "union"

DOUBLECOLON ::= "::"
THINARROW ::= "->"
FATARROW ::= "=>"

DOUBLEEQUAL ::= "=="
EXCLAMEQUAL ::= "!="
GTEQUAL ::= ">="
LTEQUAL ::= "<="
PLUSEQUAL ::= "+="
MINUSEQUAL ::= "-="
STAREQUAL ::= "*="
SLASHEQUAL ::= "/="
PERCENTEQUAL ::= "%="

PIPEEQUAL ::= "|="
AMPEQUAL ::= "&="
CARETEQUAL ::= "^="

DOUBLEAMP ::= "&&"
DOUBLEPIPE ::= "||"
DOUBLELT ::= "<<"
DOUBLEGT ::= ">>"
DOUBLELTEQUAL ::= "<<="
DOUBLEGTEQUAL ::= ">>="
DOUBLEDOT ::= ".."
TRIPLEDOT ::= "..."
DOUBLEDOTEQ ::= "..="

HASHBANG ::= "#!"
mingodad commented 1 year ago

See also this project https://github.com/bleibig/rust-grammar/issues/16 that has a grammar with 0 conflicts comparing with this one that has:

cat rust.y > .gen/.rust.y
cpp -P rust_expr.y.h >> .gen/.rust.y
cpp -P rust_tts.y.h >> .gen/.rust.y
[BISON] .gen/rust.tab.cpp
.gen/.rust.y: warning: 748 shift/reduce conflicts [-Wconflicts-sr]
.gen/.rust.y: warning: 581 reduce/reduce conflicts [-Wconflicts-rr]
.gen/.rust.y:705.4-25: warning: rule useless in parser due to conflicts [-Wother]
  : '{' block_contents '}'
    ^^^^^^^^^^^^^^^^^^^^^^
thepowersgang commented 1 year ago

The BNF grammar in the mrustc repo is OLD code, not maintained or used at all. Should probably just delete it.

mingodad commented 1 year ago

I'm developing a Yacc/Lex compatible online editor/tester and I have there the grammar from https://github.com/bleibig/rust-grammar.git there that can parse several rust code (it need the >> in generic parameters to have a space > > till a fix come along).

It can be seen here https://mingodad.github.io/parsertl-playground/playground/ (select Rust parser then click Parse to see a parser tree for the content in Input source editor).

I think it can help debug/develop/test/document grammars/parsers !

Any feedback is welcome !