adrian-thurston / colm

The Colm Programming Language
MIT License
163 stars 31 forks source link

Grammar railroad diagram #157

Open mingodad opened 11 months ago

mingodad commented 11 months ago

Trying to convert colm grammar to be used on https://mingodad.github.io/parsertl-playground/playground/ I've got an initial port that could generate an EBNF understood by https://www.bottlecaps.de/rr/ui to generate a railroad diagram for colm, follow the instructions shown bellow:

//
// EBNF to be viewd at https://www.bottlecaps.de/rr/ui
//
// Copy and paste this at https://www.bottlecaps.de/rr/ui in the 'Edit Grammar' tab
// then click the 'View Diagram' tab.
//

start::=
      start_1

start_1::=
      /*%empty*/
    | start_1 root_item

root_item::=
      rl_def
    | literal_def
    | token_def
    | ic_def
    | ignore_def
    | cfl_def
    | cfl_redef
    | region_def
    | struct_def
    | namespace_def
    | function_def
    | in_host_def
    | iter_def
    | statement
    | global_def
    | export_def
    | pre_eof_def
    | precedence_def
    | alias_def
    | _include
    | reduction_def

_include::=
      INCLUDE SQ _include_1 sq_lit_term

_include_1::=
      /*%empty*/
    | _include_1 sq_cons_data

precedence_def::=
      pred_type pred_token_list

pred_type::=
      LEFT
    | RIGHT
    | NONASSOC

pred_token_list::=
      pred_token_list COMMA pred_token
    | pred_token

pred_token::=
      region_qual id
    | region_qual backtick_lit

pre_eof_def::=
      PREEOF COPEN lang_stmt_list CCLOSE

alias_def::=
      ALIAS id type_ref

struct_item::=
      struct_var_def
    | literal_def
    | rl_def
    | token_def
    | ic_def
    | ignore_def
    | cfl_def
    | region_def
    | struct_def
    | function_def
    | in_host_def
    | iter_def
    | export_def
    | pre_eof_def
    | precedence_def
    | alias_def

export_def::=
      EXPORT var_def opt_def_init

global_def::=
      GLOBAL var_def opt_def_init

iter_def::=
      ITER id POPEN param_var_def_list PCLOSE COPEN lang_stmt_list CCLOSE

reference_type_ref::=
      REF LT type_ref GT

param_var_def_seq::=
      param_var_def COMMA param_var_def_seq
    | param_var_def

param_var_def_list::=
      param_var_def_seq
    | /*%empty*/

param_var_def::=
      id COLON type_ref
    | id COLON reference_type_ref

opt_export::=
      EXPORT
    | /*%empty*/

function_def::=
      opt_export type_ref id POPEN param_var_def_list PCLOSE COPEN lang_stmt_list CCLOSE

in_host_def::=
      opt_export type_ref id POPEN param_var_def_list PCLOSE EQUALS id

struct_var_def::=
      var_def

struct_key::=
      STRUCT
    | CONTEXT

struct_def::=
      struct_key id struct_def_1 END

struct_def_1::=
      /*%empty*/
    | struct_def_1 struct_item

literal_keyword::=
      LITERAL
    | TOKEN

literal_def::=
      literal_keyword literal_list

literal_list::=
      literal_list literal_item
    | literal_item

literal_item::=
      no_ignore_left backtick_lit no_ignore_right

no_ignore_left::=
      NI MINUS
    | /*%empty*/

no_ignore_right::=
      MINUS NI
    | /*%empty*/

reduction_def::=
      REDUCTION id reduction_def_1 END

reduction_def_1::=
      /*%empty*/
    | reduction_def_1 reduction_item

red_nonterm::=
      type_ref RED_OPEN red_nonterm_1 RED_CLOSE

red_nonterm_1::=
      /*%empty*/
    | red_nonterm_1 host_item

red_action::=
      type_ref COLON id RED_OPEN red_action_1 RED_CLOSE

red_action_1::=
      /*%empty*/
    | red_action_1 host_item

host_item::=
      red_id
    | red_lit
    | red_comment
    | red_ws
    | red_any
    | RED_LHS
    | RED_RHS_REF
    | RED_TREE_REF
    | RED_RHS_LOC
    | RED_RHS_NREF
    | RED_TREE_NREF
    | RED_RHS_NLOC
    | RED_OPEN host_item_1 RED_CLOSE

host_item_1::=
      /*%empty*/
    | host_item_1 host_item

reduction_item::=
      red_nonterm
    | red_action

namespace_def::=
      NAMESPACE id namespace_def_1 END

namespace_def_1::=
      /*%empty*/
    | namespace_def_1 namespace_item

namespace_item::=
      rl_def
    | literal_def
    | token_def
    | ic_def
    | ignore_def
    | cfl_def
    | region_def
    | struct_def
    | namespace_def
    | function_def
    | in_host_def
    | iter_def
    | pre_eof_def
    | precedence_def
    | alias_def
    | _include
    | global_def

opt_reduce_first::=
      REDUCEFIRST
    | /*%empty*/

cfl_def::=
      DEF id cfl_def_1 opt_reduce_first prod_list

cfl_def_1::=
      /*%empty*/
    | cfl_def_1 var_def

cfl_redef::=
      REDEF id cfl_redef_1 opt_reduce_first prod_list

cfl_redef_1::=
      /*%empty*/
    | cfl_redef_1 var_def

region_def::=
      LEX region_def_1 END

region_def_1::=
      /*%empty*/
    | region_def_1 root_item

rl_def::=
      RL id LEX_FSLASH lex_expr LEX_FSLASH

opt_lex_expr::=
      lex_expr
    | /*%empty*/

token_def::=
      TOKEN id token_def_1 no_ignore_left LEX_FSLASH opt_lex_expr LEX_FSLASH no_ignore_right opt_translate

token_def_1::=
      /*%empty*/
    | token_def_1 var_def

ic_def::=
      TOKEN id MINUS

opt_translate::=
      COPEN lang_stmt_list CCLOSE
    | /*%empty*/

opt_id::=
      id
    | /*%empty*/

ignore_def::=
      IGNORE opt_id LEX_FSLASH opt_lex_expr LEX_FSLASH

prod_sublist::=
      prod_sublist BAR prod_el_list
    | prod_el_list

prod_el::=
      opt_prod_el_name region_qual id opt_repeat
    | opt_prod_el_name region_qual backtick_lit opt_repeat
    | opt_prod_el_name POPEN prod_sublist PCLOSE opt_repeat

opt_prod_el_name::=
      id COLON
    | /*%empty*/

prod_el_list::=
      prod_el_list prod_el
    | /*%empty*/

opt_commit::=
      COMMIT
    | /*%empty*/

opt_prod_name::=
      COLON id
    | /*%empty*/

prod::=
      SQOPEN prod_el_list SQCLOSE opt_prod_name opt_commit opt_reduce
    | SQOPEN DOT_DOT_DOT SQCLOSE

opt_reduce::=
      COPEN lang_stmt_list CCLOSE
    | /*%empty*/

prod_list::=
      prod_list BAR prod
    | prod

case_clause::=
      CASE pattern block_or_single
    | CASE id block_or_single
    | CASE id pattern block_or_single

default_clause::=
      DEFAULT block_or_single

case_clause_list::=
      case_clause case_clause_list
    | case_clause
    | default_clause

bare_tok::=
      LT_LT
    | LARROW

statement::=
      print_stmt
    | var_def opt_def_init
    | FOR id COLON type_ref IN iter_call block_or_single
    | IF code_expr block_or_single elsif_list
    | SWITCH var_ref case_clause_list
    | SWITCH var_ref COPEN case_clause_list CCLOSE
    | WHILE code_expr block_or_single
    | var_ref EQUALS code_expr
    | YIELD var_ref
    | RETURN code_expr
    | BREAK
    | REJECT
    | var_ref POPEN call_arg_list PCLOSE
    | stmt_or_factor
    | bare_tok accumulate opt_eos

elsif_list::=
      elsif_clause elsif_list
    | optional_else

elsif_clause::=
      ELSIF code_expr block_or_single

optional_else::=
      ELSE block_or_single
    | /*%empty*/

call_arg_seq::=
      code_expr COMMA call_arg_seq
    | code_expr

call_arg_list::=
      call_arg_seq
    | /*%empty*/

iter_call::=
      E1 var_ref POPEN call_arg_list PCLOSE
    | E2 id
    | E3 code_expr

block_or_single::=
      COPEN lang_stmt_list CCLOSE
    | statement

require_pattern::=
      REQUIRE var_ref pattern

opt_require_stmt::=
      require_pattern lang_stmt_list
    | /*%empty*/

lang_stmt_list::=
      lang_stmt_list_1 opt_require_stmt

lang_stmt_list_1::=
      /*%empty*/
    | lang_stmt_list_1 statement

opt_def_init::=
      EQUALS code_expr
    | /*%empty*/

var_def::=
      id COLON type_ref

print_stmt::=
      PRINT POPEN call_arg_list PCLOSE
    | PRINTS POPEN var_ref COMMA call_arg_list PCLOSE
    | PRINT accumulate

code_expr::=
      code_expr AMP_AMP code_relational
    | code_expr BAR_BAR code_relational
    | code_relational

code_relational::=
      code_relational EQ_EQ code_additive
    | code_relational BANG_EQ code_additive
    | code_relational LT code_additive
    | code_relational GT code_additive
    | code_relational LT_EQ code_additive
    | code_relational GT_EQ code_additive
    | code_additive

code_additive::=
      code_additive PLUS code_multiplicitive
    | code_additive MINUS code_multiplicitive
    | code_multiplicitive

code_multiplicitive::=
      code_multiplicitive STAR code_unary
    | code_multiplicitive FSLASH code_unary
    | code_unary

code_unary::=
      BANG code_factor
    | DOLLAR code_factor
    | DOLLAR DOLLAR code_factor
    | CARET code_factor
    | AT code_factor
    | PERCENT code_factor
    | code_factor

opt_eos::=
      DOT
    | EOS
    | /*%empty*/

code_factor::=
      number
    | var_ref POPEN call_arg_list PCLOSE
    | var_ref
    | NIL
    | TRUE
    | FALSE
    | POPEN code_expr PCLOSE
    | string
    | type_ref IN var_ref
    | TYPEID LT type_ref GT
    | CAST LT type_ref GT code_factor
    | stmt_or_factor

type_ref::=
      region_qual id opt_repeat
    | INT
    | BOOL
    | VOID
    | PARSER LT type_ref GT
    | LIST LT type_ref GT
    | MAP LT type_ref COMMA type_ref GT
    | LIST_EL LT type_ref GT
    | MAP_EL LT type_ref COMMA type_ref GT

region_qual::=
      region_qual id DOUBLE_COLON
    | /*%empty*/

opt_repeat::=
      STAR
    | PLUS
    | QUESTION
    | LT STAR
    | LT PLUS
    | /*%empty*/

opt_capture::=
      id COLON
    | /*%empty*/

opt_field_init::=
      POPEN opt_field_init_1 PCLOSE
    | /*%empty*/

opt_field_init_1::=
      /*%empty*/
    | opt_field_init_1 field_init

field_init::=
      code_expr

stmt_or_factor::=
      PARSE opt_capture type_ref opt_field_init accumulate
    | PARSE_TREE opt_capture type_ref opt_field_init accumulate
    | PARSE_STOP opt_capture type_ref opt_field_init accumulate
    | REDUCE id type_ref opt_field_init accumulate
    | READ_REDUCE id type_ref opt_field_init accumulate
    | SEND var_ref accumulate opt_eos
    | SEND_TREE var_ref accumulate opt_eos
    | MAKE_TREE POPEN call_arg_list PCLOSE
    | MAKE_TOKEN POPEN call_arg_list PCLOSE
    | CONS opt_capture type_ref opt_field_init constructor
    | MATCH var_ref pattern
    | NEW opt_capture type_ref POPEN stmt_or_factor_1 PCLOSE

stmt_or_factor_1::=
      /*%empty*/
    | stmt_or_factor_1 field_init

opt_label::=
      id COLON
    | /*%empty*/

dq_lit_term::=
      LIT_DQ
    | LIT_DQ_NL

sq_lit_term::=
      CONS_SQ
    | CONS_SQ_NL

opt_tilde_data::=
      tilde_data
    | /*%empty*/

pattern_el_lel::=
      region_qual id opt_repeat
    | region_qual backtick_lit opt_repeat

pattern_el::=
      opt_label pattern_el_lel
    | DQ pattern_el_1 dq_lit_term
    | SQ pattern_el_2 sq_lit_term
    | TILDE opt_tilde_data TILDE_NL

pattern_el_2::=
      /*%empty*/
    | pattern_el_2 sq_cons_data

pattern_el_1::=
      /*%empty*/
    | pattern_el_1 litpat_el

litpat_el::=
      lit_dq_data
    | LIT_SQOPEN litpat_el_1 LIT_SQCLOSE

litpat_el_1::=
      /*%empty*/
    | litpat_el_1 pattern_el

pattern_top_el::=
      DQ pattern_top_el_1 dq_lit_term
    | SQ pattern_top_el_2 sq_lit_term
    | TILDE opt_tilde_data TILDE_NL

pattern_top_el_2::=
      /*%empty*/
    | pattern_top_el_2 sq_cons_data

pattern_top_el_1::=
      /*%empty*/
    | pattern_top_el_1 litpat_el

pattern_list::=
      pattern_top_el pattern_list
    | pattern_top_el

pattern::=
      pattern_list
    | SQOPEN pattern_1 SQCLOSE

pattern_1::=
      /*%empty*/
    | pattern_1 pattern_el

E1::=
      /*%empty*/

E2::=
      /*%empty*/

E3::=
      /*%empty*/

cons_el::=
      E1 region_qual backtick_lit
    | E1 DQ cons_el_1 dq_lit_term
    | E1 SQ cons_el_2 sq_lit_term
    | E1 TILDE opt_tilde_data TILDE_NL
    | E2 code_expr

cons_el_2::=
      /*%empty*/
    | cons_el_2 sq_cons_data

cons_el_1::=
      /*%empty*/
    | cons_el_1 lit_cons_el

lit_cons_el::=
      lit_dq_data
    | LIT_SQOPEN lit_cons_el_1 LIT_SQCLOSE

lit_cons_el_1::=
      /*%empty*/
    | lit_cons_el_1 cons_el

cons_top_el::=
      DQ cons_top_el_1 dq_lit_term
    | SQ cons_top_el_2 sq_lit_term
    | TILDE opt_tilde_data TILDE_NL

cons_top_el_2::=
      /*%empty*/
    | cons_top_el_2 sq_cons_data

cons_top_el_1::=
      /*%empty*/
    | cons_top_el_1 lit_cons_el

cons_list::=
      cons_top_el cons_list
    | cons_top_el

constructor::=
      cons_list
    | SQOPEN constructor_1 SQCLOSE

constructor_1::=
      /*%empty*/
    | constructor_1 cons_el

accum_el::=
      E1 DQ accum_el_1 dq_lit_term
    | E1 SQ accum_el_2 sq_lit_term
    | E1 TILDE opt_tilde_data TILDE_NL
    | E2 code_expr

accum_el_2::=
      /*%empty*/
    | accum_el_2 sq_cons_data

accum_el_1::=
      /*%empty*/
    | accum_el_1 lit_accum_el

lit_accum_el::=
      lit_dq_data
    | LIT_SQOPEN lit_accum_el_1 LIT_SQCLOSE

lit_accum_el_1::=
      /*%empty*/
    | lit_accum_el_1 accum_el

accum_top_el::=
      DQ accum_top_el_1 dq_lit_term
    | SQ accum_top_el_2 sq_lit_term
    | TILDE opt_tilde_data TILDE_NL
    | SQOPEN accum_top_el_3 SQCLOSE

accum_top_el_3::=
      /*%empty*/
    | accum_top_el_3 accum_el

accum_top_el_2::=
      /*%empty*/
    | accum_top_el_2 sq_cons_data

accum_top_el_1::=
      /*%empty*/
    | accum_top_el_1 lit_accum_el

accum_list::=
      accum_top_el accum_list
    | accum_top_el

accumulate::=
      accum_list

string_el::=
      E1 DQ string_el_1 dq_lit_term
    | E1 SQ string_el_2 sq_lit_term
    | E1 TILDE opt_tilde_data TILDE_NL
    | E2 code_expr

string_el_2::=
      /*%empty*/
    | string_el_2 sq_cons_data

string_el_1::=
      /*%empty*/
    | string_el_1 lit_string_el

lit_string_el::=
      lit_dq_data
    | LIT_SQOPEN lit_string_el_1 LIT_SQCLOSE

lit_string_el_1::=
      /*%empty*/
    | lit_string_el_1 string_el

string_top_el::=
      DQ string_top_el_1 dq_lit_term
    | SQ string_top_el_2 sq_lit_term
    | TILDE opt_tilde_data TILDE_NL

string_top_el_2::=
      /*%empty*/
    | string_top_el_2 sq_cons_data

string_top_el_1::=
      /*%empty*/
    | string_top_el_1 lit_string_el

string_list::=
      string_top_el string_list
    | string_top_el

string::=
      string_list
    | SQOPEN string_1 SQCLOSE

string_1::=
      /*%empty*/
    | string_1 string_el

var_ref::=
      region_qual qual id

qual::=
      qual id DOT
    | qual id ARROW
    | /*%empty*/

lex_expr::=
      lex_expr LEX_BAR lex_term
    | lex_expr LEX_AMP lex_term
    | lex_expr LEX_DASH lex_term
    | lex_expr LEX_DASHDASH lex_term
    | lex_term

opt_lex_dot::=
      LEX_DOT
    | /*%empty*/

lex_term::=
      lex_term opt_lex_dot lex_factor_rep
    | lex_term LEX_COLON_GT lex_factor_rep
    | lex_term LEX_COLON_GTGT lex_factor_rep
    | lex_term LEX_LT_COLON lex_factor_rep
    | lex_factor_rep

lex_factor_rep::=
      lex_factor_rep LEX_STAR
    | lex_factor_rep LEX_STARSTAR
    | lex_factor_rep LEX_PLUS
    | lex_factor_rep LEX_QUESTION
    | lex_factor_rep COPEN lex_uint CCLOSE
    | lex_factor_rep COPEN COMMA lex_uint CCLOSE
    | lex_factor_rep COPEN lex_uint COMMA CCLOSE
    | lex_factor_rep COPEN lex_uint COMMA lex_uint CCLOSE
    | lex_factor_neg

lex_factor_neg::=
      LEX_CARET lex_factor_neg
    | lex_factor

lex_range_lit::=
      lex_lit
    | lex_num

lex_num::=
      lex_uint
    | lex_hex

lex_factor::=
      lex_lit
    | lex_id
    | lex_uint
    | lex_hex
    | lex_range_lit LEX_DOTDOT lex_range_lit
    | LEX_SQOPEN_POS reg_or_data RE_SQCLOSE
    | LEX_SQOPEN_NEG reg_or_data RE_SQCLOSE
    | LEX_POPEN lex_expr LEX_PCLOSE

reg_or_data::=
      reg_or_data reg_or_char
    | /*%empty*/

reg_or_char::=
      RE_CHAR
    | RE_CHAR RE_DASH RE_CHAR

//lex
DEF ::= "def"
REDEF ::= "redef"
LEX ::= "lex"
END ::= "end"
TOKEN ::= "token"
RL ::= "rl"
IGNORE ::= "ignore"
PRINT ::= "print"
PRINTS ::= "prints"
PARSE ::= "parse"
REDUCE ::= "reduce"
READ_REDUCE ::= "read_reduce"
PARSE_TREE ::= "parse_tree"
PARSE_STOP ::= "parse_stop"
"construct"|CONS ::= "cons"
MATCH ::= "match"
REQUIRE ::= "require"
SEND ::= "send"
SEND_TREE ::= "send_tree"
NAMESPACE ::= "namespace"
REDUCTION ::= "reduction"
FOR ::= "for"
IF ::= "if"
YIELD ::= "yield"
WHILE ::= "while"
ELSIF ::= "elsif"
ELSE ::= "else"
IN ::= "in"
"parser"|PARSER ::= "accum"
LIST ::= "list"
LIST_EL ::= "list_el"
MAP ::= "map"
MAP_EL ::= "map_el"
PTR ::= "ptr"
ITER ::= "iter"
REF ::= "ref"
EXPORT ::= "export"
RETURN ::= "return"
BREAK ::= "break"
REJECT ::= "reject"
REDUCEFIRST ::= "reducefirst"
ALIAS ::= "alias"
COMMIT ::= "commit"
NEW ::= "new"
PREEOF ::= "preeof"
GLOBAL ::= "global"
EOS ::= "eos"
CAST ::= "cast"
SWITCH ::= "switch"
CASE ::= "case"
DEFAULT ::= "default"
INT ::= "int"
BOOL ::= "bool"
VOID ::= "void"

MAKE_TOKEN ::= "make_token"
MAKE_TREE ::= "make_tree"

TYPEID ::= "typeid"

LITERAL ::= "literal"
CONTEXT ::= "context"
STRUCT ::= "struct"
NI ::= "ni"

NIL ::= "nil"
TRUE ::= "true"
FALSE ::= "false"

LEFT ::= "left"
RIGHT ::= "right"
NONASSOC ::= "nonassoc"

INCLUDE ::= "include"

SQOPEN ::= "["
SQCLOSE ::= "]"
BAR ::= "|"
FSLASH ::= "/"
COLON ::= ":"
DOUBLE_COLON ::= "::"
DOT ::= "."
ARROW ::= "->"
POPEN ::= "("
PCLOSE ::= ")"
COPEN ::= "{"
CCLOSE ::= "}"
STAR ::= "*"
QUESTION ::= "?"
EQUALS ::= "="
EQ_EQ ::= "=="
BANG_EQ ::= "!="
COMMA ::= ","
LT ::= "<"
GT ::= ">"
LT_EQ ::= "<="
GT_EQ ::= ">="
BANG ::= "!"
DOLLAR ::= "$"
CARET ::= "^"
AT ::= "@"
PERCENT ::= "%"
PLUS ::= "+"
MINUS ::= "-"
AMP_AMP ::= "&&"
BAR_BAR ::= "||"
DOT_DOT_DOT ::= "..."
LT_LT ::= "<<"
LARROW ::= "<-"

LIT_DQ ::= '"'
LIT_DQ_NL ::= "\n"
LIT_SQOPEN ::= "["
LIT_SQCLOSE ::= "]"

LEX_DOT ::= "."
LEX_BAR ::= "|"
LEX_AMP ::= "&"
LEX_DASH ::= "-"
LEX_POPEN ::= "("
LEX_PCLOSE ::= ")"
LEX_STAR ::= "*"
LEX_STARSTAR ::= "**"
LEX_QUESTION ::= "?"
LEX_PLUS ::= "+"
LEX_CARET ::= "^"
LEX_DOTDOT ::= ".."

LEX_FSLASH ::= "/"
LEX_DASHDASH ::= "--"
LEX_COLON_GT ::= ":>"
LEX_COLON_GTGT ::= ":>>"
LEX_LT_COLON ::= "<:"

RE_DASH ::= "-"
RE_SQCLOSE ::= "]"

RED_OPEN ::= "{"
RED_CLOSE ::= "}"
mingodad commented 11 months ago

Would be nice if colm could generate then like I did here https://github.com/mingodad/lalr-parser-test for bison/byacc/lemon.