JasonSteving99 / claro-lang

Claro Lang
https://docs.clarolang.com
138 stars 10 forks source link

Grammar railroad diagram #46

Open mingodad opened 8 months ago

mingodad commented 8 months ago

Using a script doing a bit of regex search and repace and manually adding the tokens from the lexer we can get an EBNF understood by (IPV6) https://www.bottlecaps.de/rr/ui or (IPV4) https://rr.red-dove.com/ui to generate a nice navigable railroad diagram that can help document/develop/debug this project grammar.

Follow the instructions shown bellow at the top:

//
// EBNF to be viewd at
//  (IPV6) https://www.bottlecaps.de/rr/ui
//  (IPV4) https://rr.red-dove.com/ui
//
// Copy and paste this at one url shown above in the 'Edit Grammar' tab
// then click the 'View Diagram' tab.
//

// This is where the grammar starts.
program ::=
    stmts_and_defs_list

// TODO(steving) Add procedure defs to this target so that they also can only be used at the top level.
stmts_and_defs_list ::=
    stmt stmts_and_defs_list
  | module_definition_stmt stmts_and_defs_list
  | contract_definition_stmt stmts_and_defs_list
  | contract_implementation_stmt stmts_and_defs_list
  | generic_function_definition_stmt stmts_and_defs_list
  | generic_consumer_function_definition_stmt stmts_and_defs_list
  | generic_provider_function_definition_stmt stmts_and_defs_list
  // We'll allow files with only a single one of these following stmts as well.
  | stmt
  | module_definition_stmt
  | contract_definition_stmt
  | contract_implementation_stmt
  | generic_function_definition_stmt
  | generic_consumer_function_definition_stmt
  | generic_provider_function_definition_stmt

stmt_list ::=
    stmt stmt_list
  | stmt
  | debug_stmts

debug_stmts ::=
    DEBUG_DUMP_SCOPE LPAR RPAR SEMICOLON

stmt ::=
    print
  | show_type
  | identifier_declaration
  | identifier_assignment
  | trashcan_assignment
  | identifier_increment_stmt
  | identifier_decrement_stmt
  | struct_field_assignment_stmt
  | list_element_assignment
  | if_else_chain_stmt
  | match_stmt
  | while_stmt
  | for_loop_stmt
  | repeat_stmt
  | break_stmt
  | continue_stmt
  | function_definition_stmt
  | consumer_function_definition_stmt
  | provider_function_definition_stmt
  | consumer_function_call_stmt
  | graph_function_definition_stmt
  | graph_provider_definition_stmt
  | graph_consumer_definition_stmt
  | initializers_block_stmt
  | unwrappers_block_stmt
  | return_stmt
  | alias_stmt
  | atom_def_stmt
  | newtype_def_stmt
  | using_block_stmt
  | pipe_chain_stmt
  | http_service_def_stmt
  | endpoint_handlers_block_stmt
  | sleep
  | privileged_inline_java

module_definition_stmt ::=
    MODULE IDENTIFIER LCURLY bind_stmts_list RCURLY
  | MODULE IDENTIFIER USING LPAR identifier_list RPAR LCURLY bind_stmts_list RCURLY

bind_stmts_list ::=
    bind_stmt bind_stmts_list
  | bind_stmt

bind_stmt ::=
    BIND IDENTIFIER COLON builtin_type TO expr SEMICOLON

using_block_stmt ::=
    USING LPAR identifier_list RPAR LCURLY stmt_list RCURLY

print ::=
    PRINT LPAR expr RPAR SEMICOLON

show_type ::=
    TYPE LPAR expr RPAR SEMICOLON

unwrap_expr ::=
    UNWRAP LPAR expr RPAR

identifier_declaration ::=
    VAR IDENTIFIER ASSIGNMENT expr SEMICOLON
  | VAR IDENTIFIER COLON builtin_type SEMICOLON
  | VAR IDENTIFIER COLON builtin_type ASSIGNMENT expr SEMICOLON
  // Blocking variants.
  | VAR IDENTIFIER COLON builtin_type BLOCKING_GET expr SEMICOLON
  | VAR IDENTIFIER BLOCKING_GET expr SEMICOLON
  // Automatic error propagation variants.
  | VAR IDENTIFIER COLON builtin_type QUESTION_MARK_ASSIGNMENT expr SEMICOLON
  | VAR IDENTIFIER QUESTION_MARK_ASSIGNMENT expr SEMICOLON

alias_stmt ::=
    ALIAS IDENTIFIER COLON builtin_type

atom_def_stmt ::=
    ATOM identifier

newtype_def_stmt ::=
    NEWTYPE IDENTIFIER COLON builtin_type
  | NEWTYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET COLON builtin_type

initializers_block_stmt ::=
    INITIALIZERS IDENTIFIER LCURLY initializers_or_unwrappers_proc_defs_list RCURLY

unwrappers_block_stmt ::=
    UNWRAPPERS IDENTIFIER LCURLY initializers_or_unwrappers_proc_defs_list RCURLY

  // TODO(steving) Add support for Generic Graph Procedures when available.
initializers_or_unwrappers_proc_defs_list ::=
    function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | provider_function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | consumer_function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | graph_function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | graph_provider_definition_stmt initializers_or_unwrappers_proc_defs_list
  | generic_function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | generic_provider_function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | generic_consumer_function_definition_stmt initializers_or_unwrappers_proc_defs_list
  | function_definition_stmt
  | provider_function_definition_stmt
  | consumer_function_definition_stmt
  | graph_function_definition_stmt
  | graph_provider_definition_stmt
  | generic_function_definition_stmt
  | generic_provider_function_definition_stmt
  | generic_consumer_function_definition_stmt

builtin_type ::=
    base_builtin_type_without_mutability_modifier
  | MUT base_builtin_type_without_mutability_modifier

base_builtin_type_without_mutability_modifier ::=
    INT_TYPE
  | LONG_TYPE
  | FLOAT_TYPE
  | DOUBLE_TYPE
  | BOOLEAN_TYPE
  | STRING_TYPE
  | CHAR_TYPE
  | LBRACKET builtin_type RBRACKET
  | LCURLY builtin_type RCURLY
  | LCURLY builtin_type COLON builtin_type RCURLY
    // Maps are defined like {string} to avoid a new keyword `map` since I'd like to not impair functional style
    // where map is a well known function.
  | ONEOF L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | FUNCTION_TYPE L_ANGLE_BRACKET builtin_type ARROW builtin_type R_ANGLE_BRACKET
  | FUNCTION_TYPE L_ANGLE_BRACKET BAR builtin_types_list BAR ARROW builtin_type R_ANGLE_BRACKET
  | BLOCKING FUNCTION_TYPE L_ANGLE_BRACKET builtin_type ARROW builtin_type R_ANGLE_BRACKET
  | BLOCKING FUNCTION_TYPE L_ANGLE_BRACKET BAR builtin_types_list BAR ARROW builtin_type R_ANGLE_BRACKET
  | CONSUMER_FUNCTION_TYPE L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | BLOCKING CONSUMER_FUNCTION_TYPE L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | PROVIDER_FUNCTION_TYPE L_ANGLE_BRACKET builtin_type R_ANGLE_BRACKET
  | BLOCKING PROVIDER_FUNCTION_TYPE L_ANGLE_BRACKET builtin_type R_ANGLE_BRACKET
  | TUPLE_TYPE L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | FUTURE L_ANGLE_BRACKET builtin_type R_ANGLE_BRACKET
  | IDENTIFIER
  | SCOPED_IDENTIFIER
  | IDENTIFIER L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | SCOPED_IDENTIFIER L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | STRUCT_TYPE LCURLY function_args_types_list RCURLY
    // Structs are defined like `struct{field1: int ... fieldN: string}`. This is intended to resemble a sequence of
    // variable declarations which will align with the initialization syntax `{field1 = 1 ... fieldN = "foo"}` which
    // in turn is intended to resemble a sequence of variable initializations. I appreciate the metaphore of a struct as
    // a bundle of variables.
  | HTTP_RESPONSE
  | HTTP_CLIENT L_ANGLE_BRACKET builtin_type R_ANGLE_BRACKET
  | HTTP_SERVER L_ANGLE_BRACKET builtin_type R_ANGLE_BRACKET
  | SYNTHETIC_JAVA_TYPE LPAR STRING RPAR
  | SYNTHETIC_JAVA_TYPE L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET LPAR STRING RPAR

builtin_types_list ::=
    backwards_builtin_types_list

backwards_builtin_types_list ::=
    builtin_type COMMA backwards_builtin_types_list
  | builtin_type

identifier_assignment ::=
    identifier ASSIGNMENT expr SEMICOLON

trashcan_assignment ::=
    UNDERSCORE ASSIGNMENT expr SEMICOLON
  | UNDERSCORE QUESTION_MARK_ASSIGNMENT expr SEMICOLON

identifier_increment_stmt ::=
    identifier_increment SEMICOLON

identifier_increment ::=
    INCREMENT identifier
  | identifier INCREMENT

identifier_decrement_stmt ::=
    identifier_decrement SEMICOLON

identifier_decrement ::=
    DECREMENT identifier
  | identifier DECREMENT

list_element_assignment ::=
    collection_subscript ASSIGNMENT expr SEMICOLON
  | collection_subscript QUESTION_MARK_ASSIGNMENT expr SEMICOLON

list_remove_expr ::=
    REMOVE LPAR expr COMMA expr RPAR

// In order to support function calls w/in single-expr lambda bodies w/o making the grammar think that it has reached
// the end of the lambda at the first occurrence of `(` in the expr body I am making a separate grammar production for
// "all of the exprs including lambdas" and "all of the exprs not including lambdas" so that function application can be
// restricted to not work on lambdas right away. Solves the ambiguity in a minimally annoying way. To immediately call a
// lambda you'd need to wrap the lambda in parens or something.
expr ::=
    non_lambda_exprs
  | lambda_function_expr

non_lambda_exprs ::=
    MINUS expr
  | expr PLUS expr
  | identifier_increment
  | identifier_decrement
  | expr MINUS expr
  | expr MULTIPLY expr
  | expr DIVIDE expr
  | expr MODULUS expr
  | expr EXPONENTIATE expr
  | expr IN expr
  | parenthesized_expr
  | LOG_PREFIX float LPAR expr RPAR
  | LOG_PREFIX integer LPAR expr RPAR
  | NUMERIC_BOOL LPAR expr RPAR
  | input
  | list_remove_expr
  | unwrap_expr
  | is_input_ready
  | copy_expr
  | from_json_expr
  | list
  | map
  | list_comprehension_expr
  | set_comprehension_expr
  | map_comprehension_expr
  | collection_subscript
  | LEN LPAR expr RPAR
  | bool_expr
  | fmt_string
  | term
  | function_call_expr
  | typed_lambda_function_expr
  | provider_function_call_expr
  | tuple
  | set
  | struct
  | struct_field_access_expr
    // This is some unfortunate syntax for casting but this actually was necessary to avoid having casting create an
    // ambiguous grammar deciding whether `(i < len(l))` was a malformed cast (e.g. for some parameterized custom type
    // like `(MyType<foo>)`) or if it was actually a "less-than" operator within a parenthesized expression. From some
    // perspective this is an issue with JCUP being set to use too low of a lookahead....but I don't fully understand
    // the implications of trying to increase that so it'll stay this way.
  | CAST LPAR builtin_type COMMA expr RPAR
  | get_http_client_expr
  | get_basic_http_server_for_port_expr
  | error
    // TODO(steving) This is super helpful do the same thing for stmts.
    // There's been some syntax error in an expression somewhere.. meaning that this expression is getting thrown away
    // in order to continue checking for other errors in the input program. So "repair" the program by giving it some
    // arbitrary Expr that won't complain a second time during AST type checking.

parenthesized_expr ::=
    LPAR expr RPAR

sleep ::=
    SLEEP LPAR expr RPAR SEMICOLON

input ::=
    INPUT LPAR STRING RPAR
  | INPUT LPAR RPAR

is_input_ready ::=
    IS_INPUT_READY LPAR RPAR

copy_expr ::=
    COPY LPAR expr RPAR

from_json_expr ::=
    FROM_JSON LPAR expr RPAR

list ::=
    LBRACKET args_list RBRACKET
  | LBRACKET RBRACKET
  | MUT LBRACKET args_list RBRACKET
  | MUT LBRACKET RBRACKET

map ::=
    LCURLY RCURLY
  | LCURLY map_initializer_kv_list RCURLY
  | MUT LCURLY RCURLY
  | MUT LCURLY map_initializer_kv_list RCURLY

map_initializer_kv_list ::=
    expr COLON expr COMMA map_initializer_kv_list
  | expr COLON expr

tuple ::=
    // Tuples distinguish themselves from parenthesized expressions by having at least one comma.
    LPAR expr COMMA args_list RPAR
  | MUT LPAR expr COMMA args_list RPAR

set ::=
    // Sets distinguish themselves from parenthesized expressions by having at least one comma and tuples by {} instead of ().
    LCURLY expr COMMA args_list RCURLY
  | MUT LCURLY expr COMMA args_list RCURLY

args_list ::=
    expr COMMA args_list
  | expr

struct ::=
    LCURLY struct_field_initializers_list RCURLY
  | MUT LCURLY struct_field_initializers_list RCURLY

// Note that we're again building this up backwards for convenience (remember that all guava collections respect ordering).
struct_field_initializers_list ::=
    IDENTIFIER ASSIGNMENT expr COMMA struct_field_initializers_list
  | IDENTIFIER ASSIGNMENT expr

struct_field_access_expr ::=
    expr DOT IDENTIFIER

struct_field_assignment_stmt ::=
    struct_field_access_expr ASSIGNMENT expr SEMICOLON

collection_subscript ::=
    expr LBRACKET expr RBRACKET

bool_expr ::=
    equality
  | inequality
  | bool_arithmetic
  | instanceof_expr

// This calculator can evaluate an equality check of expressions.
equality ::=
    expr EQUALS expr
  | expr NOT_EQUALS expr

if_else_chain_stmt ::=
    if_stmt else_if_stmt_chain
  | if_stmt else_stmt
  | if_stmt

if_stmt ::=
    IF LPAR expr RPAR LCURLY stmt_list RCURLY

else_if_stmt_chain ::=
    else_if_stmt else_if_stmt_chain
  | else_if_stmt else_stmt
  | else_if_stmt

else_if_stmt ::=
    ELSE if_stmt

// We can simply return the StmtListNode itself because this is actually gonna be run by the IfStmt it's associated
// with.
else_stmt ::=
    ELSE LCURLY stmt_list RCURLY

match_stmt ::=
    MATCH LPAR expr RPAR LCURLY match_cases_list_stmt RCURLY

match_cases_list_stmt ::=
    CASE match_case_patterns ARROW stmt_list match_cases_list_stmt
  | CASE match_multi_expr_case ARROW stmt_list match_cases_list_stmt
  | CASE match_case_patterns ARROW stmt_list
  | CASE match_multi_expr_case ARROW stmt_list

match_case_patterns ::=
    primitive
  | MINUS INTEGER
  | identifier
  | identifier LPAR primitive RPAR
  | identifier LPAR match_case_patterns RPAR
  | scoped_identifier LPAR primitive RPAR
  | scoped_identifier LPAR match_case_patterns RPAR
  | LPAR match_case_patterns COMMA match_case_patterns_list RPAR
  | MUT LPAR match_case_patterns COMMA match_case_patterns_list RPAR
  | LCURLY match_case_pattern_struct_field_list RCURLY
  | MUT LCURLY match_case_pattern_struct_field_list RCURLY
  | UNDERSCORE COLON builtin_type
  | identifier COLON builtin_type
  | UNDERSCORE
    // TODO(steving) This should model `_` as an Expr to have something to point at in errors.

// TODO(steving) TESTING!! Make multi-expr-case support arbitrary patterns.
match_multi_expr_case ::=
    primitive BAR match_multi_expr_case
  | identifier LPAR primitive RPAR BAR match_multi_expr_case
  | primitive BAR primitive
  | identifier LPAR primitive RPAR BAR identifier LPAR primitive RPAR

match_case_patterns_list ::=
    match_case_patterns COMMA match_case_patterns_list
  | match_case_patterns

match_case_pattern_struct_field_list ::=
    identifier ASSIGNMENT match_case_patterns COMMA match_case_pattern_struct_field_list
  | identifier ASSIGNMENT match_case_patterns

while_stmt ::=
    WHILE LPAR expr RPAR LCURLY stmt_list RCURLY

for_loop_stmt ::=
    FOR LPAR identifier IN expr RPAR LCURLY stmt_list RCURLY

repeat_stmt ::=
    REPEAT LPAR expr RPAR LCURLY stmt_list RCURLY

break_stmt ::=
    BREAK SEMICOLON

continue_stmt ::=
    CONTINUE SEMICOLON

list_comprehension_expr ::=
    LBRACKET expr BAR identifier IN expr RBRACKET
  | MUT LBRACKET expr BAR identifier IN expr RBRACKET
  | LBRACKET expr BAR identifier IN expr WHERE expr RBRACKET
  | MUT LBRACKET expr BAR identifier IN expr WHERE expr RBRACKET

set_comprehension_expr ::=
    LCURLY expr BAR identifier IN expr RCURLY
  | MUT LCURLY expr BAR identifier IN expr RCURLY
  | LCURLY expr BAR identifier IN expr WHERE expr RCURLY
  | MUT LCURLY expr BAR identifier IN expr WHERE expr RCURLY

map_comprehension_expr ::=
    LCURLY expr COLON expr BAR identifier IN expr RCURLY
  | MUT LCURLY expr COLON expr BAR identifier IN expr RCURLY
  | LCURLY expr COLON expr BAR identifier IN expr WHERE expr RCURLY
  | MUT LCURLY expr COLON expr BAR identifier IN expr WHERE expr RCURLY

function_definition_stmt ::=
   FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | BLOCKING FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR BLOCKING FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | generic_blocking_on FUNCTION_TYPE IDENTIFIER LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR generic_blocking_on FUNCTION_TYPE IDENTIFIER LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type LCURLY stmt_list RCURLY

generic_function_definition_stmt ::=
   FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
 | REQUIRES LPAR required_contracts RPAR FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
 | BLOCKING FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
 | REQUIRES LPAR required_contracts RPAR BLOCKING FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | generic_blocking_on FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | REQUIRES LPAR required_contracts RPAR generic_blocking_on FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type LCURLY stmt_list RCURLY

generic_blocking_on ::=
    BLOCKING COLON identifier_bar_sep_list

identifier_bar_sep_list ::=
    IDENTIFIER BAR identifier_bar_sep_list
  | IDENTIFIER

maybe_blocking_procedure_types ::=
    MAYBE_BLOCKING FUNCTION_TYPE L_ANGLE_BRACKET builtin_type ARROW builtin_type R_ANGLE_BRACKET
  | MAYBE_BLOCKING FUNCTION_TYPE L_ANGLE_BRACKET BAR builtin_types_list BAR ARROW builtin_type R_ANGLE_BRACKET
  | MAYBE_BLOCKING CONSUMER_FUNCTION_TYPE L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET
  | MAYBE_BLOCKING PROVIDER_FUNCTION_TYPE L_ANGLE_BRACKET builtin_type R_ANGLE_BRACKET

// TODO(steving) I need to enable generic functions to be generic over only a partial subset of the required
// TODO(steving) Contract's type params.
required_contracts ::=
    IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET COMMA required_contracts
  | scoped_identifier L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET COMMA required_contracts
  | IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET
  | scoped_identifier L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET

lambda_function_expr ::=
    IDENTIFIER ARROW expr
  | IDENTIFIER ARROW LCURLY stmt_list RCURLY
  | LAMBDA LPAR IDENTIFIER COMMA identifier_list RPAR ARROW expr
  | LAMBDA LPAR IDENTIFIER COMMA identifier_list RPAR ARROW LCURLY stmt_list RCURLY
  | LPAR RPAR ARROW expr
  | LPAR RPAR ARROW LCURLY stmt_list RCURLY

// TODO(steving) Lambda syntax has been complicated by parsing constraints. I don't actually want the `lambda` keyword
// TODO(steving) EVER. Figure out a way to actually define all lambda forms w/o ever using `lambda` keyword.
typed_lambda_function_expr ::=
    LPAR function_args_types_list RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | LAMBDA LPAR function_args_types_list RPAR ARROW LCURLY stmt_list RCURLY
  | LAMBDA LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY

identifier_list ::=
    IDENTIFIER COMMA identifier_list
  | IDENTIFIER

consumer_function_definition_stmt ::=
    CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
  | BLOCKING CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR BLOCKING CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
  | generic_blocking_on CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR procedure_args_w_generic_blocking RPAR LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR generic_blocking_on CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR procedure_args_w_generic_blocking RPAR LCURLY stmt_list RCURLY

generic_consumer_function_definition_stmt ::=
   CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
 | REQUIRES LPAR required_contracts RPAR CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
 | BLOCKING CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
 | REQUIRES LPAR required_contracts RPAR BLOCKING CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR LCURLY stmt_list RCURLY
  | generic_blocking_on CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR LCURLY stmt_list RCURLY
  | REQUIRES LPAR required_contracts RPAR generic_blocking_on CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR LCURLY stmt_list RCURLY

provider_function_definition_stmt ::=
    PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | BLOCKING PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | USING LPAR injected_keys_list RPAR BLOCKING PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY

generic_provider_function_definition_stmt ::=
    PROVIDER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | REQUIRES LPAR required_contracts RPAR PROVIDER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | BLOCKING PROVIDER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY
  | REQUIRES LPAR required_contracts RPAR BLOCKING PROVIDER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR RPAR ARROW builtin_type LCURLY stmt_list RCURLY

injected_keys_list ::=
    identifier COLON builtin_type COMMA injected_keys_list
  | identifier COLON builtin_type AS IDENTIFIER COMMA injected_keys_list
  | identifier COLON builtin_type
  | identifier COLON builtin_type AS IDENTIFIER

// TODO(steving) Be less lazy and don't bother using a Map where we should just be using a List of pairs....but Java is so garbage it doesn't have tuples so.....
// Note that we're again building this up backwards for convenience (remember that all guava collections respect ordering).
function_args_types_list ::=
    IDENTIFIER COLON builtin_type COMMA function_args_types_list
  | IDENTIFIER COLON builtin_type

// LITERALLY THE ONLY DIFFERENCE BETWEEN THIS AND THE ABOVE `function_args_types_list` production is that this one gives
// IdentifierReferenceTerms instead of Strings. Useful for logging error messages. Only really applicable to Graphs for
// now since we don't actually have restrictions on procedure args in general.
// TODO(steving) Also use this production for blocking generic args so that we can indicate when blocking generics were used incorrectly.
function_args_types_list_identifiers ::=
    identifier COLON builtin_type COMMA function_args_types_list_identifiers
  | identifier COLON builtin_type

procedure_args_w_generic_blocking ::=
    IDENTIFIER COLON maybe_blocking_procedure_types COMMA procedure_args_w_generic_blocking
  | IDENTIFIER COLON builtin_type COMMA procedure_args_w_generic_blocking
  | IDENTIFIER COLON maybe_blocking_procedure_types
  | IDENTIFIER COLON builtin_type

return_stmt ::=
    RETURN expr SEMICOLON

// Just to make sure that graphs are somewhat consistent you must either choose between putting the root node as the
// first or last node definition in the graph body. Let's encourage ordering these sanely because it can be hard to
// trace declarative code without a pattern.
graph_function_definition_stmt ::=
    GRAPH FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR ARROW builtin_type LCURLY root_node non_root_nodes_list RCURLY
  | GRAPH FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR ARROW builtin_type LCURLY non_root_nodes_list root_node RCURLY
  | USING LPAR injected_keys_list RPAR GRAPH FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR ARROW builtin_type LCURLY root_node non_root_nodes_list RCURLY
  | USING LPAR injected_keys_list RPAR GRAPH FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR ARROW builtin_type LCURLY non_root_nodes_list root_node RCURLY

// Just to make sure that graphs are somewhat consistent you must either choose between putting the root node as the
// first or last node definition in the graph body. Let's encourage ordering these sanely because it can be hard to
// trace declarative code without a pattern.
graph_provider_definition_stmt ::=
    GRAPH PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY root_node non_root_nodes_list RCURLY
  | GRAPH PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY non_root_nodes_list root_node RCURLY
  | USING LPAR injected_keys_list RPAR GRAPH PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY root_node non_root_nodes_list RCURLY
  | USING LPAR injected_keys_list RPAR GRAPH PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type LCURLY non_root_nodes_list root_node RCURLY

graph_consumer_definition_stmt ::=
    GRAPH CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR LCURLY root_node non_root_nodes_list RCURLY
  | GRAPH CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR LCURLY non_root_nodes_list root_node RCURLY
  | USING LPAR injected_keys_list RPAR GRAPH CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR LCURLY root_node non_root_nodes_list RCURLY
  | USING LPAR injected_keys_list RPAR GRAPH CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list_identifiers RPAR LCURLY non_root_nodes_list root_node RCURLY

root_node ::=
    ROOT IDENTIFIER LEFT_ARROW expr SEMICOLON

non_root_node ::=
    NODE IDENTIFIER LEFT_ARROW expr SEMICOLON

non_root_nodes_list ::=
    non_root_node non_root_nodes_list
  | non_root_node

node_reference ::=
    AT IDENTIFIER

contract_definition_stmt ::=
    CONTRACT IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LCURLY contract_signature_defs_list RCURLY
  | CONTRACT IDENTIFIER L_ANGLE_BRACKET identifier_list IMPLICATION_ARROW identifier_list R_ANGLE_BRACKET LCURLY contract_signature_defs_list RCURLY

contract_signature_defs_list ::=
    contract_procedure_signature_definition_stmt contract_signature_defs_list
  | contract_procedure_signature_definition_stmt

contract_procedure_signature_definition_stmt ::=
   // FUNCTIONS
   FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR ARROW builtin_type SEMICOLON
 | BLOCKING FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR ARROW builtin_type SEMICOLON
 | generic_blocking_on FUNCTION_TYPE IDENTIFIER LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type SEMICOLON
 | FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR ARROW builtin_type SEMICOLON
 | BLOCKING FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type SEMICOLON
 | generic_blocking_on FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR ARROW builtin_type SEMICOLON
   // CONSUMERS
 | CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR SEMICOLON
 | BLOCKING CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR function_args_types_list RPAR SEMICOLON
 | generic_blocking_on CONSUMER_FUNCTION_TYPE IDENTIFIER LPAR procedure_args_w_generic_blocking RPAR SEMICOLON
 | CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR function_args_types_list RPAR SEMICOLON
 | BLOCKING CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR SEMICOLON
 | generic_blocking_on CONSUMER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR procedure_args_w_generic_blocking RPAR SEMICOLON
   // PROVIDERS
 | PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type SEMICOLON
 | BLOCKING PROVIDER_FUNCTION_TYPE IDENTIFIER LPAR RPAR ARROW builtin_type SEMICOLON
 | PROVIDER_FUNCTION_TYPE IDENTIFIER L_ANGLE_BRACKET identifier_list R_ANGLE_BRACKET LPAR RPAR ARROW builtin_type SEMICOLON

contract_implementation_stmt ::=
    IMPLEMENT IDENTIFIER L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET LCURLY contract_implementations_list RCURLY
  | IMPLEMENT scoped_identifier L_ANGLE_BRACKET builtin_types_list R_ANGLE_BRACKET LCURLY contract_implementations_list RCURLY

contract_implementations_list ::=
    function_definition_stmt contract_implementations_list
  | consumer_function_definition_stmt contract_implementations_list
  | provider_function_definition_stmt contract_implementations_list
  | generic_function_definition_stmt contract_implementations_list
  | generic_consumer_function_definition_stmt contract_implementations_list
  | function_definition_stmt
  | consumer_function_definition_stmt
  | provider_function_definition_stmt
  | generic_function_definition_stmt
  | generic_consumer_function_definition_stmt

pipe_chain_stmt ::=
    expr pipe_chain

pipe_chain ::=
    PIPE_ARROW expr pipe_chain
  | PIPE_ARROW consumer_function_call_stmt
  | PIPE_ARROW identifier_assignment
  | PIPE_ARROW identifier_declaration
  | PIPE_ARROW list_element_assignment
  | PIPE_ARROW print

function_call_expr ::=
    non_lambda_exprs LPAR args_list RPAR
  | scoped_identifier LPAR args_list RPAR
  // Foo:(arg1 ... argN)

provider_function_call_expr ::=
    non_lambda_exprs LPAR RPAR
  | scoped_identifier LPAR RPAR

consumer_function_call_stmt ::=
    non_lambda_exprs LPAR args_list RPAR SEMICOLON
  | scoped_identifier LPAR args_list RPAR SEMICOLON

inequality ::=
    expr L_ANGLE_BRACKET expr
  | expr R_ANGLE_BRACKET expr
  | expr LTE expr
  | expr GTE expr

bool_arithmetic ::=
    expr AND expr
  | expr OR expr
  | NOT expr

instanceof_expr ::=
    expr INSTANCEOF builtin_type

fmt_string ::=
    FMT_STRING_PART expr fmt_string
  | FMT_STRING_PART expr STRING

http_service_def_stmt ::=
    HTTP_SERVICE identifier LCURLY http_endpoints_list RCURLY

http_endpoints_list ::=
    identifier COLON fmt_string COMMA http_endpoints_list
  | identifier COLON STRING COMMA http_endpoints_list
  | identifier COLON fmt_string
  | identifier COLON STRING

endpoint_handlers_block_stmt ::=
    ENDPOINT_HANDLERS identifier LCURLY endpoint_handler_impl_graphs_list RCURLY

endpoint_handler_impl_graphs_list ::=
    graph_function_definition_stmt endpoint_handler_impl_graphs_list
  | graph_provider_definition_stmt endpoint_handler_impl_graphs_list
  | graph_function_definition_stmt
  | graph_provider_definition_stmt

get_http_client_expr ::=
    GET_HTTP_CLIENT LPAR expr RPAR

get_basic_http_server_for_port_expr ::=
    GET_BASIC_HTTP_SERVER_FOR_PORT LPAR expr RPAR

privileged_inline_java ::=
    PRIVILEGED_INLINE_JAVA

// The last production 'term' closes the grammar. It's a primitive or identifier reference.
term ::=
    primitive
  | identifier
  | scoped_identifier
  | node_reference
  | UP_ARROW

identifier ::=
    IDENTIFIER

scoped_identifier ::=
    SCOPED_IDENTIFIER

primitive ::=
    float
  | DOUBLE
  | integer
  | LONG
  | STRING
  | CHAR
  | TRUE
  | FALSE

float ::=
    FLOAT

integer ::=
    INTEGER

//Tokens
//\(\S+\)\s+{ return symbol(Tokens\.\([^, ]+\).*

/* Create a new parser symbol for the lexem. */
PLUS ::= "+"
INCREMENT ::= "++"
DECREMENT ::= "--"
MINUS ::= "-"
MULTIPLY ::= "*"
EXPONENTIATE ::= "**"
DIVIDE ::= "/"
MODULUS ::= "%"
LPAR ::= "("
RPAR ::= ")"
LCURLY ::= "{"
RCURLY ::= "}"
LBRACKET ::= "["
RBRACKET ::= "]"
EQUALS ::= "=="
NOT_EQUALS ::= "!="
L_ANGLE_BRACKET ::= "<"
R_ANGLE_BRACKET ::= ">"
LTE ::= "<="
GTE ::= ">="
OR ::= "or"
AND ::= "and"
NOT ::= "not"
ARROW ::= "->"
PIPE_ARROW ::= "|>"
IMPLICATION_ARROW ::= "=>"
TRUE ::= "true"
FALSE ::= "false"
VAR ::= "var"
ASSIGNMENT ::= "="
SEMICOLON ::= ";"
COLON ::= ":"
COMMA ::= ","
DOT ::= "."
BAR ::= "|"
IF ::= "if"
ELSE ::= "else"
MATCH ::= "match"
CASE ::= "case"
WHILE ::= "while"
FOR ::= "for"
REPEAT ::= "repeat"
BREAK ::= "break"
CONTINUE ::= "continue"
WHERE ::= "where"
RETURN ::= "return"
QUESTION_MARK_ASSIGNMENT ::= "?="

// Builtin functions are currently processed at the grammar level.. maybe there's a better generalized way.
LOG_PREFIX ::= "log_"
PRINT ::= "print"
NUMERIC_BOOL ::= "numeric_bool"
INPUT ::= "input"
IS_INPUT_READY ::= "isInputReady"
LEN ::= "len"
TYPE ::= "type"
REMOVE ::= "remove"
IN ::= "in"
INSTANCEOF ::= "instanceof"
COPY ::= "copy"
FROM_JSON ::= "fromJson"
SLEEP ::= "sleep"

// DEBUGGING keywords that should be removed when we want a real release...
DEBUG_DUMP_SCOPE ::= "$dumpscope"

// This is an internal-only feature, reserved for implementing the stdlib.
//"$$BEGIN_JAVA\n"  { if (supportPrivilegedInlineJava) {
SYNTHETIC_JAVA_TYPE::= "$java_type"

// Builtin Types.
INT_TYPE ::= "int"
LONG_TYPE ::= "long"
FLOAT_TYPE ::= "float"
DOUBLE_TYPE ::= "double"
BOOLEAN_TYPE ::= "boolean"
STRING_TYPE ::= "string"
CHAR_TYPE ::= "char"
TUPLE_TYPE ::= "tuple"
ONEOF ::= "oneof"
STRUCT_TYPE ::= "struct"
FUNCTION_TYPE ::= "function"
CONSUMER_FUNCTION_TYPE ::= "consumer"
PROVIDER_FUNCTION_TYPE ::= "provider"
LAMBDA ::= "lambda"
ALIAS ::= "alias"
ATOM ::= "atom"
NEWTYPE ::= "newtype"
UNWRAP ::= "unwrap"
INITIALIZERS ::= "initializers"
UNWRAPPERS ::= "unwrappers"

// Modifiers go here.
MUT ::= "mut"

// Module related bindings go here.
MODULE ::= "module"
BIND ::= "bind"
TO ::= "to"
AS ::= "as"
USING ::= "using"

CAST ::= "cast"

// Graph related things go here.
FUTURE ::= "future"
GRAPH ::= "graph"
ROOT ::= "root"
NODE ::= "node"
BLOCKING ::= "blocking"
MAYBE_BLOCKING ::= "blocking?"
LEFT_ARROW ::= "<-"
AT ::= "@"
BLOCKING_GET ::= "<-|"

// This up arrow is used for the pipe chain backreference term.
UP_ARROW ::= "^"

// Contract tokens.
CONTRACT ::= "contract"
IMPLEMENT ::= "implement"
REQUIRES ::= "requires"

UNDERSCORE ::= "_"

// Symbols related to builtin HTTP support go here.
// TODO(steving) The Http related types should all also require http:: namespacing.
// TODO(steving) This `http` module should be completely reimplemented as a proper claro_module_internal() target once possible.
HTTP_SERVICE ::= "HttpService"
HTTP_CLIENT ::= "HttpClient"
GET_HTTP_CLIENT::= "http::getHttpClient"
GET_BASIC_HTTP_SERVER_FOR_PORT ::= "http::getBasicHttpServerForPort"
// This is a major hack that simply allows the detection of the synthetic http optional stdlib module for which extra java deps will need to be added to the build.
//"http::startServerAndAwaitShutdown" ::= IDENTIFIER
HTTP_RESPONSE ::= "HttpResponse"
HTTP_SERVER ::= "HttpServer"
ENDPOINT_HANDLERS ::= "endpoint_handlers"

Script using Lua string pattern matching:

auto fname = "ClaroParser.cup";
auto txt = readfile(fname);
txt = txt.gsub("{:.-:}", "");
//txt = txt.gsub(".+parser code\n(.+)", "%1");
txt = txt.gsub("\nterminal%s+%S+%s+([^;]+);", "\n%%token %1").replace(",", "");
txt = txt.gsub("\nnonterminal [^\n]+", "");
txt = txt.gsub("\nprecedence ([^;]+);", "\n%%%1");
txt = txt.gsub(":[%w_]+", "");
txt = txt.gsub("\n[ \t]*\n+", "\n");
txt = txt.gsub("\n[ \t]*;\n", "\n\n");
//txt = txt.gsub("\n[ \t]*;\n", "%1\n");
//txt = txt.replace("::=", ":");
print(txt);
mingodad commented 8 months ago

I've also just added this project grammar to https://mingodad.github.io/parsertl-playground/playground/ an Yacc/Lex compatible online editor/tester (select Claro-lang parser from Examples then click Parse to see a parse tree for the content in Input source).

Notice that I replaced all occurrences of right recursion by left recursion because LALR parsers work better that way see https://www.gnu.org/software/bison/manual/bison.html#Recursion .

JasonSteving99 commented 8 months ago

This is super cool! Thanks for sharing this :D. Lately I've actually been strongly considering a future parsing migration from JCup to ANTLR, and I get the feeling that even just having this simplified EBNF with all the production actions stripped out would already make that a lot easier.