ngs-lang / ngs

Next Generation Shell (NGS)
https://ngs-lang.org/
GNU General Public License v3.0
1.45k stars 41 forks source link

Grammar railroad diagram #572

Open mingodad opened 2 years ago

mingodad commented 2 years ago

Using a modified peg/leg from here https://github.com/mingodad/peg to convert the gramar in an EBNF understood by https://www.bottlecaps.de/rr/ui to generate a nice railroad diagram (https://en.wikipedia.org/wiki/Syntax_diagram) to help show/understand the syntax.

Command to generate leg -e syntax.leg then some minor manual fixes are still needed.

Copy and paste the EBNF shown bellow at https://www.bottlecaps.de/rr/ui on the tab Edit Grammar then click on the tab View Diagram.

//To be viewd at https://www.bottlecaps.de/rr/ui

start ::=
     commands eof

commands ::=
     ( space? command commands_separator commands )
    | ( command commands_separator? space? )
    |

eof ::=
     _NOT_  .

space ::=
     [ \t\n]+

inline_space ::=
     [ \t]+

newline_space ::=
     ( inline_space? [\n]+ inline_space? )+

command ::=
     curly_code_block
    | assignment
    | inplace_assignment
    | function_definition
    | function_call
    | type
    | namespace
    | block
    | section
    | if
    | while
    | switch
    | for
    | ( _AND_  ( identifier ( "." | "::" ) ) expression )
    | comment
    | ( commands_pipeline )

commands_separator ::=
     newline_space
    | ( inline_space? [;] inline_space? )

items_separator ::=
     newline_space
    | ( inline_space? [,] space? )

comment ::=
     ( "#" [^\n]+ )
    | ( "TEST " [^\n]+ )

curly_code_block ::=
     space? '{' space? expressions space? ";"? space? '}'

assignment ::=
     identifier space? "=" space? expression

inplace_assignment ::=
     identifier space? inplace_assignment_op "=" space? expression

function_definition ::=
     optional_doc inline_space? "F" function_definition_name space? "(" space? function_definition_parameters space? ")" space? code_block

function_call ::=
     basic_expression inline_space? function_call_arguments

type ::=
     optional_doc inline_space? "type" space identifier inline_space? optional_function_call_arguments

namespace ::=
     "ns" namespace_optional_parameters ( curly_code_block | empty_curly_code_block )

block ::=
     "block" space identifier space code_block

section ::=
     "section" space string space code_block

if ::=
     "if" _NOT_  identifier_continuation space? code_block ( space "then" )? space? code_block if_optional_else_block

while ::=
     "while" _NOT_  identifier_continuation space? code_block space? code_block

switch ::=
     any_switch _NOT_  identifier_continuation space? code_block space? "{" ( space? expression space? code_block )+ space? "}"

for ::=
     for_i_n
    | for_start_cond_incr
    | for_in

identifier ::=
     ( [_a-zA-Z]+ [_a-zA-Z0-9]* )
    | ( "('" ( [-|=!@?<>~+*/%()$a-zA-Z0-9.`: ] | "[" | "]" )+ "')" )
    | ( "(" ( "$()" | [-|=!@?~+*/%$<>.#x5b#x5d:]+ | "is not" | "not in" ) ")" )

expression ::=
     comment
    | ( binop_expression ( inline_space comment )? )

commands_pipeline ::=
     inline_space? commands_pipeline_node ( commands_pipeline_option inline_space? )* ( ( ( commands_pipe ) | ) inline_space? ) command_words ( commands_pipe inline_space? command_words inline_space? )* ( ( commands_pipe ) | ( inline_space? ) ) ( "&" inline_space? )?

expressions ::=
     ( expression expression_delimiter expressions )
    | ( expression )

empty_curly_code_block ::=
     space? '{' space? '}'

code_block ::=
     curly_code_block
    | expression

expression_delimiter ::=
     newline_space
    | ( inline_space? ";" space? )

binop_expression ::=
     binop_expression_inner

binop_expression_inner ::=
     chain_expression ( ( binop chain_expression ) | ( space "returns" ( inline_space expression )? ) | ( space "breaks" ) | ( space "continues" ) | ( space "throws" space expression ) )*

chain_expression ::=
     basic_expression ( ( inline_space? function_call_arguments ) | ( "[" space? expression space? "]" ) | ( inline_space? inplace_assignment_op "=" space? expression ) | ( inline_space? ".=" space? expression ) | ( inline_space? "=" space? expression ) | ( "." super ) | ( ".(" space? expression space? ")" ) | ( "." identifier ) | ( "::(" space? expression space? ")" ) | ( "::" identifier ) )*

binop ::=
     binop1
    | binop2

binop1 ::=
     inline_space? ( ( "|" ) | ( "===" ) | ( "!==" ) | ( "==" ) | ( "!=" ) | ( "=~" ) | ( "!~" ) | ( "->" ) | ( "<=" ) | ( "<" ) | ( ">=" ) | ( ">" ) | ( "~~" ) | ( "~" ) | ( "..." ) | ( ".." ) | ( "+?" ) | ( "+" ) | ( "-" ) | ( "*" ) | ( "%" ) | ( "/" ) | ( "?" ) | ( "\\" ) ) space?

binop2 ::=
     inline_space binop2op space

binop2op ::=
     ( "is not" )
    | ( "is" )
    | ( "and" )
    | ( "or" )
    | ( "in" )
    | ( "not in" )
    | ( "tand" )
    | ( "tor" )

inplace_assignment_op ::=
     [-+*/%]

basic_expression ::=
     if
    | while
    | switch
    | cond
    | for
    | comment
    | ( "null" )
    | ( "true" )
    | ( "false" )
    | number
    | string
    | regexp
    | assignment
    | inplace_assignment
    | var_scope_declaration
    | function_definition
    | return
    | break
    | continue
    | namespace
    | collector
    | block
    | section
    | guard
    | try_catch
    | throw
    | type
    | subshell
    | super
    | PRINT_AST
    | identifier
    | array_literal
    | imm_array_literal
    | hash_literal
    | abc_function_literal
    | imm_hash_literal
    | tree_literal
    | ( "(" space? expression space? ")" )

function_call_arguments ::=
     "(" space? function_call_arguments_without_parens space? ")" ( space function_call_external_argument )*

super ::=
     "super"

cond ::=
     any_cond space? "{" ( space? code_block space? code_block )+ space? "}"

number ::=
     ( [-+]? "0x" [0-9a-fA-F]+ _NOT_  ( [a-zA-Z] | ( "." [0-9a-fA-F] ) ) )
    | ( [-+]? [0-9]+ [.] [0-9]+ _NOT_  ( [a-zA-Z] | ( "." [0-9] ) ) )
    | ( [-+]? [0-9]+ _NOT_  ( [a-zA-Z] | ( "." [0-9] ) ) )

string ::=
     string_sq
    | string_dq

regexp ::=
     "/" str_comps_node ( ( regexp_imm ) | ( regexp_escape ) )* "/" regexp_flags

var_scope_declaration ::=
     var_scope_declaration_keyword space var_scope_declaration_item ( space? "," space? var_scope_declaration_item )*

return ::=
     "return" return_node ( inline_space expression )?

break ::=
     "break"

continue ::=
     "continue"

collector ::=
     "collector" collector_init space code_block

guard ::=
     "guard" space code_block

try_catch ::=
     "try" space try_catch_node code_block ( space? "catch" space? "(" space? function_definition_parameters space? ")" space? code_block )*

throw ::=
     "throw" space expression

subshell ::=
     ( "$(" commands_pipeline ")" )
    | ( "%(" commands_pipeline ")" )
    | ( "``" commands_pipeline "``" )
    | ( "`" commands_pipeline "`" )

PRINT_AST ::=
     "PRINT_AST" space expression

array_literal ::=
     ( "[" space? "]" )
    | ( "[" space? array_items space? ","? space? "]" )

imm_array_literal ::=
     ( "%[" space? "]" )
    | ( "%[" imm_array_words space? "]" )

hash_literal ::=
     ( "{" space? "}" )
    | ( "{" space? hash_items space? ","? space? "}" )

abc_function_literal ::=
     curly_code_block

imm_hash_literal ::=
     ( "%{" space? "}" )
    | ( "%{" imm_hash_words space? "}" )

tree_literal ::=
     "\\" identifier_as_text tree_node ( inline_space? identifier_as_text inline_space? "=" inline_space? expression )* ( inline_space? array_literal )?

optional_doc ::=
     ( doc_lines )
    |

function_definition_name ::=
     ( space+ ( ( identifier ) | ( ( ( "'" ( [-|=!@?<>~+*/%()$a-zA-Z0-9.`": ] | "[" | "]" )+ "'" ) | ( [-|=!@?~+*/%$.`\\]+ ) ) ) ) )
    |

function_definition_parameters ::=
     params_node ( function_definition_parameter ( items_separator function_definition_parameter )* )*

params_node ::=

function_definition_parameter ::=
     ( "**" space? identifier )
    | ( "*" space? identifier )
    | ( identifier function_definition_parameter_type function_definition_default_value )

function_definition_parameter_type ::=
     ( space? ":" space? identifier )
    |

function_definition_default_value ::=
     ( space? "=" space? expression )
    |

namespace_optional_parameters ::=
     ( space? "(" space? function_definition_parameters space? ")" )
    |

identifier_as_text ::=
     [%_a-zA-Z]+ [_a-zA-Z0-9]*

text_till_eol ::=
     [^\n]+ [\n]

doc_line ::=
     ( inline_space? "doc " identifier_as_text " -" inline_space? [\n] )
    | ( inline_space? "doc " identifier_as_text " - " text_till_eol )
    | ( inline_space? "doc " text_till_eol )
    | ( inline_space? "doc" inline_space? [\n] )

doc_lines ::=
     doc_node ( doc_line )+

doc_node ::=

function_call_arguments_without_parens ::=
     args_node ( function_call_argument ( items_separator function_call_argument )* )*

function_call_external_argument ::=
     ( ( "with" | "do" ) space function_call_argument )
    | ( identifier space? "=>" space? expression )

function_call_argument ::=
     ( identifier space? "=" space? expression )
    | ( "**" space? expression )
    | ( "*" space? expression )
    | ( expression )

args_node ::=

array_items ::=
     array_literal_node array_item ( items_separator array_item )*

array_literal_node ::=

array_item ::=
     ( "*" space? expression )
    | expression

imm_array_words ::=
     command_words_node ( space? ( array_command_word ) )+

command_words_node ::=

array_command_word ::=
     number
    | command_word
    | imm_array_literal
    | imm_hash_literal

hash_items ::=
     hash_literal_node hash_item ( items_separator hash_item )*

hash_literal_node ::=

hash_item ::=
     ( "**" space? expression )
    | ( expression space? ":" space? expression )

imm_hash_words ::=
     command_words_node ( space? ( hash_command_word ) space ( hash_command_word ) )+

hash_command_word ::=
     number
    | command_word
    | imm_array_literal
    | imm_hash_literal
    | ( "$**" code_block )

tree_node ::=

for_i_n ::=
     "for" space? "(" space? identifier space? ";" space? code_block space? ")" space? code_block

for_start_cond_incr ::=
     "for" space? "(" code_block ";" space? code_block space? ";" space? code_block space? ")" space? code_block

for_in ::=
     "for" space? identifier space "in" space code_block space? code_block

identifier_continuation ::=
     [_a-zA-Z0-9]+

if_optional_else_block ::=
     ( ( space? "else" space? )? inline_space? code_block )
    |

any_switch ::=
     switch_keyword
    | eswitch_keyword
    | match_keyword
    | ematch_keyword

any_cond ::=
     cond_keyword
    | econd_keyword

switch_keyword ::=
     "switch"

eswitch_keyword ::=
     "eswitch"

match_keyword ::=
     "match"

ematch_keyword ::=
     "ematch"

cond_keyword ::=
     "cond"

econd_keyword ::=
     "econd"

var_scope_declaration_keyword ::=
     ( "local" )
    | ( "upvar" )
    | ( "global" )

var_scope_declaration_item ::=
     identifier var_scope_declaration_item_optional_value

var_scope_declaration_item_optional_value ::=
     ( space? "=" space? expression )
    |

string_sq ::=
     "'" str_comps_node ( ( string_sq_imm ) | ( string_escape ) )* "'"

string_dq ::=
     '"' str_comps_node ( ( string_dq_imm ) | ( string_escape ) | ( string_dq_dollar_expansion ) )* '"'

str_comps_node ::=

string_sq_imm ::=
     [^\'\\]+

string_escape ::=
     "\\" ( ( "a" ) | ( "b" ) | ( "e" ) | ( "f" ) | ( "n" ) | ( "r" ) | ( "t" ) | ( "\\" ) | ( "'" ) | ( '"' ) | ( "$" ) )

string_dq_imm ::=
     [^$\"\\]+

string_dq_dollar_expansion ::=
     ( subshell )
    | ( "$" identifier )
    | ( "$" curly_code_block )
    | ( "$*" identifier )
    | ( "$*" curly_code_block )

regexp_imm ::=
     [^/\\]+

regexp_escape ::=
     "\\" ( ( "/" ) | ( . ) )

regexp_flags ::=
     [a-zA-Z0-9]*

return_node ::=

collector_init ::=
     ( "/" expression )
    |

try_catch_node ::=

optional_function_call_arguments ::=
     function_call_arguments
    |

commands_pipeline_node ::=

commands_pipeline_option ::=
     identifier "::" ( ( expression ) | )

commands_pipe ::=
     "|"

command_words ::=
     command_words_node ( command_option inline_space? )* ( ( ( command_redirect ) | ( command_word ) ) inline_space? )+

command_option ::=
     identifier ":" ( ( expression ) | )

command_redirect ::=
     redir_fd redir_marker inline_space? command_word

command_word ::=
     ( "$*" code_block )
    | ( "$" curly_code_block )
    | ( "$" identifier )
    | basic_command_word

unquoted_basic_command_word_node ::=

unquoted_basic_command_word ::=
     unquoted_basic_command_word_node ( ( [-+a-zA-Z0-9/=,._@:]+ ) | ( string_escape ) | ( string_dq_dollar_expansion ) )+

basic_command_word ::=
     string
    | unquoted_basic_command_word

redir_fd ::=
     ( [0-9]+ )
    |

redir_marker ::=
     ( ">>" | ">" | "<<" | "<" )

//Added tokens for railroad generation
_NOT_ ::= '!'
_AND_ ::= '&'
ilyash-b commented 2 years ago

Thanks for the report! Any particular part of syntax which is not clear? Maybe we could also improve the documentation.

mingodad commented 2 years ago

The manual fixes are due to my extension to peg/leg not handling conversion of some escape sequences to match what https://www.bottlecaps.de/rr/ui expects.

Notice that this is a basically the grammar without actions and the rule/definition separator changed from <- to ::= and the alternative/choice separator from / to |.

For the ngs-lang end user documentation probably a cleanup of all space tokens will make it more useful.

ilyash-b commented 2 years ago

OK. Thanks!