colis-anr / morbig

A static parser for POSIX Shell
Other
190 stars 8 forks source link

Grammar railroad diagram #132

Closed mingodad closed 2 years ago

mingodad commented 2 years ago

There is a tool to produce railroad diagrams (https://en.wikipedia.org/wiki/Syntax_diagram) and with a help of a custom parser we can get a nice diagram that can be used to document and understand/modify the grammar.

Copy and paste the EBNFs (one at a time) bellow at https://www.bottlecaps.de/rr/ui on the Edit Grammar tab then click on the View Diagram tab to see a navigable xhtml railroad diagram (that can also be generated offline or downloaded).

src/parser.mly.ebnf:

entry_point ::=  program EOF

program ::=  linebreak complete_commands linebreak
    | linebreak

complete_commands ::=  complete_commands newline_list complete_command
    | complete_command

complete_command ::=  clist separator_op
    | clist

clist ::=  clist separator_op and_or
    | and_or

and_or ::=  pipeline
    | and_or AND_IF linebreak pipeline
    | and_or OR_IF linebreak pipeline

pipeline ::=  pipe_sequence
    | Bang pipe_sequence

pipe_sequence ::=  command
    | pipe_sequence Pipe linebreak command

command ::=  simple_command
    | compound_command
    | compound_command redirect_list
    | function_definition

compound_command ::=  brace_group
    | subshell
    | for_clause
    | case_clause
    | if_clause
    | while_clause
    | until_clause

subshell ::=  Lparen compound_list Rparen

compound_list ::=  linebreak term
    | linebreak term separator

term ::=  term separator and_or
    | and_or

for_clause ::=  For name do_group
    | For name sequential_sep do_group
    | For name linebreak cin sequential_sep do_group
    | For name linebreak cin wordlist sequential_sep do_group

name ::=  NAME

cin ::=  In

wordlist ::=  wordlist word
    | word

case_clause ::=  Case word linebreak cin linebreak case_list Esac
    | Case word linebreak cin linebreak case_list_ns Esac
    | Case word linebreak cin linebreak Esac

case_list_ns ::=  case_list case_item_ns
    | case_item_ns

case_list ::=  case_list case_item
    | case_item

case_item_ns ::=  pattern Rparen linebreak
    | pattern Rparen compound_list
    | Lparen pattern Rparen linebreak
    | Lparen pattern Rparen compound_list

case_item ::=  pattern Rparen linebreak DSEMI linebreak
    | pattern Rparen compound_list DSEMI linebreak
    | Lparen pattern Rparen linebreak DSEMI linebreak
    | Lparen pattern Rparen compound_list DSEMI linebreak

pattern ::=  word
    | pattern Pipe word

if_clause ::=  If compound_list Then compound_list else_part Fi
    | If compound_list Then compound_list Fi

else_part ::=  Elif compound_list Then compound_list
    | Elif compound_list Then compound_list else_part
    | Else compound_list

while_clause ::=  While compound_list do_group

until_clause ::=  Until compound_list do_group

function_definition ::=  fname Lparen Rparen linebreak function_body

function_body ::=  compound_command
    | compound_command redirect_list

fname ::=  NAME

brace_group ::=  Lbrace compound_list Rbrace

do_group ::=  Do compound_list Done

simple_command ::=  cmd_prefix cmd_word cmd_suffix
    | cmd_prefix cmd_word
    | cmd_prefix
    | cmd_name cmd_suffix
    | cmd_name

cmd_name ::=  word

cmd_word ::=  word

cmd_prefix ::=  io_redirect
    | cmd_prefix io_redirect
    | ASSIGNMENT_WORD
    | cmd_prefix ASSIGNMENT_WORD

cmd_suffix ::=  io_redirect
    | cmd_suffix io_redirect
    | word
    | cmd_suffix word

redirect_list ::=  io_redirect
    | redirect_list io_redirect

io_redirect ::=  io_file
    | IO_NUMBER io_file
    | io_here
    | IO_NUMBER io_here

io_file ::=  LESS filename
    | LESSAND filename
    | GREAT filename
    | GREATAND filename
    | DGREAT filename
    | LESSGREAT filename
    | CLOBBER filename

filename ::=  word

io_here ::=  DLESS here_end
    | DLESSDASH here_end

here_end ::=  word

newline_list ::=  NEWLINE
    | newline_list NEWLINE

linebreak ::=  newline_list
    | /* empty */

separator_op ::=  Uppersand
    | Semicolon

separator ::=  separator_op linebreak
    | newline_list

sequential_sep ::=  Semicolon linebreak
    | newline_list

word ::=  WORD
    | NAME

intended_error ::=  INTENDED_ERROR

//Tokens
//| \(\S+\) -> \("[^"]+"\)

EOF ::= "EOF"
AND_IF ::= "AND_IF"
OR_IF ::= "OR_IF"
DSEMI ::= "DSEMI"
//| DLESS wr -> Printf.sprintf "DLESS(%s)" (string_of_hd !wr)
//| DLESSDASH wr -> Printf.sprintf "DLESSDASH(%s)" (string_of_hd !wr)
CLOBBER ::= ">|"
If ::= "If"
Then ::= "Then"
Else ::= "Else"
Fi ::= "Fi"
Elif ::= "Elif"
LESSGREAT ::= "LESSGREAT"
LESSAND ::= "LESSAND"
DGREAT ::= "DGREAT"
GREATAND ::= "GREATAND"
//| WORD w -> Printf.sprintf "WORD(%s)" (unWord w)
//| ASSIGNMENT_WORD w -> Printf.sprintf "ASSIGNMENT_WORD(%s)" (string_of_assignment_word w)
//| NAME w -> Printf.sprintf "NAME(%s)" (unName w)
//| IO_NUMBER io -> Printf.sprintf "IONUMBER(%s)" (string_of_io_number io)
Do ::= "Do"
Done ::= "Done"
Case ::= "Case"
Esac ::= "Esac"
While ::= "While"
Until ::= "Until"
For ::= "For"
Lbrace ::= "Lbrace"
Rbrace ::= "Rbrace"
Bang ::= "Bang"
In ::= "In"
Pipe ::= "Pipe"
Lparen ::= "Lparen"
Rparen ::= "Rparen"
LESS ::= "LESS"
GREAT ::= "GREAT"
Uppersand ::= "Uppersand"
Semicolon ::= "Semicolon"
NEWLINE ::= "Newline"
INTENDED_ERROR ::= "INTENDED_ERROR"

src/REBracketExpressionParser.mly.ebnf:

bracket_expression ::=  LBRACKET matching_list RBRACKET EOF
    | LBRACKET nonmatching_list RBRACKET EOF

matching_list ::=  bracket_list

nonmatching_list ::=  HAT bracket_list

bracket_list ::=  follow_list
    | follow_list MINUS

follow_list ::=  expression_term
    | follow_list expression_term

expression_term ::=  single_expression
    | range_expression

single_expression ::=  end_range
    | character_class
    | equivalence_class

range_expression ::=  start_range end_range
    | start_range MINUS

start_range ::=  end_range MINUS

end_range ::=  COLL_ELEM_SINGLE
    | collating_symbol

collating_symbol ::=  Open_dot COLL_ELEM_SINGLE Dot_close
    | Open_dot COLL_ELEM_MULTI Dot_close
    | Open_dot meta_char Dot_close

equivalence_class ::=  Open_equal COLL_ELEM_SINGLE Equal_close
    | Open_equal COLL_ELEM_MULTI Equal_close

character_class ::=  Open_colon class_name Colon_close

meta_char ::=  MINUS
    | HAT
    | RBRACKET

class_name ::=  COLL_ELEM_SINGLE+

//Tokens
//| \("[^"]+"\)\s+{ \(\S+\).+

MINUS ::= "-"
HAT ::= "!"
LBRACKET ::= "["
RBRACKET ::= "]"
Open_equal ::= "[="
Equal_close ::= "=]"
Open_dot ::= "[."
Dot_close ::= ".]"
Open_colon ::= "[:"
Colon_close ::= ":]"