Grammar railroad diagram

mingodad commented 3 years ago

I've done a experimental tool to convert bison grammars to a kind of EBNF understood by https://www.bottlecaps.de/rr/ui to generate railroad diagrams see bellow the converted src/kernel_internals/parsing/cparser.mly and with some hand made changes to allow view it at https://www.bottlecaps.de/rr/ui the order of the rules could be changed to a better view of the railroad diagrams. Copy and paste the EBNF bellow on https://www.bottlecaps.de/rr/ui tab Edit Grammar then switch to the tab View Diagram.

/*
From https://raw.githubusercontent.com/Frama-C/Frama-C-snapshot/master/src/kernel_internals/parsing/cparser.mly
*/

interpret ::=  file
file ::=  globals EOF
globals ::=  /* empty */ | global globals | ghost_glob_begin ghost_globals globals | SEMICOLON globals
ghost_glob_begin ::=  LGHOST
ghost_globals ::=  declaration ghost_globals | function_def ghost_globals | RGHOST
global ::=  DECL | CUSTOM_ANNOT | declaration | function_def | EXTERN string_constant declaration | EXTERN string_constant LBRACE globals RBRACE | ASM LPAREN string_constant RPAREN SEMICOLON | pragma | IDENT LPAREN old_parameter_list_ne RPAREN old_pardef_list SEMICOLON | IDENT LPAREN RPAREN SEMICOLON
id_or_typename_as_id ::=  IDENT | NAMED_TYPE
id_or_typename ::=  id_or_typename_as_id
maybecomma ::=  /* empty */ | COMMA
primary_expression ::=  IDENT | constant | paren_comma_expression | LPAREN block RPAREN
postfix_expression ::=  primary_expression | postfix_expression bracket_comma_expression | postfix_expression LPAREN arguments RPAREN ghost_arguments_opt | BUILTIN_VA_ARG LPAREN expression COMMA type_name RPAREN | BUILTIN_TYPES_COMPAT LPAREN type_name COMMA type_name RPAREN | BUILTIN_OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN | postfix_expression DOT id_or_typename | postfix_expression ARROW id_or_typename | postfix_expression PLUS_PLUS | postfix_expression MINUS_MINUS | LPAREN type_name RPAREN LBRACE initializer_list_opt RBRACE
offsetof_member_designator ::=  id_or_typename | offsetof_member_designator DOT IDENT | offsetof_member_designator bracket_comma_expression
unary_expression ::=  postfix_expression | PLUS_PLUS unary_expression | MINUS_MINUS unary_expression | SIZEOF unary_expression | SIZEOF LPAREN type_name RPAREN | ALIGNOF unary_expression | ALIGNOF LPAREN type_name RPAREN | PLUS cast_expression | MINUS cast_expression | STAR cast_expression | AND cast_expression | EXCLAM cast_expression | TILDE cast_expression | AND_AND id_or_typename_as_id
cast_expression ::=  unary_expression | LPAREN type_name RPAREN cast_expression
multiplicative_expression ::=  cast_expression | multiplicative_expression STAR cast_expression | multiplicative_expression SLASH cast_expression | multiplicative_expression PERCENT cast_expression
additive_expression ::=  multiplicative_expression | additive_expression PLUS multiplicative_expression | additive_expression MINUS multiplicative_expression
shift_expression ::=  additive_expression | shift_expression INF_INF additive_expression | shift_expression SUP_SUP additive_expression
relational_expression ::=  shift_expression | relational_expression INF shift_expression | relational_expression SUP shift_expression | relational_expression INF_EQ shift_expression | relational_expression SUP_EQ shift_expression
equality_expression ::=  relational_expression | equality_expression EQ_EQ relational_expression | equality_expression EXCLAM_EQ relational_expression
bitwise_and_expression ::=  equality_expression | bitwise_and_expression AND equality_expression
bitwise_xor_expression ::=  bitwise_and_expression | bitwise_xor_expression CIRC bitwise_and_expression
bitwise_or_expression ::=  bitwise_xor_expression | bitwise_or_expression PIPE bitwise_xor_expression
logical_and_expression ::=  bitwise_or_expression | logical_and_expression AND_AND bitwise_or_expression
logical_or_expression ::=  logical_and_expression | logical_or_expression PIPE_PIPE logical_and_expression
conditional_expression ::=  logical_or_expression | logical_or_expression QUEST opt_expression COLON conditional_expression
assignment_expression ::=  conditional_expression | cast_expression EQ assignment_expression | cast_expression PLUS_EQ assignment_expression | cast_expression MINUS_EQ assignment_expression | cast_expression STAR_EQ assignment_expression | cast_expression SLASH_EQ assignment_expression | cast_expression PERCENT_EQ assignment_expression | cast_expression AND_EQ assignment_expression | cast_expression PIPE_EQ assignment_expression | cast_expression CIRC_EQ assignment_expression | cast_expression INF_INF_EQ assignment_expression | cast_expression SUP_SUP_EQ assignment_expression
expression ::=  assignment_expression
constant ::=  CST_INT | CST_FLOAT | CST_CHAR | CST_WCHAR | string_constant | wstring_list
string_constant ::=  string_list
string_list ::=  one_string | string_list one_string
wstring_list ::=  CST_WSTRING | wstring_list one_string | wstring_list CST_WSTRING | string_list CST_WSTRING
one_string ::=  CST_STRING | FUNCTION__ | PRETTY_FUNCTION__
init_expression ::=  expression | LBRACE initializer_list_opt RBRACE
initializer_list ::=  initializer_single | initializer_single COMMA initializer_list_opt
initializer_list_opt ::=  /* empty */ | initializer_list
initializer_single ::=  init_designators eq_opt init_expression | gcc_init_designators init_expression | init_expression
eq_opt ::=   /* empty */ | EQ
init_designators ::=  DOT id_or_typename init_designators_opt | LBRACKET expression RBRACKET init_designators_opt | LBRACKET expression ELLIPSIS expression RBRACKET
init_designators_opt ::=  /* empty */ | init_designators
gcc_init_designators ::=  id_or_typename COLON
ghost_arguments_opt ::=  /* empty */ | ghost_arguments
ghost_arguments ::=  LGHOST LPAREN arguments RPAREN RGHOST
arguments ::=  /* empty */ | comma_expression
opt_expression ::=  /* empty */ | comma_expression
comma_expression ::=  expression | expression COMMA comma_expression
comma_expression_opt ::=  /* empty */ | comma_expression
paren_comma_expression ::=  LPAREN comma_expression RPAREN
bracket_comma_expression ::=  LBRACKET comma_expression RBRACKET
block ::=  block_begin local_labels block_attrs block_element_list RBRACE
block_begin ::=  LBRACE
block_attrs ::=  /* empty */ | BLOCKATTRIBUTE paren_attr_list_ne
block_element_list ::=  annot_list_opt | annot_list_opt declaration block_element_list | annot_list_opt statement block_element_list | annot_list_opt pragma block_element_list | annot_list_opt id_or_typename_as_id COLON
annot_list_opt ::=  /* empty */ | annot_list
annot_list ::=  CODE_ANNOT annot_list_opt | LGHOST block_element_list RGHOST annot_list_opt
local_labels ::=  /* empty */ | LABEL__ local_label_names SEMICOLON local_labels
local_label_names ::=  id_or_typename_as_id | id_or_typename_as_id COMMA local_label_names
annotated_statement ::=  statement | annot_list statement
statement ::=  SEMICOLON | SPEC annotated_statement | comma_expression SEMICOLON | block | IF paren_comma_expression annotated_statement | IF paren_comma_expression annotated_statement ELSE annotated_statement | SWITCH paren_comma_expression annotated_statement | opt_loop_annotations WHILE paren_comma_expression annotated_statement | opt_loop_annotations DO annotated_statement WHILE paren_comma_expression SEMICOLON | opt_loop_annotations FOR LPAREN for_clause opt_expression SEMICOLON opt_expression RPAREN annotated_statement | id_or_typename_as_id COLON attribute_nocv_list annotated_statement | CASE expression COLON annotated_statement | CASE expression ELLIPSIS expression COLON annotated_statement | DEFAULT COLON annotated_statement | RETURN SEMICOLON | RETURN comma_expression SEMICOLON | BREAK SEMICOLON | CONTINUE SEMICOLON | GOTO id_or_typename_as_id SEMICOLON | GOTO STAR comma_expression SEMICOLON | ASM GOTO asmattr LPAREN asmtemplate asmoutputs RPAREN SEMICOLON | ASM asmattr LPAREN asmtemplate asmoutputs RPAREN SEMICOLON | MSASM | TRY block EXCEPT paren_comma_expression block | TRY block FINALLY block
opt_loop_annotations ::=  /* empty */ | loop_annotations
loop_annotations ::=  loop_annotation
loop_annotation ::=  LOOP_ANNOT
for_clause ::=  opt_expression SEMICOLON | declaration
ghost_parameter_opt ::=  /* empty */ | ghost_parameter
ghost_parameter ::=  LGHOST parameter_list_startscope rest_par_list RPAREN RGHOST
declaration ::=  decl_spec_list init_declarator_list SEMICOLON | decl_spec_list SEMICOLON | SPEC decl_spec_list init_declarator_list SEMICOLON | SPEC decl_spec_list SEMICOLON
init_declarator_list ::=  init_declarator | init_declarator COMMA init_declarator_attr_list
init_declarator_attr_list ::=  init_declarator_attr | init_declarator_attr COMMA init_declarator_attr_list
init_declarator_attr ::=  attribute_nocv_list init_declarator
init_declarator ::=  declarator | declarator EQ init_expression
decl_spec_wo_type ::=  TYPEDEF | EXTERN | STATIC | AUTO | REGISTER | INLINE | cvspec | attribute_nocv
decl_spec_list ::=  decl_spec_wo_type decl_spec_list_opt | type_spec decl_spec_list_opt_no_named
decl_spec_list_no_named ::=  decl_spec_wo_type decl_spec_list_opt_no_named | type_spec decl_spec_list_opt_no_named
decl_spec_list_opt ::=  /* empty */ | decl_spec_list
decl_spec_list_opt_no_named ::=  /* empty */ | decl_spec_list_no_named
type_spec ::=  VOID | CHAR | BOOL | SHORT | INT | LONG | INT64 | FLOAT | DOUBLE | SIGNED | UNSIGNED | STRUCT id_or_typename | STRUCT just_attributes id_or_typename | STRUCT id_or_typename LBRACE struct_decl_list RBRACE | STRUCT LBRACE struct_decl_list RBRACE | STRUCT just_attributes id_or_typename LBRACE struct_decl_list RBRACE | STRUCT just_attributes LBRACE struct_decl_list RBRACE | UNION id_or_typename | UNION id_or_typename LBRACE struct_decl_list RBRACE | UNION LBRACE struct_decl_list RBRACE | UNION just_attributes id_or_typename LBRACE struct_decl_list RBRACE | UNION just_attributes LBRACE struct_decl_list RBRACE | ENUM id_or_typename | ENUM id_or_typename LBRACE enum_list maybecomma RBRACE | ENUM LBRACE enum_list maybecomma RBRACE | ENUM just_attributes id_or_typename LBRACE enum_list maybecomma RBRACE | ENUM just_attributes LBRACE enum_list maybecomma RBRACE | NAMED_TYPE | TYPEOF LPAREN expression RPAREN | TYPEOF LPAREN type_name RPAREN
struct_decl_list ::=  /* empty */ | decl_spec_list SEMICOLON struct_decl_list | SEMICOLON struct_decl_list | decl_spec_list field_decl_list SEMICOLON struct_decl_list | pragma struct_decl_list
field_decl_list ::=  field_decl | field_decl COMMA field_decl_list
field_decl ::=  declarator | declarator COLON expression attributes | COLON expression
enum_list ::=  enumerator | enum_list COMMA enumerator
enumerator ::=  IDENT | IDENT EQ expression
declarator ::=  pointer_opt direct_decl attributes_with_asm
attributes_or_static ::=  attributes comma_expression_opt | attribute attributes STATIC comma_expression | STATIC attributes comma_expression
direct_decl ::=  id_or_typename | LPAREN attributes declarator RPAREN | direct_decl LBRACKET attributes_or_static RBRACKET | direct_decl LPAREN RPAREN ghost_parameter_opt | direct_decl parameter_list_startscope rest_par_list RPAREN ghost_parameter_opt
parameter_list_startscope ::=  LPAREN
rest_par_list ::=  parameter_decl rest_par_list1
rest_par_list1 ::=  /* empty */ | COMMA ELLIPSIS | COMMA parameter_decl rest_par_list1
parameter_decl ::=  decl_spec_list declarator | decl_spec_list abstract_decl | decl_spec_list | LPAREN parameter_decl RPAREN
old_proto_decl ::=  pointer_opt direct_old_proto_decl
direct_old_proto_decl ::=  direct_decl LPAREN old_parameter_list_ne RPAREN old_pardef_list
old_parameter_list_ne ::=  IDENT | IDENT COMMA old_parameter_list_ne
old_pardef_list ::=  /* empty */ | decl_spec_list old_pardef SEMICOLON ELLIPSIS | decl_spec_list old_pardef SEMICOLON old_pardef_list
old_pardef ::=  declarator | declarator COMMA old_pardef
pointer ::=  STAR attributes pointer_opt
pointer_opt ::=  /* empty */ | pointer
type_name ::=  decl_spec_list abstract_decl | decl_spec_list
abstract_decl ::=  pointer_opt abs_direct_decl attributes | pointer
abs_direct_decl ::=  LPAREN attributes abstract_decl RPAREN | abs_direct_decl_opt LBRACKET comma_expression_opt RBRACKET | abs_direct_decl parameter_list_startscope rest_par_list RPAREN | abs_direct_decl LPAREN RPAREN
abs_direct_decl_opt ::=   /* empty */ | abs_direct_decl
function_def ::=  SPEC function_def_start block | function_def_start block
function_def_start ::=  decl_spec_list declarator | decl_spec_list old_proto_decl | IDENT parameter_list_startscope rest_par_list RPAREN ghost_parameter_opt | IDENT LPAREN old_parameter_list_ne RPAREN old_pardef_list | IDENT LPAREN RPAREN ghost_parameter_opt
cvspec ::=  CONST | VOLATILE | RESTRICT | ATTRIBUTE_ANNOT
attributes ::=  /* empty */ | attribute attributes
attributes_with_asm ::=  /* empty */ | attribute attributes_with_asm | ASM LPAREN string_constant RPAREN attributes
attribute_nocv ::=  ATTRIBUTE LPAREN paren_attr_list RPAREN | DECLSPEC paren_attr_list_ne | MSATTR | THREAD
attribute_nocv_list ::=  /* empty */ | attribute_nocv attribute_nocv_list
attribute ::=  attribute_nocv | CONST | RESTRICT | VOLATILE | ATTRIBUTE_ANNOT
just_attribute ::=  ATTRIBUTE LPAREN paren_attr_list RPAREN | DECLSPEC paren_attr_list_ne
just_attributes ::=  just_attribute | just_attribute just_attributes
pragma ::=  PRAGMA PRAGMA_EOL | PRAGMA attr PRAGMA_EOL | PRAGMA attr SEMICOLON PRAGMA_EOL | PRAGMA_LINE
var_attr ::=  IDENT | NAMED_TYPE | DEFAULT COLON CST_INT | CONST | VOLATILE | CST_INT COLON CST_INT
basic_attr ::=  CST_INT | CST_FLOAT | var_attr
basic_attr_list_ne ::=  basic_attr | basic_attr basic_attr_list_ne
parameter_attr_list_ne ::=  basic_attr_list_ne | basic_attr_list_ne string_constant | basic_attr_list_ne string_constant parameter_attr_list_ne
param_attr_list_ne ::=  parameter_attr_list_ne | string_constant
primary_attr ::=  basic_attr | LPAREN attr RPAREN | string_constant
postfix_attr ::=  primary_attr | id_or_typename_as_id paren_attr_list_ne | id_or_typename_as_id LPAREN RPAREN | basic_attr param_attr_list_ne | postfix_attr ARROW id_or_typename | postfix_attr DOT id_or_typename | postfix_attr LBRACKET attr RBRACKET
unary_attr ::=  postfix_attr | SIZEOF unary_expression | SIZEOF LPAREN type_name RPAREN | ALIGNOF unary_expression | ALIGNOF LPAREN type_name RPAREN | PLUS cast_attr | MINUS cast_attr | STAR cast_attr | AND cast_attr | EXCLAM cast_attr | TILDE cast_attr
cast_attr ::=  unary_attr
multiplicative_attr ::=  cast_attr | multiplicative_attr STAR cast_attr | multiplicative_attr SLASH cast_attr | multiplicative_attr PERCENT cast_attr
additive_attr ::=  multiplicative_attr | additive_attr PLUS multiplicative_attr | additive_attr MINUS multiplicative_attr
shift_attr ::=  additive_attr | shift_attr INF_INF additive_attr | shift_attr SUP_SUP additive_attr
relational_attr ::=  shift_attr | relational_attr INF shift_attr | relational_attr SUP shift_attr | relational_attr INF_EQ shift_attr | relational_attr SUP_EQ shift_attr
equality_attr ::=  relational_attr | equality_attr EQ_EQ relational_attr | equality_attr EXCLAM_EQ relational_attr
bitwise_and_attr ::=  equality_attr | bitwise_and_attr AND equality_attr
bitwise_xor_attr ::=  bitwise_and_attr | bitwise_xor_attr CIRC bitwise_and_attr
bitwise_or_attr ::=  bitwise_xor_attr | bitwise_or_attr PIPE bitwise_xor_attr
logical_and_attr ::=  bitwise_or_attr | logical_and_attr AND_AND bitwise_or_attr
logical_or_attr ::=  logical_and_attr | logical_or_attr PIPE_PIPE logical_and_attr
conditional_attr ::=  logical_or_attr | logical_or_attr QUEST attr_test conditional_attr COLON2 conditional_attr
assign_attr ::=  conditional_attr | conditional_attr EQ conditional_attr
attr_test ::=  /* empty */
attr ::=  assign_attr
attr_list_ne ::=  attr | attr COMMA attr_list_ne
attr_list ::=  /* empty */ | attr_list_ne
paren_attr_list_ne ::=  LPAREN attr_list_ne RPAREN
paren_attr_list ::=  LPAREN attr_list RPAREN
asmattr ::=  /* empty */ | VOLATILE asmattr | CONST asmattr
asmtemplate ::=  one_string | one_string asmtemplate
asmoutputs ::=  /* empty */ | COLON asmoperands asminputs
asmoperands ::=  /* empty */ | asmoperandsne
asmoperandsne ::=  asmoperand | asmoperandsne COMMA asmoperand
asmoperand ::=  asmopname string_constant LPAREN expression RPAREN
asminputs ::=  /* empty */ | COLON asmoperands asmclobber
asmopname ::=  /* empty */ | LBRACKET IDENT RBRACKET
asmclobber ::=  /* empty */ | COLON asmlabels | COLON asmcloberlst_ne asmlabels
asmcloberlst_ne ::=  string_constant | string_constant COMMA asmcloberlst_ne
asmlabels ::=  /* empty */ | COLON local_label_names

// Tokens

AUTO ::= "auto"
CONST ::= "const"
CONST ::= "__const"
CONST ::= "__const__"
STATIC ::= "static"
EXTERN ::= "extern"
LONG ::= "long"
SHORT ::= "short"
REGISTER ::= "register"
SIGNED ::= "signed"
SIGNED ::= "__signed"
UNSIGNED ::= "unsigned"
VOLATILE ::= "volatile"
VOLATILE ::= "__volatile"
/* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile * are accepted GCC-isms */
CHAR ::= "char"
BOOL ::= "_Bool"
INT ::= "int"
FLOAT ::= "float"
DOUBLE ::= "double"
VOID ::= "void"
ENUM ::= "enum"
STRUCT ::= "struct"
TYPEDEF ::= "typedef"
UNION ::= "union"
BREAK ::= "break"
CONTINUE ::= "continue"
GOTO ::= "goto"
RETURN ::= "return"
SWITCH ::= "switch"
CASE ::= "case"
DEFAULT ::= "default"
WHILE ::= "while"
DO ::= "do"
FOR ::= "for"
IF ::= "if"
ELSE ::= "else"
/*** Implementation specific keywords ***/
SIGNED ::= "__signed__"
INLINE ::= "__inline__" | "inline" | "__inline" | "_inline" | __forceinline /* !! we turn forceinline
             * into inline */
ATTRIBUTE ::= "__attribute__"
ATTRIBUTE ::= "__attribute"
BLOCKATTRIBUTE ::= "__blockattribute__" | "__blockattribute"
ASM ::= "__asm__"
ASM ::= "asm"
TYPEOF ::= "__typeof__"
TYPEOF ::= "__typeof"
TYPEOF ::= "typeof"
ALIGNOF ::= "__alignof"
ALIGNOF ::= "__alignof__"
VOLATILE ::= "__volatile__"
VOLATILE ::= "__volatile"
FUNCTION__ ::= "__FUNCTION__"
FUNCTION__ ::= "__func__" /* ISO 6.4.2.2 */
PRETTY_FUNCTION__ ::= "__PRETTY_FUNCTION__"
LABEL__ ::= "__label__"
/*** weimer: GCC arcana ***/
RESTRICT ::= "__restrict"
RESTRICT ::= "restrict"
/*      ("__extension__", EXTENSION); */
/**** MS VC ***/
INT64 ::= "__int64"
INT ::= "__int32"
/*
("_cdecl",  fun _ -> MSATTR ("_cdecl", currentLoc ()));
("__cdecl", fun _ -> MSATTR ("__cdecl", currentLoc ()));
("_stdcall", fun _ -> MSATTR ("_stdcall", currentLoc ()));
("__stdcall", fun _ -> MSATTR ("__stdcall", currentLoc ()));
("_fastcall", fun _ -> MSATTR ("_fastcall", currentLoc ()));
("__fastcall", fun _ -> MSATTR ("__fastcall", currentLoc ()));
("__w64", fun _ -> MSATTR("__w64", currentLoc ()));
*/
DECLSPEC ::= "__declspec"
TRY ::= "__try"
EXCEPT ::= "__except"
FINALLY ::= "__finally"
/* weimer: some files produced by 'GCC -E' expect this type to be * defined */
NAMED_TYPE ::= "__builtin_va_list"
BUILTIN_VA_ARG ::= "__builtin_va_arg"
BUILTIN_TYPES_COMPAT ::= "__builtin_types_compatible_p"
BUILTIN_OFFSETOF ::= "__builtin_offsetof"
/* On some versions of GCC __thread is a regular identifier */
THREAD ::= "__thread"

ELLIPSIS ::= "..."
PLUS_EQ ::= "+="
MINUS_EQ ::= "-="
STAR_EQ ::= "*="
SLASH_EQ ::= "/="
PERCENT_EQ ::= "%="
PIPE_EQ ::= "|="
AND_EQ ::= "&="
CIRC_EQ ::= "^="
INF_INF_EQ ::= "<<="
SUP_SUP_EQ ::= ">>="
INF_INF ::= "<<"
SUP_SUP ::= ">>"
EQ_EQ ::= "=="
EXCLAM_EQ ::= "!="
INF_EQ ::= "<="
SUP_EQ ::= ">="
EQ ::= "="
INF ::= "<"
SUP ::= ">"
PLUS_PLUS  ::= "++"
MINUS_MINUS  ::= "--"
ARROW ::= "->"
PLUS  ::= '+'
MINUS  ::= '-'
STAR ::= '*'
SLASH ::= "/"
PERCENT ::= '%'
EXCLAM  ::= '!'
AND_AND  ::= "&&"
PIPE_PIPE ::= "||"
AND  ::= '&'
PIPE ::= '|'
CIRC ::= '^'
QUEST ::= '?'
COLON ::= ':'
TILDE  ::= '~'

LBRACE ::= '{'  | "<%"
RBRACE ::= '}'  | "%>"
LBRACKET ::= '['
RBRACKET ::= ']'
LBRACKET ::= "<:"
RBRACKET ::= ":>"
LPAREN ::= '('
RPAREN ::= ')'
SEMICOLON ::= ';'
COMMA ::= ','
DOT ::= '.'
SIZEOF ::= "sizeof"
MSASM ::= "__asm"

mingodad commented 3 years ago

The above EBNF is not correct somenone pointed out that my naive interpretation of menhir grammar like bison grammar is not correct (mainly rules starting with | doesn't mean /*empty*/).

I'm working to fix it.

mingodad commented 3 years ago

I have edited the EBNF from the first message to fix several issues with the tool I used to create it.

pbaudin commented 3 years ago

Thanks to introduce your tool, it recalled to me a tool I used several years ago (maybe ebnf2ps between 1995 and 2000). Instead of postscript figures your tool gives a more usable output, that is a good point.

But, it is difficult to use in our development such a tool until there is no automatic tool chain from the original grammar file (used by parser generators such as yacc, menhir, ...). At least, that requires a translator of these source formats into the format required by your tool to cope with the modification of the source file.

Tags inserted as comments into the source file should also be usable by the translator to control the final output. It could be a way to specify the rules to inline in order to simplify the final figures and retrieve something closer to the grammar figures of the ACSL document. For the same reason, theses tags should allow the renaming of some nodes (because the source file may not use the same names than the ACSL document).

Finally, the same kind of feature could be helpful for the lexical definitions of the grammar tokens.

What do you suggest to solve these engineering difficulties?

mingodad commented 3 years ago

Ask or modify menhir to ouptut EBNF . Use a custom parser like I did (see bellow) using one of https://github.com/mingodad/CocoR-CPP , https://github.com/mingodad/CocoR-CSharp , https://github.com/mingodad/CocoR-Java and download https://www.bottlecaps.de/rr/download/rr-1.63-java8.zip to use offline.

Here is the custom parser I created for menhir grammars:

/*
Need to check and fix tail like ' | /* empty */'
*/

#include "Scanner.nut"

COMPILER Menhir

CHARACTERS
    letter    = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_".
    digit     = "0123456789".
    cr        = '\r'.
    lf        = '\n'.
    tab       = '\t'.
    ff        = '\f'.
    stringCh  = ANY - '"' - '\\' - cr - lf.
    charCh    = ANY - '\'' - '\\' - cr - lf.
    printable = '\u0020' .. '\u007e'.
    hex       = "0123456789abcdef".

TOKENS
    ID     = (letter | '.') { letter | digit | '.' | '-'}.
    INT_LITERAL    = digit { digit }.
    STRING    = '"' { stringCh | '\\' printable } '"'.
    badString = '"' { stringCh | '\\' printable } (cr | lf).
    //CHAR_LITERAL      = '\'' ( charCh | '\\' printable { hex } ) '\''.

    LEFT_BRACE = '{'.
    RIGHT_BRACE = '}'.
    LEFT_PAREN = '('.
    RIGHT_PAREN= ')'.
    LEFT_ANGLEB = '<'.
    RIGHT_ANGLEB = '>'.
    ARROW = "->".

PRAGMAS

COMMENTS FROM "(*" TO "*)" NESTED
COMMENTS FROM "/*" TO "*/" NESTED
COMMENTS FROM "//" TO lf

IGNORE cr + lf + tab + ff

/*-------------------------------------------------------------------------*/

PRODUCTIONS

Menhir =
    prologue_declarations "%%" grammar [epilogue] EOF
    .

epilogue =
    "%%" {ANY}
    .

prologue_declarations =
    prologue_declaration {prologue_declaration}
    .

prologue_declaration =
    "%{"  {ANY}  "%}"
    | "%token" [tag] token_decls
    | ("%left" | "%right" | "%nonassoc") token_decls
    | "%start" [tag] token_decls
    | "%type" [tag] token_decls
    | "%parameter" tag
    .

token_decls =
    token_id {token_id}
    .

token_id =
    ID [params]
    | STRING
    .

grammar =
    rule {rule}
    .

rule =
    ["%inline" | "%public"] ID  (. printf("%s ::= ", t.val); .) [params]
        ':' ['|'] rule_id_list {'|' (. printf("\n\t|"); .) rule_id_list} [';']
        (. printf("\n\n"); .)
    .

rule_id_list =
    rule_id_elm {rule_id_elm} [sema]
    | sema ["%prec" ID] (. printf(" /* empty */"); .)
    | "%prec" ID sema (. printf(" /* empty */"); .)
    .

rule_id_elm = (. string the_id; .)
    rule_id<out the_id> ['=' rule_id<out the_id>]
        ["%prec" ID] [';']
        (. printf(" %s", the_id); .)
    .

rule_id<out string the_id> =
    "option" '(' (
        ("terminated" | "preceded") '('
            ID (. the_id = "(" + t.val; .)
            ','
            ID (. the_id += " " + t.val + ")?"; .)
            ')'
        | ID (. the_id = t.val + "?"; .)
        ) ')'
    | "separated_nonempty_list" '('
        ID (. the_id = "(" + t.val; .)
        ','
        ID (. the_id += " " + t.val + ")+"; .)
        ')'
    | "separated_list" '('
        ID (. the_id = "(" + t.val; .)
        ','
        ID (. the_id += " " + t.val + ")*"; .)
        ')'
    |(
        ID (. the_id = t.val; .) //when ID == "option" we need see inside "()"
        | STRING (. the_id = t.val; .)
    ) [params] [('*' | '?' | '+') (. the_id += t.val; .)]
    .

params =
    '(' (. SkipNested(ParserTokens._LEFT_PAREN, ParserTokens._RIGHT_PAREN); .) ')'
    .

sema =
    '{'  (. SkipNested(ParserTokens._LEFT_BRACE, ParserTokens._RIGHT_BRACE); .) '}'
    .

tag =
    '<'  (. SkipNested(ParserTokens._LEFT_ANGLEB, ParserTokens._RIGHT_ANGLEB); .) '>'
    .

END Menhir.

Frama-C / Frama-C-snapshot

Grammar railroad diagram #44