Grammar railroad diagram

mingodad commented 2 years ago

Based on the debug.c:dump_productions function I created a debug.c:dump_ebnf function to output an EBNF grammar understood on https://www.bottlecaps.de/rr/ui to generate railroad diagrams (https://en.wikipedia.org/wiki/Syntax_diagram) and as example bellow is the one for parse.par.

Copy and paste the EBNF shown bellow on https://www.bottlecaps.de/rr/ui in the TAB Edit Grammar then switch to the TAB View Diagram.

//EBNF parse to be viewed at https://www.bottlecaps.de/rr/ui
grammar_spec ::= fixed_directive_def* segment* 
fixed_directive_def ::= "#!" fixed_directive ';' 
fixed_directive ::= "mode" mode_type 
fixed_directive ::= "language" string_or_ident 
mode_type ::= "context-sensitive" 
mode_type ::= "sensitive" 
mode_type ::= "scannerless" 
mode_type ::= "context-insensitive" 
mode_type ::= "insensitive" 
mode_type ::= "scanner" 
segment ::= code 
segment ::= definition ';' 
segment ::= unfixed_directive ';' 
unfixed_directive ::= '#' directive_parms 
userparam ::= code 
userparam ::= string 
userparam ::= boolean 
userparam ::= 
directive_parms ::= "whitespaces" symbol_list 
directive_parms ::= "lexeme" symbol_list 
directive_parms ::= "fixate" symbol_list 
directive_parms ::= "left" symbol_list 
directive_parms ::= "right" symbol_list 
directive_parms ::= "nonassoc" symbol_list 
directive_parms ::= "prefix" string 
directive_parms ::= "default action" code_opt 
directive_parms ::= "default epsilon action" code_opt 
directive_parms ::= "default value type" type 
directive_parms ::= "lexeme separation" boolean_opt 
directive_parms ::= "case insensitive strings" boolean_opt 
directive_parms ::= "reserve terminals" boolean_opt 
directive_parms ::= "prologue" code 
directive_parms ::= "epilogue" code 
directive_parms ::= "pcb" code 
directive_parms ::= "extends" string 
directive_parms ::= option? line_number /*@*/USERDEF userparam 
boolean_opt ::= boolean 
boolean_opt ::= 
boolean ::= "on" 
boolean ::= "off" 
symbol_list ::= symbol_list sym 
symbol_list ::= sym 
lhs ::= identifier 
alt_lhs_list ::= alt_lhs_list lhs 
alt_lhs_list ::= lhs 
alt_regex_sym ::= alt_regex_sym regex_sym 
alt_regex_sym ::= regex_sym 
regex_sym ::= identifier 
defines ::= "->" 
defines ::= ':' 
defines ::= "=>" 
defines ::= ":=" 
definition ::= lhs goal_mark alt_lhs_list? type defines productions 
definition ::= '@' alt_regex_sym type regex code_opt ast_node sym_option* 
sym_option ::= '#' "greedy" 
sym_option ::= '#' "non-greedy" 
sym_option ::= '#' option? line_number /*@*/USERDEF userparam 
goal_mark ::= '$' 
goal_mark ::= 
productions ::= productions '|' production 
productions ::= production 
ast_node ::= '=' identifier 
ast_node ::= '=' string 
ast_node ::= 
production ::= line_number rhs_opt code_opt_dup ast_node prod_directives* 
rhs_opt ::= rhs 
rhs_opt ::= 
prod_directives ::= '#' "precedence" terminal 
rhs ::= rhs symbol access_name 
rhs ::= symbol access_name 
symbol ::= sym modifier 
symbol ::= "&error" 
symbol ::= "&eof" 
sym ::= terminal 
sym ::= identifier 
sym ::= type '(' stack_cur_prod productions ')' 
stack_cur_prod ::= 
terminal ::= ccl 
terminal ::= kw 
terminal ::= '@' identifier 
modifier ::= '*' 
modifier ::= '+' 
modifier ::= '?' 
modifier ::= 
access_name ::= ':' identifier 
access_name ::= ':' string_single 
access_name ::= 
regex ::= re_alt 
re_alt ::= re_alt '|' re_expr 
re_alt ::= re_expr 
re_expr ::= re_expr re_modifier 
re_expr ::= re_modifier 
re_modifier ::= re_factor '*' 
re_modifier ::= re_factor '+' 
re_modifier ::= re_factor '?' 
re_modifier ::= re_factor 
re_factor ::= ccl 
re_factor ::= kw 
re_factor ::= '.' 
re_factor ::= '(' regex ')' 
string ::= string_single+ 
string_single ::= ccl_string 
string_single ::= kw 
ccl ::= ccl_string 
ccl ::= '!' ccl_string 
ccl_string ::= ''' ccl_str ''' 
ccl_str ::= ccl_str ccl_char 
ccl_str ::= 
ccl_char ::= '\x0-&(-[]-\uFFFF' 
ccl_char ::= '\\' '\x1-\uFFFF' 
kw ::= '"' kw_str '"' 
kw_str ::= kw_str kw_char 
kw_str ::= 
kw_char ::= '\x0-!#-[]-\uFFFF' 
kw_char ::= '\\' '\x1-\uFFFF' 
type ::= '<' type_str '>' 
type ::= 
type_str ::= type_str '\x0-=?-\uFFFF' 
type_str ::= 
identifier ::= identifier_start identifier_follow 
identifier_start ::= 'A-Z_a-z' 
identifier_follow ::= identifier_follow '0-9A-Z_a-z' 
identifier_follow ::= 
string_or_ident ::= string 
string_or_ident ::= identifier 
integer ::= integer '0-9' 
integer ::= '0-9' 
code_opt_dup ::= code_opt 
code_opt ::= code 
code_opt ::= 
code ::= code_begin inner_code_opt "*]" 
code_begin ::= "[*" 
inner_code_opt ::= inner_code 
inner_code_opt ::= 
inner_code ::= inner_code anychar 
inner_code ::= anychar 
whitespace ::= ' ' 
whitespace ::= '\t' 
whitespace ::= "/*" comment? "*/" 
whitespace ::= "//" scomment? '\n' 
whitespace ::= '\r' 
whitespace ::= '\n' 
comment ::= comment anychar 
comment ::= anychar 
anychar ::= '\x1-\uFFFF' 
scomment ::= scomment '\x0-\t\v-\uFFFF' 
scomment ::= '\x0-\t\v-\uFFFF' 
line_number ::=

static void print_ebnf_symbol( FILE* stream, SYMBOL* sym )
{
    if( !stream )
        stream = stderr;

    if( sym->type == SYM_CCL_TERMINAL )
        fprintf( stream, "'%s'", pccl_to_str( sym->ccl, TRUE ) );
    else if( sym->type == SYM_REGEX_TERMINAL && sym->keyword )
        fprintf( stream, "\"%s\"", sym->name );
    else if( sym->type == SYM_REGEX_TERMINAL && !( sym->keyword ) )
        fprintf( stream, "/*@*/%s", sym->name );
    else if( sym->type == SYM_SYSTEM_TERMINAL )
        fprintf( stream, "%s", sym->name );
    else
        fprintf( stream, "%s", sym->name );
}

static int dump_ebnf_production( FILE* stream, PROD* p )
{
    plist*      l           = p->rhs;
    plistel*    e;
    BOOLEAN     embedded    = FALSE;
    SYMBOL*     sym;

    if( !stream )
        stream = stderr;

        plist_for( p->all_lhs, e )
        {
                sym = (SYMBOL*)plist_access( e );
                if(!sym->generated)
                    fprintf( stream, "%s ", sym->name );
        }
        if(sym->generated)
            return 0;

        fprintf( stream, "::= " );

    plist_for( l, e )
    {
        if( embedded && plist_count( l ) > plist_count( p->rhs ) )
            fprintf( stream, "<<" );
        else
        {
            sym = (SYMBOL*)plist_access( e );
            print_ebnf_symbol( stream, sym );
        }

        fprintf( stream, " " );
    }

    fprintf( stream, "\n" );
        return 1;
}

void dump_ebnf( FILE* stream, PARSER* parser )
{
    PROD*       p;
    plistel*    e;

    if( !stream )
        stream = stderr;

    if( first_progress )
        fprintf( stream, "\n" );
    fprintf( stream, "\n//EBNF %s to be viewed at https://www.bottlecaps.de/rr/ui\n\n",
        ( parser->p_basename ? parser->p_basename : "" ));

    plist_for( parser->productions, e )
    {
        p = (PROD*)plist_access( e );
        if(dump_ebnf_production( stream, p ))
                    fprintf( stream, "\n" );
    }

    first_progress = FALSE;
}

phorward commented 2 years ago

Hello @mingodad, thanks for creating this issue, it is an interesting feature.

Maybe the same output can also be generated by providing a target language description file? It could be thought of a possibility to turn a unicc parser into a EBNF-format for this website by just specifiying unicc -l ebnf-bottlecaps grammar.par.

mingodad commented 2 years ago

It seems to me that it's more code and complicated to use https://github.com/phorward/unicc/tree/master/targets

phorward commented 2 years ago

It seems to me that it's more code and complicated to use https://github.com/phorward/unicc/tree/master/targets

You're partly right. The current features there don't satisfy to resolve your proposal.

Well, the feature is probably useful, but I think it shouldn't make it into unicc, as unicc is not actively continued right now and there also no plans to do so by me currently.

You should either bring this feature to unicc2, as this is the only candidate for an improved version of unicc, or create a fork of unicc-v1 with the features you would like to add.

mingodad commented 8 months ago

I've just added unicc grammar to https://mingodad.github.io/parsertl-playground/playground/ an Yacc/Lex compatible online editor/tester (select Unicc parser from Examples then click Parse to se a parse tree for the content in Input source).

phorward commented 8 months ago

I've just added unicc grammar to https://mingodad.github.io/parsertl-playground/playground/ an Yacc/Lex compatible online editor/tester (select Unicc parser from Examples then click Parse to se a parse tree for the content in Input source).

Hello @mingodad, thanks for sharing, looks fine and works!

phorward / unicc

Grammar railroad diagram #23