MPLLang / mpl

The MaPLe compiler for efficient and scalable parallel functional programming
Other
306 stars 18 forks source link

Grammar railroad diagram #176

Closed mingodad closed 4 months ago

mingodad commented 1 year ago

Using a bit of search and replace on the grammar with and adding some of the tokens from the lexer we can have a nice navigable railroad diagram.

Copy and paste the EBNF shown bellow on https://www.bottlecaps.de/rr/ui on the tab Edit Grammar the click on the tab View Diagram to see/download a navigable railroad diagram.

// From: https://github.com/MPLLang/mpl/blob/master/mlton/front-end/ml.grm

program::= expsAndTopdecs

expsAndTopdecs::=
    exp expsAndTopdecs_prime
  | topdecs

expsAndTopdecs_prime::=
      /*empty*/
  | SEMICOLON expsAndTopdecs

topdecs::=
    topdec topdecs
  | expsAndTopdecs_prime

topdec ::= topdecnode

topdecnode
   ::= strdec
   | SIGNATURE sigbinds
   | FUNCTOR funbinds

/*---------------------------------------------------*/
/*                    Structures                     */
/*---------------------------------------------------*/

strdecs ::= strdecsnode

strdecsnode ::=
            | SEMICOLON strdecs
            | strdec strdecs

strdec ::= strdecnode

strdecnode
   ::= STRUCTURE strbinds
   | LOCAL strdecs IN strdecs END
   | decnolocal
   | SHOW_BASIS

strbinds ::= strid sigconst EQUALOP strbinds_prime

strbinds_prime ::= strexp1 strbinds_prime1
          | strexp2 strbinds_prime2

strbinds_prime1 ::= strbinds_prime2
           | WHERE whereeqn strbinds_prime1_prime

strbinds_prime1_prime ::= strbinds_prime1
            | AND whereeqn strbinds_prime1_prime

strbinds_prime2 ::=               /*empty*/
           | AND strbinds

strexp ::= strexpnode

strexpnode
  ::= strexp1
  | strexp1 whereeqns
  | strexp2node

strexp1 ::= strexp COLON sigexp_prime
        | strexp COLONGT sigexp_prime

strexp2 ::= strexp2node

strexp2node
        ::= longstrid
        | STRUCT strdecs END
        | fctid arg_fct
        | LET strdecs IN strexp END

arg_fct ::= LPAREN strexp RPAREN
        | LPAREN strdecs RPAREN

/*---------------------------------------------------*/
/*                    Signatures                     */
/*---------------------------------------------------*/

sigexp
  ::= sigexp_prime
  | sigexp_prime whereeqns

whereeqns ::= whereeqns_prime

whereeqns_prime
  ::= WHERE whereeqn
  | WHERE whereeqn whereeqns_prime
  | WHERE whereeqn whereandeqns

whereandeqns
  ::= AND whereeqn
  | AND whereeqn whereandeqns
  | AND whereeqn whereeqns_prime

sigbinds::= sigid EQUALOP sigexp_prime sigbinds_prime

sigexp_prime ::= sigexp_primenode

sigexp_primenode ::= sigid
            | SIG specs END

sigbinds_prime::=
         | AND sigbinds
         | WHERE whereeqn sigbinds_prime_prime

sigbinds_prime_prime ::= sigbinds_prime
           | AND whereeqn sigbinds_prime_prime

whereeqn  ::= TYPE tyvars longtycon EQUALOP ty

sigconst ::=
         | COLON sigexp
         | COLONGT sigexp

specs  ::=
       | SEMICOLON specs
       | spec specs

spec ::= specnode

specnode ::= VAL valdescs
         | TYPE typdescs
         | TYPE typBind
         | EQTYPE typdescs
         | DATATYPE datatypeRhs
         | EXCEPTION exndescs
         | STRUCTURE strdescs
         | INCLUDE sigexp
         | INCLUDE sigid sigids /* p. 59 */
         | sharespec

sharespec ::= SHARING TYPE longtyconeqns
          | SHARING longstrideqns

longstrideqns ::= longstrid EQUALOP longstrid
              | longstrid EQUALOP longstrideqns

longtyconeqns ::= longtycon EQUALOP longtycon
              | longtycon EQUALOP longtyconeqns

strdescs ::= strid COLON sigexp_prime strdescs_prime

strdescs_prime ::=
          | AND strdescs
          | WHERE whereeqn strdescs_prime_prime

strdescs_prime_prime ::= strdescs_prime
           | AND whereeqn strdescs_prime_prime

typdescs ::= typdesc
         | typdesc AND typdescs

typdesc ::= tyvars tycon

valdescs ::= valdesc
         | valdesc AND valdescs

valdesc ::= vid COLON ty

exndescs ::= exndesc
         | exndesc AND exndescs

exndesc ::= con tyOpt

tyOpt ::=
      | OF ty

/*---------------------------------------------------*/
/*                     Functors                      */
/*---------------------------------------------------*/

funbinds ::= fctid LPAREN fctarg RPAREN sigconst EQUALOP funbinds_prime

funbinds_prime ::= strexp1 funbinds_prime1
          | strexp2 funbinds_prime2

funbinds_prime1 ::= funbinds_prime2
           | WHERE whereeqn funbinds_prime1_prime

funbinds_prime2 ::=               /*empty*/
           | AND funbinds

funbinds_prime1_prime ::= funbinds_prime1
            | AND whereeqn funbinds_prime1_prime

fctarg ::= strid COLON sigexp
       | specs

/*---------------------------------------------------*/
/*                   Declarations                    */
/*---------------------------------------------------*/

decs ::=
     | dec decs
     | SEMICOLON decs

dec ::= decnode

decnode ::= decnolocal
        | LOCAL decs IN decs END

decnolocal
        ::= VAL valbindTop
        | VAL tyvarseq valbindTop
        | DO exp
        | FUN funs
        | FUN tyvarseq funs
        | TYPE typBind
        | DATATYPE datatypeRhs
        | ABSTYPE datBind WITH decs END
        | EXCEPTION ebs
        | OPEN longstrids
        | fixity vids
        | OVERLOAD priority vid COLON ty AS longvidands

valbindTop ::= valbind

valbind ::= pat EQUALOP exp
        | pat EQUALOP exp AND valbind
        | REC rvalbind

rvalbind ::= REC rvalbind
         | pat EQUALOP FN match
         | pat EQUALOP FN match AND rvalbind

constraint ::=
           | COLON ty

funs    ::= clausesTop
        | clausesTop AND funs

clausesTop::= clauses
          | optbar_prime clauses

clauses ::= clause
        | clause BAR clauses

clause  ::= apats constraint EQUALOP exp

typBind ::= tbs

tbs ::= tbs_prime

tbs_prime ::= tb
     | tb AND tbs_prime

tb ::= tyvars tycon EQUALOP ty

tyvars  ::= tyvarseq
        |

tyvarseq::= tyvar
        | LPAREN tyvar_pc RPAREN

tyvar_pc::= tyvar
        | tyvar COMMA tyvar_pc

constrs ::= constr
        | constr BAR constrs

constr  ::= opcon
        | opcon OF ty

opcon   ::= con
        | OP con

ebs     ::= eb
        | eb AND ebs

eb      ::= opcon ebrhs

ebrhs ::= ebrhsnode

ebrhsnode   ::=
            | OF ty
            | EQUALOP longcon
            | EQUALOP OP longcon

fixity  ::= INFIX
        | INFIX digit
        | INFIXR
        | INFIXR digit
        | NONFIX

priority ::=
         | digit

int ::= INT

word ::= WORD

digit ::= INT

numericField ::= INT

datatypeRhs
   ::= datatypeRhsnode

datatypeRhsnode
   ::= repl
   | datBind

repl ::= tyvars tycon EQUALOP DATATYPE longtycon

datBind
   ::= dbs
   | dbs withtypes

dbs ::= dbs_prime

dbs_prime ::= db
     | db AND dbs_prime

db ::= tyvars tycon EQUALOP optbar constrs

withtypes ::= WITHTYPE typBind

longvidands ::= longvid
            | longvid AND longvidands

match ::= optbar rules

rules ::= rule
      | rule BAR rules

rule    ::= pat DARROW exp

elabel  ::= field EQUALOP exp
        | idField constraint

elabels ::= elabel COMMA elabels
        | elabel

exp_ps  ::= exp optsemicolon
        | exp SEMICOLON exp_ps

exp ::= expnode

expnode ::= exp HANDLE match
        | exp ORELSE exp
        | exp ANDALSO exp
        | exp COLON ty
        | app_exp
        | FN match
        | CASE exp OF match
        | WHILE exp DO exp
        | IF exp THEN exp ELSE exp
        | RAISE exp

app_exp ::= aexp
        | aexp app_exp
        | longvid
        | longvid app_exp

aexp    ::= OP longvid
        | const
        | HASH field
        | HASHLBRACKET exp_list RBRACKET
        | HASHLBRACKET RBRACKET
        | LBRACE elabels RBRACE
        | LBRACE RBRACE
        | LPAREN RPAREN
        | LPAREN exp_ps RPAREN
        | LPAREN exp_2c RPAREN
        | LBRACKET exp_list RBRACKET
        | LBRACKET RBRACKET
        | LET decs IN exp_ps END
        | ADDRESS string symattributes COLON ty SEMICOLON
        | BUILD_CONST string COLON ty SEMICOLON
        | COMMAND_LINE_CONST string COLON ty EQUALOP constOrBool SEMICOLON
        | CONST string COLON ty SEMICOLON
        | EXPORT string ieattributes COLON ty SEMICOLON
        | IMPORT string ieattributes COLON ty SEMICOLON
        | IMPORT ASTERISK ieattributes COLON ty SEMICOLON
        | PRIM string COLON ty SEMICOLON
        | SYMBOL string symattributes COLON ty SEMICOLON
        | SYMBOL ASTERISK COLON ty SEMICOLON

ieattributes
   ::=
     /*empty*/
   | shortAlphanumId ieattributes

symattributes
   ::=
     /*empty*/
   | shortAlphanumId symattributes

exp_2c  ::= exp COMMA exp_2c
        | exp COMMA exp

exp_list ::= exp
         | exp COMMA exp_list

/*---------------------------------------------------*/
/*                     Patterns                      */
/*---------------------------------------------------*/

pat ::= cpat BAR barcpats
    | cpat

cpat ::= cpatnode

cpatnode ::= cpat AS cpat
         | cpat COLON ty
         | apats

apats ::= apat
      | apat apats

apat
  ::= apatnode

apatnode
  ::= longvidNoEqual
  | OP longvid
  | const
  | WILD
  | LPAREN pats RPAREN
  | LBRACKET pats RBRACKET
  | HASHLBRACKET pats RBRACKET
  | LBRACE RBRACE
  | LBRACE patitems RBRACE

pats ::= /*empty*/
     | pat commapats

barcpats ::= cpat
         | cpat BAR barcpats

commapats ::= /*empty*/
          | COMMA pat commapats

patitems ::= patitem COMMA patitems
         | patitem
         | DOTDOTDOT

patitem
   ::= field EQUALOP pat
   | vid constraint opaspat

opaspat ::=
        | AS pat

/*---------------------------------------------------*/
/*                       Types                       */
/*---------------------------------------------------*/

ty ::= tynode

tynode  ::= tuple_ty
        | ty ARROW ty
        | ty_primenode

ty_prime ::= ty_primenode

ty_primenode ::= tyvar
        | LBRACE tlabels RBRACE
        | LBRACE RBRACE
        | LPAREN ty0_pc RPAREN longtycon
        | LPAREN ty RPAREN
        | ty_prime longtycon
        | longtycon

tlabel  ::= field COLON ty

tlabels ::= tlabel COMMA tlabels
        | tlabel

tuple_ty ::= ty_prime ASTERISK tuple_ty
         | ty_prime ASTERISK ty_prime

ty0_pc  ::= ty COMMA ty
        | ty COMMA ty0_pc

/*---------------------------------------------------*/
/*                       Atoms                       */
/*---------------------------------------------------*/

optbar
   ::= /* empty */
   | optbar_prime

optbar_prime
   ::= BAR

optsemicolon
   ::= /* empty */
   | SEMICOLON

constOrBool
   ::= const
   | shortAlphanumId

const   ::= const_prime

const_prime  ::= int
        | word
        | REAL
        | STRING
        | CHAR

string ::= STRING

shortAlphanumId
  ::= SHORTALPHANUMID

shortSymId
  ::= SHORTSYMID

longAlphanumId
  ::= LONGALPHANUMID

longSymId
  ::= LONGSYMID

vidNoEqual ::= shortAlphanumId
           | shortSymId
           | ASTERISK
vidEqual ::= EQUALOP
vid ::= vidNoEqual
    | vidEqual
longvidNoEqual ::= vidNoEqual
               | longAlphanumId
               | longSymId
longvidEqual ::= vidEqual
longvid ::= longvidNoEqual
        | longvidEqual

con ::= vid
longcon ::= longvid

tyvar ::= TYVAR

tycon ::= shortAlphanumId
      | shortSymId
longtycon ::= tycon
          | longAlphanumId

idField ::= shortAlphanumId
        | shortSymId
        | ASTERISK
field ::= idField
      | numericField

strid ::= shortAlphanumId
longstrid ::= strid
          | longAlphanumId

sigid ::= shortAlphanumId
fctid ::= shortAlphanumId

vids ::= vid
     | vid vids

sigids ::= sigid
       | sigid sigids

longstrids ::= longstrid
           | longstrid longstrids

//Tokens
//<INITIAL>\("[^"]+"\) => (tok (Tokens.\([^,]+\),.+
ADDRESS ::= "_address"
BUILD_CONST ::= "_build_const"
COMMAND_LINE_CONST ::= "_command_line_const"
CONST ::= "_const"
EXPORT ::= "_export"
IMPORT ::= "_import"
OVERLOAD ::= "_overload"
PRIM ::= "_prim"
SYMBOL ::= "_symbol"

HASH ::= "#"
HASHLBRACKET ::= "#["
LPAREN ::= "("
RPAREN ::= ")"
COMMA ::= ","
ARROW ::= "->"
DOTDOTDOT ::= "..."
COLON ::= ":"
COLONGT ::= ":>"
SEMICOLON ::= ";"
EQUALOP ::= "="
DARROW ::= "=>"
LBRACKET ::= "["
RBRACKET ::= "]"
WILD ::= "_"
LBRACE ::= "{"
BAR ::= "|"
RBRACE ::= "}"

ABSTYPE ::= "abstype"
AND ::= "and"
ANDALSO ::= "andalso"
AS ::= "as"
CASE ::= "case"
DATATYPE ::= "datatype"
DO ::= "do"
ELSE ::= "else"
END ::= "end"
EQTYPE ::= "eqtype"
EXCEPTION ::= "exception"
FN ::= "fn"
FUN ::= "fun"
FUNCTOR ::= "functor"
HANDLE ::= "handle"
IF ::= "if"
IN ::= "in"
INCLUDE ::= "include"
INFIX ::= "infix"
INFIXR ::= "infixr"
LET ::= "let"
LOCAL ::= "local"
NONFIX ::= "nonfix"
OF ::= "of"
OP ::= "op"
OPEN ::= "open"
ORELSE ::= "orelse"
RAISE ::= "raise"
REC ::= "rec"
SHARING ::= "sharing"
SIG ::= "sig"
SIGNATURE ::= "signature"
STRUCT ::= "struct"
STRUCTURE ::= "structure"
THEN ::= "then"
TYPE ::= "type"
VAL ::= "val"
WHERE ::= "where"
WHILE ::= "while"
WITH ::= "with"
WITHTYPE ::= "withtype"

Search and replace with Lua patterns in SquiLu (https://github.com/mingodad/squilu):

txt = txt.slice(txt.indexOf("%%")+2);
txt = txt.slice(txt.indexOf("%%")+2);
txt = txt.replace("([])", "/*empty*/");
txt = txt.replace("(*", "/*");
txt = txt.replace("*)", "*/");
txt = txt.gsub("%b()\n", "\n");
txt = txt.replace(":", "::=");
txt = txt.gsub("(%w)'", "%1_prime");
txt = txt.gsub("(%w)'", "%1_prime");
txt = txt.gsub("\n *\n(%s+|)", "\n%1");

print(txt);
mingodad commented 1 year ago

And here is the EBNF for mlb.grm:

// From: https://github.com/MPLLang/mpl/blob/master/mlton/front-end/mlb.grm

mlb ::= basdecs

basdecs ::= basdecsnode

basdecsnode ::=
            | SEMICOLON basdecs
            | basdec basdecs

basdec ::= basdecnode

basdecnode
   ::= FUNCTOR fctbinds
   | SIGNATURE sigbinds
   | STRUCTURE strbinds
   | BASIS basbinds
   | LOCAL basdecs IN basdecs END
   | OPEN basids
   | FILE
   | STRING
   | PRIM
   | ANN annPlus IN basdecs END

fctbinds ::= fctid EQUALOP fctbinds_prime
         | fctid fctbinds_prime_prime

fctbinds_prime ::= fctid fctbinds_prime_prime

fctbinds_prime_prime ::=               /*empty*/
           | AND fctbinds

sigbinds ::= sigid EQUALOP sigbinds_prime
         | sigid sigbinds_prime_prime

sigbinds_prime ::= sigid sigbinds_prime_prime

sigbinds_prime_prime ::=               /*empty*/
           | AND sigbinds

strbinds ::= strid EQUALOP strbinds_prime
         | strid strbinds_prime_prime

strbinds_prime ::= strid strbinds_prime_prime

strbinds_prime_prime ::=               /*empty*/
           | AND strbinds

basbinds ::= basid EQUALOP basbinds_prime

basbinds_prime ::= basexp basbinds_prime_prime

basbinds_prime_prime ::=               /*empty*/
           | AND basbinds

basexp ::= basexpnode

basexpnode ::= BAS basdecs END
           | basid
           | LET basdecs IN basexp END

basid ::= id
basids ::= basid
       | basid basids
fctid ::= id
sigid ::= id
strid ::= id
id ::= ID

ann ::= STRING

annPlus ::= ann annStar

annStar ::=          /*empty*/
        | annPlus

// Tokens
//<INITIAL>\("[^"]+"\) => (tok (Tokens.\([^,]+\),.+
PRIM ::= "_prim"

COMMA ::= ","
SEMICOLON ::= ";"
EQUALOP ::= "="

AND ::= "and"
ANN ::= "ann"
BAS ::= "bas"
BASIS ::= "basis"
END ::= "end"
FUNCTOR ::= "functor"
IN ::= "in"
LET ::= "let"
LOCAL ::= "local"
OPEN ::= "open"
SIGNATURE ::= "signature"
STRUCTURE ::= "structure"
shwestrick commented 4 months ago

Thanks for sharing. This is cool but doesn't seem like an issue so I'm going to close for now.