minoki / LunarML

The Standard ML compiler that produces Lua/JavaScript
MIT License
349 stars 10 forks source link

Grammar railroad diagram #2

Open mingodad opened 2 years ago

mingodad commented 2 years ago

Using a bit of Lua like string pattern replacements and adding the tokens from the lexer manually to obtain an EBNF understood by https://www.bottlecaps.de/rr/ui we can have a nice railroad diagram (https://en.wikipedia.org/wiki/Syntax_diagram).

Copy and paste the EBNF shown bellow on https://www.bottlecaps.de/rr/ui in the tab "Edit Grammar" then switch to the tab "View Diagram".

START ::= Program

IntConst ::= PosInt
         | ZNIntConst

SCon ::= IntConst
     | WordConst
     | RealConst
     | StringConst
     | CharacterConst

/* type variable */
TyVar ::= PrimeIdent

/* value identifiers */
VId ::= BoundVId
    | EQUALS /* ? */

/* value identifiers, excluding '=L */
BoundVId ::= AlnumIdent
         | SymbolicIdent
         | ASTERISK

/* structure identifier */
StrId ::= AlnumIdent

/* signature identifier */
SigId ::= AlnumIdent

/* functor identifier */
FunId ::= AlnumIdent

/* record labels */
Lab ::= AlnumIdent
    | SymbolicIdent
    | ASTERISK
    | PosInt /* numeric labels */

/* type constructors; "*" is not included */
TyCon ::= AlnumIdent
      | SymbolicIdent

/* long value identifiers, including '=L */
LongVId ::= VId
        | QualifiedAlnumIdent
        | QualifiedSymbolicIdent

/* long value identifiers, excluding unqualified '=L */
LongBoundVId ::= BoundVId
             | QualifiedAlnumIdent
             | QualifiedSymbolicIdent

/* long type constructors */
LongTyCon ::= TyCon
          | QualifiedAlnumIdent
          | QualifiedSymbolicIdent

/* long structure identifiers */
LongStrId ::= StrId
          | QualifiedAlnumIdent

Opt_OP ::= /* empty */
       | OP

/* atomic patterns */
AtPat ::= UNDERSCORE /* wildcard */
      | SCon /* special constant */
      | OP LongVId /* value identifier, including 'op =L */
      | LongBoundVId /* value identifier */
      | LBRACE RBRACE /* empty record */
      | LBRACE PatRow RBRACE /* record */
      | LPAREN RPAREN /* [derived] empty tuple */
      | LPAREN Pat RPAREN /* parenthesized pattern */
      | LPAREN Pat COMMA Pat PatSeqRest RPAREN /* [derived] tuple pattern */
      | LBRACK RBRACK /* [derived] empty list */
      | LBRACK Pat PatSeqRest RBRACK /* [derived] list pattern */
      | HASHLBRACK RBRACK /* [extension] vector list */
      | HASHLBRACK ELLIPSIS RBRACK /* [extension] vector list */
      | HASHLBRACK Pat PatSeqRestEllipsis RBRACK /* [extension] vector pattern */

/* one or more atomic patterns */
AtPats ::= AtPat
       | AtPat AtPats

TypedPat ::= AtPats
         | TypedPat COLON Ty /* typed */

/* patterns */
Pat ::= TypedPat
/*
    | OP VId COLON Ty AS Pat
    | TypedPat AS Pat /* layered or [Successor ML] conjunctive */

TypedPatPun ::= BoundVId
            | BoundVId COLON Ty

PatPun ::= TypedPatPun
       | TypedPatPun AS Pat

/* pattern rows */
PatRow ::= ELLIPSIS PatRowRest /* wildcard */
       | ELLIPSIS EQUALS Pat PatRowRest /* [Successor ML] ellipses */
       | Lab EQUALS Pat PatRowRest /* pattern row */
       | PatPun PatRowRest /* [derived] punning */

PatRowRest ::= COMMA PatRow
           |

PatSeqRest ::= COMMA Pat PatSeqRest
           |

PatSeqRestEllipsis ::= COMMA Pat PatSeqRestEllipsis
                   | COMMA ELLIPSIS
                   |

AtTy ::= TyVar
     | LBRACE RBRACE /* record type expression */
     | LBRACE TyRow RBRACE /* record type expression */
     | LPAREN Ty RPAREN

LongTyConOrPrimTyCon ::= LongTyCon
                     | PRIMTYPE StringConst

ConTy ::= AtTy
      | ConTy LongTyConOrPrimTyCon /* type construction */
      | LongTyConOrPrimTyCon /* type construction */
      | LPAREN Ty COMMA Ty TySeqRest RPAREN LongTyConOrPrimTyCon /* type construction */

TupTy ::= ConTy
      | ConTy ASTERISK TupTy

/* type expressions */
Ty ::= TupTy
   | TupTy ARROW Ty /* function type expression */
   /* tuple type */

TySeqRest ::= COMMA Ty TySeqRest /* */
          | /* empty */

TyVarSeq ::= TyVar /* singleton sequence */
         | /* empty sequence */
         | LPAREN TyVar TyVarSeqRest RPAREN /* sequence */

TyVarSeqRest ::= COMMA TyVar TyVarSeqRest /* */
             | /* empty */

/* type-expression rows */
TyRow ::= Lab COLON Ty COMMA TyRow /* type-expression row */
      | Lab COLON Ty /* type-expression row */
      | ELLIPSIS COLON Ty COMMA_TyRow_NoELLIPSIS /* [Successor ML] ellipses */

COMMA_TyRow_NoELLIPSIS ::= COMMA Lab COLON Ty COMMA_TyRow_NoELLIPSIS /* type-expression row */
                       | /* type-expression row */

/* atomic expressions */
AtExp ::= SCon /* special constant */
      | OP LongVId /* value identifier */
      | LongBoundVId /* value identifier */
      | EQUALS
      | LBRACE RBRACE /* empty record */
      | LBRACE ExpRow RBRACE /* record */
      | LBRACE AtExp WHERE RBRACE /* [Successor ML] record update */
      | LBRACE AtExp WHERE ExpRow RBRACE /* [Successor ML] record update */
      | LET Decs IN Exp END /* local declaration */
      | LPAREN Exp RPAREN /* parenthesized expression */
      | LPAREN RPAREN /* [derived] 0-tuple */
      | LPAREN Exp COMMA Exp ExpCSeqRest RPAREN /* [derived] tuple::= {1=Exp1,...,n=Expn} */
      | LPAREN Exp SEMICOLON Exp ExpSSeqRest RPAREN /* [derived] sequential execution */
      | LET Decs IN Exp SEMICOLON Exp ExpSSeqRest END /* [derived] local declaration */
      | LBRACK RBRACK /* [derived] list::= Exp1 ::=::= ... ::=::= Expn ::=::= nil */
      | LBRACK Exp ExpCSeqRest RBRACK /* [derived] list::= Exp1 ::=::= ... ::=::= Expn ::=::= nil */
      | HASHLBRACK RBRACK /* [extension] vector expression */
      | HASHLBRACK Exp ExpCSeqRest RBRACK /* [extension] vector expression */
      | HASH Lab /* [derived] projection::= fn {Lab=VId,...} => VId */
      | PRIMVAL StringConst /* [extension] _primVal "name" */
      | PRIMCALL StringConst LBRACK Ty TySeqRest RBRACK LPAREN RPAREN /* [extension] _primCall "name" [tyargs] */
      | PRIMCALL StringConst LBRACK Ty TySeqRest RBRACK LPAREN Exp ExpCSeqRest RPAREN /* [extension] _primCall "name" [tyargs] */
      | PRIMCALL StringConst LPAREN RPAREN /* [extension] _primCall "name" */
      | PRIMCALL StringConst LPAREN Exp ExpCSeqRest RPAREN /* [extension] _primCall "name" */

/* comma-separated list of expressions */
ExpCSeqRest ::= COMMA Exp ExpCSeqRest
            |

/* semicolon-separated list of expressions */
ExpSSeqRest ::= SEMICOLON Exp ExpSSeqRest
            |

/* expression rows */
ExpRow ::= Lab EQUALS Exp ExpRowRest
       | ELLIPSIS EQUALS Exp ExpRowRest /* [Successor ML] ellipses */

ExpRowRest ::= COMMA ExpRow
           |

/*
AppExp ::= AtExp
       | AppExp AtExp
InfExp ::= AppExp
       | InfExp VId InfExp
*/

AppOrInfExp ::= AtExp AppOrInfExp /* atomic */
            | AtExp

TypedExp ::= AppOrInfExp
         | TypedExp COLON Ty /* typed */

AndalsoExp ::= TypedExp
           | TypedExp ANDALSO AndalsoExp /* [derived] conjunction */
           | TypedExp ANDALSO HeadExp /* [derived] conjunction */

AndalsoExp_NoHead ::= TypedExp
                  | TypedExp ANDALSO AndalsoExp_NoHead /* [derived] conjunction */

AndalsoExp_NoMatch ::= TypedExp
                   | TypedExp ANDALSO AndalsoExp_NoMatch /* [derived] conjunction */
                   | TypedExp ANDALSO HeadExp_NoMatch /* [derived] conjunction */

OrelseExp ::= AndalsoExp
          | AndalsoExp_NoHead ORELSE OrelseExp /* [derived] disjunction */
          | AndalsoExp_NoHead ORELSE HeadExp /* [derived] disjunction */

OrelseExp_NoHead ::= AndalsoExp_NoHead
                 | AndalsoExp_NoHead ORELSE OrelseExp_NoHead /* [derived] disjunction */

OrelseExp_NoMatch ::= AndalsoExp_NoMatch
                  | AndalsoExp_NoHead ORELSE OrelseExp_NoMatch /* [derived] disjunction */
                  | AndalsoExp_NoHead ORELSE HeadExp_NoMatch /* [derived] disjunction */

/* expression with a starting token */
HeadExp ::= RAISE Exp /* raise exception */
        | IF Exp THEN Exp ELSE Exp /* [derived] conditional */
        | WHILE Exp DO Exp /* [derived] iteration */
        | CASE Exp OF MatchClauses /* [derived] pattern match::= */
        | FN MatchClauses /* function */

HeadExp_NoMatch ::= RAISE Exp_NoMatch /* raise exception */
                | IF Exp THEN Exp ELSE Exp_NoMatch
                | WHILE Exp DO Exp_NoMatch /* [derived] iteration */

/* expression */
Exp ::= OrelseExp
    | OrelseExp_NoHead HANDLE MatchClauses /* handle exception */
    | HeadExp

Exp_NoMatch ::= OrelseExp_NoMatch
            | HeadExp_NoMatch

/* matches */
MatchClauses ::= Pat DARROW Exp_NoMatch BAR MatchClauses
             | Pat DARROW Exp

/* a declaration, excluding local-in-end */
Dec_NoLocal ::= VAL ValBind /* value declaration */
            | VAL TyVar ValBind /* value declaration */
            | VAL LPAREN TyVar TyVarSeqRest RPAREN ValBind /* value declaration */
            | VAL REC ValBind /* value declaration */
            | VAL REC TyVar ValBind /* [Successor ML-style] value declaration */
            | VAL REC LPAREN TyVar TyVarSeqRest RPAREN ValBind /* [Successor ML-style] value declaration */
            | VAL TyVar REC ValBind /* [SML97-style] value declaration */
            | VAL LPAREN TyVar TyVarSeqRest RPAREN REC ValBind /* [SML97-style] value declaration */
            | FUN FValBind /* [derived] function declaration */
            | FUN TyVar FValBind /* [derived] function declaration */
            | FUN LPAREN TyVar TyVarSeqRest RPAREN FValBind /* [derived] function declaration */
            | TYPE TypBind /* type declaration */
            /* | DATATYPE DatBind
            | DATATYPE TyCon EQUALS ConBind DatBindRest Withtype /* datatype declaration */
            | DATATYPE TyVar TyCon EQUALS ConBind DatBindRest Withtype /* datatype declaration */
            | DATATYPE LPAREN TyVar TyVarSeqRest RPAREN TyCon EQUALS ConBind DatBindRest Withtype /* datatype declaration */
            | DATATYPE TyCon EQUALS DATATYPE LongTyCon /* datatype replication */
            | ABSTYPE DatBind Withtype WITH Decs END /* abstype declaration */
            | EXCEPTION ExBind /* exception declaration */
            | OPEN LongStrIds /* open declaration */
            | INFIX IntConst VIds /* infix directive */
            | INFIX VIds /* infix directive */
            | INFIXR IntConst VIds /* infix directive */
            | INFIXR VIds /* infix directive */
            | NONFIX VIds /* nonfix directive */
            | OVERLOAD StringConst LBRACK LongTyCon RBRACK LBRACE OverloadSpecs RBRACE /* [extension] _overload "class" [ty] { + = ..., - = ..., ... } */

OverloadSpec ::= AlnumIdent EQUALS Exp
             | SymbolicIdent EQUALS Exp
             | ASTERISK EQUALS Exp

OverloadSpecs ::= OverloadSpec COMMA OverloadSpecs
              | OverloadSpec

Dec ::= Dec_NoLocal
    | LOCAL Decs IN Decs END /* local declaration */

/* declarations */
Decs ::= Dec Decs
     | SEMICOLON Decs /* sequential declaration */
     | /* empty declaration */

/* LongStrId[1] ... LongStrId[n] */
LongStrIds ::= LongStrId LongStrIds
           | LongStrId

/* VId[1] ... VId[n] */
VIds ::= VId VIds
     | VId

/* value bindings */
ValBind ::= Pat EQUALS Exp AND ValBind
        | Pat EQUALS Exp
        /* | REC ValBind */

FValBind ::= FMatch
         | FMatch AND FValBind

FMatch ::= FMRule
       | FMRule_NoMatch BAR FMatch

FMRule ::= FPat COLON Ty EQUALS Exp
       | FPat EQUALS Exp

FMRule_NoMatch ::= FPat COLON Ty EQUALS Exp_NoMatch
               | FPat EQUALS Exp_NoMatch

FPat ::= AtPats
/*
       OP BoundVId AtPats
     | BoundVId AtPats
     | AtPat BoundVId AtPat
     | LPAREN AtPat BoundVId AtPat RPAREN
     | LPAREN AtPat BoundVId AtPat RPAREN AtPats
*/

/* type bindings */
TypBind ::= TyVarSeq TyCon EQUALS Ty AND TypBind
        | TyVarSeq TyCon EQUALS Ty

/* datatype bindings */
DatBind ::= TyVarSeq TyCon EQUALS ConBind DatBindRest

/* datatype bindings */
DatBindRest ::= AND DatBind
            |

Withtype ::= WITHTYPE TypBind
         |

/* constructor bindings */
ConBind ::= Opt_OP BoundVId OF Ty ConBindRest
        | Opt_OP BoundVId ConBindRest

ConBindRest ::= BAR ConBind
            |

/* exception bindings */
ExBind ::= Opt_OP BoundVId OF Ty ExBindRest
       | Opt_OP BoundVId ExBindRest
       | Opt_OP BoundVId EQUALS Opt_OP LongVId ExBindRest

ExBindRest ::= AND ExBind
           |

AtStrExp ::= STRUCT StrDecs END
         | LongStrId
         | FunId LPAREN StrExp RPAREN
         | FunId LPAREN StrDecs RPAREN /* derived form */
         | LET StrDecs IN StrExp END

StrExp ::= AtStrExp
       | StrExp COLON SigExp /* transparent constraint */
       | StrExp COLONGT SigExp /* opaque constraint */

/* equivalent to 'StrExp ANDL */
StrExp_AND ::= AtStrExp AND
           | StrExp COLON SigExp_AND /* transparent constraint */
           | StrExp COLONGT SigExp_AND /* opaque constraint */

ProperStrDec ::= STRUCTURE StrBind
             | LOCAL StrDecs IN StrDecs END

StrDecs ::= Dec_NoLocal StrDecs
        | ProperStrDec StrDecs
        | SEMICOLON StrDecs
        |

StrBind ::= StrId SigConstraint EQUALS StrExp_AND StrBind
        | StrId SigConstraint EQUALS StrExp

Spec ::= VAL ValDesc
     | TYPE TyVarSeq TyCon TypDescRest
     | TYPE TyVarSeq TyCon EQUALS Ty TypDescEQRest
     | EQTYPE TypDesc
     /* | DATATYPE DatDesc */
     | DATATYPE TyCon EQUALS ConDesc DatDescRest Withtype
     | DATATYPE TyVar TyCon EQUALS ConDesc DatDescRest Withtype
     | DATATYPE LPAREN TyVar TyVarSeqRest RPAREN TyCon EQUALS ConDesc DatDescRest Withtype
     | DATATYPE TyCon EQUALS DATATYPE LongTyCon
     | EXCEPTION ExDesc
     | STRUCTURE StrDesc
     | INCLUDE SigExp_NoSigId
     | INCLUDE SigIds

ValDesc ::= VId COLON Ty
        | VId COLON Ty AND ValDesc

TypDesc ::= TyVarSeq TyCon TypDescRest

TypDescRest ::= AND TyVarSeq TyCon TypDescRest
            |

TypDescEQRest ::= AND TyVarSeq TyCon EQUALS Ty TypDescEQRest
              |

DatDescRest ::= AND TyVarSeq TyCon EQUALS ConDesc DatDescRest
            |

ConDesc ::= VId ConDescRest
        | VId OF Ty ConDescRest

ConDescRest ::= BAR ConDesc
            |

ExDesc ::= VId ExDescRest
       | VId OF Ty ExDescRest

ExDescRest ::= AND ExDesc
           |

StrDesc ::= StrId COLON SigExp
        | StrId COLON SigExp_AND StrDesc

SigIds ::= SigId SigIds
       | SigId

Specs_NoSharing ::=
                | Spec Specs_NoSharing
                | Spec SEMICOLON Specs_NoSharing

Specs ::=
      | Spec Specs_NoSharing
      | Spec SEMICOLON Specs_NoSharing
      | Specs SHARING TYPE LongTyCon EQUAL_LongTyCons Specs_NoSharing
      | Specs SHARING LongStrId EQUAL_LongStrIds Specs_NoSharing

EQUAL_LongTyCons ::= EQUALS LongTyCon EQUAL_LongTyCons
                 | EQUALS LongTyCon

EQUAL_LongStrIds ::= EQUALS LongStrId EQUAL_LongStrIds
                 | EQUALS LongStrId

SigExp_NoSigId ::= SIG Specs END
               | SigExp WHERE TYPE TyVarSeq LongTyCon EQUALS Ty TypeRealisationRest

SigExp ::= SigId
       | SigExp_NoSigId

/* equivalent to 'SigExp ANDL */
SigExp_AND ::= SIG Specs END AND
           | SigId AND
           | SigExp WHERE TYPE TyVarSeq LongTyCon EQUALS Ty TypeRealisationRest_AND

TypeRealisationRest ::= AND TYPE TyVarSeq LongTyCon EQUALS Ty TypeRealisationRest /* [removed in Successor ML] */
                    |

/* equivalent to 'TypeRealisationRest ANDL */
TypeRealisationRest_AND ::= AND TYPE TyVarSeq LongTyCon EQUALS Ty TypeRealisationRest_AND /* [removed in Successor ML] */
                        | AND

SigBinds ::= SigId EQUALS SigExp
         | SigId EQUALS SigExp_AND SigBinds

SigDec ::= SIGNATURE SigBinds

SigConstraint ::=
              | COLON SigExp
              | COLONGT SigExp

FunDec ::= FUNCTOR FunBind

FunBind ::= FunId LPAREN StrId COLON SigExp RPAREN SigConstraint EQUALS StrExp_AND FunBind
        | FunId LPAREN Specs RPAREN SigConstraint EQUALS StrExp_AND FunBind
        | FunId LPAREN StrId COLON SigExp RPAREN SigConstraint EQUALS StrExp
        | FunId LPAREN Specs RPAREN SigConstraint EQUALS StrExp

TopDecs ::= Dec_NoLocal TopDecs /* strdec */
        | ProperStrDec TopDecs /* strdec */
        | SigDec TopDecs
        | FunDec TopDecs
        |

Program ::= TopDecs SEMICOLON Program
        | Exp SEMICOLON Program /* val it = Exp */
        | TopDecs /* topdecs without semicolon */

//Tokens
//| \("[^"]+"\)\s*=>\s*(Tokens.\([^,]+\).+

LPAREN ::= "("
RPAREN ::= ")"
LBRACK ::= "["
RBRACK ::= "]"
LBRACE ::= "{"
RBRACE ::= "}"
COMMA ::= ","
SEMICOLON ::= ";"
ELLIPSIS ::= "..."
DOT ::= "."

UNDERSCORE ::= "_"
PRIMTYPE ::= "_primType"
PRIMVAL ::= "_primVal"
PRIMCALL ::= "_primCall"
OVERLOAD ::= "_overload"
ABSTYPE ::= "abstype"
AND ::= "and"
ANDALSO ::= "andalso"
AS ::= "as"
CASE ::= "case"
DATATYPE ::= "datatype"
DO ::= "do"
ELSE ::= "else"
END ::= "end"
EQTYPE ::= "eqtype"
EXCEPTION ::= "exception"
FN ::= "fn"
FUN ::= "fun"
FUNCTOR ::= "functor"
HANDLE ::= "handle"
IF ::= "if"
IN ::= "in"
INCLUDE ::= "include"
INFIX ::= "infix"
INFIXR ::= "infixr"
LET ::= "let"
LOCAL ::= "local"
NONFIX ::= "nonfix"
OF ::= "of"
OP ::= "op"
OPEN ::= "open"
ORELSE ::= "orelse"
RAISE ::= "raise"
REC ::= "rec"
SHARING ::= "sharing"
SIG ::= "sig"
SIGNATURE ::= "signature"
STRUCT ::= "struct"
STRUCTURE ::= "structure"
THEN ::= "then"
TYPE ::= "type"
VAL ::= "val"
WITH ::= "with"
WITHTYPE ::= "withtype"
WHERE ::= "where"
WHILE ::= "while"
COLON ::=  ":"
BAR ::= "|"
EQUALS ::= "="
DARROW ::= "=>"
ARROW ::= "->"
HASH ::= "#"
COLONGT ::= ":>"
ASTERISK ::= "*"

Script that transformed the grammar:

auto txt = readfile(
    "LunarML/src/syntax.grm"
    );

txt = txt.match("%%%%(.+)");
txt = txt.match("%%%%(.+)");
txt = txt.gsub("%(%*(.-)%*%)", "/*%1*/");
txt = txt.gsub("%s*%b()", "");
txt = txt.gsub(":", "::=");
txt = txt.gsub("(%S)'", "%1L");
txt = txt.gsub("(%S)'", "%1L");
print(txt);
mingodad commented 3 months ago

I've also added LunarML grammar to https://mingodad.github.io/parsertl-playground/playground/ and Yacc/Lex compatible online editor/tester (select LunaML parser (partially working) from examples then click Parse to view a parse tree for the content in Input source).

There is some lexer definitions missing, pull requests are welcome !