Closed mingodad closed 3 years ago
Nice catch, thank you. I've fixed it in the above commit.
Also it seems that the <otherwise>
nonterminal is not referenced on <match>
, bellow is what I suspect is your intention:
datatype ::= "datatype" "(" ( derive_clause "," )? datatype_name ( "," variant )+ ")"
record ::= "record" "(" ( derive_clause "," )? record_name ( "," field )* ")"
datatype_name ::= ident
record_name ::= ident
variant ::= "(" variant_name ( "," type )* ")"
field ::= "(" type "," field_name ")"
variant_name ::= ident
field_name ::= ident
derive_clause ::= "derive" "(" deriver_name ( "," deriver_name )* ")"
deriver_name ::= ident
match ::= "match" "(" lvalue ")" of+ otherwise?
matches ::= "MATCHES" "(" expr "," ident ")"
if_let ::= "ifLet" "(" lvalue "," variant_name "," ident ( "," ident )* ")" stmt
of ::= "of" "(" variant_name ( "," ident )* ")" stmt
otherwise ::= "otherwise" stmt
And here I'm also including the _
for the of
:
Obs: edited to fix misplacement.
datatype ::= "datatype" "(" ( derive_clause "," )? datatype_name ( "," variant )+ ")"
record ::= "record" "(" ( derive_clause "," )? record_name ( "," field )* ")"
datatype_name ::= ident
record_name ::= ident
variant ::= "(" variant_name ( "," type )* ")"
field ::= "(" type "," field_name ")"
variant_name ::= ident
field_name ::= ident
derive_clause ::= "derive" "(" deriver_name ( "," deriver_name )* ")"
deriver_name ::= ident
match ::= "match" "(" lvalue ")" of+ otherwise?
matches ::= "MATCHES" "(" expr "," ident ")"
if_let ::= "ifLet" "(" lvalue "," variant_name "," ident ( "," ident )* ")" stmt
of ::= "of" "(" variant_name ( "," (ident | "_") )* ")" stmt
otherwise ::= "otherwise" stmt
It seems that the _
also apply to if_let
, doesn't it ?
Yes, it does. _
can be used as a binding anywhere. I'll fix it now.
And here I'm also including the _ for the of:
This grammar is ambiguous: <ident>
may be _
as well. So we can either define our special <ident>
that is not equal to _
or just use an informal description. I think the latter is more convenient.
Here is another one with a match_ident
nonterminal that seems to accomplish the intended usage:
datatype ::= "datatype" "(" ( derive_clause "," )? datatype_name ( "," variant )+ ")"
record ::= "record" "(" ( derive_clause "," )? record_name ( "," field )* ")"
datatype_name ::= ident
record_name ::= ident
variant ::= "(" variant_name ( "," type )* ")"
field ::= "(" type "," field_name ")"
variant_name ::= ident
field_name ::= ident
derive_clause ::= "derive" "(" deriver_name ( "," deriver_name )* ")"
deriver_name ::= ident
match ::= "match" "(" lvalue ")" of+ otherwise?
matches ::= "MATCHES" "(" expr "," ident ")"
if_let ::= "ifLet" "(" lvalue "," variant_name "," ( "," match_ident )+ ")" stmt
of ::= "of" "(" variant_name ( "," match_ident)+ ")" stmt
otherwise ::= "otherwise" stmt
match_ident ::= ident | "_"
Here, match_ident
cannot be parsed unambiguously because again it might result in both an identifier and _
. I.e., if we see _
, we cannot understand whether it is "_"
or ident
.
Here is a simple LL1 parser (https://github.com/mingodad/CocoR-CPP, https://github.com/mingodad/CocoR-Java, https://github.com/mingodad/CocoR-CSharp, ...) for the grammar you are describing :
#include "Scanner.nut"
COMPILER Datatype99
CHARACTERS
letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_".
oct = '0'..'7'.
digit = '0'..'9'.
nzdigit = '1'..'9'.
cr = '\r'.
lf = '\n'.
tab = '\t'.
stringCh = ANY - '"' - '\\' - cr - lf.
charCh = ANY - '\'' - '\\' - cr - lf.
longStringCh = ANY - "]]".
printable = '\u0020' .. '\u007e'.
hex = digit + 'a'..'f' + 'A'..'F'.
TOKENS
ident = letter { letter | digit }.
TOK_NUMBER = ( '.' digit {digit} [('e'|'E') ['+'|'-'] digit {digit}]
| digit {digit} '.' {digit} [('e'|'E') ['+'|'-'] digit {digit}]
| digit {digit} [('e'|'E') ['+'|'-'] digit {digit}]
)
.
/*
TK_FLT = ( '.' digit {digit}
| digit {digit} ['.' {digit}]
) [('e'|'E') ['+'|'-'] digit {digit}]
.
TK_INT = ( /*nz*/ digit {digit}
//| '0' {oct}
| ("0x"|"0X") hex {hex}
)
.
*/
TOK_STRING = '"' {stringCh | '\\' (printable | ['\r'] '\n')} '"'
| "'" { charCh | '\\' (printable | ['\r'] '\n')} "'"
| "[[" {longStringCh} "]]"
.
badString = '"' {stringCh | '\\' printable} (cr | lf)
| "'" {charCh | '\\' printable } (cr | lf)
.
COMMENTS FROM "/*" TO "*/" NESTED
COMMENTS FROM "//" TO lf
IGNORE cr + lf + tab
PRODUCTIONS
Datatype99 =
{declarations} EOF
.
declarations =
datatype
| record
| matches
| match
| if_let
.
datatype =
"datatype" "(" [ derive_clause "," ] datatype_name comma_variant {comma_variant} ")" ";"
.
comma_variant =
"," variant
.
record =
"record" "(" [ derive_clause "," ] record_name { "," field } ")" ";"
.
variant =
"(" variant_name { "," type } ")"
.
field =
"(" type "," field_name ")"
.
derive_clause =
"derive" "(" deriver_name { "," deriver_name } ")"
.
match =
"match" "(" lvalue ")" "{" {of} [otherwise] "}"
.
matches =
"MATCHES" "(" expr "," ident ")"
.
if_let =
"ifLet" "(" lvalue "," variant_name comma_match_ident {comma_match_ident} ")" (stmt | ";")
.
of =
"of" "(" variant_name {comma_match_ident} ")" stmt
.
comma_match_ident =
"," match_ident
.
otherwise =
"otherwise" stmt
.
match_ident =
ident | "_"
.
type =
ident {ident} {"*"}
.
datatype_name =
ident
.
record_name =
ident
.
variant_name =
ident
.
field_name =
ident
.
deriver_name =
ident
.
lvalue =
{"*"} ident
.
expr =
{"*"} ident
.
stmt =
ident (
{ident}
| "(" {ANY} ")"
| "=" (TOK_NUMBER | ident)
) ';'
| "return" ANY {ANY} ";"
| match
| if_let
| "{" {stmt} "}"
.
END Datatype99.
And here is the generated EBNF that can be viewed at https://www.bottlecaps.de/rr/ui :
//
// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui
//
//
// productions
//
Datatype99 ::= ( declarations )* EOF
declarations ::= ( datatype | record | matches | match | if_let )
datatype ::= "datatype" "(" ( derive_clause "," )? datatype_name comma_variant ( comma_variant )* ")" ";"
record ::= "record" "(" ( derive_clause "," )? record_name ( "," field )* ")" ";"
matches ::= "MATCHES" "(" expr "," ident ")"
match ::= "match" "(" lvalue ")" "{" ( of )* ( otherwise )? "}"
if_let ::= "ifLet" "(" lvalue "," variant_name comma_match_ident ( comma_match_ident )* ")" ( stmt | ";" )
derive_clause ::= "derive" "(" deriver_name ( "," deriver_name )* ")"
datatype_name ::= ident
comma_variant ::= "," variant
variant ::= "(" variant_name ( "," type )* ")"
record_name ::= ident
field ::= "(" type "," field_name ")"
variant_name ::= ident
type ::= ident ( ident )* ( "*" )*
field_name ::= ident
deriver_name ::= ident
lvalue ::= ( "*" )* ident
of ::= "of" "(" variant_name ( comma_match_ident )* ")" stmt
otherwise ::= "otherwise" stmt
expr ::= ( "*" )* ident
comma_match_ident ::= "," match_ident
stmt ::= ( ident ( ( ident )* | "(" ( ANY )* ")" | "=" ( TOK_NUMBER | ident ) ) ";" | "return" ANY ( ANY )* ";" | match | if_let | "{" ( stmt )* "}" )
match_ident ::= ( ident | "_" )
//
// tokens
//
And here is the test file:
// clang-format off
datatype(
derive(Menu),
UserCommand,
(SendMessage, MessageContent, UserId),
(SubscribeToChannel, ChannelId),
(DeleteAccount)
);
// clang-format on
// clang-format off
datatype(
derive(Metadata),
Num,
(Char, char),
(Int, int),
(Double, double)
);
// clang-format on
// clang-format off
datatype(
derive(Print),
MyType,
(Foo, const char *),
(Bar, int, int)
);
// clang-format on
// clang-format off
record(
derive(Metadata),
Point,
(int, x),
(int, y)
);
// clang-format on
ifLet(expr, C, str, x) {
assert(str == &expr.data.C._0);
assert(x == &expr.data.C._1);
goto end_if_let;
}
ifLet(expr, B, _) FAIL;
ifLet(b, B, _);
ifLet(b, B, _);
ifLet(expr, B, _) {}
datatype(
Foo,
(MkFoo, MyArray)
);
// clang-format off
datatype(
Expr,
(Const, double),
(Add, Expr *, Expr *),
(Sub, Expr *, Expr *),
(Mul, Expr *, Expr *),
(Div, Expr *, Expr *)
);
// clang-format on
match(*expr) {
of(Const, number) return *number;
of(Add, lhs, rhs) return eval(*lhs) + eval(*rhs);
of(Sub, lhs, rhs) return eval(*lhs) - eval(*rhs);
of(Mul, lhs, rhs) return eval(*lhs) * eval(*rhs);
of(Div, lhs, rhs) return eval(*lhs) / eval(*rhs);
}
// clang-format off
datatype(
BinaryTree,
(Leaf, int),
(Node, BinaryTree *, int, BinaryTree *)
);
// clang-format on
match(*expr) {
of(Const, number) return *number;
of(Add, lhs, rhs) return eval(*lhs) + eval(*rhs);
of(Sub, lhs, rhs) return eval(*lhs) - eval(*rhs);
of(Mul, lhs, rhs) return eval(*lhs) * eval(*rhs);
of(Div, lhs, rhs) return eval(*lhs) / eval(*rhs);
}
match(*tree) {
of(Leaf, x) {
free(tree);
return;
}
of(Node, lhs, x, rhs) {
destroy_tree(*lhs);
destroy_tree(*rhs);
free(tree);
}
}
match(*tree) {
of(Leaf, x) return *x;
of(Node, lhs, x, rhs) return sum(*lhs) + *x + sum(*rhs);
}
// clang-format off
datatype(
Token,
(Ident, const char *),
(Int, int),
(LParen),
(RParen),
(Plus)
);
// clang-format on
match(token) {
of(Ident, ident) printf("%s", *ident);
of(Int, x) printf("%d", *x);
of(LParen) printf("(");
of(RParen) printf(")");
of(Plus) printf(" + ");
}
datatype(Trivial1, (Trivial1A));
datatype(Trivial2, (Trivial2A));
datatype(Trivial3, (Trivial3A), (Trivial3B), (Trivial3C));
// clang-format off
datatype(
Complex,
(A),
(B, int),
(C, const char *, long double),
(D, char, unsigned, long long, int *)
);
// clang-format on
MATCHES(a, A)
match(a) {
otherwise {
foo = 123;
}
}
// Test a nested `match`.
match(a) {
of(A) {
match(b) {
of(B) {
foo = 34;
}
otherwise assert(false);
}
}
otherwise assert(false);
}
match(expr) {
of(A) {
assert(false);
}
of(B, _) {
assert(false);
}
of(C, _, x) {
assert(124.1404 == *x);
}
of(D, c, _, _, ptr) {
}
}
With the transformed EBNF shown bellow from the README, we can copy and paste it on https://www.bottlecaps.de/rr/ui in the tab
Edit Grammar
then switch to the tabView Diagram
to see the railroad diagram.In doing so I noticed that the
<arm>
nonterminal is not defined.