BosqueLanguage / BosqueCore

Other
144 stars 5 forks source link

Online grammar editor #72

Open mingodad opened 9 months ago

mingodad commented 9 months ago

I'm trying to add this project grammar to https://mingodad.github.io/parsertl-playground/playground/ but noticed that the grammar in src/bsqon/parser/lb/bsqon.y is far for complete (see bellow the adapted grammar/lexer to use at https://mingodad.github.io/parsertl-playground/playground/).

I think that https://mingodad.github.io/parsertl-playground/playground/ can help fast iteration to develop/debug/document the grammar.

Any feedback is welcome !

//From: https://github.com/BosqueLanguage/BosqueCore/blob/3c94cb556b783a36576a1060aaabe9a6dd9f3e03/src/bsqon/parser/lb/bsqon.y

/*Tokens*/
%token SYM_BAR
%token SYM_AMP
%token SYM_COLON
%token SYM_COMMA
%token KW_NONE
%token KW_NOTHING
%token KW_TRUE
%token KW_FALSE
%token KW_SOMETHING
%token KW_OK
%token KW_ERR
%token SYM_DOUBLE_COLON
//%token SYM_ELLIPSIS
%token SYM_ENTRY
//%token SYM_BANG
%token SYM_EQUALS
//%token SYM_DOT
//%token SYM_AT
%token SYM_UNDERSCORE
//%token KW_SOME
%token KW_SRC
//%token KW_LET
//%token KW_IN
%token TOKEN_NAT
%token TOKEN_INT
%token TOKEN_BIG_NAT
%token TOKEN_BIG_INT
%token TOKEN_RATIONAL
%token TOKEN_FLOAT
%token TOKEN_DOUBLE
%token TOKEN_NUMBERINO
%token TOKEN_BYTE_BUFFER
%token TOKEN_UUID_V4
%token TOKEN_UUID_V7
%token TOKEN_SHA_HASH
%token TOKEN_STRING
%token TOKEN_ASCII_STRING
%token TOKEN_REGEX
%token TOKEN_PATH_ITEM
%token TOKEN_DATE_TIME
%token TOKEN_UTC_DATE_TIME
%token TOKEN_PLAIN_DATE
%token TOKEN_PLAIN_TIME
%token TOKEN_LOGICAL_TIME
%token TOKEN_TICK_TIME
%token TOKEN_TIMESTAMP
%token TOKEN_IDENTIFIER
%token TOKEN_TYPE_COMPONENT
%token TOKEN_UNSPEC_IDENTIFIER
%token '<'
%token '>'
%token '['
%token ']'
%token '{'
%token '}'
%token '('
%token ')'

%left /*1*/ SYM_BAR
%left /*2*/ SYM_AMP

%start bsqonroot

%%

bsqontypel :
    bsqontypel bsqontypel_entry
    | bsqontypel_entry
    ;

bsqontypel_entry :
    bsqontype SYM_COMMA
    //| error SYM_COMMA
    ;

bsqonnametypel :
    bsqonnametypel bsqonnametypel_entry
    | bsqonnametypel_entry
    ;

bsqonnametypel_entry :
    TOKEN_IDENTIFIER SYM_COLON bsqontype SYM_COMMA
    //| TOKEN_IDENTIFIER SYM_COLON error SYM_COMMA
    ;

bsqonnominaltype :
    TOKEN_TYPE_COMPONENT
    | TOKEN_TYPE_COMPONENT bsqontermslist
    | bsqonnominaltype SYM_DOUBLE_COLON TOKEN_TYPE_COMPONENT
    ;

bsqontermslist :
    '<' bsqontype '>'
    | '<' bsqontypel bsqontype '>'
    //| '<' error '>'
    //| '<' bsqontypel error '>'
    ;

bsqontupletype :
    '[' ']'
    | '[' bsqontype ']'
    | '[' bsqontypel bsqontype ']'
    //| '[' error ']'
    //| '[' bsqontypel error ']'
    ;

bsqonrecordtype :
    '{' '}'
    | '{' TOKEN_IDENTIFIER SYM_COLON bsqontype '}'
    | '{' bsqonnametypel TOKEN_IDENTIFIER SYM_COLON bsqontype '}'
    //| '{' TOKEN_IDENTIFIER SYM_COLON error '}'
    //| '{' bsqonnametypel TOKEN_IDENTIFIER SYM_COLON error '}'
    ;

bsqontype :
    bsqonnominaltype
    | bsqontupletype
    | bsqonrecordtype
    | bsqontype SYM_AMP /*2L*/ bsqontype
    | bsqontype SYM_BAR /*1L*/ bsqontype
    | '(' bsqontype ')'
    //| '(' error ')'
    ;

bsqontspec :
    bsqonnominaltype
    | bsqontupletype
    | bsqonrecordtype
    ;

//bsqontyperoot :
//  bsqontype
//  ;

bsqonliteral :
    KW_NONE
    | KW_NOTHING
    | KW_TRUE
    | KW_FALSE
    | TOKEN_NAT
    | TOKEN_INT
    | TOKEN_BIG_NAT
    | TOKEN_BIG_INT
    | TOKEN_RATIONAL
    | TOKEN_FLOAT
    | TOKEN_DOUBLE
    | TOKEN_BYTE_BUFFER
    | TOKEN_UUID_V4
    | TOKEN_UUID_V7
    | TOKEN_SHA_HASH
    | TOKEN_STRING
    | TOKEN_ASCII_STRING
    | TOKEN_PATH_ITEM
    | TOKEN_REGEX
    | TOKEN_DATE_TIME
    | TOKEN_UTC_DATE_TIME
    | TOKEN_PLAIN_DATE
    | TOKEN_PLAIN_TIME
    | TOKEN_LOGICAL_TIME
    | TOKEN_TICK_TIME
    | TOKEN_TIMESTAMP
    ;

bsqonunspecvar :
    TOKEN_UNSPEC_IDENTIFIER
    ;

bsqonidentifier :
    KW_SRC
    | TOKEN_IDENTIFIER
    ;

bsqonscopedidentifier :
    bsqonnominaltype SYM_COLON TOKEN_IDENTIFIER
    ;

bsqonstringof :
    TOKEN_STRING bsqonnominaltype
    | TOKEN_ASCII_STRING bsqonnominaltype
    ;

bsqonpath :
    TOKEN_PATH_ITEM bsqonnominaltype
    ;

bsqontypeliteral :
    TOKEN_NUMBERINO SYM_UNDERSCORE bsqonnominaltype
    | KW_NONE SYM_UNDERSCORE bsqonnominaltype
    | KW_NOTHING SYM_UNDERSCORE bsqonnominaltype
    | bsqonliteral SYM_UNDERSCORE bsqonnominaltype
    ;

bsqonterminal :
    bsqonliteral
    | bsqonunspecvar
    | bsqonidentifier
    | bsqonscopedidentifier
    | bsqonstringof
    | bsqonpath
    | bsqontypeliteral
    ;

bsqon_mapentry :
    bsqonval SYM_ENTRY bsqonval
    //| error SYM_ENTRY bsqonval
    //| bsqonval SYM_ENTRY error
    //| error SYM_ENTRY error
    ;

bsqonvall :
    bsqonvall bsqonl_entry
    | bsqonl_entry
    ;

bsqonl_entry :
    bsqon_braceval SYM_COMMA
    //| error SYM_COMMA
    ;

bsqonbracketvalue :
    '[' ']'
    | '[' bsqonval ']'
    | '[' bsqonvall bsqonval ']'
    //| '[' error ']'
    //| '[' bsqonvall error ']'
    ;

bsqonnamevall :
    bsqonnamevall bsqonnameval_entry
    | bsqonnameval_entry
    ;

bsqon_braceval :
    bsqonval
    | bsqon_mapentry
    ;

bsqonnameval_entry :
    TOKEN_IDENTIFIER SYM_EQUALS bsqonval SYM_COMMA
    //| TOKEN_IDENTIFIER SYM_EQUALS error SYM_COMMA
    | bsqon_braceval SYM_COMMA
    //| error SYM_COMMA
    ;

bsqonbracevalue :
    '{' '}'
    | '{' TOKEN_IDENTIFIER SYM_EQUALS bsqonval '}'
    | '{' bsqonnamevall TOKEN_IDENTIFIER SYM_EQUALS bsqonval '}'
    //| '{' TOKEN_IDENTIFIER SYM_EQUALS error '}'
    //| '{' bsqonnamevall TOKEN_IDENTIFIER SYM_EQUALS error '}'
    | '{' bsqon_braceval '}'
    | '{' bsqonnamevall bsqon_braceval '}'
    //| '{' error '}'
    //| '{' bsqonnamevall error '}'
    ;

bsqonbracketbracevalue :
    bsqonbracketvalue
    | bsqonbracevalue
    ;

bsqontypedvalue :
    '<' bsqontspec '>' bsqonbracketbracevalue
    | bsqonnominaltype bsqonbracketbracevalue
    //| '<' error '>' bsqonbracketbracevalue
    //| error bsqonbracketbracevalue
    ;

bsqonstructvalue :
    bsqonbracketbracevalue
    | bsqontypedvalue
    ;

bsqonspecialcons :
    KW_SOMETHING '(' bsqonval ')'
    | KW_OK '(' bsqonval ')'
    | KW_ERR '(' bsqonval ')'
    ;

bsqonval :
    bsqonterminal
    | bsqonspecialcons
    | bsqonstructvalue
    ;

//bsqonletexp :
//  '(' KW_LET bsqonidentifier SYM_COLON bsqontype SYM_EQUALS bsqonval KW_IN bsqonval ')'
//  ;

bsqonroot :
    bsqonval
    //| error
    ;

%%

%x STRING
%x ASCII_STRING
%x REGEX
%x PATH_ITEM

NON_ZERO_INTEGRAL ([1-9][0-9]*)
EXP ([Ee][-+]?[0-9]+)

DATE ([0-9]{4})-([0-9]{2})-([0-9]{2})
TIME ([0-9]{2}):([0-9]{2}):([0-9]{2})
TZNAME ((\{[a-zA-Z0-9/, _-]+\})|[A-Z]+)

UUID ([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})

IDENTIFIER ([_]?[a-z][a-zA-Z0-9_]*)
TYPE_IDENTIFIER ([A-Z][a-zA-Z0-9_]*)

%%

 /* comments */
"//".*    skip() /* line comment */

[ \t\n\r]+   skip() /* ignore white space */

/* standard parens */
"("     '('
")"     ')'
"{"     '{'
"}"     '}'
"["     '['
"]"     ']'
"<"     '<'
">"     '>'

 /* symbols */
//"..."       SYM_ELLIPSIS

"::"        SYM_DOUBLE_COLON
"=>"        SYM_ENTRY

":"         SYM_COLON
","         SYM_COMMA
"&"         SYM_AMP
"|"         SYM_BAR
//"!"         SYM_BANG
"="         SYM_EQUALS
//"."         SYM_DOT
//"@"         SYM_AT
"_"         SYM_UNDERSCORE

 /* keywords */
"something" KW_SOMETHING

"nothing"   KW_NOTHING

"false"     KW_FALSE

"$src"      KW_SRC
"none"      KW_NONE
//"some"      KW_SOME
"true"      KW_TRUE

"err"       KW_ERR
//"let"       KW_LET

//"in"        KW_IN
"ok"        KW_OK

 /* numbers */
(0|[+]?{NON_ZERO_INTEGRAL})n    TOKEN_NAT
(0|[+-]?{NON_ZERO_INTEGRAL})i   TOKEN_INT
(0|[+]?{NON_ZERO_INTEGRAL})N    TOKEN_BIG_NAT
(0|[+-]?{NON_ZERO_INTEGRAL})I   TOKEN_BIG_INT

(0|[+-]?{NON_ZERO_INTEGRAL})R                         TOKEN_RATIONAL
((0|[+-]?{NON_ZERO_INTEGRAL})\/{NON_ZERO_INTEGRAL})R  TOKEN_RATIONAL

//[-]0[niNIR]                                      { yyerror("Zero cannot be negative"); return YYerror; }
//[-]{NON_ZERO_INTEGRAL}[nN]                       { yyerror("Cannot have negative natural number"); return YYerror; }
//[+]?0+{NON_ZERO_INTEGRAL}[nN]                    { yyerror("Leading zero is not allowed"); return YYerror; }
//[+-]?0+{NON_ZERO_INTEGRAL}[iIR]                  { yyerror("Leading zero is not allowed"); return YYerror; }
//[+-]?0+{NON_ZERO_INTEGRAL}\/{NON_ZERO_INTEGRAL}R { yyerror("Leading zero is not allowed"); return YYerror; }
//[+]?00+[nN]                                      { yyerror("Leading zero is not allowed"); return YYerror; }
//[+-]?00+[iIR]                                    { yyerror("Leading zero is not allowed"); return YYerror; }
//((0|[+-]?{NON_ZERO_INTEGRAL})\/0)R               { yyerror("Zero as divisor"); return YYerror; }

[+-]?"0."[0-9]+{EXP}?f                    TOKEN_FLOAT
[+-]?{NON_ZERO_INTEGRAL}"."[0-9]+{EXP}?f  TOKEN_FLOAT

[+-]?"0."[0-9]+{EXP}?d                    TOKEN_DOUBLE
[+-]?{NON_ZERO_INTEGRAL}"."[0-9]+{EXP}?d  TOKEN_DOUBLE

//[+-]?0+"0."[0-9]+{EXP}?[fd]                   { yyerror("Redundant leading zero is not allowed"); return YYerror; }
//[+-]?0+{NON_ZERO_INTEGRAL}"."[0-9]+{EXP}?[fd] { yyerror("Leading zero is not allowed"); return YYerror; }
//[+-]?"."[0-9]+{EXP}?[fd]                      { yyerror("Leading decimal is not allowed"); return YYerror; }

 /* numberinos */
0|{NON_ZERO_INTEGRAL}        TOKEN_NUMBERINO

//[+-](0|{NON_ZERO_INTEGRAL}) { yyerror("Cannot have sign on indexer"); return YYerror; }
//0+{NON_ZERO_INTEGRAL}       { yyerror("Leading zero is not allowed"); return YYerror; }
//00+                         { yyerror("Leading zero is not allowed"); return YYerror; }

 /* strings */

0x"["[a-fA-F0-9]*"]"     TOKEN_BYTE_BUFFER
uuid4"{"{UUID}"}"        TOKEN_UUID_V4
uuid7"{"{UUID}"}"        TOKEN_UUID_V7
sha3"{"[a-f0-9]{64}"}"   TOKEN_SHA_HASH

//0x"["[^\]]*"]"        { yyerror("Invalid buffer contents"); return YYerror; }
//uuid(4|7)"{"[^}]*"}" { yyerror("Invalid UUID value"); return YYerror; }
//sha3"{"[^}]*"}"      { yyerror("Invalid SHA3 value"); return YYerror; }

//0x"["[a-zA-Z0-9]*          { yyerror("Missing close bracket"); return YYerror; }
//uuid(4|7)"{"[a-zA-Z0-9-]+  { yyerror("Missing close bracket"); return YYerror; }
//sha3"{"[a-zA-Z0-9]+        { yyerror("Missing close bracket"); return YYerror; }

\"<STRING>
<STRING>{
    \"<INITIAL>  TOKEN_STRING
    //<<EOF>>       { BEGIN(INITIAL); yyerror("Unclosed String"); return YYEOF; }
    \n[ \t\r]*\\<.> /*ignore whitespace till alignment on multi-line string*/
    [^"]<.>
}

'<ASCII_STRING>
<ASCII_STRING>{
    '<INITIAL>  TOKEN_ASCII_STRING
    //<<EOF>>       { BEGIN(INITIAL); yyerror("Unclosed ASCIIString"); return YYEOF; }
    //\n[ \t\r]*\\<.>  /*ignore whitespace till alignment on multi-line string*/
    [!-~ \t]<.>
    //.             { yyerror("Non-printable character (ignoring)"); return YYerror; }
}

"/"<REGEX>
<REGEX>{
    "/"<INITIAL>  TOKEN_REGEX
    //<<EOF>>     { BEGIN(INITIAL); yyerror("Unclosed Regex"); return YYEOF; }
    //\n          { yyerror("Newline in Regex"); return YYerror; }
    //[!-~ \t]<.>
    //.           { yyerror("Invalid character (ignoring)"); return YYerror;}
}

[gf]?"`"<PATH_ITEM>
<PATH_ITEM>{
    "`"<INITIAL>  TOKEN_PATH_ITEM
    //<<EOF>>     { BEGIN(INITIAL); yyerror("Unclosed Path Item"); return YYEOF; }
    //[ \t\n\r]   { yyerror("Whitespace in Path Item"); return YYerror; }
    [!-~]<.>
    //.           { yyerror("Invalid character (ignoring)"); return YYerror;}
}

 /* dates */
{DATE}T{TIME}@{TZNAME}  TOKEN_DATE_TIME
{DATE}T{TIME}Z?         TOKEN_UTC_DATE_TIME
{DATE}                  TOKEN_PLAIN_DATE
{TIME}                  TOKEN_PLAIN_TIME

(0|{NON_ZERO_INTEGRAL})t  TOKEN_TICK_TIME
(0|{NON_ZERO_INTEGRAL})l  TOKEN_LOGICAL_TIME

//[+-]0[tl]                                        { yyerror("Cannot have sign/negative time"); return YYerror; }
//[+-]{NON_ZERO_INTEGRAL}[tl]                      { yyerror("Cannot have sign/negative time"); return YYerror; }
//0+{NON_ZERO_INTEGRAL}[tl]                        { yyerror("Leading zero is not allowed"); return YYerror; }
//[+-]?00+[tl]                                     { yyerror("Leading zero is not allowed"); return YYerror; }

{DATE}T{TIME}("."[0-9]{3})Z  TOKEN_TIMESTAMP

 /* names */
{IDENTIFIER}  TOKEN_IDENTIFIER

_"$"{IDENTIFIER}  TOKEN_UNSPEC_IDENTIFIER

{TYPE_IDENTIFIER}(::{TYPE_IDENTIFIER})* TOKEN_TYPE_COMPONENT

//. { return yytext[0]; } /* catch all and error in parser */

%%
BosqueLanguage commented 8 months ago

Looks like a neat project for working with flex/bison grammars. The current grammar there is just for a Bosque Literal Object Notation language. Which is (mostly) a subset of the Bosque language grammar and why the file looks incomplete in some ways.

Maybe I missed it but I didn't see the grammar in the playground dropdown list.

mingodad commented 8 months ago

It's not there because I was thinking that that grammar was for the language itself and I could not get it to parse any example I found. But the grammar is in the first comment, copying and paste it on https://mingodad.github.io/parsertl-playground/playground/ and you can test it there.