Closed AJamesPhillips closed 2 months ago
Here you go James. It is an ANTLR v3 grammar.
If you do anything interesting with it, please share.
grammar Formula;
options
{
language=JavaScript;
output=AST;
ASTLabelType=CommonTree;
backtrack=true;
}
tokens
{
NEGATE;
ASSIGN;
FUNCALL;
MATERIAL;
UNIT;
POWER;
DEFAULTS;
PARAMS;
UNITCLUMP;
ARRAY;
LINES;
WHILE;
IFTHENELSE;
ELSE;
FOR;
FORIN;
FUNCTION;
ANONFUNCTION;
NUMBER;
LABEL;
RANGE;
INNER;
ASSIGNED;
SELECTOR;
DOTSELECTOR;
NEW;
TRYCATCH;
THROW;
}
lines
: NEWLINE* (expression (NEWLINE+|EOF))* EOF -> ^(LINES expression*)
;
NEWLINE
: ('\r'? '\n')
;
WHILESTATEMENT
: W H I L E
;
FORSTATEMENT
: F O R
;
FROMSTATEMENT
: F R O M
;
INSTATEMENT
: I N
;
TOSTATEMENT
: T O
;
BYSTATEMENT
: B Y
;
LOOPSTATEMENT
: L O O P
;
IFSTATEMENT
: I F
;
THENSTATEMENT
: T H E N
;
ELSESTATEMENT
: E L S E
;
FUNCTIONSTATEMENT
: F U N C T I O N
;
ENDBLOCK
: E N D
;
RETURNSTATEMENT
: R E T U R N
;
NEWSTATEMENT
: N E W
;
TRYSTATEMENT
: T R Y
;
CATCHSTATEMENT
: C A T C H
;
THROWSTATEMENT
: T H R O W
;
expression
: assignment
| logicalExpression
| whileLoop
| forLoop
| forInLoop
| ifThenElse
| functionDef
| returnExp
| tryCatch
| throwExp
;
returnExp
:
RETURNSTATEMENT^ logicalExpression
;
innerBlock
: (expression (NEWLINE+))* -> ^(LINES expression+)
;
whileLoop
: WHILESTATEMENT logicalExpression NEWLINE+ innerBlock ENDBLOCK LOOPSTATEMENT -> ^(WHILE logicalExpression innerBlock)
;
forLoop
: FORSTATEMENT IDENT FROMSTATEMENT logicalExpression TOSTATEMENT logicalExpression (BYSTATEMENT logicalExpression)? NEWLINE+ innerBlock ENDBLOCK LOOPSTATEMENT -> ^(FOR IDENT ^(PARAMS logicalExpression*) innerBlock)
;
forInLoop
: FORSTATEMENT IDENT INSTATEMENT logicalExpression NEWLINE+ innerBlock ENDBLOCK LOOPSTATEMENT -> ^(FORIN IDENT logicalExpression innerBlock)
;
ifThenElse
: IFSTATEMENT logicalExpression THENSTATEMENT NEWLINE+ innerBlock (ELSESTATEMENT IFSTATEMENT logicalExpression THENSTATEMENT NEWLINE+ innerBlock)* (ELSESTATEMENT NEWLINE+ innerBlock)? ENDBLOCK IFSTATEMENT -> ^(IFTHENELSE ^(PARAMS logicalExpression+) ^(PARAMS innerBlock+))
;
functionDef
: FUNCTIONSTATEMENT IDENT '(' (IDENT (EQUALS defaultValue | (',' IDENT )*) (',' IDENT EQUALS defaultValue )*)? ')' NEWLINE+ innerBlock ENDBLOCK FUNCTIONSTATEMENT -> ^(FUNCTION ^(PARAMS IDENT*) ^(DEFAULTS defaultValue*) innerBlock)
;
tryCatch
: TRYSTATEMENT NEWLINE+ innerBlock CATCHSTATEMENT IDENT NEWLINE+ innerBlock ENDBLOCK TRYSTATEMENT -> ^(TRYCATCH innerBlock* IDENT)
;
throwExp
: THROWSTATEMENT primaryExpression -> ^(THROW primaryExpression)
;
anonFunctionDef
: FUNCTIONSTATEMENT '(' (IDENT (EQUALS defaultValue | (',' IDENT )*) (',' IDENT EQUALS defaultValue )*)? ')' ( (NEWLINE+ innerBlock ENDBLOCK FUNCTIONSTATEMENT) | expression) -> ^(ANONFUNCTION ^(PARAMS IDENT*) ^(DEFAULTS defaultValue*) innerBlock? expression?)
;
assignment
:
IDENT '(' (IDENT (EQUALS defaultValue | (',' IDENT )*) (',' IDENT EQUALS defaultValue )*)? ')' '<-' logicalExpression -> ^(FUNCTION ^(PARAMS IDENT*) ^(DEFAULTS defaultValue*) logicalExpression) |
(PRIMITIVE | assigned) (',' (PRIMITIVE | assigned))* '<-' logicalExpression -> ^(ASSIGN PRIMITIVE* assigned* logicalExpression)
;
assigned
: IDENT selector? -> ^(ASSIGNED IDENT selector?)
;
logicalExpression
: booleanXORExpression (OR^ booleanXORExpression)*
;
OR : '||' | O R;
booleanXORExpression
: booleanAndExpression (XOR^ booleanAndExpression)*
;
XOR : X O R;
booleanAndExpression
: equalityExpression (AND^ equalityExpression)*
;
AND : '&&' | A N D;
equalityExpression
: relationalExpression ((EQUALS|NOTEQUALS)^ relationalExpression)*
;
EQUALS
: '=' | '==';
NOTEQUALS
: '!=' | '<>';
relationalExpression
: additiveExpression ( (LT|LTEQ|GT|GTEQ)^ additiveExpression )*
;
LT : '<';
LTEQ : '<=';
GT : '>';
GTEQ : '>=';
additiveExpression
: multiplicativeExpression ( (PLUS|MINUS)^ multiplicativeExpression )*
;
PLUS : '+';
MINUS : '-';
multiplicativeExpression
: arrayExpression ( (MULT|DIV|MOD)^ arrayExpression )*
;
MULT : '*';
DIV : '/';
MOD : '%' | M O D;
arrayExpression
: negationExpression ({ this.input.get(this.input.index()-1).getText()!=" "}? COLON { this.input.get(this.input.index()-1).getText()!=" "}? negationExpression)? ( { this.input.get(this.input.index()-1).getText()!=" "}? COLON { this.input.get(this.input.index()-1).getText()!=" "}? negationExpression)? -> ^(RANGE negationExpression*)
;
negationExpression
: MINUS powerExpression -> ^(NEGATE powerExpression) |
powerExpression
;
powerExpression
: unaryExpression (POW unaryOrNegate )* -> ^(POWER unaryExpression unaryOrNegate*)
;
unaryOrNegate
: unaryExpression |
MINUS unaryExpression -> ^(NEGATE unaryExpression)
;
POW : '^';
unaryExpression
: NOT^ innerPrimaryExpression
| innerPrimaryExpression
;
NOT : '!' | N O T;
innerPrimaryExpression
: selectionExpression -> ^(INNER selectionExpression)
;
selectionExpression
: primaryExpression ({ this.input.get(this.input.index()-1).getText()!=" "}? (selector|funCall))*
;
funCall :
'(' ( logicalExpression (',' logicalExpression)* )? ')' -> ^(FUNCALL logicalExpression*)
;
primaryExpression
: '('! logicalExpression ')'!
| value
;
value
: number
| bool
| string
| material
| IDENT
| primitive
| array
| anonFunctionDef
| newObject
;
newObject
: NEWSTATEMENT IDENT funCall? -> ^(NEW IDENT funCall?);
defaultValue
: negnumber
| number
| bool
| string
| array
;
array
:
LARR NEWLINE* (label NEWLINE* (',' NEWLINE* label NEWLINE*)*)? NEWLINE* RARR -> ^(ARRAY label*)
| LCURL NEWLINE* (label NEWLINE*(',' NEWLINE* label NEWLINE*)*)? NEWLINE* RCURL -> ^(ARRAY label*)
;
selector
: (minarray | dotselector) -> ^(SELECTOR minarray? dotselector?)
;
minarray
:
LARR! (logicalExpression|MULT) (COMMA! (logicalExpression|MULT) )* RARR!
| LCURL! (logicalExpression|MULT) (COMMA! (logicalExpression|MULT) )* RCURL!
;
dotselector
: ('.' arrayName)+ -> ^(DOTSELECTOR arrayName+)
;
arrayName
: IDENT|STRING|MULT;
label :
(arrayName NEWLINE* COLON)? NEWLINE* logicalExpression -> ^(LABEL logicalExpression arrayName?)
;
LARR : '\u00AB' | '<<' ;
RARR : '\u00BB' | '>>' ;
LCURL : '{';
RCURL : '}';
number : INTEGER|FLOAT;
negnumber : '-' number -> ^(NEGATE number);
INTEGER
: ('0'..'9')+ ('e' ('+'|'-')? ('0'..'9')* )?
;
FLOAT
: ('0'..'9')* '.' ('0'..'9')+ ('e' ('+'|'-')? ('0'..'9')* )?
;
bool
: TRUE
| FALSE
;
TRUE
: T R U E
;
FALSE
: F A L S E
;
material: LCURL additiveExpression { this.input.get(this.input.index()-1).getText()==" "}? unitMultiplicativeExpression RCURL -> ^(MATERIAL unitMultiplicativeExpression additiveExpression)
;
PER
: P E R
;
unitMultiplicativeExpression
: unitInnerMultiplicativeExpression ( PER^ unitInnerMultiplicativeExpression ) *
;
unitInnerMultiplicativeExpression
: unitClump ( (MULT|DIV)^ unitClump ) *
;
unitClump
: (INTEGER DIV) unitPowerExpression CUBED? SQUARED? -> ^(UNITCLUMP unitPowerExpression NEGATE CUBED* SQUARED*)
| unitPowerExpression CUBED? SQUARED? -> ^(UNITCLUMP unitPowerExpression CUBED* SQUARED*)
;
SQUARED : S Q U A R E D;
CUBED : C U B E D;
unitPowerExpression
: unit ( POW^ MINUS? (INTEGER|FLOAT) )*
;
unit : IDENT (IDENT)* -> ^(UNIT IDENT+)
| '(' unitMultiplicativeExpression ')' -> ^(UNITCLUMP unitMultiplicativeExpression)
;
IDENT
: ('a'..'z' | 'A'..'Z') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
;
primitive
: PRIMITIVE
;
PRIMITIVE
: LBRACKET (~'[') .* RBRACKET
| LBRACKET LBRACKET .* RBRACKET RBRACKET
;
LBRACKET
: '[';
RBRACKET
: ']';
COMMENT
: '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
;
LINE_COMMENT
: ('//'|'#') (~('\n'|'\r'))* {$channel=HIDDEN;}
;
COLON
: ':'
;
/* Ignore white spaces */
WS
: (' '|'\t'|'\u000C')+ {$channel=HIDDEN;}
;
fragment A:('a'|'A');
fragment B:('b'|'B');
fragment C:('c'|'C');
fragment D:('d'|'D');
fragment E:('e'|'E');
fragment F:('f'|'F');
fragment G:('g'|'G');
fragment H:('h'|'H');
fragment I:('i'|'I');
fragment J:('j'|'J');
fragment K:('k'|'K');
fragment L:('l'|'L');
fragment M:('m'|'M');
fragment N:('n'|'N');
fragment O:('o'|'O');
fragment P:('p'|'P');
fragment Q:('q'|'Q');
fragment R:('r'|'R');
fragment S:('s'|'S');
fragment T:('t'|'T');
fragment U:('u'|'U');
fragment V:('v'|'V');
fragment W:('w'|'W');
fragment X:('x'|'X');
fragment Y:('y'|'Y');
fragment Z:('z'|'Z');
string
: STRING^
;
COMMA
: ','
;
STRING :
'\'' .* '\''
| '"' ('\\\"'|~'\"')* '"';
Thank you! I look forward to experimenting with this and I'll definitely post back here with any news.
Hi @scottfr , I noticed the package is now using ANTLR v4. Would it be possible to share this grammar file please as well? I did have a look at adapting the existing v3 grammar file, and I'm taking another look now over the next couple of months. If you'd be interested in chatting further I'd be grateful for the opportunity to get your thoughts on what I'm planning.
Here you go. I would love to hear about what you are working on.
grammar Formula;
options
{
language = JavaScript;
caseInsensitive = true;
}
@header {
// @ts-nocheck
/* eslint-disable */
import antlr4 from "../../../vendor/antlr4-all.js";
}
COMMENT: R_? '/*' (.)*? '*/' R_? -> skip;
LINE_COMMENT: R_? ('//' | '#') (~('\n' | '\r'))* R_? -> skip;
R__: SPACE* (NEWLINES SPACE*)+;
R_: SPACE+;
// Optional whitespace contains newlines: ((R__ | R_)*) Optional whitespace does not contain
// newlines: ((R_)*) Note that using parser rules for these slows things down 5% or so
NEWLINES: '\n' | '\r';
lines: ((R__ | R_)*) (
expression (R__+ expression)* ((R__ | R_)*)
)? EOF;
WHILESTATEMENT: 'while';
FORSTATEMENT: 'for';
FROMSTATEMENT: 'from';
INSTATEMENT: 'in';
TOSTATEMENT: 'to';
BYSTATEMENT: 'by';
LOOPSTATEMENT: 'loop';
IFSTATEMENT: 'if';
THENSTATEMENT: 'then';
ELSESTATEMENT: 'else';
FUNCTIONSTATEMENT: 'function';
ENDBLOCK: 'end';
RETURNSTATEMENT: 'return';
NEWSTATEMENT: 'new';
TRYSTATEMENT: 'try';
CATCHSTATEMENT: 'catch';
THROWSTATEMENT: 'throw';
expression:
assignment
| logicalExpression
| whileLoop
| forLoop
| forInLoop
| ifThenElse
| functionDef
| returnExp
| tryCatch
| throwExp;
returnExp: RETURNSTATEMENT (R_*) logicalExpression;
innerBlock: ((R__ | R_)*) (
expression (R__+ expression)* ((R__ | R_)*)
)?;
whileLoop:
WHILESTATEMENT (R_*) logicalExpression (R__ innerBlock) R__ ENDBLOCK R_ LOOPSTATEMENT;
forLoop:
FORSTATEMENT R_ IDENT R_ FROMSTATEMENT (R_*) logicalExpression (
R_*
) TOSTATEMENT (R_*) logicalExpression (
(R_*) BYSTATEMENT (R_*) logicalExpression
)? (R__ innerBlock)? R__ ENDBLOCK R_ LOOPSTATEMENT;
forInLoop:
FORSTATEMENT R_ IDENT R_ INSTATEMENT (R_*) logicalExpression (
R__ innerBlock
)? R__ ENDBLOCK R_ LOOPSTATEMENT;
ifThenElse:
IFSTATEMENT (R_*) logicalExpression ((R__ | R_)*) THENSTATEMENT? (
R__ innerBlock
)? (
R__ ELSESTATEMENT R_ IFSTATEMENT (R_*) logicalExpression (
(R__ | R_)*
) THENSTATEMENT? (R__ innerBlock)?
)* (R__ ELSESTATEMENT (R__ innerBlock)?)? R__ ENDBLOCK R_ IFSTATEMENT;
functionDef:
FUNCTIONSTATEMENT R_ IDENT (R_*) '(' (
(R_*) IDENT (
(R_*) EQUALS (R_*) defaultValue
| ((R_*) ',' (R_*) IDENT)*
) ((R_*) ',' (R_*) IDENT (R_*) EQUALS (R_*) defaultValue)*
)? (R_*) ')' (R__ innerBlock)? R__ ENDBLOCK R_ FUNCTIONSTATEMENT;
tryCatch:
TRYSTATEMENT (R__ innerBlock)? R__ CATCHSTATEMENT R_ IDENT (
R__ innerBlock
)? R__ ENDBLOCK R_ TRYSTATEMENT;
throwExp: THROWSTATEMENT R_ primaryExpression;
anonFunctionDef:
FUNCTIONSTATEMENT (R_*) '(' (
(R_*) IDENT (
(R_*) EQUALS (R_*) defaultValue
| ( (R_*) ',' (R_*) IDENT)*
) ((R_*) ',' (R_*) IDENT (R_*) EQUALS (R_*) defaultValue)*
)? (R_*) ')' (
((R__ innerBlock)? R__ ENDBLOCK R_ FUNCTIONSTATEMENT)
| (R_*) expression
);
assignment:
IDENT (R_*) '(' (
(R_*) IDENT (
(R_*) EQUALS (R_*) defaultValue
| ((R_*) ',' (R_*) IDENT)*
) (',' (R_*) IDENT (R_*) EQUALS (R_*) defaultValue)*
)? (R_*) ')' (R_*) '<-' (R_*) logicalExpression
| (PRIMITIVE | assigned) (
(R_*) ',' (R_*) (PRIMITIVE | assigned)
)* (R_*) '<-' (R_*) logicalExpression;
assigned: IDENT selector?;
logicalExpression:
booleanXORExpression ((R_*) OR (R_*) booleanXORExpression)*;
OR: '||' | 'or';
booleanXORExpression:
booleanAndExpression ((R_*) XOR (R_*) booleanAndExpression)*;
XOR: 'xor';
booleanAndExpression:
equalityExpression ((R_*) AND (R_*) equalityExpression)*;
AND: '&&' | 'and';
equalityExpression:
relationalExpression (
(R_*) (EQUALS | NOTEQUALS) (R_*) relationalExpression
)*;
EQUALS: '=' | '==';
NOTEQUALS: '!=' | '<>';
relationalExpression:
additiveExpression (
(R_*) (LT | LTEQ | GT | GTEQ) (R_*) additiveExpression
)*;
LT: '<';
LTEQ: '<=';
GT: '>';
GTEQ: '>=';
additiveExpression:
multiplicativeExpression (
(R_*) (PLUS | MINUS) (R_*) multiplicativeExpression
)*;
PLUS: '+';
MINUS: '-';
multiplicativeExpression:
arrayExpression (
(R_*) (MULT | DIV | MOD) (R_*) arrayExpression
)*;
MULT: '*';
DIV: '/';
MOD: '%' | 'mod';
arrayExpression:
negationExpression (':' negationExpression)? (
':' negationExpression
)?;
negationExpression:
MINUS (R_*) powerExpression
| powerExpression;
powerExpression:
unaryExpression ((R_*) POW (R_*) unaryOrNegate)*;
unaryOrNegate: MINUS (R_*) unaryExpression | unaryExpression;
POW: '^';
unaryExpression:
('!' | 'not') (R_*) innerPrimaryExpression
| innerPrimaryExpression;
innerPrimaryExpression: selectionExpression;
selectionExpression: primaryExpression ( selector | funCall)*;
funCall:
'(' (
(R_*) logicalExpression (
(R_*) ',' (R_*) logicalExpression
)*
)? (R_*) ')';
primaryExpression:
'(' ((R__ | R_)*) logicalExpression ((R__ | R_)*) ')'
| value;
value:
number
| BOOL
| string
| material
| IDENT
| PRIMITIVE
| array
| anonFunctionDef
| newObject;
material:
LCURL (R_*) additiveExpression R_ unitMultiplicativeExpression (
R_*
) RCURL;
array:
LARR (
((R__ | R_)*) label (
((R__ | R_)*) ',' ((R__ | R_)*) label
)*
)? ((R__ | R_)*) RARR
| LCURL (
((R__ | R_)*) label (
((R__ | R_)*) ',' ((R__ | R_)*) label
)*
)? ((R__ | R_)*) RCURL
| LARR (
((R__ | R_)*) logicalExpression (
((R__ | R_)*) ',' ((R__ | R_)*) logicalExpression
)*
)? ((R__ | R_)*) RARR
| LCURL (
((R__ | R_)*) logicalExpression (
((R__ | R_)*) ',' ((R__ | R_)*) logicalExpression
)*
)? ((R__ | R_)*) RCURL;
newObject: NEWSTATEMENT R_ IDENT funCall?;
defaultValue: negnumber | number | BOOL | string | array;
selector: (minarray | dotselector);
minarray:
LARR (R_*) (logicalExpression | MULT) (
(R_*) ',' (R_*) (logicalExpression | MULT)
)* (R_*) RARR
| LCURL (R_*) (logicalExpression | MULT) (
(R_*) ',' (R_*) (logicalExpression | MULT)
)* (R_*) RCURL;
dotselector: ('.' arrayName)+;
arrayName: IDENT | STRING | MULT;
label:
arrayName ((R__ | R_)*) ':' ((R__ | R_)*) logicalExpression;
LARR: '\u00AB' | '<<';
RARR: '\u00BB' | '>>';
LCURL: '{';
RCURL: '}';
number: INTEGER | FLOAT;
negnumber: '-' number;
INTEGER: ('0' ..'9')+ ('e' ('+' | '-')? ('0' ..'9')*)?;
FLOAT: (('0' ..'9')* '.' ('0' ..'9')+ | ('0' ..'9')+ '.') (
'e' ('+' | '-')? ('0' ..'9')*
)?;
BOOL: 'true' | 'false';
PER: 'per';
unitMultiplicativeExpression:
unitInnerMultiplicativeExpression (
R_ PER R_ unitInnerMultiplicativeExpression
)*;
unitInnerMultiplicativeExpression:
unitClump ((R_*) (MULT | DIV) (R_*) unitClump)*;
unitClump: (INTEGER (R_*) DIV) (R_*) unitPowerExpression (
R_ CUBED
)? (R_ SQUARED)?
| unitPowerExpression (R_ CUBED)? (R_ SQUARED)?;
SQUARED: 'squared';
CUBED: 'cubed';
unitPowerExpression:
unit ((R_*) POW ((R_*) MINUS)? (R_*) (INTEGER | FLOAT))*;
unit:
IDENT (R_ IDENT)*
| '(' (R_*) unitMultiplicativeExpression (R_*) ')';
IDENT: ('a' .. 'z') ('a' .. 'z' | '_' | '0' .. '9')*;
PRIMITIVE:
LBRACKET (~('[' | ']'))+? RBRACKET
| LBRACKET LBRACKET (~('[' | ']'))+? RBRACKET RBRACKET;
LBRACKET: '[';
RBRACKET: ']';
SPACE: (' ' | '\t' | '\u000C');
string: STRING;
STRING: '\'' .*? '\'' | '"' ('\\"' | ~'"')* '"';
Amazing, thanks @scottfr ! It's a project based around simulations (obviously!) for helping support and improve our democratic communication and sense making. I'm at \<email address redacted>, it'd be great to send you a description via email or share over a video call.
Hi @scottfr , I was hoping to play with this code and extend the grammar to include different ways of referencing other values. Would it be possible to see the Formula.g file the antlr3 FormulaLexer and FormulaParser were made from please? Thank you! James