Open SchrodingerZhu opened 3 years ago
This part is really tedious to do without a lexer.... I will mimic the way from Shunting-yard algorithm. @ICEYSELF
[WIP]
grammar Selfish;
module: (WS | LINE_ENDING)? stmt_list (WS | LINE_ENDING)? EOF;
SINGLE_STRING: '\'' ( ~'\'' | '\'\'')* '\'';
DOUBLE_STRING: '"' DBSTR_BODY '"';
FOREIGN_BLOCK: '`' ( ~ '`' | '``')* '`';
fragment DEC_INTEGER: DEC_DIGIT+ ( '_' DEC_DIGIT+)*;
fragment HEX_INTEGER: '0x' HEX_DIGIT+ ( '_' HEX_DIGIT+)*;
fragment BIN_INTEGER: '0b' BIN_DIGIT+ ( '_' BIN_DIGIT+)*;
fragment OCT_INTEGER: '0o' OCT_DIGIT+ ( '_' OCT_DIGIT+)*;
INTEGER: ('+' | '-')? (DEC_INTEGER | HEX_INTEGER | BIN_DIGIT | OCT_DIGIT);
FLOAT: ('+' | '-')? NORM_FLOAT | SCI_FLOAT;
RETURN: 'return';
LAMBDA_HEAD: 'fn';
FUNCTION_HEAD: 'fun';
MODULE_HEAD: 'module';
IMPORT_HEAD: 'import';
THROW_HEAD: 'throw';
TRY: 'try';
CATCH: 'catch';
FINALLY: 'finally';
WHILE: 'while';
BREAK: 'break';
IF: 'if';
ELSE: 'else';
ELIF: 'elif';
TRUE: 'true';
FALSE: 'false';
DO: 'do';
FOR: 'for';
IN : 'in';
OPEN_HEAD: 'open';
AS: 'as';
PUBLIC_DEC: 'public';
EXPORT_DEC: 'export';
RD_SYM: ('<>' | '<' | '>' );
ARITH_OP: ('+' | '-' | '*' | '/' | '^' | '%');
NIL : 'nil';
VAR_START : '$';
IDENTIFIER : [\p{Letter}\p{Number}_][\p{Letter}\p{Number}\p{General_Category=Other_Symbol}_]*;
fragment NORM_FLOAT: DEC_INTEGER '.' DEC_INTEGER;
fragment SCI_FLOAT : DEC_INTEGER ('.' DEC_INTEGER)? ('E' | 'e') ('+' | '-')? DEC_INTEGER;
fragment BIN_DIGIT: '0' .. '1';
fragment OCT_DIGIT: '0' .. '7';
fragment DEC_DIGIT: '0' .. '9';
fragment HEX_DIGIT: '0' .. '9' | 'a' .. 'f' | 'A' .. 'F';
fragment OCT_ESC : OCT_DIGIT OCT_DIGIT OCT_DIGIT;
fragment HEX_ESC : 'x' HEX_DIGIT HEX_DIGIT;
fragment DBSTR_BODY: ( ~ ["\\] | '\\' (OCT_ESC | SMALL_ESC | LARGE_ESC | SPECIAL_ESC ))*;
fragment SMALL_ESC : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment LARGE_ESC : 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment SPECIAL_ESC : [abfnrtv\\"];
single_string: SINGLE_STRING;
double_string: DOUBLE_STRING;
identifer: IDENTIFIER;
integer: INTEGER;
bool: TRUE | FALSE;
floating: FLOAT;
bareword: (IDENTIFIER | INTEGER | FLOAT | ARITH_OP | ':'| '.' | '/' | '~' | '=' | '@' | '!' | '?')+;
varname: VAR_START varpart;
varpart: (identifer ':')* identifer;
string : single_string | double_string ;
atomic : bool | integer | floating | bareword | varname | compound | nil;
compound : string (paren_expr | varname | member | indexing | string)* ;
paren_expr : '(' ( (WS | LINE_ENDING)? expr ) (WS | LINE_ENDING)? ')';
redirect : INTEGER? RD_SYM ('-' | '&' integer | WS? ( primitive | paren_expr ));
arglist: '(' (WS | identifer)* ( identifer ((WS | LINE_ENDING)* ',' (WS | LINE_ENDING)* identifer)* )? (WS | LINE_ENDING)* ')';
lambda: LAMBDA_HEAD (WS | LINE_ENDING)* arglist (WS | LINE_ENDING)* block;
block: '{' (WS | LINE_ENDING)* stmt_list? (WS | LINE_ENDING)* '}';
fblock: tag (WS | LINE_ENDING)? FOREIGN_BLOCK;
assignment : (varpart | indexing | member) (WS | LINE_ENDING)? '=' (WS | LINE_ENDING)? expr;
return_stmt: RETURN (WS expr)?;
module_stmt: MODULE_HEAD WS identifer;
import_stmt: (OPEN_HEAD WS)? IMPORT_HEAD WS varpart (WS AS WS identifer)?;
export_stmt: EXPORT_DEC WS varpart (WS AS WS identifer)?;
function: (PUBLIC_DEC WS)? FUNCTION_HEAD (WS | LINE_ENDING)* bareword (WS | LINE_ENDING)* arglist (WS | LINE_ENDING)* (block | fblock);
try_stmt: TRY (WS | LINE_ENDING)*
block
((WS | LINE_ENDING)* catch_block)?
((WS | LINE_ENDING)* final_block)?;
throw_stmt: THROW_HEAD (WS | LINE_ENDING)* expr;
if_stmt: IF (WS | LINE_ENDING)* expr (WS | LINE_ENDING)* block
((WS | LINE_ENDING)* elif_block)*
((WS | LINE_ENDING)* else_block)?;
elif_block: ELIF (WS | LINE_ENDING)* expr (WS | LINE_ENDING)* block;
else_block: ELSE (WS | LINE_ENDING)* block;
catch_block: CATCH (WS | LINE_ENDING)* identifer (WS | LINE_ENDING)* block;
final_block: FINALLY (WS | LINE_ENDING)* block;
break_stmt: BREAK ((WS | LINE_ENDING)* identifer)?;
while_stmt: WHILE (WS | LINE_ENDING)* expr ((WS | LINE_ENDING)* tag)? (WS | LINE_ENDING)* block
((WS | LINE_ENDING)* else_block)?;
for_stmt: FOR (WS | LINE_ENDING)* identifer (WS | LINE_ENDING)* IN (WS | LINE_ENDING)* expr ((WS | LINE_ENDING)* tag)? (WS | LINE_ENDING)* block;
tag: '[' (WS | LINE_ENDING)* identifer (WS | LINE_ENDING)* ']';
nil: NIL;
stmt_list: stmt ( WS? ((';' | LINE_ENDING) WS?)+ stmt)* (';' | LINE_ENDING)*;
stmt
: expr
| assignment
| block
| fblock
| return_stmt
| function
| module_stmt
| import_stmt
| export_stmt
| try_stmt
| throw_stmt
| if_stmt
| break_stmt
| while_stmt
| for_stmt
;
do_expr: DO (WS | LINE_ENDING)* block;
primitive: member
| indexing
| atomic
| lambda
| indexing
| paren_expr
| do_expr;
indexing : varname WS? '[' (WS | LINE_ENDING)*? expr (WS | LINE_ENDING)*? ']';
member: (varname | indexing) ('.' varpart)+;
expr : primitive ( (WS | INLINE_WS)+ primitive )* (WS? redirect)*
| expr ((WS | INLINE_WS)+)? '|' ((WS | INLINE_WS)+)? expr
| expr ((WS | INLINE_WS)+)? '&&' ((WS | INLINE_WS)+)? expr
| expr ((WS | INLINE_WS)+)? '&' (((WS | INLINE_WS)+)? expr)*;
INLINE_WS: '\\' ('\r\n' | '\r' | '\n' );
LINE_ENDING : ('\r\n' | '\r' | '\n' );
WS : [ \t]+;
LINE_COMMENT: '#' ~( '\r' | '\n' )* -> skip;
@ICEYSELF @ice1000 control flow remains to be determined. I need to prepare for my Monday Midterm now. Please have a look or direct give me the patch if you have time.
Also there are bugs like *
is actually causing conflict... Will changing it to mul
be a good idea?
now we have something like
module test
import a:b:c as d
export internal:api as interface
print "welcome to use selfish language"
public fun a ( x ) { return x; }
f = fn (x) { return + $x 1 }
export f as add
Third update; I think it is already quite complete:
module test
for x in $arg [loop] {
print $x
if (= $x $env[+ 2 (* 4 5)]) {
print $x
break loop
}
}
while true {
try {
throw exception "some-exception"
} catch e {
# print message
print $e.message
} finally {
print "done"
}
}
Another example:
do { cat 1.txt } \
| fn (x) { return length x }
I am going to handle parsing the eval expressions.
Precedence:
<redirects>
|
pipeline&&
continuous joba & b
formThis follows from fish and bash.
Question:
We could also import something like a parallel pipeline?Pipeline is already parallel according to bash.I guess pipeline will be one of the hardest thing to implement later. Basically, it mixes up our function and command. What's worse, we want the pipeline to pass our language object. I initially wanted to let command return a object containing some fields like
We may need think carefully of the semantic routine of all the mess.
Maybe we can consider something like Future and combine the things in an async environment