Closed simplelife963 closed 5 years ago
Are you using the latest version (v0.7.0)?
Can you try it with the latest commit on master
? I just made a big change to how the standalone generator works.
I re-generated the stand-alone parser by using the latest commit on master and it solved my problem and works perfectly. Really appreciated!
update, it failed to parse 'if' statements, for example
simple_stmts = """
if a > 2:
a = 1
"""
The error message:
UnexpectedCharacters: No terminal defined for 'a' at line 2 col 4
if a > 2:
Expecting: {'__ANON_16', '__ANON_4', 'FROM', '__ANON_1', 'OR', 'STAR', '__IGNORE_1', '__ANON_11', '__ANON_6', '__ANON_5', 'SLASH', '__ANON_9', 'MINUS', 'LSQB', '__ANON_22', 'ASYNC', 'FOR', '__ANON_18', 'COMMA', 'RPAR', '__ANON_2', 'AT', '__ANON_13', '__IGNORE_0', 'IS', 'IF', 'DOT', 'LESSTHAN', 'IN', '__ANON_17', 'SEMICOLON', 'COMMENT', '__ANON_21', '__ANON_12', 'MORETHAN', '__ANON_8', 'AS', '__ANON_19', 'RSQB', 'AMPERSAND', 'LPAR', 'ELSE', '__ANON_14', 'CIRCUMFLEX', 'AND', '__ANON_15', 'NOT', '__ANON_7', 'RBRACE', 'EQUAL', '__ANON_10', 'PLUS', 'VBAR', 'PERCENT', '__ANON_3', '_NEWLINE', 'COLON', '__ANON_20'}
Can you post a minimal example that works with a Lark instance, but fails as a standalone?
from lark.indenter import Indenter
# v001.py is generated by using the generator in version 0.7.0
from v001 import Lark_StandAlone as LS1
# v002.py is generated by using the latest commit on master
from v002 import Lark_StandAlone as LS2
class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
simple_stmts = """
if a > 2:
a = 1
"""
kwargs = dict(postlex=PythonIndenter())
def run_ls1():
print(LS1(**kwargs).parse(simple_stmts))
def run_ls2():
print(LS2(**kwargs).parse(simple_stmts))
def run_ls1_twice():
run_ls1()
run_ls1()
if __name__ == '__main__':
# parse properly
run_ls1()
# return two different ASTs
# run_ls1_twice()
# return error
# run_ls2()
run_ls1() returns
Tree(file_input, [Tree(compound_stmt, [Tree(if_stmt, [Tree(comparison, [Tree(var, [Token(NAME, 'a')]), Token(MORETHAN, '>'), Tree(number, [Token(DEC_NUMBER, '2')])]), Tree(suite, [Tree(expr_stmt, [Tree(var, [Token(NAME, 'a')]), Tree(number, [Token(DEC_NUMBER, '1')])])])])])])
run_ls1_twice() returns
Tree(file_input, [Tree(compound_stmt, [Tree(if_stmt, [Tree(comparison, [Tree(var, [Token(NAME, 'a')]), Token(MORETHAN, '>'), Tree(number, [Token(DEC_NUMBER, '2')])]), Tree(suite, [Tree(expr_stmt, [Tree(var, [Token(NAME, 'a')]), Tree(number, [Token(DEC_NUMBER, '1')])])])])])])
Tree(_cb4_NonTerminal('file_input'), [Tree(_cb448_NonTerminal('__anon_star_0'), [Tree(_cb447_NonTerminal('__anon_star_0'), [Token(_NEWLINE, '\n')]), Tree(_cb142_NonTerminal('stmt'), [Tree(_cb237_NonTerminal('compound_stmt'), [Tree(_cb244_NonTerminal('if_stmt'), [Token(IF, 'if'), Tree(_cb263_NonTerminal('test'), [Tree(_cb272_NonTerminal('or_test'), [Tree(_cb274_NonTerminal('and_test'), [Tree(_cb277_NonTerminal('not_test'), [Tree(_cb279_NonTerminal('comparison'), [Tree(_cb281_NonTerminal('expr'), [Tree(_cb284_NonTerminal('xor_expr'), [Tree(_cb286_NonTerminal('and_expr'), [Tree(_cb287_NonTerminal('shift_expr'), [Tree(_cb289_NonTerminal('arith_expr'), [Tree(_cb292_NonTerminal('term'), [Tree(_cb293_NonTerminal('factor'), [Tree(_cb318_NonTerminal('power'), [Tree(_cb320_NonTerminal('await_expr'), [Tree(_cb323_NonTerminal('atom_expr'), [Tree(_cb338_NonTerminal('atom'), [Token(NAME, 'a')])])])])])])])])])])]), Tree(_cb493_NonTerminal('__anon_star_19'), [Tree(_cb308_NonTerminal('_comp_op'), [Token(MORETHAN, '>')]), Tree(_cb281_NonTerminal('expr'), [Tree(_cb284_NonTerminal('xor_expr'), [Tree(_cb286_NonTerminal('and_expr'), [Tree(_cb287_NonTerminal('shift_expr'), [Tree(_cb289_NonTerminal('arith_expr'), [Tree(_cb292_NonTerminal('term'), [Tree(_cb293_NonTerminal('factor'), [Tree(_cb318_NonTerminal('power'), [Tree(_cb320_NonTerminal('await_expr'), [Tree(_cb323_NonTerminal('atom_expr'), [Tree(_cb340_NonTerminal('atom'), [Tree(_cb440_NonTerminal('number'), [Token(DEC_NUMBER, '2')])])])])])])])])])])])])])])])])])]), Token(COLON, ':'), Tree(_cb262_NonTerminal('suite'), [Token(_NEWLINE, '\n '), Token(_INDENT, ' '), Tree(_cb487_NonTerminal('__anon_plus_16'), [Tree(_cb141_NonTerminal('stmt'), [Tree(_cb146_NonTerminal('simple_stmt'), [Tree(_cb150_NonTerminal('small_stmt'), [Tree(_cb157_NonTerminal('expr_stmt'), [Tree(_cb162_NonTerminal('testlist_star_expr'), [Tree(_cb263_NonTerminal('test'), [Tree(_cb272_NonTerminal('or_test'), [Tree(_cb274_NonTerminal('and_test'), [Tree(_cb277_NonTerminal('not_test'), [Tree(_cb278_NonTerminal('comparison'), [Tree(_cb281_NonTerminal('expr'), [Tree(_cb284_NonTerminal('xor_expr'), [Tree(_cb286_NonTerminal('and_expr'), [Tree(_cb287_NonTerminal('shift_expr'), [Tree(_cb289_NonTerminal('arith_expr'), [Tree(_cb292_NonTerminal('term'), [Tree(_cb293_NonTerminal('factor'), [Tree(_cb318_NonTerminal('power'), [Tree(_cb320_NonTerminal('await_expr'), [Tree(_cb323_NonTerminal('atom_expr'), [Tree(_cb338_NonTerminal('atom'), [Token(NAME, 'a')])])])])])])])])])])])])])])])])]), Tree(_cb463_NonTerminal('__anon_star_6'), [Token(EQUAL, '='), Tree(_cb162_NonTerminal('testlist_star_expr'), [Tree(_cb263_NonTerminal('test'), [Tree(_cb272_NonTerminal('or_test'), [Tree(_cb274_NonTerminal('and_test'), [Tree(_cb277_NonTerminal('not_test'), [Tree(_cb278_NonTerminal('comparison'), [Tree(_cb281_NonTerminal('expr'), [Tree(_cb284_NonTerminal('xor_expr'), [Tree(_cb286_NonTerminal('and_expr'), [Tree(_cb287_NonTerminal('shift_expr'), [Tree(_cb289_NonTerminal('arith_expr'), [Tree(_cb292_NonTerminal('term'), [Tree(_cb293_NonTerminal('factor'), [Tree(_cb318_NonTerminal('power'), [Tree(_cb320_NonTerminal('await_expr'), [Tree(_cb323_NonTerminal('atom_expr'), [Tree(_cb340_NonTerminal('atom'), [Tree(_cb440_NonTerminal('number'), [Token(DEC_NUMBER, '1')])])])])])])])])])])])])])])])])])])])])]), Token(_NEWLINE, '\n\n')])])]), Token(_DEDENT, '')])])])])])])
run_ls2() returns
v002.UnexpectedCharacters: No terminal defined for 'a' at line 2 col 4
if a > 2:
Expecting: {'OR', 'AT', '__ANON_1', '__ANON_4', 'DOT', '__ANON_19', 'FOR', 'COMMENT', 'IS', '__ANON_14', '__ANON_3', '__ANON_10', '__ANON_20', 'ASYNC', 'CIRCUMFLEX', 'COLON', 'PERCENT', 'RSQB', '__ANON_15', 'ELSE', 'IN', 'EQUAL', '__ANON_17', 'MINUS', 'PLUS', 'COMMA', 'FROM', '_NEWLINE', 'AND', '__ANON_5', '__ANON_9', '__ANON_11', 'SLASH', 'LESSTHAN', 'STAR', '__ANON_12', 'IF', 'RPAR', '__IGNORE_1', '__ANON_6', 'RBRACE', 'MORETHAN', '__ANON_7', '__ANON_16', '__ANON_22', 'AS', '__ANON_8', 'LPAR', '__IGNORE_0', 'AMPERSAND', 'LSQB', 'NOT', '__ANON_2', 'SEMICOLON', '__ANON_21', '__ANON_18', '__ANON_13', 'VBAR'}
Thanks Erezsh
We tried around and got a solution for v0.7.0 to keep parsing results consistent with stand-alone mode. Each time after building the AST, the program need to destroy the imported stand-alone module cache in sys.modules, and dynamically re-import it before instancing another Lark_StandAlone object. Hopefully this solution might help others.
from lark.indenter import Indenter
import sys
import importlib
# v001.py is generated by using the generator in version 0.7.0
# from v001 import Lark_StandAlone as LS1
class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
simple_stmts = """
if a > 2:
a = 1
"""
kwargs = dict(postlex=PythonIndenter())
def run_ls1():
module_name = 'v001'
# dynamically import stand-alone module
module = importlib.import_module(module_name)
# instance Lark_StandAlone Object
parser = module.Lark_StandAlone(**kwargs)
# parse input string
print(parser.parse(simple_stmts))
# destroy imported module cache
del sys.modules[module_name]
def run_ls1_twice():
run_ls1()
run_ls1()
if __name__ == '__main__':
run_ls1_twice()
Thus, the results are consistent and the parser work perfectly.
Tree(file_input, [Tree(compound_stmt, [Tree(if_stmt, [Tree(comparison, [Tree(var, [Token(NAME, 'a')]), Token(MORETHAN, '>'), Tree(number, [Token(DEC_NUMBER, '2')])]), Tree(suite, [Tree(expr_stmt, [Tree(var, [Token(NAME, 'a')]), Tree(number, [Token(DEC_NUMBER, '1')])])])])])])
Tree(file_input, [Tree(compound_stmt, [Tree(if_stmt, [Tree(comparison, [Tree(var, [Token(NAME, 'a')]), Token(MORETHAN, '>'), Tree(number, [Token(DEC_NUMBER, '2')])]), Tree(suite, [Tree(expr_stmt, [Tree(var, [Token(NAME, 'a')]), Tree(number, [Token(DEC_NUMBER, '1')])])])])])])
I know what the problem is. I hope to fix it in the upcoming days.
Everything should work now in the latest master
. Please check and let me know!
The parsing results are now consistent by using the latest codes on master. We really appreciated your work, Erezsh!
Two parsers are both instanced from Lark_StandAlone class and parse the same string, however, the results are two different ASTs shown as below, could you please check it for me? Thanks.