Chevrotain / chevrotain

Parser Building Toolkit for JavaScript
https://chevrotain.io
Apache License 2.0
2.48k stars 204 forks source link

Parsing ends prematurely #1868

Open yanny7 opened 1 year ago

yanny7 commented 1 year ago

I encountered problem with recovery, when in specific case recovery ends on error and can't continue (it fails in OR alternative).

working example:

(function jsonGrammarOnlyExample() {
  // ----------------- Lexer -----------------
  const createToken = chevrotain.createToken;
  const Lexer = chevrotain.Lexer;

  const IF = createToken({name: "IF", pattern: /\bIF\b/});
  const THEN = createToken({name: "THEN", pattern: /\bTHEN\b/});
  const GOTO = createToken({name: "GOTO", pattern: /\bGOTO\b/});
  const Assign = createToken({name: ":=", pattern: /:=/});
  const Colon = createToken({name: "Colon", pattern: /:/});
  const EOL = createToken({name: "EOL", pattern: /\n|\n\r|\r\n|\r/y, line_breaks: true});
  const ObjName = createToken({name: "ObjName", pattern: /[a-zA-Z_]+/});
  const WhiteSpace = createToken({
    name: "WhiteSpace",
    pattern: /[ \t]+/,
    group: Lexer.SKIPPED
  });

  const jsonTokens = [WhiteSpace, IF, THEN, GOTO, EOL, Assign, Colon, ObjName];

  const JsonLexer = new Lexer(jsonTokens, {
    // Less position info tracked, reduces verbosity of the playground output.
    positionTracking: "onlyStart"
  });

  // ----------------- parser -----------------
  const CstParser = chevrotain.CstParser;

  class JsonParser extends CstParser {
    constructor() {
      super(jsonTokens, {
        recoveryEnabled: true
      })

      const $ = this;

      $.RULE("esl", () => {
        $.MANY(() => $.SUBRULE($.actions));
      });

      $.RULE("actions", () => {
        $.OR([
          {ALT: () => $.SUBRULE($.if)},
          {ALT: () => $.SUBRULE($.label)},
          {ALT: () => $.SUBRULE($.assignment)},
          {ALT: () => $.CONSUME(EOL)}
        ]);
      });

      $.RULE("if", () => {
        $.CONSUME(IF);
        $.OR1([
          {ALT: () => $.CONSUME(THEN)},
          {ALT: () => {
            $.CONSUME(GOTO);
            $.CONSUME(ObjName);
          }}
        ]);
        $.CONSUME(EOL);
      });

      $.RULE("label", () => {
        $.CONSUME(ObjName);
        $.CONSUME(Colon);
        $.CONSUME(EOL);
      });

      $.RULE("assignment", () => {
        $.CONSUME(ObjName);
        $.CONSUME(Assign);
        $.CONSUME(EOL);
      });

      // very important to call this after all the rules have been setup.
      // otherwise the parser may not work correctly as it will lack information
      // derived from the self analysis.
      this.performSelfAnalysis();
    }

  }

  // for the playground to work the returned object must contain these fields
  return {
    lexer: JsonLexer,
    parser: JsonParser,
    defaultRule: "esl"
  };
}())

when you type (don't forget newline after last sentence)

IF THEN
IF THENs
IF THEN

recovery stops on second line.

Problem is that RecognizerEngine fails to find path (in orInternal function) and thus its considered as stuck in manyInternalLogic.

Maybe it should skip next token and try to recover again. Or in my case, I would like to tell recovery that skip up to newline and try to continue with parsing from there.

yanny7 commented 1 year ago

Note: after removing alternation label or assignment from actions rule recovery succeed and parser continues correctly