antlr / antlr4

ANTLR (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, or translating structured text or binary files.
http://antlr.org
BSD 3-Clause "New" or "Revised" License
17.19k stars 3.29k forks source link

probleme with lexer #1387

Closed ikeas68 closed 7 years ago

ikeas68 commented 7 years ago

with a simple grammar

/**
 * Define a grammar called TEST
 */
grammar Test;

@header
{
    package x.calcul.antlr;
}

@lexer::members {
    public static final int EXTRA = 1;
    public static final int COMMENTS = 2;
}

 compute
  : expr EOF        # line
  ;

// Whitespace
//NEWLINE           : '\r\n' | 'r' | '\n';          
WS                  : [\t ]+ -> channel(EXTRA);

A                   : 'abc';
B                   : 'de' ;
C                   : 'abcde' ;
MNEMONIQUE          : [A-Za-z\.0-9_]+ { false }? ; 
PILOT               : [A-Za-z\.0-9_]+ { false }? ;

SK                  : ~[' ' | ',' [ '=' ]+ ;

expr    : A '='         # fa
        | A ',' B       # fb
        | C             # fc
        ;

and override class

public class TestExtendLexer extends TestLexer {

    public TestExtendLexer(final CharStream input) {
        super(input);
    }

    @Override
    public boolean sempred(final RuleContext _localctx, final int ruleIndex, final int predIndex) {
        switch (ruleIndex) {
        case TestLexer.MNEMONIQUE:
            return checkMnemonic(getText());
        case TestLexer.PILOT:
            return checkPilot(getText());
        }
        return false;
        // return super.sempred(_localctx, ruleIndex, predIndex);
    }

    List<String> mnemonics = new ArrayList<>();

    public List<String> getMnemonics() {
        return mnemonics;
    }

    List<String> pilots = new ArrayList<>();

    public List<String> getPilots() {
        return pilots;
    }

    //
    private boolean checkMnemonic(final String name) {
        System.out.println(
                String.format("MNEMONIC %-20.20s = %s", name, mnemonics.contains(name)));
        return mnemonics.contains(name);
    }

    private boolean checkPilot(final String name) {
        System.out.println(
                String.format("PILOTES  %-20.20s = %s", name, pilots.contains(name)));
        return pilots.contains(name);
    }

}

and test

lexer = new TestExtendLexer(input);

final TestExtendLexer lx = (TestExtendLexer) lexer;
lx.getPilots().add("p0");
lx.getPilots().add("p1");
lx.getMnemonics().add("alpha");
lx.getMnemonics().add("beta");

final CommonTokenStream tokens = new CommonTokenStream(lexer);
parser = new TestParser(tokens);`

the token is not correctly reconized

sample :

CHECK        = "alpha "
MNEMONIC a              = false
PILOTES  a                    = false
MNEMONIC al             = false
PILOTES  al                   = false
MNEMONIC alp           = false
PILOTES  alp                 = false
MNEMONIC alph          = false
PILOTES  alph                = false
**MNEMONIC alpha        = true**
PILOTES  alpha              = false
TOKENS       = PILOT
parrt commented 7 years ago

please simplify this example. I have no idea what's going on here. For example, get rid of the subclass. I need to see something that is specifically wrong with ANTLR.

ikeas68 commented 7 years ago

i have find a solution adding protected boolean checkMnemonic(){ return false; } protected boolean checkPilot(){ return false; }

and change predicate for MNEMONIQUE : [A-Za-z.0-9_]+ { checkMnemonic() }? ; PILOT : [A-Za-z.0-9_]+ { checkPilot() }? ;

/**
 * Define a grammar called TEST
 */
grammar Test;

@header
{
    package x.calcul.antlr;
}

@lexer::members {
    public static final int EXTRA = 1;
    public static final int COMMENTS = 2;

    protected boolean checkMnemonic(){ return false; }
    protected boolean checkPilot(){ return false; }
}

 compute
  : expr EOF        # line
  ;

// Whitespace
//NEWLINE           : '\r\n' | 'r' | '\n';          
WS                  : [\t ]+ -> channel(EXTRA);

A                   : 'abc';
B                   : 'de' ;
C                   : 'abcde' ;
MNEMONIQUE      : [A-Za-z\.0-9_]+ { checkMnemonic() }? ; 
PILOT               : [A-Za-z\.0-9_]+ { checkPilot() }? ;

//SK                    : ~[' ' | ',' | '=' ]+ ;
SK                  : ~[ ]+ ;

expr    : A '='     # fa
        | A ',' B       # fb
        | C         # fc
        | MNEMONIQUE     # mnemonic
        | PILOT         # pilot
        ;

and finaly change extended class

public class TestExtendLexer extends TestLexer {

    public TestExtendLexer(final CharStream input) {
        super(input);
    }

    List<String> mnemonics = new ArrayList<>();

    public List<String> getMnemonics() {
        return mnemonics;
    }

    List<String> pilots = new ArrayList<>();

    public List<String> getPilots() {
        return pilots;
    }

    //
    @Override
    protected boolean checkMnemonic() {
        final String name = getText();
        System.out.println(
                String.format("MNEMONIC %-20.20s = %s", name, mnemonics.contains(name)));
        return mnemonics.contains(name);
    }

    @Override
    protected boolean checkPilot() {
        final String name = getText();
        System.out.println(
                String.format("PILOTES  %-20.20s = %s", name, pilots.contains(name)));
        return pilots.contains(name);
    }
}

and it's work now