parboiled1 / parboiled-core

Parboiled1; core mechanics, without Scala support
Apache License 2.0
0 stars 0 forks source link

NullPointer Exception in ProxyMatcher #1

Open chbussler opened 10 years ago

chbussler commented 10 years ago

Hi, this is a follow-up from this: http://users.parboiled.org/NullPointer-Exception-in-ProxyMatcher-td4024292.html

This is

To reproduce, I'll include two files in the following. A file that has the parser, and a file that contains a main, that creates/uses the parser. Running the main will result in a prompt; type anything to see the parser throw the NullPointer exception.

Note that the parser contains a comment /* Alternative 1 / and / Alternative 2 */. Alternative 1 does not work, but Alternative 2 does. The difference is that the rule chain is shortened, but nothing else. Not sure if the parser hits a sizing limit internally, as aside from the depth of the rules there is no difference between Alternative 1 and 2.

Thanks a lot (and great initiative to keep parboiled for Java alive!),

Christoph

Parser.java

package repro.abc;

import org.parboiled.BaseParser;
import org.parboiled.Rule;
import org.parboiled.annotations.BuildParseTree;

@BuildParseTree
public class Parser extends BaseParser<Object> {

    public Rule InputLine() {
        return Sequence(
                R01(),
                EOI);
    }

    public Rule R01() {
        return Sequence(
                R60(),
                R02(),
                EOI);
    }

    public Rule R02() {
        return Sequence(
                R56(),
                Optional(
                        R03()
                ),
                Optional(
                        R04()
                ),
                R05(),
                R21()
        );
    }

    public Rule R03() {
        return R51();
    }

    public Rule R04() {
        return R55();
    }

    public Rule R05() {
        return Sequence(
                R06(),
                ZeroOrMore(
                        Sequence(
                                R44(),
                                R06()
                        )
                )
        );
    }

    public Rule R06() {
        return FirstOf(
                R17(),
                R07(),
                R16(),
                R15()
        );
    }

    public Rule R07() {
        return R08();
    }

    public Rule R08() {
        return R09();
    }

    public Rule R09() {
        return R10();
    }

    public Rule R10() {
        return R11();
    }

    public Rule R11() {
        return R12();
    }

    public Rule R12() {
        return R13();
    }

    public Rule R13() {
        return R14();
    }

    public Rule R14() {
        return FirstOf(
                Sequence(
                        R18(),
                        R45(),
                        R36(),
                        R60()
                ),
                Sequence(
                        R36(),
                        R60()
                )
        );
    }

    public Rule R15() {
        return Sequence(
                R19(),
                R60(),
                Sequence(
                        FirstOf(
                                R17(),
                                R07(),
                                R16()
                        ),
                        ZeroOrMore(
                                R44(),
                                FirstOf(
                                        R17(),
                                        R07(),
                                        R16()
                                )
                        )
                ),
                R20(),
                R60()
        );
    }

    public Rule R16() {
        return R43();
    }

    public Rule R17() {
        return Sequence(
                R18(),
                R45(),
                R43()
        );
    }

    public Rule R18() {
        return FirstOf(
                R24(),
                R41()
        );
    }

    public Rule R19() {
        return R49();

    }

    public Rule R20() {
        return R50();
    }

    public Rule R21() {
        return R22();
    }

    public Rule R22() {
        return Sequence(
                R53(),
                R23()
        );
    }

    public Rule R23() {
        return FirstOf(
                Sequence(
                        R24(),
                        Optional(
                                R26()
                        )),
                Sequence(
                        R27(),
                        R26()
                )
        );
    }

    public Rule R24() {
        return R25();
    }

    public Rule R25() {
        return Sequence(
                R39(),
                R45(),
                R40(),
                R60()
        );
    }

    public Rule R26() {
        return Sequence(
                Optional(
                        R52(),
                        R60()
                ),
                R41()
        );
    }

    public Rule R27() {
        return R28();
    }

    public Rule R28() {
        return R29();
    }

    public Rule R29() {
        return Sequence(
                R47(),
                R30(),
                R48()
        );
    }

    public Rule R30() {
        return R31();
    }

    public Rule R31() {
        return Sequence(
                R30(),
                R57(),
                R32()
        );
    }

    /* Alternative 1 */
    public Rule R32() {
        return R33();
    }

    /* Alternative 2 */
    /*
    public Rule R32() {
        return R02();
    }
    */

    public Rule R33() {
        return R34();
    }

    public Rule R34() {
        return R35();
    }

    public Rule R35() {
        return R02();
    }

    public Rule R36() {
        return FirstOf(
                Sequence(
                        R37(),
                        OneOrMore(
                                Sequence(
                                        R45(), R37()
                                )
                        )
                ),
                R37()
        );
    }

    public Rule R37() {
        return FirstOf(
                Sequence(
                        R46(),
                        OneOrMore(R65()),
                        OneOrMore(R38()),
                        R46()),
                Sequence(
                        R46(),
                        OneOrMore(R65()),
                        R46()),
                Sequence(
                        OneOrMore(R65()),
                        OneOrMore(R38())
                ),
                OneOrMore(R65())
        );
    }

    public Rule R38() {
        return Sequence(
                Ch('['),
                R42(),
                Ch(']')
        );
    }

    public Rule R39() {
        return R40();
    }

    public Rule R40() {
        return FirstOf(
                Sequence(
                        R46(),
                        OneOrMore(R66()),
                        R46()
                ),
                OneOrMore(R65()));
    }

    public Rule R41() {
        return R65();
    }

    public Rule R42() {
        return OneOrMore(R64());
    }

    public Rule R43() {
        return R59('*');
    }

    public Rule R44() {
        return R59(',');
    }

    public Rule R45() {
        return Ch('.');
    }

    public Rule R46() {
        return Ch('\'');
    }

    public Rule R47() {
        return Ch('(');
    }

    public Rule R48() {
        return Ch(')');
    }

    public Rule R49() {
        return Ch('{');
    }

    public Rule R50() {
        return Ch('}');
    }

    public Rule R51() {
        return R58("A1");
    }

    public Rule R52() {
        return R58("A2");
    }

    public Rule R53() {
        return R58("F");
    }

    public Rule R54() {
        return R58("I");
    }

    public Rule R55() {
        return R58("P");
    }

    public Rule R56() {
        return R58("S");
    }

    public Rule R57() {
        return R58("U");
    }

    public Rule R58(String string) {
        return Sequence(
                IgnoreCase(string),
                R60());
    }

    public Rule R59(char c) {
        return Sequence(
                Ch(c),
                R60());
    }

    public Rule R60() {
        return ZeroOrMore(
                FirstOf(
                        R62(),
                        R61()));
    }

    public Rule R61() {
        return FirstOf(
                Ch(' '),
                Ch('\t'),
                Ch('\f'),
                R63());
    }

    public Rule R62() {
        return Sequence(
                "/*",
                ZeroOrMore(FirstOf(R61(), R67())),
                "*/");
    }

    public Rule R63() {
        return AnyOf("\n\r");
    }

    public Rule R64() {
        return CharRange('0', '9');
    }

    public Rule R65() {
        return R67();
    }

    public Rule R66() {
        return FirstOf(
                R67(),
                R45());
    }

    public Rule R67() {
        return FirstOf(
                CharRange('a', 'z'),
                CharRange('A', 'Z'),
                CharRange('0', '9'),
                Ch('_'),
                Ch('$')
        );
    }
}

ParserTest.java

package repro.abc;

import org.parboiled.Parboiled;
import org.parboiled.common.Formatter;
import org.parboiled.common.StringUtils;
import org.parboiled.parserunners.RecoveringParseRunner;
import org.parboiled.support.ParsingResult;
import org.parboiled.support.ToStringFormatter;
import org.parboiled.trees.GraphNode;
import org.parboiled.trees.GraphUtils;

import java.util.Scanner;

import static org.parboiled.errors.ErrorUtils.printParseErrors;
import static org.parboiled.support.ParseTreeUtils.printNodeTree;

public class ParserTest {

    @SuppressWarnings("unchecked")
    public static void main(String[] args) {
        repro.abc.Parser parser = Parboiled.createParser(repro.abc.Parser.class);
        Boolean done = false;

        while (!done) {
            System.out.print("test> ");
            String input = new Scanner(System.in).nextLine();
            if (StringUtils.isEmpty(input)) {
                System.out.println("Empty input, exiting.");
                done = true;
            } else {
                ParsingResult<?> result = new RecoveringParseRunner(parser.InputLine()).run(input);

                if (result.hasErrors()) {
                    System.out.println("\nParse Errors:\n" + printParseErrors(result));
                }

                Object value = result.parseTreeRoot.getValue();
                if (value != null) {
                    String str = value.toString();
                    int ix = str.indexOf('|');
                    if (ix >= 0) str = str.substring(ix + 2); // extract value part of AST node toString()
                    System.out.println("Input: " + input + " = " + str + '\n');
                }
                if (value instanceof GraphNode) {
                    Formatter<GraphNode> f = new ToStringFormatter<>(null);
                    System.out.println("\nAbstract Syntax Tree:\n" +
                            GraphUtils.printTree((GraphNode) value, f) + '\n');
                } else {
                    System.out.println("\nParse Tree:\n" + printNodeTree(result) + '\n');
                }
            }
        }
    }
}
fge commented 10 years ago

Hello,

OK, I can reproduce; even a random input provokes the bug.

I need to figure out the call chain first! There are quite a lot of rules there.

chbussler commented 10 years ago

Hi,

yes, I tried to reduce the number of rules to make the reproduction easier. And that's how I came across the Alternative 1 vs. 2, meaning, shortening the rules by just a few in a linear rule dependency makes the bug disappear.

fge commented 10 years ago

OK, so, first, it seems you are using Java 7 (diamond), but here I use Java 6 and the issue is the same.

Also, I could reproduce the bug by replacing the call to R33() by a call to R34(). I also tried to change to a BasicParseRunner and a TracingParseRunner, no change. It therefore seems to really come from the deep, lower levels.

chbussler commented 10 years ago

I tried to debug a little myself and this line

Rule inner = unwrap(target);

expects a non-null value. The 'target' is a ProxyMatcher and it contains a property called 'target' that unwrap() retrieves. That is null, but should not be. But that's all I can figure as I don't really understand how this all works.

fge commented 10 years ago

Well, I am discovering like you here ;)

It appears that R35() is the culprit here; for some reason its target is null; it should be R02...

chbussler commented 10 years ago

Yes, and in Alternative 2 it actually is, I think (meaning R32 has the correct value, not null).

fge commented 10 years ago

OK, still no progress to speak of, except that upgrading to asm 4.2 doesn't change anything...

fge commented 10 years ago

Well, it is buried deep in ASM code it seems.

It is one RuleMethodProcessor which seems to do the job wrong. If we look at the stack trace we see that the first lines are:

    at org.parboiled.matchers.ProxyMatcher.label(ProxyMatcher.java:156)
    at repro.abc.Parser$$parboiled.R33(Unknown Source)
    at repro.abc.Parser.R32(Parser.java:239)
    at repro.abc.Parser$$parboiled.R32(Unknown Source)

It seems this is this repro.abc.Parser$$parboiled.R33 which doesn't do things right here.

So, now, we must find which RuleMethodProcessor doesn't do its job as it should do...

@sirthias, any hint?

fge commented 10 years ago

OK, one more thing: when I remove the call to R32() in R31() the bug disappears. But this changes the grammar, so of course this is a no-no.

I could also obtain the bytecode of the generated class... Trying and analyzing it at the moment.

fge commented 10 years ago

OK, so, I have dumped the whole bytecode of the generated parser class and can now debug using it...

And the problem here is with R35. It calls back R02, but at the time it does, R02 is a ProxyMatcher, with no target. And this should not happen.

I am not sure whether this is an indication of an illegal grammar or a bug with parboiled however...

And I see this up the call chain:

    public Rule R30() {
        return R31(); // <-- HERE
    }

    public Rule R31() {
        return Sequence(
            R30(), // <-- AND HERE
            R57(),
            R32()
        );
    }
chbussler commented 10 years ago

Thanks; your second remark about the call chain: that's a left-recursion and in the meanwhile I have experienced that one, too (as an endless loop), and changed it so that there is no left recursion anymore (I am in active development). Not sure if you want to have the changes; let me know and I'll post them.

As for R35, yes, that's confirming what I have seen not knowing about at all what ProxyMatchers are. I don't think this is a grammar bug as it is a simple recursion in the grammar. And, as I mentioned before, if you enable Alternative 2, then it works perfectly. Alternative 2 cuts short the last two rules (for testing only).

You say that R02 is a ProxyMatcher with no target, but it must have a target Alternative 2. I am curious why that is?

This grammar is given to me and about 5% of what it is going to be in the end. I implement it feature-by-feature in order to make sure that I implement the processing correctly. Many of the rules you see are in their simplest form and will become more complex over time mostly with alternatives; and there will be many more rules. This way it is easier to debug for me (and you also). If I had implemented the full grammar first, it probably would be a nightmare to debug.

fge commented 10 years ago

@sirthias the problem here is with the unwrap method or ProxyMatcher; it is said to return the innermost matcher which IS NOT a ProxyMatcher, but here target is null; what should be done in that case?

fge commented 10 years ago

@chbussler do you also see an NPE with your new grammar? If yes, do post, please. Maybe with some more samples the bug can be more easily tracked down.

fge commented 10 years ago

@chbussler here is the thread dump right before the NPE occurs::

"main@1" prio=5 tid=0x1 nid=NA runnable
  java.lang.Thread.State: RUNNABLE
      at org.parboiled.matchers.ProxyMatcher.unwrap(ProxyMatcher.java:236)
      at org.parboiled.matchers.ProxyMatcher.label(ProxyMatcher.java:155)
      at repro.abc.Parser$$parboiled.R33(Parser$$parboiled.java:1074) // L1
      at repro.abc.Parser.R32(Parser.java:239)
      at repro.abc.Parser$$parboiled.R32(Parser$$parboiled.java:1052)
      at repro.abc.Parser.R31(Parser.java:230)
      at repro.abc.Parser$$parboiled.R31(Parser$$parboiled.java:1033)
      at repro.abc.Parser.R30(Parser.java:226)
      at repro.abc.Parser$$parboiled.R30(Parser$$parboiled.java:1014)
      at repro.abc.Parser.R29(Parser.java:218)
      at repro.abc.Parser$$parboiled.R29(Parser$$parboiled.java:995)
      at repro.abc.Parser.R28(Parser.java:214)
      at repro.abc.Parser$$parboiled.R28(Parser$$parboiled.java:976)
      at repro.abc.Parser.R27(Parser.java:210)
      at repro.abc.Parser$$parboiled.R27(Parser$$parboiled.java:957)
      at repro.abc.Parser.R23(Parser.java:173)
      at repro.abc.Parser$$parboiled.R23(Parser$$parboiled.java:881)
      at repro.abc.Parser.R22(Parser.java:166)
      at repro.abc.Parser$$parboiled.R22(Parser$$parboiled.java:862)
      at repro.abc.Parser.R21(Parser.java:162)
      at repro.abc.Parser$$parboiled.R21(Parser$$parboiled.java:843)
      at repro.abc.Parser.R02(Parser.java:24)
      at repro.abc.Parser$$parboiled.R02(Parser$$parboiled.java:482) // L2
      at repro.abc.Parser.R01(Parser.java:17)
      at repro.abc.Parser$$parboiled.R01(Parser$$parboiled.java:463)
      at repro.abc.Parser.InputLine(Parser.java:11)
      at repro.abc.Parser$$parboiled.InputLine(Parser$$parboiled.java:308)
      at repro.abc.ParserTest.main(ParserTest.java:32)

At this point, R33 has no value; at line L1 of the thread dump, it has called R34 -> R35 -> R02, and when R02 is called, cache$R02 in Parser$$parboiled is a ProxyMatcher with no target, which is the result of L2. Since the result is not null, a label will be attempted to be put on the rule; but since target is null there, it tries and unwrap. Which is where the bug occurs: if the matcher is an instance of ProxyMatcher, it returns the target, which is still null.

chbussler commented 10 years ago

I still get the error with the removal of the left recursion, here the change in rule:

    public Rule R31() {
        return FirstOf(
                R33(),
                Sequence(
                        R30(),
                        R57(),
                        R32()
                )
        );
    }
fge commented 10 years ago

@chbussler is that the only change in the whole grammar?

chbussler commented 10 years ago

At this point, yes, but I'll keep working on it. So maybe I should automatically post changes as I make them.

fge commented 10 years ago

OK, I have a solution which is working but I am not convinced it is the correct thing to do.

I simply changed the unwrap() method, making it lying: if the target is itself a proxy but it has no delegate, return the target itself.

The problem is, I am not sure the labels are fully correct... All tests still pass however; both -core and -java.

If you want to test, you'll have to build branch issue1 of -core. Note that branch core-issue1 of -java contains the test code I used to debug this problem; it includes a full dump of your original grammar.

chbussler commented 10 years ago

But why does it work in Alternative 2?

Just realized that that has to change because of the left recursion:

    /* Alternative 1 */
    /*
    public Rule R33() {
        return R34();
    }
    */

    /* Alternative 2 */

    public Rule R33() {
        return R02();
    }
fge commented 10 years ago

@chbussler I'll need to generate the grammar code, but my first guess is that R02 has the time to "complete" in this scenario. Just a guess...

I'll test and keep you informed

fge commented 10 years ago

OK, let us do this way: can you share all grammars you have tested which fail? Maybe on gists or something? I'll test them here.

Also, I have to write an SSCCE demonstrating the problem. Your grammar demonstrates it but it is quite the piece, and for a test it is a little large ;)

chbussler commented 10 years ago

You have the grammar that fails and also the latest change with the left-recursion removed. This is the first time I ran into such a problem and posted it right away. All the previous versions of the grammar were smaller and had no problem at all. But the grammar will change/increase after you have the bug fix and I will post additional problems (if they come up at all).

As promised, any change I make I'll communicate so that you are up-to-date on what I am doing.

As for the size, it looks to me from the distance that size matters here, especially with the Alternative 1/2 difference. Maybe that will give a hint once you figured what makes that difference.

fge commented 10 years ago

OK, so, how do you want to proceed from there? Note that nothing has been set up for uploads/etc, which means at the moment I cannot publish any artifacts. The only place where it is available is branch issue1 on -core, and if you build that branch you'll get a 1.1.7-SNAPSHOT jar. And that's it, really.

This organisation is not even three days old, but I believe I'll have to accelerate the tempo if things go on like this :p

chbussler commented 10 years ago

I fully appreciate your efforts and that get up and going is hard; so no worries there at all. Continuing with parboiled on Java is very important in my opinion (see also the various comments on the parboiled2 group).

As for the general direction, I think having artifacts on a maven repository was good and I would certainly wish that it stays that way (like what parboiled2 is doing: http://search.maven.org/#search|ga|1|parboiled).

As for the specific issue at hand, I can try and build that branch, however, if something else comes up we might face the problem of difference in environment for the build process. To avoid environment uncertainties, maybe you could share the jar you generated over Google docs or dropbox or even git itself (you could add a target directory to the branch 'issue1' that contains the jar).

fge commented 10 years ago

@chbussler OK, I can do that (make the jar available on the issue1 branch). No new dependencies exist so that should be OK.

What Java version do you use? For now everything is either compiled for Java 6 or 7 but not both -- mixed tests have not been done.

chbussler commented 10 years ago

Thanks; I am on Java 7.

fge commented 10 years ago

OK, I have uploaded a jar compiled with Java 6. If you use 1.1.6 you'll probably have a jar compiled with that version, and while you may probably use -java 1.7 with a 1.6 compiled core, I doubt the reverse is true.

See the BIN directory at the top of branch issue1. According to your build system, this may, or may not, require that you excude -core dependency from your -java dependency.

Note that one change made is that all generated bytecode is 1.6; it is 1.5 in -core 1.1.6 (yes, with all these 5, 6 and 7 around this can become confusing).

chbussler commented 10 years ago

Did you push it? It is not yet visible on the 'issue1' branch.

fge commented 10 years ago

@chbussler

Did you push it?

Oh dear... No I didn't. Now it is there.

Sorry for the inconvenience :/

fge commented 10 years ago

@chbussler can you share your other failing/succeeding grammars? I am afraid I still haven't had the time to dump and analyze the other failing and working ones. Maybe make an archive out of it and upload it somewhere?

chbussler commented 10 years ago

Thanks for the jar; I'll try it out and let you know, of course.

As for the grammars, I'll post an update as soon as I tried the jar.

chbussler commented 10 years ago

OK, it takes time to exclude the maven dependencies and load jars directly. I took the -core jar you uploaded to the issue 1 branch and the -java jar as it came through maven.

Here a first problem:

java.lang.NoClassDefFoundError: org/objectweb/asm/ClassVisitor
    at org.parboiled.Parboiled.createParser(Parboiled.java:54)

I'll be offline for a while, so I might not respond right away. Thanks.

fge commented 10 years ago

Uuuh... OK. I think I'll go the whole way, require that a new groupId be created at Sonatype and upload there...

Stay tuned.

fge commented 10 years ago

https://issues.sonatype.org/browse/OSSRH-9519 <-- request sent to SonaType. Not sure yet what the published version will be... I need to have a good, hard look at http://semver.org I guess.

chbussler commented 10 years ago

I see, this way it is probably the best way forward.

semver.org makes a lot of sense, I think. Not sure if parboiled used it this way so far, but going forward it seems to fit the bill.

chbussler commented 10 years ago

Just downloaded both jars from Bintray and I am getting the same error as above:

java.lang.NoClassDefFoundError: org/objectweb/asm/ClassVisitor
    at org.parboiled.Parboiled.createParser(Parboiled.java:54)
fge commented 10 years ago

That means you don't have the asm jars; you'd need to also download asm, asm-tree, asm-utils and asm-analysis version 4.2.

chbussler commented 10 years ago

OK, thanks; I'll do. Was not required with 1.1.16, so that's probably why I did not realize that those need to be downloaded also.

chbussler commented 10 years ago

OK, works! Great! Thanks!

BTW, there are different entries for asm on the maven repository. If you want to add a note in the project description, this is the one to look for: http://mvnrepository.com/artifact/org.ow2.asm

fge commented 10 years ago

Uh, 1.1.6 does use ASM also: http://mvnrepository.com/artifact/org.parboiled/parboiled-java/1.1.6

Not sure what your environment is but it looks strange that you didn't have those already in your classpath...

Note that you have a new method to dump the bytecode; it can help for debugging.

fge commented 10 years ago

Note: 1.1.7-beta.2 has been released; the only difference with .1 is that it is now available on maven!

Unfortunately I still have not been able to reproduce your problem with a more simple grammar...

sirthias commented 10 years ago

Sorry, guys, for not reacting earlier. This is a long discussion, that I don't have the capacity to fully follow. I think the first thing would be to write a test case that is as short as possible and provokes the bug/problem.

fge commented 10 years ago

@sirthias the problem is with the .unwrap() method of a ProxyMatcher; in method:

public Rule label(String label)

if the target of the proxy is not null, it calls .unwrap(); this method checks whether the target is itself a ProxyMatcher and if so, returns this proxy's target.

The problem here is that the "inner" ProxyMatcher does not have yet a target at this time; as a result, when calling:

        target = (Matcher) inner.label(label); // since relabelling might change the instance we have to update it

an NPE is thrown since inner is null.

And yes, I need to write an SSCCE...

The fix I made is in unwrap(): if the target is a proxy with no target yet, I return the proxy itself.

sirthias commented 10 years ago

Ok, I see the problem. Your fix appears ok, however it does change the semantic of unwrap, which I'd therefor rename to tryUnwrap for cleanliness.

fge commented 10 years ago

@sirthias OK, I'll complete the fix with that. Do you wish that I send a pull request to org.parboiled with this fix? Before of after I have written an SSCCE?

sirthias commented 10 years ago

Once you have a test case provoking the problem it'd be great if you could open a PR for backporting the fix, yes. Thanks for all your effort around this! It's great to see someone reenergizing the project!

fge commented 10 years ago

@sirthias that is exactly what I feared; my "fix" doesn't really fix anything but the NPE.

Minimal test case is here but it is so short that I can paste it here, with includes and all:

package repro.abc;

import org.parboiled.BaseParser;
import org.parboiled.Rule;
import org.parboiled.annotations.BuildParseTree;

@BuildParseTree
public class Parser extends BaseParser<Object> {
    public Rule R31() {
        return R34();
    }

    public Rule R34() {
        return R35();
    }

    public Rule R35() {
        return R31();
    }
}

There is clearly an infinite recursion here but the NPE triggers before.

Now, there are a couple interesting things:

# With `@BuildParseTree`
Exception in thread "main" java.lang.NullPointerException
    at org.parboiled.matchers.ProxyMatcher.label(ProxyMatcher.java:156)
    at repro.abc.Parser$$parboiled.R31(Unknown Source)
    at repro.abc.ParserTest.main(ParserTest.java:12)

# Without `@BuildParseTree`
Exception in thread "main" java.lang.NullPointerException
    at org.parboiled.matchers.ProxyMatcher.suppressNode(ProxyMatcher.java:170)
    at repro.abc.Parser$$parboiled.R34(Unknown Source)
    at repro.abc.Parser.R31(Parser.java:8)
    at repro.abc.Parser$$parboiled.R31(Unknown Source)
    at repro.abc.ParserTest.main(ParserTest.java:12)

It looks like "proxy unrolling", for lack of a better name, has problems here.

I'll investigate further by decompiling (see here; note that it requires Java 7; also, CacheGenerator should be made public for better code generation wrt its .Arguments), but if you have an idea so that I don't go the wrong way...

fge commented 10 years ago

OK well, in .suppressNodes() the culprit is the same; the line before the NPE there is also a call to .unwrap().

fge commented 10 years ago

@sirthias not much progress, just the name that changes; I wonder whether it would be possible to actually detect a "pure recursive" grammar like this one and fail at parser build time -- with a proper exception, not an NPE!

sirthias commented 10 years ago

Ok. First of all the infinite recursion in your example above should not be experienced at parser creation time but only at parser execution time. The proxy/caching logic I described in yesterday's email should effectively prevent infinite recursions during parser construction.

With regard to the unwrapping logic: It is clearly not currently prepared to deal with nested proxies properly, which might be the problem you are seeing.