alecthomas / participle

A parser library for Go
MIT License
3.45k stars 186 forks source link

Issue with a nested structure #381

Closed gwd closed 5 months ago

gwd commented 10 months ago

First, thank you so much for this library -- made my first grammar on Friday and I think the setup really helped make things straightforward.

I'm using participle v2.10.0, and trying to write a parser for strings like the following:

    tests := []string{
        "Xen 4.18.x",
        "Linux 4.7.x",
        "QEMU 4.7-RC series",
        "Linux 4.7.x - Linux 4.9.x",
        "xen-unstable",
        "Xen 4.18.x - xen-unstable",
        "QEMU 4.7-RC series - QEMU 4.10.x",
    }

i.e., a version that may be a single version, or a range (separated by -); that may be a single string, or <project> <version number>.

I wrote the following participle structures:

type ProjectVersion struct {
    Project string `parser:"@Project"`
    Version string `parser:"Space @(VersionString | RCVersionString)"`
}

type VersionSingle struct {
    TipVersion     *string         `parser:"@TipVersion"`
    ProjectVersion *ProjectVersion `parser:"| @@"`
}

type VersionRange struct {
    From VersionSingle `parser:"@@"`
    To   VersionSingle `parser:"VersionDash @@"`
}

type Version struct {
    Range  *VersionRange  `parser:"@@"`
    Single *VersionSingle `parser:"| @@"`
}

var rulesCommon []lexer.SimpleRule{ 
    {"Project", `Xen|Linux|QEMU|xapi`},
    {"TipVersion", `xen-unstable`},
    {"RCVersionString", `\d+\.\d+-RC series`},
    {"VersionString", `\d+\.\d+\.x`},
    {"VersionDash", " - "},
    {"Space", ` `},
}

Unfortunately, I get errors like the following:

Parsing Xen 4.18.x: 1:11: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
Parsing Linux 4.7.x: 1:12: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
Parsing QEMU 4.7-RC series: 1:19: unexpected token "<EOF>" (expected <versiondash> VersionSingle)

In other words, it's somehow getting stuck on parsing something as a VersionRange, and not backing out and parsing it simply as a VersionSingle.

But this only happens if both Version and VersionSingle have at least two ways to be interpreted. If I replace the ProjectVersion with a single regexp that matches the same string, it works (here replacing the Version lexer token with a ProjectVersion token with the appropriate regexp).

// With the range, without the "project version"
type VersionSingleNoProjectVersion struct {
    TipVersion     *string `parser:"@TipVersion"`
    ProjectVersion *string `parser:"| @ProjectVersion"`
}

type VersionRangeNoProjectVersion struct {
    From VersionSingleNoProjectVersion `parser:"@@"`
    To   VersionSingleNoProjectVersion `parser:"VersionDash @@"`
}

type VersionNoProjectVersion struct {
    Range  *VersionRangeNoProjectVersion  `parser:"@@"`
    Single *VersionSingleNoProjectVersion `parser:"| @@"`
}

The problem for single items goes away if I get rid of the RangeVersion; but then of course you can't parse ranges:

// With "project version", without range
type VersionNoRange struct {
    Single *VersionSingle `parser:"@@"`
}

Any idea what's going on?

For completeness, here's a complete testing function you can use to trigger the issue:

package participletest_test

import (
    "testing"

    "github.com/alecthomas/participle/v2"
    "github.com/alecthomas/participle/v2/lexer"
)

// What I'd like:
type ProjectVersion struct {
    Project string `parser:"@Project"`
    Version string `parser:"Space @(VersionString | RCVersionString)"`
}

type VersionSingle struct {
    TipVersion     *string         `parser:"@TipVersion"`
    ProjectVersion *ProjectVersion `parser:"| @@"`
}

type VersionRange struct {
    From VersionSingle `parser:"@@"`
    To   VersionSingle `parser:"VersionDash @@"`
}

type Version struct {
    Range  *VersionRange  `parser:"@@"`
    Single *VersionSingle `parser:"| @@"`
}

// With "project version", without range
type VersionNoRange struct {
    Single *VersionSingle `parser:"@@"`
}

// With the range, without the "project version"
type VersionSingleNoProjectVersion struct {
    TipVersion     *string `parser:"@TipVersion"`
    ProjectVersion *string `parser:"| @ProjectVersion"`
}

type VersionRangeNoProjectVersion struct {
    From VersionSingleNoProjectVersion `parser:"@@"`
    To   VersionSingleNoProjectVersion `parser:"VersionDash @@"`
}

type VersionNoProjectVersion struct {
    Range  *VersionRangeNoProjectVersion  `parser:"@@"`
    Single *VersionSingleNoProjectVersion `parser:"| @@"`
}

var rulesCommon = []lexer.SimpleRule{
    {"TipVersion", `xen-unstable`},
    {"RCVersionString", `\d+\.\d+-RC series`},
    {"VersionString", `\d+\.\d+\.x`},
    {"VersionDash", " - "},
    {"Space", ` `},
}

func TestVersion(t *testing.T) {
    simpletests := []string{
        "Xen 4.18.x",
        "Linux 4.7.x",
        "QEMU 4.7-RC series",
        "xen-unstable",
    }

    rangetests := []string{
        "Linux 4.7.x - Linux 4.9.x",
        "Xen 4.18.x - xen-unstable",
        "QEMU 4.7-RC series - QEMU 4.10.x",
    }

    lexProject := lexer.MustSimple(append(rulesCommon, lexer.SimpleRule{"Project", `Xen|Linux|QEMU|xapi`}))

    lexProjectVersion := lexer.MustSimple(append(rulesCommon,
        lexer.SimpleRule{"ProjectVersion", `(Xen|Linux|QEMU|xapi) (\d+\.\d+\.x|\d+\.\d+-RC series)`}))

    pVersion := participle.MustBuild[Version](participle.Lexer(lexProject))

    t.Log("Testing pVersion with simple and range")
    for _, in := range append(simpletests, rangetests...) {
        out, err := pVersion.ParseString("", in)
        if err != nil {
            t.Errorf("ERROR: Parsing %v: %v", in, err)
        } else {
            t.Logf("Parsing %v resulted in %v", in, out)
        }
    }

    pVersionNoRange := participle.MustBuild[VersionNoRange](participle.Lexer(lexProject))

    t.Log("Testing pVersionNoRange with simple only")
    for _, in := range simpletests {
        out, err := pVersionNoRange.ParseString("", in)
        if err != nil {
            t.Errorf("ERROR: Parsing %v: %v", in, err)
        } else {
            t.Logf("Parsing %v resulted in %v", in, out)
        }
    }

    pVersionNoProjcetVersion := participle.MustBuild[VersionNoProjectVersion](participle.Lexer(lexProjectVersion))

    t.Log("Testing pVersionNoProjectVersion with simple and range")
    for _, in := range append(simpletests, rangetests...) {
        out, err := pVersionNoProjcetVersion.ParseString("", in)
        if err != nil {
            t.Errorf("ERROR: Parsing %v: %v", in, err)
        } else {
            t.Logf("Parsing %v resulted in %v", in, out)
        }
    }
}
gwd commented 10 months ago

BTW, the following works for all tests:

type VersionSlice struct {
    Range []VersionSingle `parser:"@@ ( VersionDash @@ )*"`
}

...
    pVersionSlice := participle.MustBuild[VersionSlice](participle.Lexer(lexProject))

    t.Log("Testing pVersionSlice with simple and range")
    for _, in := range append(simpletests, rangetests...) {
        out, err := pVersionSlice.ParseString("", in)
        if err != nil {
            t.Errorf("ERROR: Parsing %v: %v", in, err)
        } else {
            t.Logf("Parsing %v resulted in %v", in, out)
        }
    }
...

So I think I have a work-around for now; but if the first version is supposed to work, it would be good to track down what's going on.