kswoll / npeg

This parser is an implementation of a Packrat Parser with support for left-recursion. The algorithm for left recursion is a modified version of Packrat parsers can support left recursion.
MIT License
16 stars 5 forks source link

Capture doesn't always capture #5

Open xanatos opened 5 years ago

xanatos commented 5 years ago

I've tried the fix for the capture. Now sometimes the .Capture() doesn't "Capture" (write to object) The example is self contained. There is a FIXME comment. The uncommented line of code works. If it is swapped with the commented one, the string is still parsed, but the LcName element remains empty.

using PEG;
using PEG.SyntaxTree;
using System;
using System.Collections.Generic;
using System.Linq;

namespace KlcImporter
{
    public class Program
    {
        static void Main(string[] args)
        {
            string str = "      x (latin small letter ae - 00E6)\r";
            var grammar = CrossRefGrammar.Create();

            var parser = new PegParser<CrossRef>(grammar, grammar.CrossRef());

            int read;
            CrossRef cr;
            var res = parser.Parse(str, out cr, out read);

            Console.WriteLine(cr);
        }
    }

    public class CrossRef
    {
        public string Tab { get; set; }
        public string Char { get; set; }
        public LcName LcName { get; set; }
        public string OpenAngularBracket { get; set; }
        public string Dash { get; set; }
        public LcName LazyLcNameForCrossRef { get; set; }

        public override string ToString() =>
            LcName != null || LazyLcNameForCrossRef != null ?
                (OpenAngularBracket == null ?
                    (Dash == null ?
                        $"{Tab}x {Char} {LcName}" :
                        $"{Tab}x ({LcName ?? LazyLcNameForCrossRef} - {Char})"
                    ) :
                    (Dash == null ?
                        $"{Tab}x {Char} <{LcName}>" :
                        $"{Tab}x (<{LcName}> - {Char})"
                    )
                ) :
                $"{Tab}x {Char}";
    }

    public class LcName
    {
        public string Name { get; set; }
        public List<CharElement> CharContainer { get; set; }

        public override string ToString() => $"{string.Join("-", new[] { Name }.Concat(CharContainer ?? Enumerable.Empty<object>()))}";
    }

    public class CharElement
    {
        public string Char { get; set; }

        public override string ToString() => Char;
    }

    public class CrossRefGrammar : Grammar<CrossRefGrammar>
    {
        private Expression CrossRefBaseExpression() => Sp() + "-"._().Capture(nameof(KlcImporter.PEG.CrossRef.Dash)) + Sp() + Char() + ")"._() + Lf();

        public virtual Expression CrossRef() =>
            (Tab() + "x"._() + Sp() + Char() + Sp() + LcName() + Lf()) |
            (Tab() + "x"._() + Sp() + Char() + Sp() + "<"._().Capture(nameof(KlcImporter.PEG.CrossRef.OpenAngularBracket)) + LcName() + ">"._() + Lf()) |

            // FIXME 
            //(Tab() + "x"._() + Sp() + "("._() + LazyLcNameForCrossRef().Capture(nameof(KlcImporter.PEG.CrossRef.LcName)) + CrossRefBaseExpression()) |
            (Tab() + "x"._() + Sp() + "("._() + LazyLcNameForCrossRef() + CrossRefBaseExpression()) |

            (Tab() + "x"._() + Sp() + "("._() + "<"._().Capture(nameof(KlcImporter.PEG.CrossRef.OpenAngularBracket)) + LcName() + ">"._() + CrossRefBaseExpression()) |
            (Tab() + "x"._() + Sp() + Char() + Lf());

        private Expression BaseLcName() => 'a'.To('z') | '0'.To('9') | " "._() | ("-"._() + !Char());

        public virtual Expression LcName() => (+BaseLcName()).Capture(nameof(KlcImporter.PEG.LcName.Name)) + CharContainer();

        public virtual Expression LazyLcNameForCrossRef() => (+(!CrossRefBaseExpression() + BaseLcName())).Capture(nameof(KlcImporter.PEG.LcName.Name)) + CharContainer();

        public virtual Expression CharContainer() => -CharElement();

        public virtual Expression CharElement() => ("-"._() + Char());

        public virtual Expression Char() => X().Repeat(4, 6);

        public virtual Expression X() => '0'.To('9') | 'A'.To('F');

        public virtual Expression Tab() => +'\t'._();

        public virtual Expression Sp() => " "._();

        public virtual Expression Lf() => ('\r'._() + ~'\n'._()) | ('\n'._() + ~'\r'._());
    }
}