joemalle / limn

A tiny parser designed to compile quickly
Boost Software License 1.0
2 stars 1 forks source link

use custom Token class for input stream #10

Open asmwarrior opened 1 year ago

asmwarrior commented 1 year ago

This is the test code, and it looks like it can both support char based parsing and custom Token based parsing.

See the demo code below:

#include <iostream>
#include <string_view>
#include <vector>
#include <span>

// derived class, add remove_prefix function to std::span
template<typename T>
class Span : public std::span<T>
{
public:
    // Inheriting constructors
    using std::span<T>::span;

    // add a public function which is similar to std::string_view::remove_prefix
    constexpr void remove_prefix(std::size_t n) {
        auto& self = static_cast<std::span<T>&>(*this);
        self = self.subspan(n);
    }
};

struct Token
{
    Token(){};
    Token(const Token& other)
    {
        lexeme = other.lexeme;
        type = other.type;
    }
    std::string_view lexeme;
    int type;
    // equal operator
    bool operator==(const Token& other)const {
        return (this->lexeme == other.lexeme) ;
    }
};

template <typename T>
struct Viewer;

template <>
struct Viewer<Token>
{
    using type = Span<Token>; // std::span or derived class
};

template <>
struct Viewer<char>
{
    using type = std::string_view;
};

template <typename T> using ViewerT = typename Viewer<T>::type;

template <typename T>
class Parser
{
    using v = ViewerT<T>;
};

// a simple parser demo

template <typename Base, typename T>
struct parser_base {
    using v = ViewerT<T>;
    constexpr auto operator[](v& output) const noexcept;
};

template<typename T>
struct char_ final : public parser_base<char_<T>, T> {
    using v = ViewerT<T>;
    constexpr explicit char_(const T ch) noexcept
        : ch(ch)
    {}

    constexpr inline bool visit(v& sv) const& noexcept {
        if (!sv.empty() && sv.front() == ch) {
            sv.remove_prefix(1);
            return true;
        }
        return false;
    }

private:
    T ch;
};

template <typename Parser, typename T>
constexpr bool parse(ViewerT<T> &input, Parser const& parser) noexcept {
    return parser.visit(input);
}

int main()
{
    // Token based parsing
    Token kw_class;
    kw_class.lexeme = "a";
    std::vector<Token> token_stream;
    token_stream.push_back(kw_class);
    token_stream.push_back(kw_class);
    token_stream.push_back(kw_class);

    // the first Token is "a", so it will match the pattern
    token_stream[0].lexeme = "a";
    Span<Token> token_stream_view{&token_stream[0], 3};
    // the parser try to match "a"
    auto p = char_(kw_class);
    bool result = parse<decltype(p), Token>(token_stream_view, p);
    std::cout << result << std::endl;

    // char based parsing
    auto p2 = char_('a');
    std::string_view input = "aaa";
    result  = parse<decltype(p2), char>(input, p2);
    std::cout << result << std::endl;

    return 0;
}

Note the code has both of the two kinds of parsers.

I have got a lot of help from stackoverflow site, especially this what is the way to remove the first element from a std::span? question.

asmwarrior commented 1 year ago

The code cam be simplified, at least the parse function call can be simplified, this is from help of stackoverflow site:

#include <iostream>
#include <string_view>
#include <vector>
#include <span>

// derived class, add remove_prefix function to std::span
template<typename T>
class Span : public std::span<T>
{
public:
    // Inheriting constructors
    using std::span<T>::span;

    // add a public function which is similar to std::string_view::remove_prefix
    constexpr void remove_prefix(std::size_t n) {
        auto& self = static_cast<std::span<T>&>(*this);
        self = self.subspan(n);
    }
};

struct Token
{
    Token(){};
    Token(const Token& other)
    {
        lexeme = other.lexeme;
        type = other.type;
    }
    std::string_view lexeme;
    int type;
    // equal operator
    bool operator==(const Token& other)const {
        return (this->lexeme == other.lexeme) ;
    }
};

template <typename T>
struct Viewer;

template <>
struct Viewer<Token>
{
    using type = Span<Token>; // std::span or derived class
};

template <>
struct Viewer<char>
{
    using type = std::string_view;
};

template <typename T> using ViewerT = typename Viewer<T>::type;

template <typename T>
class Parser
{
    using v = ViewerT<T>;
};

// a simple parser demo

template <typename Base, typename T>
struct parser_base {
    using v = ViewerT<T>;
    constexpr auto operator[](v& output) const noexcept;
};

template<typename T>
struct char_ final : public parser_base<char_<T>, T> {
    using v = ViewerT<T>;
    constexpr explicit char_(const T ch) noexcept
        : ch(ch)
    {}

    constexpr inline bool visit(v& sv) const& noexcept {
        if (!sv.empty() && sv.front() == ch) {
            sv.remove_prefix(1);
            return true;
        }
        return false;
    }

private:
    T ch;
};

template <typename Parser, typename ViewerT>
constexpr bool parse(ViewerT &input, Parser const& parser) noexcept {
    return parser.visit(input);
}

int main()
{
    // Token based parsing
    Token kw_class;
    kw_class.lexeme = "a";
    std::vector<Token> token_stream;
    token_stream.push_back(kw_class);
    token_stream.push_back(kw_class);
    token_stream.push_back(kw_class);

    // the first Token is "a", so it will match the pattern
    token_stream[0].lexeme = "a";
    Span<Token> token_stream_view{&token_stream[0], 3};
    // the parser try to match "a"
    auto p = char_(kw_class);
    bool result = parse(token_stream_view, p);
    std::cout << result << std::endl;

    // char based parsing
    auto p2 = char_('a');
    std::string_view input = "aaa";
    result  = parse(input, p2);
    std::cout << result << std::endl;

    return 0;
}
asmwarrior commented 1 year ago

Now, a simple sequence parser works:

#include <iostream>
#include <string_view>
#include <vector>
#include <span>

// derived class, add remove_prefix function to std::span
template<typename T>
class Span : public std::span<T>
{
public:
    // Inheriting constructors
    using std::span<T>::span;

    // add a public function which is similar to std::string_view::remove_prefix
    constexpr void remove_prefix(std::size_t n) {
        auto& self = static_cast<std::span<T>&>(*this);
        self = self.subspan(n);
    }
};

struct Token
{
    Token(){};
    Token(const Token& other)
    {
        lexeme = other.lexeme;
        type = other.type;
    }
    std::string_view lexeme;
    int type;
    // equal operator
    bool operator==(const Token& other)const {
        return (this->lexeme == other.lexeme) ;
    }
};

template <typename T>
struct Viewer;

template <>
struct Viewer<Token>
{
    using type = Span<Token>; // std::span or derived class
};

template <>
struct Viewer<char>
{
    using type = std::string_view;
};

template <typename T> using ViewerT = typename Viewer<T>::type;

template <typename T>
class Parser
{
    using v = ViewerT<T>;
};

// a simple parser demo

template <typename Base, typename T>
struct parser_base {
    using v = ViewerT<T>;
    using charType = T;
    constexpr auto operator[](v& output) const noexcept;
};

template<typename T>
struct char_ final : public parser_base<char_<T>, T> {
    using v = ViewerT<T>;
    constexpr explicit char_(const T ch) noexcept
        : ch(ch)
    {}

    constexpr inline bool visit(v& sv) const& noexcept {
        if (!sv.empty() && sv.front() == ch) {
            sv.remove_prefix(1);
            return true;
        }
        return false;
    }

private:
    T ch;
};

template <typename Left, typename Right>
struct seq_ final : public parser_base<seq_<Left, Right>, typename Left::charType> {
    constexpr explicit seq_(Left&& left, Right&& right) noexcept
        : left(std::forward<Left>(left))
        , right(std::forward<Right>(right))
    {}
    template<typename ViewerT>
    constexpr inline bool visit(ViewerT& sv) const& noexcept {
        bool left_result = left.visit(sv);
        if (left_result)
        {
            return right.visit(sv);
        }
        else
            return false;
    }

private:
    Left left;
    Right right;
};

template <typename Left, typename Right>
constexpr inline auto operator>>(Left&& left, Right&& right) noexcept {
    return seq_<Left, Right>(
        std::forward<Left>(left),
        std::forward<Right>(right)
    );
}

template <typename Parser, typename ViewerT>
constexpr bool parse(ViewerT &input, Parser const& parser) noexcept {
    return parser.visit(input);
}

int main()
{
    // Token based parsing
    Token kw_a;
    kw_a.lexeme = "a";
    Token kw_b;
    kw_b.lexeme = "b";
    std::vector<Token> token_stream;
    token_stream.push_back(kw_a);
    token_stream.push_back(kw_b);
    token_stream.push_back(kw_a);

    // the first Token is "a", so it will match the pattern
    token_stream[0].lexeme = "a";
    token_stream[1].lexeme = "b";
    Span<Token> token_stream_view{&token_stream[0], 3};
    // the parser try to match "a"
    auto p = char_(kw_a) >> char_(kw_b);
    bool result = parse(token_stream_view, p);
    std::cout << result << std::endl;

    // char based parsing
    auto p2 = char_('a') >> char_('b');
    std::string_view input = "aba";
    result  = parse(input, p2);
    std::cout << result << std::endl;

    return 0;
}
asmwarrior commented 1 year ago

I post another question on the stackoverflow site, it's here:

c++ - How to deduce template argument from a function type? - Stack Overflow

Because I have some issue on the deduction of the template arguments, and I got some good answers.

#include <iostream>
#include <type_traits>
#include <concepts>
#include <span>
#include <string_view>
#include <vector>

// traits for finding the arg type!
template <typename T> struct function;
template <typename ReType, typename Arg> struct function<ReType(*)(Arg)> {
    using ret_type = ReType;
    using arg_type = Arg;
};
template <typename FunctionPtr> using function_arg_t = typename function<FunctionPtr>::arg_type;

struct Token {};
template <typename T> struct Viewer;

// helper function for finding the ViewerT
template <typename Type> constexpr auto helper() noexcept {
    if constexpr (std::is_same_v<Type, char>)  return std::string_view{};
    else if constexpr (std::is_same_v<Type, Token>)  return std::span<Token>{};
}
// alias template
template <typename T> using ViewerT = decltype(helper<T>());

template <typename Base, typename T> struct parser_base {
    using v = ViewerT<T>;
    using charType = T;
};

template <typename Func, typename T>
struct action_ final : public parser_base<action_<Func, T>, T> {
    constexpr explicit action_(Func&& func) noexcept
        : func(std::forward<Func>(func))
    {}

    template<typename V, typename Skipper>
    constexpr inline bool visit(V& sv, [[maybe_unused]] Skipper& skipper) const& noexcept {
        return func(sv); // func returns false to fail the parse
    }

private:
    Func func;
};
// deduction guide
template <typename Func, typename T = function_arg_t<Func>>
action_(Func) -> action_<Func, T>;

// T == char
using myTestAction = bool(std::string_view&);
constexpr bool testAction(std::string_view&) {   return true; }

// T == Token
using myTestActionSpan = bool(const std::span<Token>&);
constexpr bool funcSpan(const std::span<Token>&) { return true; }

int main()
{
    // T == char
    [[maybe_unused]] auto p1 = action_<myTestAction*, char>(&testAction);
    [[maybe_unused]] auto p2 = action_(&testAction);

    // T == Token
    [[maybe_unused]] auto p3 = action_<myTestActionSpan*, Token>(funcSpan);
    [[maybe_unused]] auto p4 = action_(&funcSpan);
    return 0;
}
joemalle commented 1 year ago

Thanks for sharing the link; using a deduction guide sounds right to me