AlexiaChen / AlexiaChen.github.io

My Blog https://github.com/AlexiaChen/AlexiaChen.github.io/issues
87 stars 11 forks source link

用了递归下降分析法写了一个LL(1)文法的表达式计算器 #108

Open AlexiaChen opened 4 years ago

AlexiaChen commented 4 years ago

算是复习了下大学的《编译原理》了,最近要给我们的链做一个合约语言,正在捡起一些东西。主要就是左递归的文法不满足LL(1)文法的定义,需要消除左递归的文法产生式。然后每个非终结符的产生式编写一个同名的对应的解析函数,直到遇到终结符开始识别和匹配。LL(1)意思就是Left 从左往右线性无回溯的扫描,第二个Left就是使用最左推导,1的意思是向前多看一个token是什么,这样才可以根据那个符号选择正确的产生式。

LL文法是不能解决左递归的,但是自然而然可以右递归,一般说到的递归下降分析就是指LL文法,自顶向下分析,有LL(1), LL(2)到LL(k),甚至到LL(∞)。LR文法反过来是不能解决右递归的,但是自然而然可以左递归,是自底向上分析,或者说规约更好。后面我会写一个文章专门梳理下Parser的体系。

/*

Author: MathxH Chen
E-mail: brainfvck@foxmail.com

LL(1) grammer for calc:

expr -> expr add term | term
term -> term mul factor | factor
factor -> number | ( expr )
add -> + | -
mul -> * | /
number -> [0...9]+

Remove Left Recursive:

A —> Aα | β
A —> βA'
A' —> αA'

expr -> expr add term | term
expr -> term expr'
expr' -> add term expr'

term -> term mul factor | factor
term -> factor term'
term' -> mul factor term'

factor -> number | ( expr )
add -> + | -
mul -> * | /
number -> [0...9]+

Compile:

g++ calc.cpp -Wall -o calc

*/

#include<iostream>
#include<string>
#include<regex>
#include<algorithm>

class tokenizer
{
public:
    tokenizer():_index(0)
    {

    }
    tokenizer(const std::string& expr_string)
    {
        make(expr_string);
    }

    void make(const std::string& expr_string)
    {
        _index = 0;
        _current_token.clear();
        _tokens.clear();

        auto const re = std::regex{R"(\s+)"};
        _tokens = std::vector<std::string>(
            std::sregex_token_iterator{std::begin(expr_string), std::end(expr_string), re, -1},
            std::sregex_token_iterator{});
    }

    std::string get_current_token() const
    {
        return _current_token;
    }

    std::string get_token() const
    {
        _current_token = next();
        return _current_token;
    }

    std::string next() const
    {
        if(_index < (int)_tokens.size())
        {
            return _tokens[_index++];
        }
        return std::string();
    }

    void print_tokens() const
    {
        for(const auto& v : _tokens)
        {
            std::cout << v << " ";
        }
    }

private:
    std::vector<std::string> _tokens;
    mutable std::string _current_token;
    mutable int _index;
};

static tokenizer my_tokenizer;

int term();
int term_(int left_value);
int expr();
int expr_(int left_value);
int factor();
void match(const std::string& token);
bool is_number(const std::string& s);

// expr -> term expr'
int expr()
{
    int left_value = term();
    return expr_(left_value);
}

// term -> factor term'
int term()
{
    int left_value = factor();
    return term_(left_value);
}

// expr' -> add term expr'
int expr_(int left_value)
{
    std::string current_token = my_tokenizer.get_current_token();
    // add -> + | -
    if(current_token == "+")
    {
        match("+");
        int right_value = term();
        int value = left_value + right_value;
        return expr_(value);
    }
    else if(current_token == "-")
    {
        match("-");
        int right_value = term();
        int value = left_value - right_value;
        return expr_(value);
    }
    else
    {
        return left_value;
    }
}

// term' -> mul factor term'
int term_(int left_value)
{
    std::string current_token = my_tokenizer.get_current_token();

    // mul -> * | /
    if(current_token == "*")
    {
        match("*");
        int right_value = factor();
        int value = left_value * right_value;
        return term_(value);
    }
    else if(current_token == "/")
    {
        match("/");
        int right_value = factor();
        int value = left_value / right_value;
        return term_(value);
    }
    else
    {
        return left_value;
    }

}

// factor -> number | ( expr )
int factor()
{
    std::string current_token = my_tokenizer.get_current_token();
    int value = 0;
    if(is_number(current_token))
    {
        value = std::atoi(current_token.c_str());
        match(current_token);
        return value;
    }
    else if(current_token == "(")
    {
        match("(");
        int value = expr();
        match(")");
        return value;
    }
    else
    {
        std::cout << "Not match <factor>: " << current_token << std::endl;
        exit(-1);
    }

}

void match(const std::string& token)
{
    if (token != my_tokenizer.get_current_token()) {
        std::cout << "expected token:" << token << std::endl;
        exit(-1);
    }
    my_tokenizer.get_token();
}

bool is_number(const std::string& s)
{
    return !s.empty() && std::find_if(s.begin(), 
        s.end(), [](unsigned char c) { return !std::isdigit(c); }) == s.end();
}

int main()
{
    std::string expr_str;
    std::cout << ">";
    while (std::getline(std::cin, expr_str))
    {
        my_tokenizer.make(expr_str);
        my_tokenizer.get_token();
        std::cout << expr() << std::endl;
        expr_str.clear();
        std::cout << ">";
    }
    return 0;
}