we-like-parsers / cpython

Here we work on integrating pegen into CPython; use branch 'pegen'
https://github.com/gvanrossum/pegen
Other
1 stars 0 forks source link

Don't use longjmp and check for the error indicator on every rule #119

Closed pablogsal closed 4 years ago

pablogsal commented 4 years ago
gvanrossum commented 4 years ago

Sorry the line I linked to isn't clear in the review. It's at line 10109 in the generated parse.c (and at many other places too). I believe generated at L387 in c_generator.py. (By self.out_of_memory_return().)

gvanrossum commented 4 years ago

There's also a leak in generated code like this. The return NULL; following p->error_indicator = 1; leaks children.

// _loop0_4: ';' small_stmt
static asdl_seq *
_loop0_4_rule(Parser *p)
{
    if (p->error_indicator){
        return NULL;
    }
    void *res = NULL;
    int mark = p->mark;
    int start_mark = p->mark;
    void **children = PyMem_Malloc(sizeof(void *));
    if (!children) {
        PyErr_Format(PyExc_MemoryError, "Parser out of memory");
        return NULL;
    }
    ssize_t children_capacity = 1;
    ssize_t n = 0;
    { // ';' small_stmt
        stmt_ty elem;
        void *literal;
        while (
            (literal = _PyPegen_expect_token(p, 13))
            &&
            (elem = small_stmt_rule(p))
        )
        {
            res = elem;
            if (res == NULL && PyErr_Occurred()) {
                p->error_indicator = 1;
                return NULL;  // <================ LEAK HERE? ===============
            }
            if (n == children_capacity) {
                children_capacity *= 2;
                children = PyMem_Realloc(children, children_capacity*sizeof(void *));
                if (!children) {
                    PyErr_Format(PyExc_MemoryError, "realloc None");
                    return NULL;
                }
            }
            children[n++] = res;
            mark = p->mark;
        }
        p->mark = mark;
    }
    asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);
    if (!seq) {
        PyErr_Format(PyExc_MemoryError, "asdl_seq_new _loop0_4");
        return NULL;  // <================== LEAK HERE? ==================
    }
    for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);
    PyMem_Free(children);
    _PyPegen_insert_memo(p, start_mark, _loop0_4_type, seq);
    return seq;
}
pablogsal commented 4 years ago

After 023ed5e now the function you mention looks like this:

_loop0_4_rule(Parser *p)
{
    if (p->error_indicator) {
        return NULL;
    }
    void *res = NULL;
    int mark = p->mark;
    int start_mark = p->mark;
    void **children = PyMem_Malloc(sizeof(void *));
    if (!children) {
        PyErr_Format(PyExc_MemoryError, "Parser out of memory");
        return NULL;
    }
    ssize_t children_capacity = 1;
    ssize_t n = 0;
    { // ';' small_stmt
        stmt_ty elem;
        void *literal;
        while (
            (literal = _PyPegen_expect_token(p, 13))
            &&
            (elem = small_stmt_rule(p))
        )
        {
            res = elem;
            if (res == NULL && PyErr_Occurred()) {
                p->error_indicator = 1;
                PyMem_Free(children);
                return NULL;
            }
            if (n == children_capacity) {
                children_capacity *= 2;
                children = PyMem_Realloc(children, children_capacity*sizeof(void *));
                if (!children) {
                    PyErr_Format(PyExc_MemoryError, "realloc None");
                    return NULL;
                }
            }
            children[n++] = res;
            mark = p->mark;
        }
        p->mark = mark;
    }
    asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);
    if (!seq) {
        PyErr_Format(PyExc_MemoryError, "asdl_seq_new _loop0_4");
        PyMem_Free(children);
        return NULL;
    }
    for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);
    PyMem_Free(children);
    _PyPegen_insert_memo(p, start_mark, _loop0_4_type, seq);
    return seq;
}