oridb / mc

Myrddin Compiler
MIT License
387 stars 34 forks source link

6m segfault at compilation on linux/amd64 #203

Closed henesy closed 3 years ago

henesy commented 4 years ago

Building

The file that crashes 6m, this is not minimized, apologies:

use std
use bio

pkg adb =
    const ingest    : (f : byte[:]  -> std.result(db,           byte[:]))

    /* Types */

    // An attribute=value pair
    type attribute = struct
        key     : byte[:]
        value   : byte[:]
    ;;

    // A tuple composed of attributes
    type tuple = struct
        pairs : attribute[256]
    ;;

    // An individual record composed of tuples
    type record = struct
        tuples : tuple[256]
    ;;

    // A parsed attrdb database
    type db = struct
        records : record[256]
    ;;

    // The results from a query
    type query = union
        `Some
        `None
    ;;

    /* State */

    var chatty = false
;;

/* ???

    Are traits in myr powerful enough to implement find(@f) as :: findable on top of:

        {tuple[:], record[:], attribute[:]}

    ???
*/

// Open and parse a file
const ingest = {path : byte[:]
    var f, db

    match bio.open(path, bio.Rd)
    | `std.Ok   file:
        f = file

    | `std.Err  e:
        -> `std.Err e
    ;;

    // States for parsing state machine
    type states = union
    `SingleQuote    uint64      // The starting character point for the quote
    `DoubleQuote    uint64      // ^
    `Comment
    `Newline
    `Key
    `Value
    ;;

    // Current state
    var state   = `Newline

    // Last state
    var last    = state

    // Current line for errors - tick by passing \n
    var line    = 1

    // Current character for errors - tick each iteration
    var cn      = 0

    // String builder buffer
    var buf     = std.mksb()    // Memory leak? - need init for compiler to ☺

    // Index trackers for slices
    var ki = 0, vi = 0, pi = 0, ti = 0, ri = 0

    // Parse into db
    for c : bio.bychar(f)
        last = state

        cn++

        if c == '\n'
            line++
            cn = 0
        ;;

        if chatty
            // Nice printing of each character
            std.put("→ ")
            match c
            | '\n':
                std.put("\\n")
            | '\t':
                std.put("\\t")
            | ' ':
                std.put("_")
            | _:
                std.put("{}", c)
            ;;
            std.put("               ({})\n", state)
        ;;

        match state
        | `Comment:
            // Read through until end of line
            if c == '\n'
                state = `Newline
            ;;

            continue
        | _:
            ;
        ;;

        match c
        | '#':
            // Allow comments at the beginning of lines and after values
            match state
            | `Newline:
                ;
            | `Value:
                ;
            | _:
                -> `std.Err std.fmt("invalid comment beginning at {}:{}:{}", path, line, cn)
            ;;

            state = `Comment

        | '\n':
            var str = std.sbpeek(buf)
            std.sbtrim(buf, 0)

            if str.len < 1
                // Empty string, discard value
                ;
            else
                std.put("» finished value (\\n): {}\n", str)
                db.records[ri].tuples[ti].pairs[pi].value = str
            ;;

            state = `Newline

        | '\'':
            match state
            | `SingleQuote origin:
                // In a quote already

                match bio.peekc(f)
                | `std.Ok   next:
                    match next
                    | '\'':
                        // A '' in a quote
                        if ! std.sbputc(buf, '\'')
                            -> `std.Err std.fmt("could not insert ' at {}:{}:{}", path, line, cn)
                        ;;

                        // Consume the next quote
                        bio.getc(f)
                    | _:
                        // Ending a quote
                        var str = std.sbpeek(buf)
                        std.sbtrim(buf, 0)

                        if str.len < 1
                            // Empty string, discard value
                        else
                            std.put("» finished value ('): {}\n", str)

                            db.records[ri].tuples[ti].pairs[pi].value = str
                        ;;

                        std.put("»» committing previous pair (')\n")

                        //tuple.pairs = std.slpush(&tuple.pairs, pair)

                        pi++

                        state = `Key
                        buf = std.mksb()
                    ;;
                | `std.Err  e:
                    -> `std.Err std.fmt("single quote peek failed at {}:{}:{}", path, line, cn)
                ;;

            | `Value:
                // Beginning a quote
                state = `SingleQuote cn

            | `DoubleQuote origin:
                std.sbputc(buf, c)

            | _:
                -> `std.Err std.fmt("unexpected single quote at {}:{}:{}", path, line, cn)
            ;;

        | '\"':
            match state
            | `DoubleQuote origin:
                // In a quote already

                match bio.peekc(f)
                | `std.Ok   next:
                    match next
                    | '\"':
                        // A "" in a quote
                        if ! std.sbputc(buf, '\"')
                            -> `std.Err std.fmt("could not insert \" at {}:{}:{}", path, line, cn)
                        ;;

                        // Consume the next quote
                        bio.getc(f)
                    | _:
                        // Ending a quote
                        var str = std.sbpeek(buf)
                        std.sbtrim(buf, 0)

                        if str.len < 1
                            // Empty string, discard value
                        else
                            std.put("» finished value (\"): {}\n", str)
                            db.records[ri].tuples[ti].pairs[pi].value = str
                        ;;

                        std.put("»» committing previous pair (\")\n")

                        //tuple.pairs = std.slpush(&tuple.pairs, pair)

                        pi++

                        state = `Key
                        buf = std.mksb()
                    ;;
                | `std.Err  e:
                    -> `std.Err std.fmt("double quote peek failed at {}:{}:{}", path, line, cn)
                ;;

            | `Value:
                // Beginning a quote
                state = `DoubleQuote cn

            | `SingleQuote origin:
                std.sbputc(buf, c)

            | _:
                -> `std.Err std.fmt("unexpected double quote at {}:{}:{}", path, line, cn)
            ;;

        | ' ':

            match state
            | `Newline:
                // This is necessary, apparently
                // Start a new sub-tuple of a record
                std.put("»» committing previous tuple to record (_)\n")

                /*
                record.tuples = std.slpush(&record.tuples, tuple)
                tuple.pairs = std.slzalloc(0)
                */

                ti++
                pi = 0

                std.put("»» start sub-tuple (_)\n")
                state = `Key
                buf = std.mksb()

            | `Key:
                // Allow to passthrough, might be a mistake?
                ;

            | `Value:
                var str = std.sbpeek(buf)
                std.sbtrim(buf, 0)

                if str.len < 1
                    // Empty string, discard value
                else
                    std.put("» finished value (_): {}\n", str)
                    db.records[ri].tuples[ti].pairs[pi].value = str
                ;;

                std.put("»» committing previous pair (_)\n")

                //tuple.pairs = std.slpush(&tuple.pairs, pair)
                pi++

                state = `Key
                buf = std.mksb()

            | `SingleQuote origin:
                std.sbputc(buf, c)

            | `DoubleQuote origin:
                std.sbputc(buf, c)

            | _:
                -> `std.Err std.fmt("invalid space at {}:{}:{}", path, line, cn)
            ;;

        | '\t':
            match state
            | `Newline:
                // This is necessary, apparently
                // Start a new sub-tuple of a record
                state = `Key
                buf = std.mksb()

                std.put("»» committing previous tuple (\\t)\n")

                /*
                record.tuples = std.slpush(&record.tuples, tuple)
                tuple.pairs = std.slzalloc(0)
                */

                ti++
                pi = 0

                std.put("»» start sub-tuple (\\t)\n")

            | `Key:
                // Allow to passthrough, might be a mistake?
                ;

            | `Value:
                var str = std.sbpeek(buf)
                std.sbtrim(buf, 0)

                if str.len < 1
                    // Empty string, discard value
                else
                    std.put("» finished value (\\t): {}\n", str)
                    db.records[ri].tuples[ti].pairs[pi].value = str
                ;;

                std.put("»» committing previous pair (\\t)\n")

                //tuple.pairs = std.slpush(&tuple.pairs, pair)

                pi++

                state = `Key
                buf = std.mksb()

            | `SingleQuote origin:
                std.sbputc(buf, c)

            | `DoubleQuote origin:
                std.sbputc(buf, c)

            | _:
                -> `std.Err std.fmt("invalid tab at {}:{}:{}", path, line, cn)
            ;;

        | '=':
            var str = std.sbpeek(buf)
            std.sbtrim(buf, 0)

            if str.len < 1
                // Empty string, disallowed for keys
                -> `std.Err std.fmt("empty key at {}:{}:{}", path, line, cn)
            else
                std.put("» finished key (=): {}\n", str)
                db.records[ri].tuples[ti].pairs[pi].key = str
            ;;

            state = `Value
            buf = std.mksb()
        | _:
            // A key or value text segment
            match state
            | `Key:
                if ! std.sbputc(buf, c)
                    -> `std.Err std.fmt("could not insert key char at {}:{}:{}", path, line, cn)
                ;;

            | `Value:
                std.sbputc(buf, c)

            | `SingleQuote  origin:
                std.sbputc(buf, c)

            | `DoubleQuote  origin:
                std.sbputc(buf, c)

            | `Newline:
                // Start a new record
                // TODO - do we need to push the last pair and tuple as well?
                if ti > 0
                    std.put("»» committing previous record\n")
                    //db.records = std.slpush(&db.records, record)

                    std.put("»» # tuples in rec = {}\n", ti)

                    ri++
                    ti = 0
                    pi = 0

                ;;

                //record.tuples = std.slzalloc(0)
                //tuple.pairs = std.slzalloc(0)
                ti = 0
                pi = 0

                std.put("»» start record\n")
                std.put("»» start master tuple\n")

                state = `Key

                std.sbfree(buf)
                buf = std.mksb()
                std.sbputc(buf, c)

            | _:
                -> `std.Err std.fmt("unexpected character at {}:{}:{}", path, line, cn)
            ;;
        ;;
    ;;

    // TODO - if there is a tuple (or key= only pair) dangling, be sure to append it
    // Check key.len?
    if ti > 0
        // TODO - do we need to push the last pair and tuple as well?
        std.put("»» committing dangling record\n")
        //db.records = std.slpush(&db.records, record)
        ri++
    ;;

    var nr = 0, nt = 0, np = 0
    for r : db.records
        nr++
        std.put("nr = {}\n", nr)
        for t : r.tuples
            nt++
            std.put("nt = {}\n", nt)
            for p : t.pairs
                std.put("np = {}\n", np)
                np++

                if p.key.len < 1
                    std.put("¡! 0 length key at {}/{}/{}\n", nr, nt, np)
                elif p.value.len < 1
                    std.put("¡! 0 length value at {}/{}/{}\n", nr, nt, np)
                else
                    std.put("   {}={}\n", p.key, p.value)
                ;;
            ;;
        ;;
    ;;

    /* Correct:

        nr = 4

        nt = 12

        np = 17
    */
    std.put("# nr = {}\n# nt = {}\n# np = {}\n", nr, nt, np)

    -> `std.Ok db
}

Segfaulting

Compilation:

; 6m -O obj adb.myr
segmentation violation--core dumped
; 

Running and interrupting valgrind with SIGINT:

; valgrind 6m -O obj adb.myr
==24845== Memcheck, a memory error detector
==24845== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==24845== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==24845== Command: 6m -O obj adb.myr
==24845== 

^?==24845== 
==24845== Process terminating with default action of signal 2 (SIGINT)
==24845==    at 0x483DF50: strlen (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==24845==    by 0x4914532: strdup (strdup.c:41)
==24845==    by 0x13A409: mklbl (node.c:270)
==24845==    by 0x13A505: genlbl (node.c:291)
==24845==    by 0x15050E: nextnode (match.c:172)
==24845==    by 0x150C26: addwildrec (match.c:326)
==24845==    by 0x150D95: addwildrec (match.c:344)
==24845==    by 0x150C7F: addwildrec (match.c:330)
==24845==    by 0x150D95: addwildrec (match.c:344)
==24845==    by 0x15104F: addwild (match.c:392)
==24845==    by 0x152166: addpat (match.c:662)
==24845==    by 0x152901: genonematch (match.c:788)
==24845== 
==24845== HEAP SUMMARY:
==24845==     in use at exit: 18,349,457 bytes in 232,547 blocks
==24845==   total heap usage: 459,939 allocs, 227,392 frees, 28,815,981,120 bytes allocated
==24845== 
==24845== LEAK SUMMARY:
==24845==    definitely lost: 167,279 bytes in 3,197 blocks
==24845==    indirectly lost: 68,377 bytes in 1,385 blocks
==24845==      possibly lost: 72 bytes in 1 blocks
==24845==    still reachable: 18,113,729 bytes in 227,964 blocks
==24845==         suppressed: 0 bytes in 0 blocks
==24845== Rerun with --leak-check=full to see details of leaked memory
==24845== 
==24845== For lists of detected and suppressed errors, rerun with: -s
==24845== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

; 
typeless commented 4 years ago

@henesy I suspect you were using an older compiler. The latest does not have functions such as addwildrec.

typeless commented 4 years ago

@henesy I gave the code a try. The compiler wouldn't crash in the match compiler using the latest. But the type db is of the size 32256256*256 = 536,870,912‬ bytes. The backend would try to generate instructions for each word of the object, which thus explodes the compiler.

See https://github.com/oridb/mc/blob/master/6/simp.c#L1243 and https://github.com/oridb/mc/blob/master/6/isel.c#L450-L455 where the compiler is generating the code for -> std.Err e (line 60) in adb.myr.

henesy commented 4 years ago

Thanks :)

This explanation makes sense, thank you.

It would be nice if the compiler handled this kind of overflow sort of thing more gracefully, but on the other hand the data structure was a little absurd :)