ktye / i

interpret
100 stars 16 forks source link

pack data section #51

Closed ktye closed 11 months ago

ktye commented 11 months ago

currently the data section is (char-classes + z.k + ..):

1856 zz (uncompressed)
1143 zz.gz
1150 zz.zst (zstd -19)
1574 zz.lz4 (lz4  -12)

adding uncompressor for lz4 block is:

func u8(x int32) int32 { return I8(x) & 0xff }
func lz(p, e, d int32) int32 { //src-start, src-end, dst
    for {
        t := u8(p)
        p++
        l := t >> 4 //literal length
        if l == 15 {
            for {
                l += u8(p)
                p++
                if u8(p-1) != 255 {
                    break
                }
            }
        }
        Memorycopy(d, p, l)
        p += l
        d += l
        if p >= e {
            return d
        }
        o := u8(p) | u8(p+1)<<8 //offset
        p += 2
        l = 4 + t&15 //match length
        if l == 19 {
            for {
                l += u8(p)
                p++
                if u8(p-1) != 255 {
                    break
                }
            }
        }
        t = d + l
        for d < t {
            SetI8(d, u8(d-o))
            d += 1
            continue
        }
    }
    return 0
}

this costs 244 bytes. but compression saves only 1856-1574-19 = 301 bytes.

test code:

func TestZ(t *testing.T) { // write zz (uncompressed)
    newtest()
    os.WriteFile("zz", Bytes[132:280+1708], 0744)
    reset()
}
func TestLz(t *testing.T) {
    z, err := os.ReadFile("zz.lz12")
    if err != nil {
        t.Fatal(err)
    }
    z = z[11:]
    z = z[:len(z)-8] // strip lz4 frame wrapper
    var orig []byte
    newtest()
    orig = append(orig, Bytes[132:280+1708]...)
    reset()
    Bytes = make([]byte, 8192)
    p, d := int32(132), int32(2048)
    copy(Bytes[p:], z)
    e := p + int32(len(z))
    r := lz(p, e, d)
    if string(orig) != string(Bytes[d:r]) {
        t.Fatal("lz4 mismatch")
    }
}
ktye commented 11 months ago

or precompute the initial memory image, removing some of kinit, e.g. minit(). packing the first 5000 bytes with lz4 is 1742 (including z.k, but before evaluating it). remaining bytes are 0.