pierrec / lz4

LZ4 compression and decompression in pure Go
BSD 3-Clause "New" or "Revised" License
878 stars 142 forks source link

Seems compress rate alway same for level1 to level9 #219

Open july2993 opened 3 months ago

july2993 commented 3 months ago

test code:

package lz4

import (
    "fmt"
    "io"
    "os"

    "github.com/pierrec/lz4/v4"
    "github.com/spf13/cobra"
)

func NewLZ4Command() *cobra.Command {
    var decompress bool
    var level int
    var block int

    command := cobra.Command{
        Use: "lz4 [arg] [input] [output]",
        RunE: func(cmd *cobra.Command, args []string) error {
            if len(args) <= 0 {
                return cmd.Usage()
            }

            input := args[0]
            var output string
            if len(args) >= 2 {
                output = args[1]
            }

            if decompress {
                return fmt.Errorf("unimplemented")
            }

            // start handle compress
            if output == "" {
                output = input + ".lz4"
            }

            infile, err := os.Open(input)
            if err != nil {
                return err
            }
            defer infile.Close()

            outfile, err := os.Create(output)
            if err != nil {
                return err
            }

            zw := lz4.NewWriter(outfile)

            // lz4.ConcurrencyOption(concurrency),
            // lz4.BlockChecksumOption(blockChecksum),
            // lz4.ChecksumOption(streamChecksum),

            var options []lz4.Option
            if level > 0 {
                clv, err := toLevel(level)
                if err != nil {
                    return err
                }
                op := lz4.CompressionLevelOption(clv)
                options = append(options, op)
            }

            if block > 0 {
                cblock, err := toBlockSize(block)
                if err != nil {
                    return err
                }
                op := lz4.BlockSizeOption(cblock)
                options = append(options, op)
            }

            if len(options) > 0 {
                err = zw.Apply(options...)
                if err != nil {
                    return err
                }
            }

            _, err = io.Copy(zw, infile)
            if err != nil {
                return fmt.Errorf("failed to compress")
            }

            err = zw.Close()
            if err != nil {
                return fmt.Errorf("failed close lz4 writer: %w", err)
            }
            err = outfile.Close()
            if err != nil {
                return fmt.Errorf("failed close file: %w", err)
            }

            outStat, err := os.Stat(output)
            if err != nil {
                return fmt.Errorf("failed to stat %q: %w", output, err)
            }

            inStat, err := infile.Stat()
            if err != nil {
                return err
            }

            fmt.Printf("Compress: %0.2f\n", float64(outStat.Size())*100.0/float64(inStat.Size()))

            return nil
        },
    }

    command.Flags().BoolVarP(&decompress, "decompress", "d", false, "decompression (default for .lz4 extension)")
    command.Flags().IntVarP(&level, "level", "i", 0, "level 1-9")
    command.Flags().IntVarP(&block, "block", "B", 0, "block 4-7")

    return &command
}

func toLevel(lv int) (lz4.CompressionLevel, error) {
    if lv > 9 {
        return 0, fmt.Errorf("unsupported level: %v", lv)
    }

    var clv uint32 = 1 << (8 + lv)
    if lv == 9 && lz4.CompressionLevel(clv) != lz4.Level9 {
        panic("..")
    }

    fmt.Println("level lz4.CompressionLevel: ", lz4.CompressionLevel(clv))

    return lz4.CompressionLevel(clv), nil
}

func toBlockSize(block int) (lz4.BlockSize, error) {
    switch block {
    case 4:
        return lz4.Block64Kb, nil
    case 5:
        return lz4.Block256Kb, nil
    case 6:
        return lz4.Block1Mb, nil
    case 7:
        return lz4.Block4Mb, nil
    default:
        return 0, fmt.Errorf("unsupported block size: %v", block)
    }
}

I test on some file with default value and level 1 - 9:

#! /bin/bash

file=./20230628171044.xml
# file=./binlog.bk

rm $file.lz4
go run main.go lz4 -B 4 $file

for (( i=1; i < 10;i++ )); do
  rm $file.lz4
  go run main.go lz4 -i $i -B 4 $file
done

The compress rate seems always same for level 1 to level 9

Compress: 7.66
level lz4.CompressionLevel:  Level1
Compress: 6.44
level lz4.CompressionLevel:  Level2
Compress: 6.44
level lz4.CompressionLevel:  Level3
Compress: 6.44
level lz4.CompressionLevel:  Level4
Compress: 6.44
level lz4.CompressionLevel:  Level5
Compress: 6.44
level lz4.CompressionLevel:  Level6
Compress: 6.44
level lz4.CompressionLevel:  Level7
Compress: 6.44
level lz4.CompressionLevel:  Level8
Compress: 6.44
level lz4.CompressionLevel:  Level9
Compress: 6.44