mholt / archiver

Easily create & extract archives, and compress & decompress files of various formats
https://pkg.go.dev/github.com/mholt/archiver/v4
MIT License
4.37k stars 387 forks source link

open zip : invalid argument. There are folders or file names that are not in UTF-8 format #416

Open imkos opened 3 months ago

imkos commented 3 months ago

test code:

package main

import (
    "context"
    "fmt"
    "io"
    "os"

    "github.com/mholt/archiver/v4"
)

func extractArchiverInsideFile(fPath string) ([]string, error) {
    reader, err := os.Open(fPath)
    if err != nil {
        return nil, err
    }
    defer reader.Close()
    format, input, err := archiver.Identify(fPath, reader)
    if err != nil {
        return nil, err
    }
    fileList := make([]string, 0, 1000)
    if ex, ok := format.(archiver.Extractor); ok {
        ex.Extract(context.Background(), input, nil, func(ctx context.Context, f archiver.File) error {
            fi, err := f.Stat()
            if err != nil {
                return err
            }
            if !fi.IsDir() {
                fileList = append(fileList, f.NameInArchive)
            }
            return nil
        })
    }
    return fileList, nil
}

func main() {
    fn := "/home/longw/test/YS8293EN_EA9732.zip"
    fl, err := extractArchiverInsideFile(fn)
    if err != nil {
        fmt.Println("extractArchiverInsideFile err,", err)
        return
    }
    fmt.Println("fl:", fl)
    afs, err := archiver.FileSystem(context.Background(), fn)
    if err != nil {
        return
    }
    for _, f1 := range fl {
        f, err := afs.Open(f1)
        if err != nil {
            fmt.Printf("archiver open file %s err, %v\n", f1, err)
            return
        }
        defer f.Close()
        n, err := io.Copy(io.Discard, f)
        if err != nil {
            return
        }
        fmt.Printf("f: %s, size: %d \n", f1, n)
    }
}

issue: The fileList contains noUTF8 characters and the file cannot be opened

archiver package: zip.go, Cannot specify TextEncoding.

func init() {
    RegisterFormat(Zip{})
.....
}

try new code: archiver.ZipFormat().TextEncoding = "gb18030"

var defaultZip = &Zip{}

func ZipFormat() *Zip {
    return defaultZip
}

func init() {
    RegisterFormat(defaultZip)
.....
}

sample file: use attach file YS8293EN_EA9732.zip

mholt commented 2 months ago

Sorry for my late reply.

I see, so you want a way to specify a default text encoding for methods that abstract away the creation of the format and just open the file immediately.

I like your suggestion, though I might tweak it a bit for the final implementation.