Open lonok opened 1 year ago
I have a image like that:
Name One <-- MANY SPACE --> Name Two
URL of image: https://ibb.co/VMHKyY3
Wrong Result: Name One Name Two (two names in one result) Expected Results: Name One, Name Two (two results)
client := gosseract.NewClient() defer client.Close() client.SetLanguage("eng") client.SetPageSegMode(gosseract.PSM_AUTO) // <--- I TRY WITH ALL, SAME RESULT rows, cols := img.Rows(), img.Cols() scale := 2 to_rows := rows * scale to_cols := cols * scale double := gocv.NewMatWithSize(to_rows, to_cols, gocv.MatTypeCV8UC3) gocv.Resize(*img, &double, image.Pt(to_cols, to_rows), 0, 0, gocv.InterpolationCubic) // get bytes buf, _ := gocv.IMEncode(gocv.PNGFileExt, double) bytes = buf.GetBytes() client.SetWhitelist("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \t") client.SetImageFromBytes(bytes) boxes, _ := client.GetBoundingBoxes(gosseract.RIL_TEXTLINE) for _, box := range boxes { fmt.Printf("%#v\n", box) }
gosseract.BoundingBox{Box:image.Rectangle{Min:image.Point{X:188, Y:808}, Max:image.Point{X:1464, Y:839}}, Word:"Name One Name Two\n", Confidence:95.23932647705078, BlockNum:0, ParNum:0, LineNum:0, WordNum:0}
Linux dell-g15 6.2.0-33-generic #33~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Sep 7 10:33:52 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
GO111MODULE='' GOARCH='amd64' GOBIN='' GOCACHE='/home/lonok/.cache/go-build' GOENV='/home/lonok/.config/go/env' GOEXE='' GOEXPERIMENT='' GOFLAGS='' GOHOSTARCH='amd64' GOHOSTOS='linux' GOINSECURE='' GOMODCACHE='/home/lonok/go/pkg/mod' GONOPROXY='' GONOSUMDB='' GOOS='linux' GOPATH='/home/lonok/go' GOPRIVATE='' GOPROXY='https://proxy.golang.org,direct' GOROOT='/usr/lib/go' GOSUMDB='sum.golang.org' GOTMPDIR='' GOTOOLCHAIN='auto' GOTOOLDIR='/usr/lib/go/pkg/tool/linux_amd64' GOVCS='' GOVERSION='go1.21.1' GCCGO='gccgo' GOAMD64='v1' AR='ar' CC='gcc' CXX='g++' CGO_ENABLED='1' GOMOD='/dev/null' GOWORK='' CGO_CFLAGS='-O2 -g' CGO_CPPFLAGS='' CGO_CXXFLAGS='-O2 -g' CGO_FFLAGS='-O2 -g' CGO_LDFLAGS='-O2 -g' PKG_CONFIG='pkg-config' GOGCCFLAGS='-fPIC -m64 -pthread -Wl,--no-gc-sections -fmessage-length=0 -ffile-prefix-map=/tmp/go-build3605953866=/tmp/go-build -gno-record-gcc-switches'
go version
go version go1.21.1 linux/amd64
tesseract 5.3.2-17-gce93 leptonica-1.82.0 libgif 5.1.9 : libjpeg 8d (libjpeg-turbo 2.1.1) : libpng 1.6.37 : libtiff 4.3.0 : zlib 1.2.11 : libwebp 1.2.2 : libopenjp2 2.4.0 Found AVX512BW Found AVX512F Found AVX512VNNI Found AVX2 Found AVX Found FMA Found SSE4.1 Found OpenMP 201511 Found libarchive 3.6.0 zlib/1.2.11 liblzma/5.2.5 bz2lib/1.0.8 liblz4/1.9.3 libzstd/1.4.8 Found libcurl/7.81.0 OpenSSL/3.0.2 zlib/1.2.11 brotli/1.0.9 zstd/1.4.8 libidn2/2.3.2 libpsl/0.21.0 (+libidn2/2.3.2) libssh/0.9.6/openssl/zlib nghttp2/1.43.0 librtmp/2.3 OpenLDAP/2.5.16
Summary
I have a image like that:
URL of image: https://ibb.co/VMHKyY3
Wrong Result: Name One Name Two (two names in one result) Expected Results: Name One, Name Two (two results)
Reproducibility
Reproducibility Frequency
Reproducible
output
Environment
go version go1.21.1 linux/amd64