Open gedw99 opened 2 years ago
We can replace https://github.com/serhack/pdf-diff/blob/5535f71841530a024fd0b8b1f823de03df9a0fc1/main.go#L48 with this if we want. ....
package main import ( "flag" "fmt" "image/jpeg" "os" "path/filepath" "github.com/gen2brain/go-fitz" ) func main() { // flags for source dir (pdf) and output dir (png,etc) sourceFile := flag.String("source", ".", "source file pdf") targetDir := flag.String("target", ".", "target dir") flag.Parse() fmt.Println("sourceFile:", *sourceFile) fmt.Println("targetDir:", *targetDir) doc, err := fitz.New(*sourceFile) if err != nil { panic(err) } defer doc.Close() /* // output to runtime dir currentDir, err := os.Getwd() //tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz") if err != nil { panic(err) } // concat out dir tmpDir := filepath.Join(currentDir, "out") err = os.MkdirAll(tmpDir, os.ModePerm) */ err = os.MkdirAll(*targetDir, os.ModePerm) if err != nil { panic(err) } // Extract pages as images for n := 0; n < doc.NumPage(); n++ { img, err := doc.Image(n) if err != nil { panic(err) } f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.jpg", n))) if err != nil { panic(err) } err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality}) if err != nil { panic(err) } f.Close() } // Extract pages as text for n := 0; n < doc.NumPage(); n++ { text, err := doc.Text(n) if err != nil { panic(err) } f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.txt", n))) if err != nil { panic(err) } _, err = f.WriteString(text) if err != nil { panic(err) } f.Close() } // Extract pages as html for n := 0; n < doc.NumPage(); n++ { html, err := doc.HTML(n, true) if err != nil { panic(err) } f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.html", n))) if err != nil { panic(err) } _, err = f.WriteString(html) if err != nil { panic(err) } f.Close() } }
This will build for all OS because the libs are included for all os at https://github.com/gen2brain/go-fitz/tree/master/libs
works for me on Mac. Maybe test on windows, and linux.
it would replace poppler which is very heavy IMHO and make the golang binary fully contained to a single file.
We can replace https://github.com/serhack/pdf-diff/blob/5535f71841530a024fd0b8b1f823de03df9a0fc1/main.go#L48 with this if we want. ....
This will build for all OS because the libs are included for all os at https://github.com/gen2brain/go-fitz/tree/master/libs
works for me on Mac. Maybe test on windows, and linux.
it would replace poppler which is very heavy IMHO and make the golang binary fully contained to a single file.