101arrowz / fflate

High performance (de)compression in an 8kB package
https://101arrowz.github.io/fflate
MIT License
2.27k stars 79 forks source link

The 2nd+ JPG images in a zip stream being created are corrupt for some reason? #202

Closed lancejpollard closed 8 months ago

lancejpollard commented 8 months ago

I have this React.js / Next.js page which uses imagemagick-wasm to convert multiple images in the browser, and then zip them up and download the zip.

I have tested a few situations to narrow down the bug, which is why I think it is fflate and not imagemagick-wasm:

  1. Converting/downloading 1 image and not zipping always works.
  2. Converting/downloading 1 image with zipping always works.
  3. Converting/downloading 2+ images without zipping always works.
  4. The only thing that doesn't work is converting/downloading 2+ images with zipping.

Here is what I'm seeing:

2024-02-06 19 23 09

And here is my code (sorry, it would take a lot of work to boil this down to a fully reproducible example):

'use client'

import forge from 'node-forge'
import * as fflate from 'fflate'
import bytes from 'bytes'
import React, {
  useCallback,
  useContext,
  useEffect,
  useMemo,
  useRef,
  useState,
} from 'react'
import { BookSidebarLayout, Footer } from '~/components'
import ToolListFooter from '~/components/ToolListFooter'
import { scrollToElementCallback } from '~/utilities/client/element'
import Field from '~/components/Field'
import Label from '~/components/Label'
import _ from 'lodash'
import InputError from '~/components/InputError'
import { scrollIntoViewIfVisible } from '~/utilities/element'
import NativeSelect from '~/components/NativeSelect'
import FileDropzone, { MimeType } from '~/components/FileDropzone'
import { isTooLarge } from '~/utilities/byte'
import Psd from '@webtoon/psd'
import mimeType from 'mime-types'
import {
  IMAGE_MAGICK_READING_FORMAT,
  IMAGE_MAGICK_WRITING_FORMAT,
  convertImageWithImageMagick,
} from '~/utilities/imageMagick'
import { MagickFormat } from '~/utilities/imagemagick/index.js'
import useImageMagick from '~/hooks/useImageMagick'
import { downloadBlob, replaceFileExtension } from '~/utilities/file'
import BufferImage from '~/components/BufferImage'
import { generateUuidv5 } from '~/utilities/uuid'

export const PIXEL =
  'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=='

export const INPUT_IMAGE_FORMAT = [
  '3fr',
  'arw',
  'avif',
  'bmp',
  'cr2',
  'cr3',
  'crw',
  'dcr',
  'dng',
  'eps',
  'erf',
  'gif',
  'heic',
  'heif',
  'icns',
  'ico',
  'jfif',
  'jpeg',
  'jpg',
  'mos',
  'mrw',
  'nef',
  'odd',
  'odg',
  'orf',
  'pef',
  'png',
  'ppm',
  'ps',
  'psd',
  'raf',
  'raw',
  'rw2',
  'tif',
  'tiff',
  'webp',
  'x3f',
  'xcf',
  'xps',
]

export const OUTPUT_IMAGE_FORMAT = [
  'bmp',
  'eps',
  'gif',
  'ico',
  'jpg',
  'odd',
  'png',
  'ps',
  'psd',
  'tiff',
  'webp',
]

// export const SERVER_OUTPUT_IMAGE_FORMAT = ['odd']

export type ImageConverterCast = {
  input: string
  output: string
  onChange: (i: string, o: string) => void
}

export default function ImageConverter({
  input,
  output,
  onChange,
}: ImageConverterCast) {
  useImageMagick()

  return (
    <ImageContextProvider>
      <Body
        onChange={onChange}
        inputFormat={input}
        outputFormat={output}
      />
    </ImageContextProvider>
  )
}

export type ImageContextDataCast = {
  files: Record<string, File>
}

export type ImageContextCast = {
  data: ImageContextDataCast
  set: React.Dispatch<React.SetStateAction<ImageContextDataCast>>
}

export const ImageContext = React.createContext<ImageContextCast>({
  data: {
    files: {},
  },
  set: () => {},
})

export type ImageContextProviderCast = {
  children: React.ReactNode
}

export function ImageContextProvider({
  children,
}: ImageContextProviderCast) {
  const [value, setValue] = useState<ImageContextDataCast>({
    files: {},
  })
  const state = useMemo(
    () => ({
      data: value,
      set: setValue,
    }),
    [value],
  )

  return (
    <ImageContext.Provider value={state}>
      {children}
    </ImageContext.Provider>
  )
}

export type BodyCast = {
  inputFormat: string
  outputFormat: string
  onChange: (i: string, o: string) => void
}

export type FileRecordCast = {
  id: string
  file: File
}

export type PreviewImageCast = {
  id: string
  content: ArrayBuffer
}

export function PreviewImage({ id, content }: PreviewImageCast) {
  const editState = useContext(ImageContext)

  const clearImage = () => {
    editState.set({
      ...editState.data,
      files: _.omit(editState.data.files, [id]),
    })
  }

  return (
    <div className="relative">
      <div className="p-8 w-fit bg-slate-50 rounded-sm">
        <BufferImage
          className="max-h-192 max-w-192"
          content={content}
        />
      </div>
      <div className="flex items-center justify-center p-8">
        <span
          className="group hover:opacity-50 opacity-70 transition-all inline-block w-24 h-24"
          onClick={clearImage}
        >
          <CloseIcon colorClassName="fill-slate-800 dark:fill-slate-100 group-hover:fill-rose-400" />
        </span>
      </div>
    </div>
  )
}

function Body({ inputFormat, outputFormat, onChange }: BodyCast) {
  const [inputError, setInputError] = useState<string>()
  const inputMimeType = mimeType.lookup(inputFormat)
  const [previews, setPreviews] = useState<
    Array<{ id: string; element: React.ReactNode }>
  >([])
  const editState = useContext(ImageContext)

  const handleSelectFiles = async (files: Array<File>) => {
    const previews: Array<{ id: string; element: React.ReactNode }> = []
    const images: Record<string, File> = {}
    let i = 0
    for (const file of files) {
      const uuid = generateUuidv5('foo')
      images[uuid] = file

      const arrayBuffer = await file.arrayBuffer()
      switch (inputFormat) {
        case 'png':
        case 'ico':
        case 'jpg':
        case 'gif':
        case 'webp':
          // we can display directly with BufferImage
          previews.push({
            id: uuid,
            element: (
              <PreviewImage
                id={uuid}
                content={arrayBuffer}
                key={uuid}
              />
            ),
          })
          break
        default: {
          const out = await convertImageWithImageMagick(
            new Uint8Array(arrayBuffer),
            IMAGE_MAGICK_FORMAT[inputFormat],
            MagickFormat.Jpeg,
          )
          previews.push({
            id: uuid,
            element: (
              <PreviewImage
                id={uuid}
                content={out}
                key={uuid}
              />
            ),
          })
        }
      }
      i++
    }
    setPreviews(p => [...p, ...previews])
    editState.set({
      ...editState.data,
      files: _.merge(images, editState.data.files),
    })
  }

  const handleInputFormat = (val: string) => {
    let out = val === outputFormat ? inputFormat : outputFormat
    onChange(val, out)
  }

  const handleOutputFormat = (val: string) => {
    onChange(inputFormat, val)
  }

  const handleConvert = async () => {
    const files = Object.values(editState.data.files)
    if (files.length === 1) {
      const file = files[0]
      const arrayBuffer = await file.arrayBuffer()
      const out = await convertImageWithImageMagick(
        new Uint8Array(arrayBuffer),
        IMAGE_MAGICK_FORMAT[inputFormat],
        IMAGE_MAGICK_FORMAT[outputFormat],
      )
      const name = replaceFileExtension(file.name, outputFormat)
      const blob = new Blob([out], {
        type:
          mimeType.lookup(`.${outputFormat}`) ||
          'application/octet-stream',
      })
      downloadBlob(blob, name)
    } else {
      const streams = await convertAndZipImages(
        editState.data.files,
        inputFormat,
        outputFormat,
      )
      const md = forge.md.md5.create()
      const decoder = new TextDecoder()
      for (const stream of streams) {
        const str = decoder.decode(stream)
        md.update(str)
      }
      const hash = md.digest().toHex()
      const blob = new Blob(streams, {
        type: 'application/octet-stream',
      })
      downloadBlob(blob, `image.${hash}.${outputFormat}.zip`)
    }
  }

  return (
    <>
      <div
        id="tool"
        className="relative p-16 flex flex-col gap-16"
      >
        <Field>
          <Label color="purple">Input files</Label>
          <FileDropzone
            accept={[inputFormat]}
            topped
            multiple
            className="1-3-screen-minus-nav flex items-center bg-violet-50 hover:bg-violet-100"
            borderClassName="border-violet-300"
            onDrop={handleSelectFiles}
            bottomed={Boolean(inputError)}
          >
            <P className="text-center">Drop file here</P>
          </FileDropzone>
          {inputError && <InputError>{inputError}</InputError>}
        </Field>
        <div className="p-16 flex justify-center items-center">
          <Button onClick={handleConvert}>Convert</Button>
        </div>
        <div className="p-16 flex justify-center flex-wrap gap-16 items-end">
          {previews
            .filter(x => x.id in editState.data.files)
            .map(x => x.element)}
        </div>
        <Grid
          maxColumns={2}
          minWidth={192}
          gap={16}
        >
          <Field>
            <Label color="base">Input format</Label>
            <NativeSelect
              color="base"
              value={inputFormat}
              onChange={handleInputFormat}
            >
              {INPUT_IMAGE_FORMAT.map(format => (
                <option
                  key={format}
                  value={format}
                >
                  {format}
                </option>
              ))}
            </NativeSelect>
          </Field>
          <Field>
            <Label color="base">Output format</Label>
            <NativeSelect
              color="base"
              value={outputFormat}
              onChange={handleOutputFormat}
            >
              {OUTPUT_IMAGE_FORMAT.filter(x => x !== inputFormat).map(
                format => (
                  <option
                    key={format}
                    value={format}
                  >
                    {format}
                  </option>
                ),
              )}
            </NativeSelect>
          </Field>
        </Grid>
      </div>
    </>
  )
}

export const IMAGE_MAGICK_FORMAT: Record<string, MagickFormat> = {
  jpg: MagickFormat.Jpeg,
  png: MagickFormat.Png,
  bmp: MagickFormat.Bmp,
  eps: MagickFormat.Eps,
  gif: MagickFormat.Gif,
  ico: MagickFormat.Ico,
  ps: MagickFormat.Ps,
  psd: MagickFormat.Psd,
  tiff: MagickFormat.Tiff,
  webp: MagickFormat.Webp,
}

export async function convertAndZipImages(
  files: Record<string, File>,
  inputFormat: string,
  outputFormat: string,
): Promise<Array<ArrayBuffer>> {
  return new Promise(async (res, rej) => {
    const streams: Array<any> = []

    const zip = new fflate.Zip()
    zip.ondata = (err: any, dat: any, final: any) => {
      if (!err) {
        // output of the streams
        streams.push(dat)
        if (final) {
          res(streams)
        }
      } else {
        return rej(err)
      }
    }

    const values = Object.values(files)

    for (const file of values) {
      const arrayBuffer = await file.arrayBuffer()
      const out = await convertImageWithImageMagick(
        new Uint8Array(arrayBuffer),
        IMAGE_MAGICK_FORMAT[inputFormat],
        IMAGE_MAGICK_FORMAT[outputFormat],
      )
      const name = replaceFileExtension(file.name, outputFormat)
      const zipFile = new fflate.ZipPassThrough(name)

      // Always add streams to ZIP archives before pushing to those streams
      zip.add(zipFile)

      zipFile.push(out, true)
    }

    zip.end()
  })
}

See toward the end, the fflate stuff, straight from the docs.

What am I missing? Is there any reason you can think of why this might happen?

Thank you so much for your time/help.

lancejpollard commented 8 months ago

I tried the fflate.zipSync and same error. Also, the files which are corrupt are random it seems, it changes on each download. Maybe I'm doing something wrong?

101arrowz commented 8 months ago

By any chance have you tried this with fflate v0.8.2? I released it just a few hours ago and this issue seems very similar to something I fixed in that release.

lancejpollard commented 8 months ago

I'm using 0.8.1, let me try 0.8.2 then :) Thanks.

lancejpollard commented 8 months ago

Ah now with 0.8.2 I'm getting this when I create the zip with 2 files:

Screenshot 2024-02-06 at 7 37 24 PM
101arrowz commented 8 months ago

Can you send me a repo with this code or at least some examples of data you're compressing here? The corrupted ZIP would help too.

lancejpollard commented 8 months ago

@101arrowz okay I whipped together this demo, which seems to be demonstrating the latest error of the unopenable archive.

https://github.com/lancejpollard/zip-test-with-fflate

I use pnpm install && pnpm dev to start it, then upload 2 or 3 PNGs and it will convert them to JPG with imagemagick-wasm.

Here are the PNGs I used if it makes a difference.

Archive.zip

I noticed before I added imagemagick-wasm just now, if I simply selected PNGs and zipped them without conversion, it worked. SO it could also be something with imagemagick, I'm not sure. Oh I also tried UZIP.js and it worked for more files but still resulted in corrupted images, so if that is also failing, it could also be something I'm doing, not sure.

Thanks for taking a look.

lancejpollard commented 8 months ago

I just tried once using this:

zipFile.push(out.slice(0), true);

And it seems to have not corrupted the zipped images? Could it be something to do with the error you sometimes get with array buffers like:

Cannot perform %TypedArray%.prototype.set on a detached ArrayBuffer
101arrowz commented 8 months ago

I haven't tried cloning your repo yet but based on that error message I'm pretty sure I know why you're having issues. A "detached ArrayBuffer" is essentially an ArrayBuffer (i.e. the underlying block of memory for a Uint8Array) that has either been invalidated, or been moved to a separate thread or process.

In the context of WebAssembly, this usually happens when you construct a Uint8Array that points to a location in the WASM memory (likely as an output pointer for your WASM code to write the converted image to), but then you have to grow the WASM memory to do some temporary heap allocations. When you grow the WASM memory all pointers to the old memory are invalidated, and their ArrayBuffers detached.

In this case I'd guess that the WASM executed by convertImageWithImageMagick is allocating a new output buffer for the image each time it's called. Since you aren't manually freeing the memory it returns (you'd probably have to call some release function on the returned Uint8Array), the allocator is running out of memory space and is therefore growing the entire WASM memory on each call. In other words, every time you call convertImageWithImageMagick, all its previous return values become detached and therefore break.

This issue gets resolved if you immediately copy the data out of the WASM memory into JavaScript-owned memory (e.g. with out.slice(0), as you observed) because then you're no longer keeping a long-term reference to the WASM memory. But when you use ZipPassThrough directly, fflate directly moves your input to the output stream with zero copies, and here, that input is a Uint8Array that points to WASM memory that gets invalidated after future calls to convertImageWithImageMagick. So when you try to concatenate all the output buffers and download them with new Blob(streams), you're still using those invalid WASM pointers.

My recommendation here is to modify your convertImageWithImageMagick function to return a copy of the WASM memory, rather than a view into WASM memory (e.g. if it's currently returning origBuffer, return origBuffer.slice() instead), and deallocate the original view back using your WASM binary's allocator (maybe your bindings have some sort of free(origBuffer) function?). This should fix your current problem and any future unexpected behavior you could encounter due to that function returning a "temporary" buffer.

Let me know if you need any more clarification or if you have any further issues!

lancejpollard commented 8 months ago

Wow thank you for that! It really clears things up, I was just about to ask about that on the web. This is a really great explanation and makes total sense. I will move the .slice() to inside convertImageWithImageMagick, so it's abstracted away I guess. Thanks again!