jimmywarting / StreamSaver.js

StreamSaver writes stream to the filesystem directly asynchronous
https://jimmywarting.github.io/StreamSaver.js/example.html
MIT License
4.01k stars 417 forks source link

Added ZIP64 Support to zip-stream.js #259

Open magnus-rattlehead opened 2 years ago

magnus-rattlehead commented 2 years ago

Feel free to test. I used the Sintel movie and a 10GB file created by fallocate. Testing shows that the zip works. For some reason on my zip archiver (Ark from KDE), it complains about a corrupted zip, but is able to read and extract successfully. zip -T archive.zip confirms that it is valid. Feel free to test it.

magnus-rattlehead commented 2 years ago

Nevermind. archiving software doesn't complain about corrupted zips.

FallingHazard commented 2 years ago

It would be awesome if this was built intro a transform stream like in: https://github.com/transcend-io/conflux/blob/master/src/write.js

PidgeyBE commented 6 months ago

FYI, the solution here gave me corrupt zip files. I asked ChatGPT to rewrite the original one and did a bit of cleanup to end up on a working file... Tested with 10.000 files which result in a total zip file of 9GB.

zip-stream64.js
``` class Crc32 { constructor() { this.crc = -1 } append(data) { var crc = this.crc | 0; var table = this.table for (var offset = 0, len = data.length | 0; offset < len; offset++) { crc = (crc >>> 8) ^ table[(crc ^ data[offset]) & 0xFF] } this.crc = crc } get() { return ~this.crc } } Crc32.prototype.table = (() => { var i; var j; var t; var table = [] for (i = 0; i < 256; i++) { t = i for (j = 0; j < 8; j++) { t = (t & 1) ? (t >>> 1) ^ 0xEDB88320 : t >>> 1 } table[i] = t } return table })() const getDataHelper = byteLength => { var uint8 = new Uint8Array(byteLength) return { array: uint8, view: new DataView(uint8.buffer) } } const pump = zipObj => zipObj.reader.read().then(chunk => { if (chunk.done) return zipObj.writeFooter() const outputData = chunk.value zipObj.crc.append(outputData) zipObj.uncompressedLength += outputData.length zipObj.compressedLength += outputData.length zipObj.ctrl.enqueue(outputData) }) /** * [createWriter description] * @param {Object} underlyingSource [description] * @return {Boolean} [description] */ function createWriter(underlyingSource) { const files = Object.create(null) const filenames = [] const encoder = new TextEncoder() let offset = 0 let activeZipIndex = 0 let ctrl let activeZipObject, closed function next() { activeZipIndex++ activeZipObject = files[filenames[activeZipIndex]] if (activeZipObject) processNextChunk() else if (closed) closeZip() } var zipWriter = { enqueue(fileLike) { if (closed) throw new TypeError('Cannot enqueue a chunk into a readable stream that is closed or has been requested to be closed') let name = fileLike.name.trim() const date = new Date(typeof fileLike.lastModified === 'undefined' ? Date.now() : fileLike.lastModified) if (fileLike.directory && !name.endsWith('/')) name += '/' if (files[name]) throw new Error('File already exists.') const nameBuf = encoder.encode(name) filenames.push(name) const zipObject = files[name] = { level: 0, ctrl, directory: !!fileLike.directory, nameBuf, comment: encoder.encode(fileLike.comment || ''), compressedLength: 0, uncompressedLength: 0, extraArray: null, writeHeader() { var header = getDataHelper(26) var data = getDataHelper(30 + nameBuf.length) zipObject.header = header zipObject.offset = offset if (zipObject.level !== 0 && !zipObject.directory) { header.view.setUint16(4, 0x0800) } header.view.setUint32(0, 0x14000808) header.view.setUint16(0, 45, true) header.view.setUint16(6, (((date.getHours() << 6) | date.getMinutes()) << 5) | date.getSeconds() / 2, true) header.view.setUint16(8, ((((date.getFullYear() - 1980) << 4) | (date.getMonth() + 1)) << 5) | date.getDate(), true) header.view.setUint16(22, nameBuf.length, true) data.view.setUint32(0, 0x504b0304) data.array.set(header.array, 4) data.array.set(nameBuf, 30) offset += data.array.length ctrl.enqueue(data.array) }, writeFooter() { zipObject.header.view.setUint16(0, 45) var footer = getDataHelper(24) footer.view.setUint32(0, 0x504b0708) if (zipObject.crc) { zipObject.header.view.setUint32(10, zipObject.crc.get(), true) footer.view.setUint32(4, zipObject.crc.get(), true) } let zip64Extra = getDataHelper(28) zipObject.header.view.setUint32(14, 0xffffffff, true) zipObject.header.view.setUint32(18, 0xffffffff, true) footer.view.setBigUint64(8, BigInt(zipObject.compressedLength), true) footer.view.setBigInt64(16, BigInt(zipObject.uncompressedLength), true) zip64Extra.view.setUint16(0, 0x0001, true) zip64Extra.view.setUint16(2, 24, true) zip64Extra.view.setBigUint64(4, BigInt(zipObject.uncompressedLength), true) zip64Extra.view.setBigUint64(12, BigInt(zipObject.compressedLength), true) zip64Extra.view.setBigUint64(20, BigInt(files[name].offset), true) zipObject.extraArray = zip64Extra.array ctrl.enqueue(footer.array) offset += zipObject.compressedLength + footer.array.length next() }, fileLike } if (!activeZipObject) { activeZipObject = zipObject processNextChunk() } }, close() { if (closed) throw new TypeError('Cannot close a readable stream that has already been requested to be closed') if (!activeZipObject) closeZip() closed = true } } function closeZip() { var length = 0 var index = 0 var indexFilename, file, cdOffset, totalEntries = filenames.length for (indexFilename = 0; indexFilename < totalEntries; indexFilename++) { file = files[filenames[indexFilename]] length += 46 + file.nameBuf.length + file.comment.length if (file.extraArray) { length += file.extraArray.length } } cdOffset = offset const data = getDataHelper(length + 56 + 20 + 22) for (indexFilename = 0; indexFilename < totalEntries; indexFilename++) { file = files[filenames[indexFilename]] data.view.setUint32(index, 0x504b0102) data.view.setUint16(index + 4, 0x1400) data.array.set(file.header.array, index + 6) if (file.extraArray) { data.view.setUint16(index + 30, file.extraArray.length, true) } data.view.setUint16(index + 32, file.comment.length, true) if (file.directory) { data.view.setUint8(index + 38, 0x10) } if (file.offset >= 0xffffffff) data.view.setUint32(index + 42, 0xffffffff, true) else data.view.setUint32(index + 42, file.offset, true) data.array.set(file.nameBuf, index + 46) var extraLength = 0 if (file.extraArray) { extraLength = file.extraArray.length data.array.set(file.extraArray, index + 46 + file.nameBuf.length) } data.array.set(file.comment, index + 46 + file.nameBuf.length + extraLength) index += 46 + file.nameBuf.length + file.comment.length + extraLength } // Zip64 End of Central Directory record // 0: Signature data.view.setUint32(index, 0x504b0606); // 4: Size of zip64 EOCD data.view.setBigUint64(index + 4, BigInt(44), true); // 12: Version made By data.view.setUint16(index + 12, 45, true); // 14: version needed to extract data.view.setUint16(index + 14, 45, true); // 16: number of this disk // 20: number of the disk with the start of CD // 24: total number of entries in the central directory on this disk data.view.setBigUint64(index + 24, BigInt(totalEntries), true); // 32: total number of entries in the central directory data.view.setBigUint64(index + 32, BigInt(totalEntries), true); // 40: size of the central directory data.view.setBigUint64(index + 40, BigInt(length), true); // 48: Offset of start of central directory data.view.setBigUint64(index + 48, BigInt(cdOffset), true); index += 56 // Zip64 End of Central Directory locator // 0: Signature data.view.setUint32(index, 0x504b0607); // 4: number of the disk with the zip64 EOCD // 8: Offset of the zip64 EOCD data.view.setBigUint64(index + 8, BigInt(cdOffset + length), true); // 16: total number of disks data.view.setUint32(index + 16, 1, true); index += 20 // EOCD must set these values to 0xffff and 0xffffffff when using ZIP64 format totalEntries = 0xffff; cdOffset = 0xffffffff; data.view.setUint32(index, 0x504b0506) data.view.setUint16(index + 8, totalEntries, true) data.view.setUint16(index + 10, totalEntries, true) data.view.setUint32(index + 12, length, true) data.view.setUint32(index + 16, cdOffset, true) ctrl.enqueue(data.array) ctrl.close() } function processNextChunk() { if (!activeZipObject) return if (activeZipObject.directory) return activeZipObject.writeFooter(activeZipObject.writeHeader()) if (activeZipObject.reader) return pump(activeZipObject) if (activeZipObject.fileLike.stream) { activeZipObject.crc = new Crc32() activeZipObject.reader = activeZipObject.fileLike.stream().getReader() activeZipObject.writeHeader() } else next() } return new ReadableStream({ start: c => { ctrl = c underlyingSource.start && Promise.resolve(underlyingSource.start(zipWriter)) }, pull() { return processNextChunk() || ( underlyingSource.pull && Promise.resolve(underlyingSource.pull(zipWriter)) ) } }) } window.ZIP = createWriter ```
aleimu commented 1 month ago

The file is damaged because the variable zip64 is not switched to true in a timely manner, resulting in some files being in zip32 format and some in zip64 format. However, unzip cannot correctly recognize this. To solve the file damage and "Header error", I set zip64 to be true by default. This way, it's okay!