With the help of @MonicaLemay @romayalon we saw that running with hashCheck=true fails with ArrayIndexOutOfBoundsException when the total stream size exceeds 2 GB.
I found the problem in RandomInputStream (which is pretty old code) - casting the gap variable to int overflows in those cases. I tested that simply defining gap as long and then casting it to int only in the end when it is used for arraycopy seems to fix this issue.
Here is my standalone reproduction code:
public class TestRandomInputStream {
public static void main(String[] args) {
final long MAX_SIZE = (2 * GB) + IO_SIZE + hashLen;
System.out.println("--- testMaxSize ---");
TestRandomInputStream testMaxSize = new TestRandomInputStream(MAX_SIZE, true);
testMaxSize.readEntireSize();
System.out.println("--- testOverMax ---");
TestRandomInputStream testOverMax = new TestRandomInputStream(MAX_SIZE + 1, true);
try {
testOverMax.readEntireSize();
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("--- all done ---");
}
private static final long KB = 1024L;
private static final long MB = KB * KB;
private static final long GB = KB * KB * KB;
private static final long IO_SIZE = MB;
private static final int SIZE = 4096; // 4 KB
private final byte[] buffer = new byte[SIZE];
private final boolean hashCheck;
private static final int hashLen = 40;
private byte[] hashBytes;
private final long size;
private long processed = 0;
public TestRandomInputStream(long size, boolean hashCheck) {
this.hashCheck = hashCheck;
this.size = size;
}
public void readEntireSize() {
long pos = 0;
while (pos < size) {
int n = (int) Math.min(size - pos, IO_SIZE);
byte[] bytes = new byte[n];
processBytes(bytes, 0, n);
pos += n;
}
}
public void processBytes(byte[] bytes, int offset, int length) {
if (!hashCheck) {
do {
int segment = length > SIZE ? SIZE : length;
System.arraycopy(buffer, 0, bytes, offset, segment);
length -= segment;
offset += segment;
} while (length > 0); // data copy completed
} else {
System.out.println("--- processBytes#1: length=" + length + " processed=" + processed);
if (length <= hashLen) {
System.arraycopy(hashBytes, hashLen - length, bytes, 0, length);
return;
}
int gap = (int) ((processed + length) - (size - hashLen));
if (gap > 0) // partial hash needs append in gap area.
length -= gap;
processed += length;
System.out.println("--- processBytes#2: length=" + length + " processed=" + processed + " gap=" + gap);
do {
int segment = length > SIZE ? SIZE : length;
System.arraycopy(buffer, 0, bytes, offset, segment);
// util.update(buffer, 0, segment);
length -= segment;
offset += segment;
} while (length > 0); // data copy completed
if ((gap <= hashLen) && (gap >= 0)) {
// append md5 hash
// String hashString = util.calculateHash();
String hashString = "!!!!" + "00112233445566778899aabbccddeeff" + "!!!!";
assert hashString.length() == hashLen;
try {
hashBytes = hashString.getBytes("UTF-8");
} catch (Exception e) {
e.printStackTrace();
hashBytes = hashString.getBytes();
}
assert hashBytes.length == hashLen;
if (gap > 0)
System.arraycopy(hashBytes, 0, bytes, offset, gap);
}
}
}
}
Hi
This problem was found while debugging this issue https://github.com/noobaa/noobaa-core/issues/6934.
With the help of @MonicaLemay @romayalon we saw that running with
hashCheck=true
fails withArrayIndexOutOfBoundsException
when the total stream size exceeds 2 GB.I found the problem in
RandomInputStream
(which is pretty old code) - casting thegap
variable to int overflows in those cases. I tested that simply defininggap
as long and then casting it to int only in the end when it is used for arraycopy seems to fix this issue.Here is my standalone reproduction code:
The (abbreviated) output is: