intel-cloud / cosbench

a benchmark tool for cloud object storage service
Other
573 stars 242 forks source link

RandomInputStream with hashCheck=true gap calculation overflows when size > 2GB #425

Open guymguym opened 2 years ago

guymguym commented 2 years ago

Hi

This problem was found while debugging this issue https://github.com/noobaa/noobaa-core/issues/6934.

With the help of @MonicaLemay @romayalon we saw that running with hashCheck=true fails with ArrayIndexOutOfBoundsException when the total stream size exceeds 2 GB.

I found the problem in RandomInputStream (which is pretty old code) - casting the gap variable to int overflows in those cases. I tested that simply defining gap as long and then casting it to int only in the end when it is used for arraycopy seems to fix this issue.

Here is my standalone reproduction code:

public class TestRandomInputStream {

    public static void main(String[] args) {
        final long MAX_SIZE = (2 * GB) + IO_SIZE + hashLen;

        System.out.println("--- testMaxSize ---");
        TestRandomInputStream testMaxSize = new TestRandomInputStream(MAX_SIZE, true);
        testMaxSize.readEntireSize();

        System.out.println("--- testOverMax ---");
        TestRandomInputStream testOverMax = new TestRandomInputStream(MAX_SIZE + 1, true);
        try {
            testOverMax.readEntireSize();
        } catch (Exception e) {
            e.printStackTrace();
        }

        System.out.println("--- all done    ---");
    }

    private static final long KB = 1024L;
    private static final long MB = KB * KB;
    private static final long GB = KB * KB * KB;
    private static final long IO_SIZE = MB;

    private static final int SIZE = 4096; // 4 KB
    private final byte[] buffer = new byte[SIZE];
    private final boolean hashCheck;
    private static final int hashLen = 40;
    private byte[] hashBytes;
    private final long size;
    private long processed = 0;

    public TestRandomInputStream(long size, boolean hashCheck) {
        this.hashCheck = hashCheck;
        this.size = size;
    }

    public void readEntireSize() {
        long pos = 0;
        while (pos < size) {
            int n = (int) Math.min(size - pos, IO_SIZE);
            byte[] bytes = new byte[n];
            processBytes(bytes, 0, n);
            pos += n;
        }
    }

    public void processBytes(byte[] bytes, int offset, int length) {

        if (!hashCheck) {
            do {
                int segment = length > SIZE ? SIZE : length;
                System.arraycopy(buffer, 0, bytes, offset, segment);

                length -= segment;
                offset += segment;
            } while (length > 0); // data copy completed

        } else {

            System.out.println("--- processBytes#1: length=" + length + " processed=" + processed);

            if (length <= hashLen) {
                System.arraycopy(hashBytes, hashLen - length, bytes, 0, length);

                return;
            }

            int gap = (int) ((processed + length) - (size - hashLen));
            if (gap > 0) // partial hash needs append in gap area.
                length -= gap;

            processed += length;

            System.out.println("--- processBytes#2: length=" + length + " processed=" + processed + " gap=" + gap);

            do {
                int segment = length > SIZE ? SIZE : length;
                System.arraycopy(buffer, 0, bytes, offset, segment);
                // util.update(buffer, 0, segment);

                length -= segment;
                offset += segment;
            } while (length > 0); // data copy completed

            if ((gap <= hashLen) && (gap >= 0)) {
                // append md5 hash
                // String hashString = util.calculateHash();
                String hashString = "!!!!" + "00112233445566778899aabbccddeeff" + "!!!!";
                assert hashString.length() == hashLen;

                try {
                    hashBytes = hashString.getBytes("UTF-8");
                } catch (Exception e) {
                    e.printStackTrace();
                    hashBytes = hashString.getBytes();
                }
                assert hashBytes.length == hashLen;

                if (gap > 0)
                    System.arraycopy(hashBytes, 0, bytes, offset, gap);
            }

        }
    }
}

The (abbreviated) output is:

javac TestRandomInputStream.java
java -ea TestRandomInputStream
--- testMaxSize ---
--- processBytes#1: length=1048576 processed=0
--- processBytes#2: length=1048576 processed=1048576 gap=-2147483648
--- processBytes#1: length=1048576 processed=1048576
--- processBytes#2: length=1048576 processed=2097152 gap=-2146435072
...
... (abbreviated)
...
--- processBytes#1: length=1048576 processed=2146435072
--- processBytes#2: length=1048576 processed=2147483648 gap=-1048576
--- processBytes#1: length=1048576 processed=2147483648
--- processBytes#2: length=1048576 processed=2148532224 gap=0
--- processBytes#1: length=40 processed=2148532224
--- testOverMax ---
--- processBytes#1: length=1048576 processed=0
--- processBytes#2: length=-2146435071 processed=-2146435071 gap=2147483647
java.lang.ArrayIndexOutOfBoundsException: arraycopy: length -2146435071 is negative
        at java.base/java.lang.System.arraycopy(Native Method)
        at TestRandomInputStream.processBytes(TestRandomInputStream.java:80)
        at TestRandomInputStream.readEntireSize(TestRandomInputStream.java:44)
        at TestRandomInputStream.main(TestRandomInputStream.java:13)
--- all done    ---