Currently the The abstract class BytePacker has the following method
@Deprecated
public void unpack8Values(final byte[] input, final int inPos, final int[] output, final int outPos) {
unpack8Values(ByteBuffer.wrap(input), inPos, output, outPos);
}
I don’t know why to use ByteBuffer wrap byte[], ByteBuffer has poor performance.
I suggest using
public abstract void unpack8Values(final byte[]input, final int inPos, final int[] output, final int outPos);
to replace
@Deprecated
public void unpack8Values(final byte[] input, final int inPos, final int[] output, final int outPos) {
unpack8Values(ByteBuffer.wrap(input), inPos, output, outPos);
}
Tested by me the byte array api has better performance than ByteBuffer api,
My test result is:
[Unpack8ValuesByteArray spent time] 80 ms
[Unpack8ValuesByteBuffer spent time] 133 ms
public class ByteBufferTest {
private static final BytePacker bytePacker = Packer.LITTLE_ENDIAN.newBytePacker(7);
private static final int COUNT = 100000;
public static void main(String[] args) {
byte [] in = new byte[1008];
int [] out = new int[1152];
int [] out1 = new int[1152];
int [] out2 = new int[1152];
int res = 0;
for(int i = 0; i < in.length; i++) {
in[i] = (byte) i;
}
for(int i = 0; i < COUNT; i++) {
res += unpack8ValuesBytes(in, out, i % out.length);
}
res = 0;
long t1 = System.currentTimeMillis();
for(int i = 0; i < COUNT; i++) {
res += unpack8ValuesBytes(in, out1, i % out.length);
}
long t2 = System.currentTimeMillis();
System.out.println("[Unpack8ValuesByteArray spent time] " + (t2-t1) + " ms");
ByteBuffer byteBuffer = ByteBuffer.wrap(in);
for(int i = 0; i < COUNT; i++) {
res += unpack8ValuesByteBuffer(byteBuffer, out, i % out.length);
}
res = 0;
long t3 = System.currentTimeMillis();
for(int i = 0; i < COUNT; i++) {
res += unpack8ValuesByteBuffer(byteBuffer, out2, i % out.length);
}
long t4 = System.currentTimeMillis();
System.out.println("[Unpack8ValuesByteBuffer spent time] " + (t4-t3) + " ms");
private static int unpack8ValuesBytes(byte [] in, int [] out, int ctr) {
for(int i = 0, j = 0; i < in.length; i+=7, j+=8) {
bytePacker.unpack8Values(in, i, out, j);
}
return out[ctr];
}
private static int unpack8ValuesByteBuffer(ByteBuffer in, int [] out, int ctr) {
for(int i = 0, j = 0; i < in.capacity(); i+=7, j+=8) {
bytePacker.unpack8Values(in, i, out, j);
}
return out[ctr];
}
}
Currently the The abstract class BytePacker has the following method
@Deprecated public void unpack8Values(final byte[] input, final int inPos, final int[] output, final int outPos) { unpack8Values(ByteBuffer.wrap(input), inPos, output, outPos);
}
I don’t know why to use ByteBuffer wrap byte[], ByteBuffer has poor performance.
I suggest using
public abstract void unpack8Values(final byte[]input, final int inPos, final int[] output, final int outPos);
to replace
@Deprecated public void unpack8Values(final byte[] input, final int inPos, final int[] output, final int outPos) { unpack8Values(ByteBuffer.wrap(input), inPos, output, outPos);
}
Tested by me the byte array api has better performance than ByteBuffer api,
My test result is:
[Unpack8ValuesByteArray spent time] 80 ms [Unpack8ValuesByteBuffer spent time] 133 ms
My test code is:
package org.apache.parquet.column.values.bitpacking;
import java.nio.ByteBuffer;
public class ByteBufferTest { private static final BytePacker bytePacker = Packer.LITTLE_ENDIAN.newBytePacker(7);
private static final int COUNT = 100000;
public static void main(String[] args) { byte [] in = new byte[1008]; int [] out = new int[1152]; int [] out1 = new int[1152]; int [] out2 = new int[1152];
int res = 0;
for(int i = 0; i < in.length; i++) { in[i] = (byte) i; }
for(int i = 0; i < COUNT; i++) { res += unpack8ValuesBytes(in, out, i % out.length); }
res = 0; long t1 = System.currentTimeMillis(); for(int i = 0; i < COUNT; i++) { res += unpack8ValuesBytes(in, out1, i % out.length); } long t2 = System.currentTimeMillis(); System.out.println("[Unpack8ValuesByteArray spent time] " + (t2-t1) + " ms");
ByteBuffer byteBuffer = ByteBuffer.wrap(in);
for(int i = 0; i < COUNT; i++) { res += unpack8ValuesByteBuffer(byteBuffer, out, i % out.length); }
res = 0; long t3 = System.currentTimeMillis(); for(int i = 0; i < COUNT; i++) { res += unpack8ValuesByteBuffer(byteBuffer, out2, i % out.length); } long t4 = System.currentTimeMillis(); System.out.println("[Unpack8ValuesByteBuffer spent time] " + (t4-t3) + " ms");
for (int i=0; i<out1.length; i++) { if(out1[i] != out2[i]) { System.out.println("diff: " + out1[i] + " " + out2[i]); } } }
private static int unpack8ValuesBytes(byte [] in, int [] out, int ctr) { for(int i = 0, j = 0; i < in.length; i+=7, j+=8) { bytePacker.unpack8Values(in, i, out, j); } return out[ctr]; } private static int unpack8ValuesByteBuffer(ByteBuffer in, int [] out, int ctr) { for(int i = 0, j = 0; i < in.capacity(); i+=7, j+=8) { bytePacker.unpack8Values(in, i, out, j); } return out[ctr]; } }
Reporter: jiangjiguang0719 / @jiangjiguang
Note: This issue was originally created as PARQUET-2189. Please see the migration documentation for further details.