51zero / eel-sdk

Big Data Toolkit for the JVM
Apache License 2.0
145 stars 35 forks source link

Parquet DecimalWriter yields fixed binary error #344

Closed hannesmiller closed 7 years ago

hannesmiller commented 7 years ago
 IllegalArgumentException: Fixed Binary size 16 does not match field type length 14
             java.lang.IllegalArgumentException: Fixed Binary size 16 does not match field type length 14
    at org.apache.parquet.column.values.plain.FixedLenByteArrayPlainValuesWriter.writeBytes(FixedLenByteArrayPlainValuesWriter.java:57)
    at org.apache.parquet.column.impl.ColumnWriterV1.write(ColumnWriterV1.java:199)
    at org.apache.parquet.io.MessageColumnIO$MessageColumnIORecordConsumer.addBinary(MessageColumnIO.java:463)
    at org.apache.parquet.io.ValidatingRecordConsumer.addBinary(ValidatingRecordConsumer.java:219)
    at io.eels.component.parquet.DecimalWriter.write(RecordWriter.scala:144)
    at io.eels.component.parquet.StructRecordWriter$$anonfun$write$1.apply$mcVI$sp(RecordWriter.scala:114)
    at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
    at io.eels.component.parquet.StructRecordWriter.write(RecordWriter.scala:107)
    at io.eels.component.parquet.RowWriter.write(RowWriteSupport.scala:44)
    at io.eels.component.parquet.RowWriteSupport.write(RowWriteSupport.scala:35)
    at io.eels.component.parquet.RowWriteSupport.write(RowWriteSupport.scala:16)
    at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:123)
    at org.apache.parquet.hadoop.ParquetWriter.write(ParquetWriter.java:292)
    at io.eels.component.hive.dialect.ParquetHiveDialect$$anon$3.write(ParquetHiveDialect.scala:73)
    at io.eels.component.hive.HiveSinkWriter.write(HiveSinkWriter.scala:65)
    at io.eels.datastream.SinkAction$$anonfun$1$$anon$2$$anonfun$run$1$$anonfun$apply$1.apply(SinkAction.scala:62)
    at io.eels.datastream.SinkAction$$anonfun$1$$anon$2$$anonfun$run$1$$anonfun$apply$1.apply(SinkAction.scala:61)
    at scala.collection.Iterator$class.foreach(Iterator.scala:891)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
    at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
    at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
    at io.eels.datastream.SinkAction$$anonfun$1$$anon$2$$anonfun$run$1.apply(SinkAction.scala:61)
    at io.eels.datastream.SinkAction$$anonfun$1$$anon$2$$anonfun$run$1.apply(SinkAction.scala:60)
    at scala.collection.Iterator$class.foreach(Iterator.scala:891)
    at com.sksamuel.exts.collection.BlockingQueueConcurrentIterator.foreach(BlockingQueueConcurrentIterator.scala:10)
    at io.eels.datastream.SinkAction$$anonfun$1$$anon$2.run(SinkAction.scala:60)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
sksamuel commented 7 years ago

This is an error whereby the original schema used a precision too low.