this error was observed running scalding 0.17.3. We do not yet know the cause.
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:148)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:460)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:344)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168)
Caused by: cascading.flow.stream.DuctException: internal error: ['com.twitter.scalding.serialization.Boxed0@6a6c7f42', 'XXXXXX (redacted)']
at cascading.flow.hadoop.stream.HadoopCoGroupGate.receive(HadoopCoGroupGate.java:101)
at cascading.flow.hadoop.stream.HadoopCoGroupGate.receive(HadoopCoGroupGate.java:43)
at cascading.flow.stream.SourceStage.map(SourceStage.java:102)
at cascading.flow.stream.SourceStage.run(SourceStage.java:58)
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:130)
... 7 more
Caused by: java.lang.ArrayIndexOutOfBoundsException
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer$Buffer.write(MapTask.java:1464)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer$Buffer.write(MapTask.java:1360)
at java.io.DataOutputStream.write(DataOutputStream.java:88)
at java.io.DataOutputStream.writeByte(DataOutputStream.java:153)
at org.apache.hadoop.io.WritableUtils.writeVLong(WritableUtils.java:273)
at org.apache.hadoop.io.WritableUtils.writeVInt(WritableUtils.java:253)
at cascading.tuple.hadoop.io.HadoopTupleOutputStream.writeIntInternal(HadoopTupleOutputStream.java:155)
at cascading.tuple.hadoop.io.HadoopTupleOutputStream.writeIndexTuple(HadoopTupleOutputStream.java:160)
at cascading.tuple.hadoop.io.IndexTupleSerializer.serialize(IndexTupleSerializer.java:37)
at cascading.tuple.hadoop.io.IndexTupleSerializer.serialize(IndexTupleSerializer.java:28)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1158)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:617)
at cascading.tap.hadoop.util.MeasuredOutputCollector.collect(MeasuredOutputCollector.java:69)
at cascading.flow.hadoop.stream.HadoopCoGroupGate.receive(HadoopCoGroupGate.java:88)
... 11 more
this error was observed running scalding 0.17.3. We do not yet know the cause.
looks like this copy failing: https://github.com/apache/hadoop/blob/release-3.0.0-RC0/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java#L1464