The following command generates an error. Other spark programs work when specifying hdfs://scc/user/farrell/adsp/bams/SRR990385.bam as the input. It seems to be having a problem with testing for the presence of the SRR990385.bai file which is present. I tried running the command with hdfs://scc:8020/user/farrell/adsp/bams/SRR990385.bam and that works.
[December 3, 2017 2:56:35 PM EST] org.broadinstitute.hellbender.tools.genome.SparkGenomeReadCounts done. Elapsed time: 0.57 minutes.
Runtime.totalMemory()=982515712
org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 0.0 failed 4 times, most recent failure: Lost task 12.3 in stage 0.0 (TID 14, scc-q09.scc.bu.edu, executor 1): java.lang.IllegalArgumentException: Wrong FS: hdfs://scc:8020/user/farrell/adsp/bams/SRR990385.bai, expected: hdfs://scc
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:645)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:193)
at org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFileSystem.java:105)
at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:302)
at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:298)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:298)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:766)
at org.seqdoop.hadoop_bam.util.WrapSeekable.openPath(WrapSeekable.java:60)
at org.seqdoop.hadoop_bam.BAMRecordReader.initialize(BAMRecordReader.java:142)
at org.seqdoop.hadoop_bam.BAMInputFormat.createRecordReader(BAMInputFormat.java:121)
at org.seqdoop.hadoop_bam.AnySAMInputFormat.createRecordReader(AnySAMInputFormat.java:190)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:178)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:177)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
The following command generates an error. Other spark programs work when specifying hdfs://scc/user/farrell/adsp/bams/SRR990385.bam as the input. It seems to be having a problem with testing for the presence of the SRR990385.bai file which is present. I tried running the command with hdfs://scc:8020/user/farrell/adsp/bams/SRR990385.bam and that works.
/share/pkg/gatk/4.beta.5/install/bin/gatk-launch SparkGenomeReadCounts -I hdfs://scc/user/farrell/adsp/bams/SRR990385.bam -o SRR990385.ReadCounts -R /restricted/projectnb/genpro/bundle/2.8/b37/human_g1k_v37.fasta --verbosity ERROR -- --sparkRunner SPARK --sparkMaster yarn --num-executors 1 --executor-memory 4G --executor-cores 3
[December 3, 2017 2:56:35 PM EST] org.broadinstitute.hellbender.tools.genome.SparkGenomeReadCounts done. Elapsed time: 0.57 minutes. Runtime.totalMemory()=982515712 org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 0.0 failed 4 times, most recent failure: Lost task 12.3 in stage 0.0 (TID 14, scc-q09.scc.bu.edu, executor 1): java.lang.IllegalArgumentException: Wrong FS: hdfs://scc:8020/user/farrell/adsp/bams/SRR990385.bai, expected: hdfs://scc at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:645) at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:193) at org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFileSystem.java:105) at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:302) at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:298) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:298) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:766) at org.seqdoop.hadoop_bam.util.WrapSeekable.openPath(WrapSeekable.java:60) at org.seqdoop.hadoop_bam.BAMRecordReader.initialize(BAMRecordReader.java:142) at org.seqdoop.hadoop_bam.BAMInputFormat.createRecordReader(BAMInputFormat.java:121) at org.seqdoop.hadoop_bam.AnySAMInputFormat.createRecordReader(AnySAMInputFormat.java:190) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:178) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:177)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)