./bin/spark-submit --deploy-mode client --master spark://dgx03:7077 --class org.apache.spark.examples.SparkPi ./examples/target/scala-2.11/jars/spark-examples_2.11-2.4.0.jar
...
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
19/02/15 12:12:35 INFO SparkContext: Running Spark version 2.4.0
19/02/15 12:12:35 INFO SparkContext: Submitted application: Spark Pi
19/02/15 12:12:35 INFO SecurityManager: Changing view acls to: akven
19/02/15 12:12:35 INFO SecurityManager: Changing modify acls to: akven
19/02/15 12:12:35 INFO SecurityManager: Changing view acls groups to:
19/02/15 12:12:35 INFO SecurityManager: Changing modify acls groups to:
19/02/15 12:12:35 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with vie
w permissions: Set(akven); groups with view permissions: Set(); users with modify permissions: Set(akven); groups with modify permissions: Set()
19/02/15 12:12:36 INFO Utils: Successfully started service 'sparkDriver' on port 36411.
19/02/15 12:12:36 INFO SparkEnv: Registering MapOutputTracker
19/02/15 12:12:36 INFO disni: creating RdmaProvider of type 'nat'
19/02/15 12:12:36 INFO disni: jverbs jni version 32
19/02/15 12:12:36 INFO disni: sock_addr_in size mismatch, jverbs size 28, native size 16
19/02/15 12:12:36 INFO disni: IbvRecvWR size match, jverbs size 32, native size 32
19/02/15 12:12:36 INFO disni: IbvSendWR size mismatch, jverbs size 72, native size 128
19/02/15 12:12:36 INFO disni: IbvWC size match, jverbs size 48, native size 48
19/02/15 12:12:36 INFO disni: IbvSge size match, jverbs size 16, native size 16
19/02/15 12:12:36 INFO disni: Remote addr offset match, jverbs size 40, native size 40
19/02/15 12:12:36 INFO disni: Rkey offset match, jverbs size 48, native size 48
19/02/15 12:12:36 INFO disni: createEventChannel, objId 47303849051008
19/02/15 12:12:36 INFO disni: createId, id 47303849066960
19/02/15 12:12:36 INFO disni: bindAddr, address dgx03/A.B.C.D:3037
19/02/15 12:12:36 INFO RdmaNode: Failed to bind to port 3037 on iteration 0
19/02/15 12:12:36 INFO disni: bindAddr, address dgx03/A.B.C.D:3038
19/02/15 12:12:36 INFO RdmaNode: Failed to bind to port 3038 on iteration 1
...
19/02/15 12:12:36 INFO RdmaNode: Failed to bind to port 3052 on iteration 15
19/02/15 12:12:36 ERROR RdmaNode: Failed in RdmaNode constructor
^ This seems to be the main error
19/02/15 12:12:36 INFO disni: destroyCmId, id 0
19/02/15 12:12:36 INFO disni: destroyEventChannel, channel 0
19/02/15 12:12:36 ERROR SparkContext: Error initializing SparkContext.
java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.SparkEnv$.instantiateClass$1(SparkEnv.scala:264)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:323)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:175)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:257)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:424)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:935)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:926)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at org.apache.spark.examples.SparkPi$.main(SparkPi.scala:31)
at org.apache.spark.examples.SparkPi.main(SparkPi.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.io.IOException: Failed to bind. Make sure your NIC supports RDMA
at org.apache.spark.shuffle.rdma.RdmaNode.<init>(RdmaNode.java:87)
at org.apache.spark.shuffle.rdma.RdmaShuffleManager.<init>(RdmaShuffleManager.scala:137)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.SparkEnv$.instantiateClass$1(SparkEnv.scala:264)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:323)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:175)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:257)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:424)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:935)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:926)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at org.apache.spark.examples.SparkPi$.main(SparkPi.scala:31)
at org.apache.spark.examples.SparkPi.main(SparkPi.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Any thoughts on what I need to change to fix this? Thanks in advance.
Hi,
I'm trying to submit a SparkPi example using SparkRDMA. I have the following in my
spark-default.conf
but I see the following errors:
^ This seems to be the main error
Any thoughts on what I need to change to fix this? Thanks in advance.