Open silenceli opened 4 years ago
docker-registry.qiyi.virtual/jarvis-image/spark-with-rapids:3.0.0
From gcr.io/spark-operator/spark:v3.0.0
USER root
# RUN mkdir -p /opt/sparkRapidsPlugin
COPY cudf-0.14-cuda10-1.jar getGpusResources.sh rapids-4-spark_2.12-0.1.0.jar /opt/spark/jars/
ENV SPARK_RAPIDS_DIR=/opt/spark/jars SPARK_CUDF_JAR=/opt/spark/jars/cudf-0.14-cuda10-1.jar SPARK_RAPIDS_PLUGIN_JAR=/opt/spark/jars/rapids-4-spark_2.12-0.1.0.jar
[root@mesos-gpu-online001-bjdxt9 rapids]# ls -al
total 149104
-rw-r--r-- 1 root root 145993287 Jul 7 11:29 cudf-0.14-cuda10-1.jar
-rw-r--r-- 1 root root 348 Jul 7 20:53 Dockerfile
-rwxr-xr-x 1 root root 1754 Jul 7 11:15 getGpusResources.sh
-rwxr-xr-x 1 root root 6676958 Jul 7 11:09 rapids-4-spark_2.12-0.1.0.jar
docker build -t docker-registry.qiyi.virtual/jarvis-image/spark-with-rapids:3.0.0 .
apiVersion: "sparkoperator.k8s.io/v1beta2"
kind: SparkApplication
metadata:
name: spark-pi
namespace: default
spec:
type: Scala
mode: cluster
image: "docker-registry.qiyi.virtual/jarvis-image/spark-with-rapids:3.0.0"
mainClass: org.apache.spark.examples.SparkPi
mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.12-3.0.0.jar"
sparkConf:
"spark.plugins": "com.nvidia.spark.SQLPlugin"
"spark.executor.resource.gpu.discoveryScript": "/opt/spark/jars/getGpusResources.sh"
"spark.executor.resource.gpu.vendor": "nvidia.com"
"spark.driver.resource.gpu.discoveryScript": "/opt/spark/jars/getGpusResources.sh"
"spark.driver.resource.gpu.vendor": "nvidia.com"
"spark.rapids.sql.concurrentGpuTasks": "1"
"spark.driver.resource.gpu.amount": "1"
"spark.task.resource.gpu.amount": "1"
"spark.executor.resource.gpu.amount": "1"
ImagePullPolicy: IfNotPresent
sparkVersion: "3.0.0"
deps:
jars:
- local:///opt/spark/jars/cudf-0.14-cuda10-1.jar
- local:///opt/spark/jars/rapids-4-spark_2.12-0.1.0.jar
files:
- local:///opt/spark/jars/getGpusResources.sh
restartPolicy:
type: Never
volumes:
- name: "test-volume"
hostPath:
path: "/tmp"
type: Directory
driver:
cores: 1
gpu:
name: "nvidia.com/gpu"
quantity: 1
coreLimit: "1200m"
memory: "512m"
labels:
version: 3.0.0
serviceAccount: spark
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
executor:
cores: 1
instances: 1
memory: "512m"
gpu:
name: "nvidia.com/gpu"
quantity: 1
labels:
version: 3.0.0
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
Sorry if I'm missing something, but shouldn't the image be based on nvidia/cuda for this to work? https://github.com/NVIDIA/spark-xgboost-examples/blob/spark-3/Dockerfile
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.
Spark 3.0.0 support RAPIDS Accelerator which can provides a set of plugins for Apache Spark that leverage GPUs to accelerate processing via the RAPIDS libraries and UCX.
Do you have any plans to support this feature?
Look forward to your reply, thx!