sudo pip install awscli aws configure
curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/latest_release/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp sudo mv /tmp/eksctl /usr/local/bin
git clone https://github.com/apache/spark cd spark && ./build/mvn -DskipTests=true -Pkubernetes package && cd python && python setup.py sdist && sudo pip install dist/*.tar.gz
eksctl create cluster --name=computable-spark-test --nodes=4 --kubeconfig=./kubeconfig.spark-test.yaml --node-ami=auto
export KUBECONFIG=pwd
/kubeconfig.spark-test.yaml
kubectl get nodes
Update core-site.xml to have AWS Access key ID and secret
Run grant-api-role.sh to allow default service account to launch more pods for Spark
Update Spark job script to run desire query in sql.py
Push sql script to s3
aws s3 cp sql.py --acl public-read s3://computable-spark/sql.py
export KUBE_MASTER=k8s://https://xxxxxxxx.amazonaws.com
HADOOP_CONF_DIR=pwd
spark-submit --deploy-mode cluster --master $KUBE_MASTER --conf spark.kubernetes.container.image=tnachen/spark-py:latest2 s3a://computable-spark/sql.py
kubectl get pods
kubectl logs