Closed Chancebair closed 5 years ago
From baidriver
ks apply kubeflow --component mpi-operator --verbose --dry-run && \
ks apply kubeflow --component mxnet-operator
)
==> Installing kubeflow operators
+ _check_kubeflow_app_config
Kubeflow is not initialized yet
+ local kubeflow_config_path=/root/.bai/kubeflow-ks-app/ks_app/app.yaml + [[ ! -f /root/.bai/kubeflow-ks-app/ks_app/app.yaml ]] + echo 'Kubeflow is not initialized yet'
+ rm -rf /root/.bai/kubeflow-ks-app
+ return 0
+ mkdir /root/.bai/kubeflow-ks-app
+ cd /root/.bai/kubeflow-ks-app
+ export KUBECONFIG=/root/.bai/kubeconfig
+ KUBECONFIG=/root/.bai/kubeconfig
+ ks init ks_app
level=info msg="Using context "eks_benchmark-cluster" from kubeconfig file "/root/.bai/kubeconfig""
level=info msg="Creating environment "default" with namespace "default", pointing to "version:v1.13.7" cluster at address "https://84F650F9D0584E4A8C6F85E24FF42FCD.sk1.us-west-2.eks.amazonaws.com""
level=info msg="Generating ksonnet-lib data at path '/root/.bai/kubeflow-ks-app/ks_app/lib/ksonnet-lib/v1.13.7'"
+ cd ks_app
+ echo '-> Creating kubeflow namespace'
-> Creating kubeflow namespace
+ kubectl --kubeconfig=/root/.bai/kubeconfig apply -f + __create_kubeflow_namespace
+ cat
namespace/kubeflow unchanged
+ echo '-> Creating kubeflow environment'
-> Creating kubeflow environment
+ ks registry describe kubeflow
level=error msg="registry "kubeflow" doesn't exist"
+ ks registry add kubeflow https://github.com/kubeflow/kubeflow/tree/v0.4.1/kubeflow + ks env describe kubeflow
level=error msg="environment "kubeflow" was not found"
+ ks env add kubeflow --namespace kubeflow
level=info msg="Using context "eks_benchmark-cluster" from kubeconfig file "/root/.bai/kubeconfig""
level=info msg="Creating environment "kubeflow" with namespace "kubeflow", pointing to "version:v1.13.7" cluster at address "https://84F650F9D0584E4A8C6F85E24FF42FCD.sk1.us-west-2.eks.amazonaws.com""
+ echo '-> Applying kubeflow stuff'
-> Applying kubeflow stuff
+ ks pkg list --installed -o table
+ grep mpi-job
+ xargs
+ awk '{print $2}'
+ ks pkg install kubeflow/mpi-job
level=info msg="Retrieved 6 files"
+ awk '{print $2}'
+ ks pkg list --installed -o table
+ xargs
+ grep mxnet-job
+ ks pkg install kubeflow/mxnet-job
level=info msg="Retrieved 5 files"
+ ks component list
+ xargs
+ awk '{print $1}'
+ grep mpi-operator
+ ks generate mpi-operator mpi-operator
level=info msg="Writing component at '/root/.bai/kubeflow-ks-app/ks_app/components/mpi-operator.jsonnet'"
+ awk '{print $1}'
+ grep mxnet-operator
+ xargs
+ ks component list
+ ks generate mxnet-operator mxnet-operator
level=info msg="Writing component at '/root/.bai/kubeflow-ks-app/ks_app/components/mxnet-operator.jsonnet'"
+ ks apply kubeflow --component mpi-operator --verbose --dry-run level=debug msg="setting log verbosity" verbosity-level=1 level=debug msg="loading application configuration from /root/.bai/kubeflow-ks-app/ks_app"
level=debug msg="loading schema version 0.3.0"
level=debug msg="loading overrides from /root/.bai/kubeflow-ks-app/ks_app"
level=debug msg="loading overrides from /root/.bai/kubeflow-ks-app/ks_app"
level=debug msg="Validating deployment at 'kubeflow' with server '[https://84f650f9d0584e4a8c6f85e24ff42fcd.sk1.us-west-2.eks.amazonaws.com]'"
level=debug msg="Overwriting --cluster flag with 'eks_benchmark-cluster'"
level=debug msg="Overwriting --namespace flag with 'kubeflow'"
level=debug msg="creating ks pipeline for environment "kubeflow""
level=debug msg="building objects" action=pipeline module-name=/ level=debug msg="jsonnet evaluate snippet" elapsed="395.162µs" name=params.libsonnet level=debug msg="jsonnet evaluate snippet" elapsed=1.669682ms name=applyGlobals level=debug msg="jsonnet evaluate snippet" elapsed=79.860162ms name=modularize-params level=debug msg="jsonnet evaluate snippet" elapsed=2.299804ms name=params-for-module level=debug msg="jsonnet evaluate snippet" elapsed="279.929µs" name=/root/.bai/kubeflow-ks-app/ks_app/environments/kubeflow/params.libsonnet level=debug msg="preparing package /root/.bai/kubeflow-ks-app/ks_app/vendor/kubeflow/mpi-job@797bcb7407a589bacc35b9624120f51f36a83468->/tmp/ksvendor531599041/kubeflow/mpi-job" action=env.revendorPackages level=debug msg="preparing package /root/.bai/kubeflow-ks-app/ks_app/vendor/kubeflow/mxnet-job@797bcb7407a589bacc35b9624120f51f36a83468->/tmp/ksvendor531599041/kubeflow/mxnet-job" action=env.revendorPackages /work/baictl/drivers/aws/baidriver: line 234: 552 Killed ks apply kubeflow --component mpi-operator --verbose --dry-run [ERROR] Failed with exit code: 137
+ local exit_code=137
+ [[ 137 != 0 ]]
+ echo '[ERROR] Failed with exit code: 137'
+ return 1
+ return 1
Usage: baictl [verb] [object] [options]
When I run ks commands locally I get
ERROR handle object: patching object from cluster: merging object with existing state: unable to recognize "/var/folders/n6/lkbqfq0506vdyvqp6qbmyrsrmtf9fx/T/ksonnet-mergepatch148807747": no matches for kind "CustomResourceDefinition" in version "apiextensions.k8s.io/v1beta1"
But I'm not sure why that isn't showing up in the logs
Running the script locally the error does not present itself. Somehow only when run on a container
Will no longer use ECS, but switching to the codebuild pipeline approach
When running ./baictl-infrastructure create on any account us-west-2