glorysdj commented 3 years ago

autograd custom.py

client command

${SPARK_HOME}/bin/spark-submit \
  --master ${RUNTIME_SPARK_MASTER} \
  --deploy-mode client \
  --conf spark.driver.host=172.16.0.200 \
  --conf spark.driver.port=54321 \
  --conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
  --name analytics-zoo-autoestimator \
  --conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
  --conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.driver.label.az=true \
  --conf spark.kubernetes.executor.label.az=true \
  --conf spark.kubernetes.node.selector.spark=true \
  --executor-cores ${RUNTIME_EXECUTOR_CORES} \
  --executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
  --total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
  --driver-cores ${RUNTIME_DRIVER_CORES} \
  --driver-memory ${RUNTIME_DRIVER_MEMORY} \
  --properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
  --py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py \
  --conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
  --conf spark.sql.catalogImplementation='in-memory' \
  --conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py

cluster command

${SPARK_HOME}/bin/spark-submit \
  --master ${RUNTIME_SPARK_MASTER} \
  --deploy-mode cluster \
  --conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
  --name analytics-zoo-autoestimator \
  --conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
  --conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.driver.label.az=true \
  --conf spark.kubernetes.executor.label.az=true \
  --conf spark.kubernetes.node.selector.spark=true \
  --executor-cores ${RUNTIME_EXECUTOR_CORES} \
  --executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
  --total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
  --driver-cores ${RUNTIME_DRIVER_CORES} \
  --driver-memory ${RUNTIME_DRIVER_MEMORY} \
  --properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
  --py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py \
  --conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
  --conf spark.sql.catalogImplementation='in-memory' \
  --conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py

exception

cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.Sample
BigDLBasePickler registering: bigdl.dllib.utils.common  Sample
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.EvaluatedResult
BigDLBasePickler registering: bigdl.dllib.utils.common  EvaluatedResult
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.JTensor
BigDLBasePickler registering: bigdl.dllib.utils.common  JTensor
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.JActivity
BigDLBasePickler registering: bigdl.dllib.utils.common  JActivity
creating: createZooKerasSequential
creating: createZooKerasDense
creating: createDefault
creating: createSGD
creating: createZooKerasVariable
creating: createZooKerasVariable
creating: createZooKerasCustomLoss
2021-10-19 05:51:19 INFO  LocalOptimizer$:69 - Clone 1 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 2 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 3 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 4 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 5 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 6 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 7 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 8 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 9 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 10 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 11 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 12 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 13 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 14 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 15 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 16 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:119 - model thread pool size is 1
2021-10-19 05:51:20 ERROR ThreadPool$:136 - Error: Layer info: Model[ce577d6d]/KerasLayerWrapper[Mean76c60542_wrapper]
java.lang.IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0
        at scala.Predef$.require(Predef.scala:281)
        at com.intel.analytics.bigdl.dllib.nn.Sum.getPositiveDimension(Sum.scala:64)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:75)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:44)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:289)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

2021-10-19 05:51:20 ERROR ThreadPool$:136 - Error: Layer info: Model[ce577d6d]/KerasLayerWrapper[Mean76c60542_wrapper]
java.lang.IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0
        at scala.Predef$.require(Predef.scala:281)
        at com.intel.analytics.bigdl.dllib.nn.Sum.getPositiveDimension(Sum.scala:64)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:75)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:44)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:289)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

glorysdj commented 3 years ago

intel-analytics / analytics-zoo

dllib on k8s: IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0 #31

autograd custom.py

client command

cluster command

exception