eaplatanios / tensorflow_scala

TensorFlow API for the Scala Programming Language
http://platanios.org/tensorflow_scala/
Apache License 2.0
939 stars 95 forks source link

datasetFromOutputSlices method not working with Estimator: FailedPreconditionException #165

Open mandar2812 opened 5 years ago

mandar2812 commented 5 years ago

This issue is possibly related to #157

Motivation

The main reason why I need to use the tf.data.datasetFromOutputSlices is because the Tensorflow python docs mention that tf.data.Dataset.from_tensor_slices() is not ideal when loading large data sets.

So I thought its better to use tf.data.datasetFromOutputSlices instead. But using that in any Model\Estimator based API leads to the ShapeRefiner based FailedPreconditionException.

Example

The following is a minimal reproduction of the issue


import _root_.java.nio.file.Paths
import org.platanios.tensorflow.api._
import org.platanios.tensorflow.data.image.MNISTLoader

val dataSet = MNISTLoader.load(Paths.get("datasets/MNIST"))
val trainImages =
  tf.data.datasetFromOutputSlices(dataSet.trainImages.toOutput).map(_.toFloat)
val trainLabels =
  tf.data.datasetFromOutputSlices(dataSet.trainLabels.toOutput).map(_.toLong)
val testImages =
  tf.data.datasetFromOutputSlices(dataSet.testImages.toOutput).map(_.toFloat)
val testLabels =
  tf.data.datasetFromOutputSlices(dataSet.testLabels.toOutput).map(_.toLong)
val trainData =
  trainImages
    .zip(trainLabels)
    .repeat()
    .shuffle(10000)
    .batch(256)
    .prefetch(10)
val evalTrainData = trainImages.zip(trainLabels).batch(1000).prefetch(10)
val evalTestData  = testImages.zip(testLabels).batch(1000).prefetch(10)

logger.info("Building the logistic regression model.")

val input = tf.learn.Input(
  FLOAT32,
  Shape(-1, dataSet.trainImages.shape(1), dataSet.trainImages.shape(2))
)
val trainInput = tf.learn.Input(INT64, Shape(-1))
val layer = tf.learn.Flatten[Float]("Input/Flatten") >>
  tf.learn.Linear[Float]("Layer_0/Linear", 128) >> tf.learn
  .ReLU[Float]("Layer_0/ReLU", 0.1f) >>
  tf.learn.Linear[Float]("Layer_1/Linear", 64) >> tf.learn
  .ReLU[Float]("Layer_1/ReLU", 0.1f) >>
  tf.learn.Linear[Float]("Layer_2/Linear", 32) >> tf.learn
  .ReLU[Float]("Layer_2/ReLU", 0.1f) >>
  tf.learn.Linear[Float]("OutputLayer/Linear", 10)
val loss = tf.learn.SparseSoftmaxCrossEntropy[Float, Long, Float](
  "Loss/CrossEntropy"
) >>
  tf.learn.Mean[Float]("Loss/Mean") >>
  tf.learn.ScalarSummary[Float]("Loss/Summary", "Loss")
val optimizer = tf.train.YellowFin()

val model = tf.learn.Model.simpleSupervised(
  input = input,
  trainInput = trainInput,
  layer = layer,
  loss = loss,
  optimizer = optimizer,
  clipGradients = tf.learn.ClipGradientsByGlobalNorm(5.0f)
)

logger.info("Training the linear regression model.")
val summariesDir = Paths.get("temp/mnist-mlp")
val accMetric =
  tf.metrics.MapMetric((v: (Output[Float], (Output[Float], Output[Int]))) => {
    (tf.argmax(v._1, -1, INT64).toFloat, v._2._2.toFloat)
  }, tf.metrics.Accuracy("Accuracy"))
val estimator = tf.learn.InMemoryEstimator(
  model,
  tf.learn.Configuration(Some(summariesDir)),
  tf.learn.StopCriteria(maxSteps = Some(100000)),
  Set(
    tf.learn.LossLogger(trigger = tf.learn.StepHookTrigger(100)),
    tf.learn.Evaluator(
      log = true,
      datasets =
        Seq(("Train", () => evalTrainData), ("Test", () => evalTestData)),
      metrics = Seq(accMetric),
      trigger = tf.learn.StepHookTrigger(1000),
      name = "Evaluator"
    ),
    tf.learn.StepRateLogger(
      log = false,
      summaryDir = summariesDir,
      trigger = tf.learn.StepHookTrigger(100)
    ),
    tf.learn.SummarySaver(summariesDir, tf.learn.StepHookTrigger(100)),
    tf.learn.CheckpointSaver(summariesDir, tf.learn.StepHookTrigger(1000))
  ),
  tensorBoardConfig =
    tf.learn.TensorBoardConfig(summariesDir, reloadInterval = 1)
)
estimator.train(
  () => trainData,
  tf.learn.StopCriteria(maxSteps = Some(10000))
)

def accuracy(images: Tensor[UByte], labels: Tensor[UByte]): Float = {
  val predictions = estimator.infer(() => images.toFloat)
  predictions
    .argmax(1)
    .toUByte
    .equal(labels)
    .toFloat
    .mean()
    .scalar
}

logger.info(
  s"Train accuracy = ${accuracy(dataSet.trainImages, dataSet.trainLabels)}"
)
logger.info(
  s"Test accuracy = ${accuracy(dataSet.testImages, dataSet.testLabels)}"
)

Stack Trace


2019-05-09 14:54:35.812 [main] INFO  MNIST Data Loader - Extracting images from file '/Users/mandar/datasets/MNIST/train-images-idx3-ubyte.gz'.
2019-05-09 14:54:37.756 [main] INFO  MNIST Data Loader - Extracting labels from file '/Users/mandar/datasets/MNIST/train-labels-idx1-ubyte.gz'.
2019-05-09 14:54:37.760 [main] INFO  MNIST Data Loader - Extracting images from file '/Users/mandar/datasets/MNIST/t10k-images-idx3-ubyte.gz'.
2019-05-09 14:54:37.843 [main] INFO  MNIST Data Loader - Extracting labels from file '/Users/mandar/datasets/MNIST/t10k-labels-idx1-ubyte.gz'.
2019-05-09 14:54:37.844 [main] INFO  MNIST Data Loader - Finished loading the MNIST dataset.
2019-05-09 14:54:38.074001: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.2 AVX AVX2 FMA
org.platanios.tensorflow.jni.FailedPreconditionException: Input 0 ('Constant') for 'Evaluator/TensorSlicesDataset' was not previously added to ShapeRefiner.
  org.platanios.tensorflow.jni.Op$.finish(Native Method)
  org.platanios.tensorflow.api.ops.Op$Builder.$anonfun$build$1(Op.scala:2646)
  org.platanios.tensorflow.api.utilities.package$.using(package.scala:31)
  org.platanios.tensorflow.api.ops.Op$Builder.build(Op.scala:2589)
  org.platanios.tensorflow.api.ops.data.Data$$anon$4.createHandle(Data.scala:210)
  org.platanios.tensorflow.api.ops.data.Dataset$$anon$9.createHandle(Dataset.scala:455)
  org.platanios.tensorflow.api.ops.data.Dataset$$anon$21.createHandle(Dataset.scala:1230)
  org.platanios.tensorflow.api.ops.data.Dataset$$anon$14.createHandle(Dataset.scala:857)
  org.platanios.tensorflow.api.ops.data.Dataset$$anon$17.createHandle(Dataset.scala:1026)
  org.platanios.tensorflow.api.ops.data.DatasetIterator.createInitializer(DatasetIterator.scala:87)
  org.platanios.tensorflow.api.learn.hooks.Evaluator.$anonfun$begin$4(Evaluator.scala:95)
  scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:234)
  scala.collection.immutable.List.foreach(List.scala:389)
  scala.collection.TraversableLike.map(TraversableLike.scala:234)
  scala.collection.TraversableLike.map$(TraversableLike.scala:227)
  scala.collection.immutable.List.map(List.scala:295)
  org.platanios.tensorflow.api.learn.hooks.Evaluator.$anonfun$begin$1(Evaluator.scala:93)
  scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
  scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
  org.platanios.tensorflow.api.ops.Op$.createWith(Op.scala:2043)
  org.platanios.tensorflow.api.learn.hooks.Evaluator.begin(Evaluator.scala:90)
  org.platanios.tensorflow.api.learn.hooks.Hook.internalBegin(Hook.scala:129)
  org.platanios.tensorflow.api.learn.hooks.Hook.internalBegin$(Hook.scala:129)
  org.platanios.tensorflow.api.learn.hooks.TriggeredHook.internalBegin(TriggeredHook.scala:52)
  org.platanios.tensorflow.api.learn.MonitoredSession$.$anonfun$apply$2(SessionWrapper.scala:441)
  org.platanios.tensorflow.api.learn.MonitoredSession$.$anonfun$apply$2$adapted(SessionWrapper.scala:441)
  scala.collection.Iterator.foreach(Iterator.scala:929)
  scala.collection.Iterator.foreach$(Iterator.scala:929)
  scala.collection.AbstractIterator.foreach(Iterator.scala:1417)
  scala.collection.IterableLike.foreach(IterableLike.scala:71)
  scala.collection.IterableLike.foreach$(IterableLike.scala:70)
  scala.collection.AbstractIterable.foreach(Iterable.scala:54)
  org.platanios.tensorflow.api.learn.MonitoredSession$.apply(SessionWrapper.scala:441)
  org.platanios.tensorflow.api.learn.estimators.Estimator$.monitoredTrainingSession(Estimator.scala:353)
  org.platanios.tensorflow.api.learn.estimators.InMemoryEstimator.$anonfun$session$1(InMemoryEstimator.scala:171)
  scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
  org.platanios.tensorflow.api.ops.Op$.createWith(Op.scala:2043)
  org.platanios.tensorflow.api.learn.estimators.InMemoryEstimator.<init>(InMemoryEstimator.scala:156)
  org.platanios.tensorflow.api.learn.estimators.InMemoryEstimator$.apply(InMemoryEstimator.scala:412)
  ammonite.$file.scripts.mnist_test$.<init>(mnist_test.sc:61)
  ammonite.$file.scripts.mnist_test$.<clinit>(mnist_test.sc)