Microsoft.ML.Transforms.TensorFlow.TFException: "Expected image (JPEG, PNG, or GIF), got unknown format starting with '2552162552240167' [[{{node map/while/DecodePng}}]]" when trying to use Attention-OCR model #4038
Getting Microsoft.ML.Transforms.TensorFlow.TFException: "Expected image (JPEG, PNG, or GIF) error when trying to predict with OCR model created with the Attention-OCR Github Repository (written in Tensorflow).
Source code / logs
ModelUtils.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using OCRWithMLNET.ImageDataStructures;
using System.Diagnostics;
using System.IO;
using System.Drawing;
using System.Drawing.Imaging;
namespace OCRWithMLNET
{
public static class ModelUtils
{
public struct ImageSettings
{
public const int imageHeight = 224;
public const int imageWidth = 224;
public const float mean = 117;
public const bool channelsLast = true;
}
public struct TensorFlowModelSettings
{
// input tensor name
public const string inputTensorName = "input_image_as_bytes";
// output tensor name
public const string outputTensorName = "prediction";
}
public static PredictionEngine<ImageInputData, ImageLabelPredictions> loadModel(string modelLocation)
{
MLContext mlContext = new MLContext();
var data = CreateEmptyDataView(mlContext);
var pipeline = mlContext.Transforms.Concatenate("input_image_as_bytes", new string[] { "input_image" })
.Append(mlContext.Model.LoadTensorFlowModel(modelLocation).
ScoreTensorFlowModel(outputColumnNames: new[] { "prediction" },
inputColumnNames: new[] { "input_image_as_bytes" }, addBatchDimensionInput: false));
ITransformer model = pipeline.Fit(data);
var predictionEngine = mlContext.Model.CreatePredictionEngine<ImageInputData, ImageLabelPredictions>(model);
return predictionEngine;
}
private static IDataView CreateEmptyDataView(MLContext mlContext)
{
//Create empty DataView. We just need the schema to call fit()
List<ImageInputData> list = new List<ImageInputData>();
IEnumerable<ImageInputData> enumerableData = list;
var dv = mlContext.Data.LoadFromEnumerable(enumerableData);
return dv;
}
public static void makePredictions(PredictionEngine<ImageInputData, ImageLabelPredictions> model, string[] imgPaths)
{
int i = 0;
foreach (var path in imgPaths)
{
ImageInputData sample = new ImageInputData()
{
input_image = string.Join("", File.ReadAllBytes(path))
//input_image = Convert.ToBase64String(File.ReadAllBytes(path))
};
Stopwatch sw = new Stopwatch();
sw.Start();
model.Predict(sample);
//Console.WriteLine(prediction.PredictedLabels);
i++;
sw.Stop();
Console.WriteLine("Elapsed={0}", sw.Elapsed.TotalMilliseconds);
}
}
public static string ImageToString(this Image image)
{
if (image == null)
return String.Empty;
var stream = new MemoryStream();
image.Save(stream, image.RawFormat);
var bytes = stream.ToArray();
return Convert.ToBase64String(bytes);
}
}
}
ImageInputData.cs:
using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace OCRWithMLNET.ImageDataStructures
{
public class ImageInputData
{
[LoadColumn(0)]
public string input_image;
}
}
ImageLabelPredictions.cs
using Microsoft.ML.Data;
namespace OCRWithMLNET.ImageDataStructures
{
public class ImageLabelPredictions
{
[ColumnName(ModelUtils.TensorFlowModelSettings.outputTensorName)]
public string[] PredictedLabels;
}
}
Programm.cs
using System;
using Microsoft.ML;
using OCRWithMLNET.ImageDataStructures;
namespace OCRWithMLNET
{
class Program
{
static void Main(string[] args)
{
PredictionEngine<ImageInputData, ImageLabelPredictions> model = ModelUtils.loadModel("<path>/frozen_graph.pb");
string[] inputArr = new string[1] { "img.png" };
ModelUtils.makePredictions(model, inputArr);
}
}
}
Additional Information
It suspect that it has to do with the way I load in the image. I also tried loading it in with base64 encoding but this didn't work either.
In Python I can load the image in like this:
with open(filename, 'rb') as img_file:
img_file_data = img_file.read()
The issue was that I trained the model with Tensorflow v1.14 when training the model but used v1.13 with ML.NET. When changing to Tensorflow v.1.13 for training everything worked as expected.
System information
Issue
Getting
Microsoft.ML.Transforms.TensorFlow.TFException: "Expected image (JPEG, PNG, or GIF)
error when trying to predict with OCR model created with the Attention-OCR Github Repository (written in Tensorflow).Source code / logs
ModelUtils.cs:
ImageInputData.cs:
ImageLabelPredictions.cs
Programm.cs
Additional Information
It suspect that it has to do with the way I load in the image. I also tried loading it in with base64 encoding but this didn't work either.
In Python I can load the image in like this: