Open JakeRadMSFT opened 1 year ago
Fix in the PR:
The following code works as of 11:15PM 4/29/2023
csproj dependencies
<PackageReference Include="Microsoft.ML" Version="3.0.0-preview.23229.1" />
<PackageReference Include="Microsoft.ML.ImageAnalytics" Version="3.0.0-preview.23229.1" />
<PackageReference Include="Microsoft.ML.TorchSharp" Version="0.21.0-preview.23229.1" />
<PackageReference Include="TorchSharp-cuda-windows" Version="0.99.5" />
*.consumption.cs
// This file was auto-generated by ML.NET Model Builder.
using Microsoft.ML;
using Microsoft.ML.Data;
using System;
using System.Linq;
using System.IO;
using System.Collections.Generic;
namespace CatOD2_ConsoleApp2
{
public partial class CatOD2
{
/// <summary>
/// model input class for CatOD2.
/// </summary>
#region model input class
public class ModelInput
{
[LoadColumn(0)]
[ColumnName(@"Labels")]
public string[] Labels { get; set; }
[LoadColumn(1)]
[ColumnName(@"Image")]
[Microsoft.ML.Transforms.Image.ImageType(800, 600)]
public MLImage Image { get; set; }
[ColumnName(@"ImagePath")]
public string ImagePath { get; set; }
[LoadColumn(2)]
[ColumnName(@"Box")]
public float[] Box { get; set; }
}
#endregion
/// <summary>
/// model output class for CatOD2.
/// </summary>
#region model output class
public class ModelOutput
{
[ColumnName(@"PredictedLabel")]
public string[] PredictedLabel { get; set; }
[ColumnName(@"score")]
public float[] Score { get; set; }
[ColumnName(@"PredictedBoundingBoxes")]
public float[] PredictedBoundingBoxes { get; set; }
}
#endregion
private static string MLNetModelPath = @"C:\Users\jakerad\source\repos\LocalObjectDetectionTest\CatOD2_ConsoleApp2\CatOD2.mlnet";
public static readonly Lazy<PredictionEngine<ModelInput, ModelOutput>> PredictEngine = new Lazy<PredictionEngine<ModelInput, ModelOutput>>(() => CreatePredictEngine(), true);
private static PredictionEngine<ModelInput, ModelOutput> CreatePredictEngine()
{
var mlContext = new MLContext();
mlContext.GpuDeviceId = 0;
mlContext.FallbackToCpu = false;
ITransformer mlModel = mlContext.Model.Load(MLNetModelPath, out var _);
return mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(mlModel);
}
/// <summary>
/// Use this method to predict on <see cref="ModelInput"/>.
/// </summary>
/// <param name="input">model input.</param>
/// <returns><seealso cref=" ModelOutput"/></returns>
public static ModelOutput Predict(ModelInput input)
{
var predEngine = PredictEngine.Value;
var output = predEngine.Predict(input);
CalculateAspectAndOffset(input.Image.Width, input.Image.Height, TrainingImageWidth, TrainingImageHeight, out float xOffset, out float yOffset, out float aspect);
if (output.PredictedBoundingBoxes != null && output.PredictedBoundingBoxes.Length > 0)
{
for(int x = 0; x < output.PredictedBoundingBoxes.Length; x+=2)
{
output.PredictedBoundingBoxes[x] = (output.PredictedBoundingBoxes[x] - xOffset) / aspect;
output.PredictedBoundingBoxes[x + 1] = (output.PredictedBoundingBoxes[x + 1] - yOffset) / aspect;
}
}
return output;
}
}
}
*.training.cs
// This file was auto-generated by ML.NET Model Builder.
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Data;
using Microsoft.ML.TorchSharp;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Image;
using Microsoft.ML;
using Microsoft.ML.TorchSharp.AutoFormerV2;
using System.Text.Json.Nodes;
using System.Text.Json;
namespace CatOD2_ConsoleApp2
{
public partial class CatOD2
{
public const string RetrainFilePath = @"C:\dev\Scratch\ObjectDetective\dataset\cats-export.json";
public const int TrainingImageWidth = 800;
public const int TrainingImageHeight = 600;
/// <summary>
/// Train a new model with the provided dataset.
/// </summary>
/// <param name="outputModelPath">File path for saving the model. Should be similar to "C:\YourPath\ModelName.mlnet"</param>
/// <param name="inputDataFilePath">Path to the data file for training.</param>
/// <param name="separatorChar">Separator character for delimited training file.</param>
/// <param name="hasHeader">Boolean if training file has a header.</param>
public static void Train(string outputModelPath, MLContext? mlContext = null, string inputDataFilePath = RetrainFilePath)
{
mlContext ??= new MLContext();
mlContext.GpuDeviceId = 0;
mlContext.FallbackToCpu = false;
var data = LoadIDataViewFromVOTTFile(mlContext, inputDataFilePath);
var model = RetrainModel(mlContext, data);
SaveModel(mlContext, model, data, outputModelPath);
}
/// <summary>
/// Load an IDataView from a file path.
/// </summary>
/// <param name="mlContext">The common context for all ML.NET operations.</param>
/// <param name="inputDataFilePath">Path to the data file for training.</param>
/// <param name="separatorChar">Separator character for delimited training file.</param>
/// <param name="hasHeader">Boolean if training file has a header.</param>
/// <returns>IDataView with loaded training data.</returns>
public static IDataView LoadIDataViewFromVOTTFile(MLContext mlContext, string inputDataFilePath)
{
var data = mlContext.Data.LoadFromEnumerable(LoadFromVott(inputDataFilePath));
var dataTransform = mlContext.Transforms.LoadImages("Image", null, "ImagePath");
return dataTransform.Fit(data).Transform(data);
}
/// <summary>
/// Save a model at the specified path.
/// </summary>
/// <param name="mlContext">The common context for all ML.NET operations.</param>
/// <param name="model">Model to save.</param>
/// <param name="data">IDataView used to train the model.</param>
/// <param name="modelSavePath">File path for saving the model. Should be similar to "C:\YourPath\ModelName.mlnet.</param>
public static void SaveModel(MLContext mlContext, ITransformer model, IDataView data, string modelSavePath)
{
// Pull the data schema from the IDataView used for training the model
DataViewSchema dataViewSchema = data.Schema;
using (var fs = File.Create(modelSavePath))
{
mlContext.Model.Save(model, dataViewSchema, fs);
}
}
/// <summary>
/// Retrains model using the pipeline generated as part of the training process.
/// </summary>
/// <param name="mlContext"></param>
/// <param name="trainData"></param>
/// <returns></returns>
public static ITransformer RetrainModel(MLContext mlContext, IDataView trainData)
{
var pipeline = BuildPipeline(mlContext);
var model = pipeline.Fit(trainData);
return model;
}
/// <summary>
/// build the pipeline that is used from model builder. Use this function to retrain model.
/// </summary>
/// <param name="mlContext"></param>
/// <returns></returns>
public static IEstimator<ITransformer> BuildPipeline(MLContext mlContext)
{
// Data process configuration with pipeline data transformations
var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName:@"Labels",inputColumnName:@"Labels",addKeyValueAnnotationsAsText:false)
.Append(mlContext.Transforms.ResizeImages(outputColumnName:@"Image",inputColumnName:@"Image",imageHeight: TrainingImageHeight, imageWidth: TrainingImageWidth, cropAnchor:ImageResizingEstimator.Anchor.Center,resizing:ImageResizingEstimator.ResizingKind.IsoPad))
.Append(mlContext.MulticlassClassification.Trainers.ObjectDetection(new ObjectDetectionTrainer.Options(){LabelColumnName=@"Labels",PredictedLabelColumnName=@"PredictedLabel",BoundingBoxColumnName=@"Box",ImageColumnName=@"Image",ScoreColumnName=@"score",MaxEpoch=5,InitLearningRate=1,WeightDecay=0,}))
.Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName:@"PredictedLabel",inputColumnName:@"PredictedLabel"));
return pipeline;
}
private static void CalculateAspectAndOffset(float sourceWidth, float sourceHeight, float destinationWidth, float destinationHeight, out float xOffset, out float yOffset, out float aspect)
{
float widthAspect = destinationWidth / sourceWidth;
float heightAspect = destinationHeight / sourceHeight;
xOffset = 0;
yOffset = 0;
if (heightAspect < widthAspect)
{
aspect = heightAspect;
xOffset = (destinationWidth - (sourceWidth * aspect)) / 2;
}
else
{
aspect = widthAspect;
yOffset = (destinationHeight - (sourceHeight * aspect)) / 2;
}
}
private static IEnumerable<ModelInput> LoadFromVott(string inputDataFilePath)
{
JsonNode jsonNode;
using (StreamReader r = new StreamReader(inputDataFilePath))
{
string json = r.ReadToEnd();
jsonNode = JsonSerializer.Deserialize<JsonNode>(json);
}
var imageData = new List<ModelInput>();
foreach (KeyValuePair<string, JsonNode> asset in jsonNode["assets"].AsObject())
{
var labelList = new List<string>();
var boxList = new List<float>();
var sourceWidth = asset.Value["asset"]["size"]["width"].GetValue<float>();
var sourceHeight = asset.Value["asset"]["size"]["height"].GetValue<float>();
CalculateAspectAndOffset(sourceWidth, sourceHeight, TrainingImageWidth, TrainingImageHeight, out float xOffset, out float yOffset, out float aspect);
foreach (var region in asset.Value["regions"].AsArray())
{
foreach (var tag in region["tags"].AsArray())
{
labelList.Add(tag.GetValue<string>());
var boundingBox = region["boundingBox"];
var left = boundingBox["left"].GetValue<float>();
var top = boundingBox["top"].GetValue<float>();
var width = boundingBox["width"].GetValue<float>();
var height = boundingBox["height"].GetValue<float>();
boxList.Add(xOffset + (left * aspect));
boxList.Add(yOffset + (top * aspect));
boxList.Add(xOffset + ((left + width) * aspect));
boxList.Add(yOffset + ((top + height) * aspect));
}
}
var imagePath = asset.Value["asset"]["path"].GetValue<string>().Replace("file:", "");
var modelInput = new ModelInput()
{
ImagePath = imagePath,
Labels = labelList.ToArray(),
Box = boxList.ToArray(),
};
imageData.Add(modelInput);
}
return imageData;
}
}
}
Program.cs
// This file was auto-generated by ML.NET Model Builder.
using CatOD2_ConsoleApp2;
using Microsoft.ML;
using Microsoft.ML.Data;
var mlContext = new MLContext();
mlContext.Log += MlContext_Log;
void MlContext_Log(object? sender, LoggingEventArgs e)
{
Console.WriteLine(e.Message);
}
CatOD2.Train(@"C:\Users\jakerad\source\repos\LocalObjectDetectionTest\CatOD2_ConsoleApp2\CatOD2.mlnet", mlContext);
// Create single instance of sample data from first line of dataset for model input.
var image = MLImage.CreateFromFile(@"C:\dev\Scratch\ObjectDetective\dataset\IMG_8984.jpg");
CatOD2.ModelInput sampleData = new CatOD2.ModelInput()
{
Image = image,
};
// Make a single prediction on the sample data and print results.
var predictionResult = CatOD2.Predict(sampleData);
Console.WriteLine("\n\nPredicted Boxes:\n");
Console.WriteLine(predictionResult);
We might need to add <PlatformTarget>x64</PlatformTarget>
for Torch based scenarios
@JakeRadMSFT object detection also in mlnet cli now. maybe we can close this issue as completed?
Required
Testing
Code Gen
ML.NET and AutoML
Other
Nice to Have
Training Options