dotnet / machinelearning

ML.NET is an open source and cross-platform machine learning framework for .NET.
https://dot.net/ml
MIT License
8.94k stars 1.86k forks source link

Schema mismatch for label column 'Label': expected Single, got Vector<Single> (Parameter 'labelCol') #6795

Open ooples opened 11 months ago

ooples commented 11 months ago

I'm trying out this nuget package and experimenting with the different normalization options for regression and prediction trainers. My code is working perfectly when I use Single values (float) but I saw that there were bunch of normalization options that only seemed to work using a vector of Single values so I get the error when I change all of my float values to a float array. Am I just missing something obvious?

FYI I'm using ML.NET 3.0.0-preview.23266.6 for this example


var trainingCount = 50;
var modelInputList = new List<ModelInput>();
var estCount = valuesList.Any() ? valuesList.First().ValueList.Count : 0;

for (int j = 0; j < estCount; j++)
{
    var modelInput = new ModelInput();

    var actual = j < estCount - 1 ? actualList[j] : 0;
    modelInput.Actual = new float[] { Convert.ToSingle(actual) };

    for (int k = 0; k < valueCount; k++)
    {
        var rvItem = Convert.ToSingle(valuesList[k].ValueList[j]);

        switch (k)
        {
            case 0:
                modelInput.Input1 = new float[] { rvItem };
                break;
            case 1:
                modelInput.Input2 = new float[] { rvItem };
                break;
            case 2:
                modelInput.Input3 = new float[] { rvItem };
                break;
            default:
                break;
        }
    }

    modelInputList.Add(modelInput);
}

var firstHalf = mlContext.Data.LoadFromEnumerable(modelInputList.Take(trainingCount));
var secondHalf = mlContext.Data.LoadFromEnumerable(modelInputList.Skip(trainingCount));
var dataProcessPipeline = mlContext.Transforms
                    .CopyColumns("Label", nameof(ModelInput.Actual))
                    .Append(mlContext.Transforms.NormalizeBinning(outputColumnName: nameof(ModelInput.Input1)))
                    .Append(mlContext.Transforms.NormalizeBinning(outputColumnName: nameof(ModelInput.Input2)))
                    .Append(mlContext.Transforms.NormalizeBinning(outputColumnName: nameof(ModelInput.Input3)))
                    .Append(mlContext.Transforms.Concatenate("Features", nameof(ModelInput.Input1),
                        nameof(ModelInput.Input2), nameof(ModelInput.Input3)));
var trainer = mlContext.Regression.Trainers.OnlineGradientDescent();
var trainingPipeline = dataProcessPipeline.Append(trainer);
var trainedModel = trainingPipeline.Fit(firstHalf); // getting the exception here
var trainingData = trainedModel.Transform(firstHalf);
var predictions = trainedModel.Transform(secondHalf);

public class ModelInput
{
    [LoadColumn(0)]
    [VectorType(1)]
    public float[] Actual { get; set; }

    [LoadColumn(1)]
    [VectorType(1)]
    public float[] Input1 { get; set; }

    [LoadColumn(2)]
    [VectorType(1)]
    public float[] Input2 { get; set; }

    [LoadColumn(3)]
    [VectorType(1)]
    public float[] Input3 { get; set; }
}