Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic.ModelOperations
{
public static class OnnxConversion
{
private class ScoreValue
{
public float Score { get; set; }
}

private class OnnxScoreValue
{
public VBuffer<float> Score { get; set; }
}

private static void PrintScore(IEnumerable<ScoreValue> values, int numRows)
{
foreach (var value in values.Take(numRows))
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
}

private static void PrintScore(IEnumerable<OnnxScoreValue> values, int numRows)
{
foreach (var value in values.Take(numRows))
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score.GetItemOrDefault(0));
}

public static void Example()
{
Copy link
Contributor

@harishsk harishsk Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this sample is getting more complicated than necessary. Can we simplify it just to demonstrate basic Onnx export and exporting to a different opset? The scenarios of using the whole pipeline versus partial pipeline is more advanced and can be left out of this sample. #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure


In reply to: 436857134 [](ancestors = 436857134)

var mlContext = new MLContext(seed: 0);

//Get dataset
// Download the raw dataset.
var originalData = Microsoft.ML.SamplesUtils.DatasetUtils
.LoadRawAdultDataset(mlContext);
// Download the featurized dataset.
// featurizedData = featurizationPipeline.Transform(originalData)
var featurizedData = Microsoft.ML.SamplesUtils.DatasetUtils
.LoadFeaturizedAdultDataset(mlContext);

//Dataset partition
// Partition the original dataset. Leave out 10% of data for testing.
var trainTestOriginalData = mlContext.Data
.TrainTestSplit(originalData, testFraction: 0.3);
// Partition the featurized dataset. Leave out 10% of data for testing.
var trainTestFeaturizedData = mlContext.Data
.TrainTestSplit(featurizedData, testFraction: 0.3);

// Define training pielines(wholePipeline = featurizationPipeline + binaryRegressionpipeline)
var featurizationPipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
// Convert categorical features to one-hot vectors
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
// Combine all features into one feature vector
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.NormalizeMinMax("Features"));
var binaryRegressionpipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
// Concatenate two pipelines into one
var wholePipeline = featurizationPipeline.Append(binaryRegressionpipeline);

// Fit the pipeline, and get a transformer that knows how to score new data
// There are two ways to generate the transformer
// 1. Fit the whole pipeline with original data
var transformer1 = wholePipeline.Fit(trainTestOriginalData.TrainSet);
// 2. Fit the partial(second half) pipeline with featurizedData
var transformer2 = binaryRegressionpipeline.Fit(trainTestFeaturizedData.TrainSet);

//What you need to convert an ML.NET model to an onnx model is a transformer and input data
//By default, the onnx conversion will generate the onnx file with the latest OpSet version
//There are two ways to do the onnx conversion
//1. Apply the transformer(Generated by fitting the whole pipeline) and original dataset
using (var stream = File.Create("sample_onnx_conversion_1.onnx"))
mlContext.Model.ConvertToOnnx(transformer1, originalData, stream);
//2. Apply the transformer(Generated by fitting the second half pipeline) and featurized dataset
using (var stream = File.Create("sample_onnx_conversion_2.onnx"))
mlContext.Model.ConvertToOnnx(transformer2, featurizedData, stream);
//Please note that the above two methods generate the exact same onnx file.

//However, you can also specify a custom OpSet version by using the following code
//Currently, we support OpSet versions 9 for most transformers, but there are certain transformers that require a higher OpSet version
//Please refer to the following link for most update information of what OpSet version we support
//https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.OnnxConverter/OnnxExportExtensions.cs
int customOpSetVersion = 9;
using (var stream = File.Create("sample_onnx_conversion_3.onnx"))
mlContext.Model.ConvertToOnnx(transformer1, originalData, customOpSetVersion, stream);

//Create the pipeline using onnx file.
var onnxModelPath1 = "your_path_to_sample_onnx_conversion_1.onnx";
var onnxEstimator1 = mlContext.Transforms.ApplyOnnxModel(onnxModelPath1);
var onnxTransformer1 = onnxEstimator1.Fit(trainTestOriginalData.TrainSet);
//You may want to create the transformer by using onnxTransformer2 = onnxEstimator2.Fit(trainTestFeaturizedData.TrainSet)
//It's wrong because you can only apply original data to onnx model. This is a different concept from ML.NET model
//Please always use the orignal dataset to fit onnx estimator to get onnx transformer
var onnxModelPath2 = "your_path_to_sample_onnx_conversion_2.onnx";
var onnxEstimator2 = mlContext.Transforms.ApplyOnnxModel(onnxModelPath2);
var onnxTransformer2 = onnxEstimator2.Fit(trainTestOriginalData.TrainSet);

//Inference the testset
var output1 = transformer1.Transform(trainTestOriginalData.TestSet);
var output2 = transformer2.Transform(trainTestFeaturizedData.TestSet);
//Always use original dataset to inference onnx transformer
var onnxOutput1 = onnxTransformer1.Transform(trainTestOriginalData.TestSet);
var onnxOutput2 = onnxTransformer2.Transform(trainTestOriginalData.TestSet);

//Get the outScores
var outScores1 = mlContext.Data.CreateEnumerable<ScoreValue>(output1, reuseRowObject: false);
var outScores2 = mlContext.Data.CreateEnumerable<ScoreValue>(output2, reuseRowObject: false);
var onnxOutScores1 = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput1, reuseRowObject: false);
var onnxOutScores2 = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput2, reuseRowObject: false);

//Print
PrintScore(outScores1, 5);
PrintScore(outScores2, 5);
PrintScore(onnxOutScores1, 5);
PrintScore(onnxOutScores2, 5);
//Expected same results for the above 4 methods
//Score - 0.09044361
//Score - 9.105377
//Score - 11.049
//Score - 3.061928
//Score - 6.375817
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
<ProjectReference Include="..\..\..\src\Microsoft.ML.TimeSeries\Microsoft.ML.TimeSeries.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.DnnImageFeaturizer.ResNet18\Microsoft.ML.DnnImageFeaturizer.ResNet18.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.OnnxConverter\Microsoft.ML.OnnxConverter.csproj" />

<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
Expand Down
55 changes: 34 additions & 21 deletions src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,38 @@ public static IDataView LoadHousingRegressionDataset(MLContext mlContext)
/// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
/// </remarks>
public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
{
// Create data featurizing pipeline
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
// Convert categorical features to one-hot vectors
Copy link
Contributor

@harishsk harishsk Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you still need these changes? #Resolved

Copy link
Contributor Author

@wangyems wangyems Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, because the input data for the pipeline is raw data instead of featurizedData, and the onnx model can only be fed with raw data. So I refactor the SamplesDatasetUtils.cs to have a specific function to get raw data #Resolved

.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
// Combine all features into one feature vector
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = LoadRawAdultDataset(mlContext);
var featurizedData = pipeline.Fit(data).Transform(data);
return featurizedData;
}

/// <summary>
/// Returns the path to the Adult UCI dataset and featurizes it to be suitable for classification tasks.
/// </summary>
/// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param>
/// <returns>Raw dataset.</returns>
/// <remarks>
/// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
/// </remarks>
public static IDataView LoadRawAdultDataset(MLContext mlContext)
{
// Obtains the path to the file
string dataFile = GetAdultDataset();
Expand All @@ -103,33 +135,14 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
new TextLoader.Column("capital-gain", DataKind.Single, 10),
new TextLoader.Column("capital-loss", DataKind.Single, 11),
new TextLoader.Column("hours-per-week", DataKind.Single, 12),
new TextLoader.Column("native-country", DataKind.Single, 13),
new TextLoader.Column("native-country", DataKind.String, 13),
new TextLoader.Column("IsOver50K", DataKind.Boolean, 14),
},
separatorChar: ',',
hasHeader: true
);

// Create data featurizing pipeline
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
// Convert categorical features to one-hot vectors
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
// Combine all features into one feature vector
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = loader.Load(dataFile);
var featurizedData = pipeline.Fit(data).Transform(data);
return featurizedData;
return loader.Load(dataFile);
}

/// <summary>
Expand Down