-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add a sample for Onnx conversion #5195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
6e2b2b4
4719f1f
0b1a59c
7d7df23
918b671
b863595
ae1cf37
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.IO; | ||
| using System.Linq; | ||
| using Microsoft.ML; | ||
| using Microsoft.ML.Data; | ||
|
|
||
| namespace Samples.Dynamic.ModelOperations | ||
| { | ||
| public static class OnnxConversion | ||
| { | ||
| private class ScoreValue | ||
| { | ||
| public float Score { get; set; } | ||
| } | ||
|
|
||
| private class OnnxScoreValue | ||
| { | ||
| public VBuffer<float> Score { get; set; } | ||
| } | ||
|
|
||
| private static void PrintScore(IEnumerable<ScoreValue> values, int numRows) | ||
| { | ||
| foreach (var value in values.Take(numRows)) | ||
| Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); | ||
| } | ||
|
|
||
| private static void PrintScore(IEnumerable<OnnxScoreValue> values, int numRows) | ||
| { | ||
| foreach (var value in values.Take(numRows)) | ||
| Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score.GetItemOrDefault(0)); | ||
| } | ||
|
|
||
| public static void Example() | ||
| { | ||
| var mlContext = new MLContext(seed: 0); | ||
|
|
||
| //Get dataset | ||
| // Download the raw dataset. | ||
| var originalData = Microsoft.ML.SamplesUtils.DatasetUtils | ||
| .LoadRawAdultDataset(mlContext); | ||
| // Download the featurized dataset. | ||
| // featurizedData = featurizationPipeline.Transform(originalData) | ||
| var featurizedData = Microsoft.ML.SamplesUtils.DatasetUtils | ||
| .LoadFeaturizedAdultDataset(mlContext); | ||
|
|
||
| //Dataset partition | ||
| // Partition the original dataset. Leave out 10% of data for testing. | ||
| var trainTestOriginalData = mlContext.Data | ||
| .TrainTestSplit(originalData, testFraction: 0.3); | ||
| // Partition the featurized dataset. Leave out 10% of data for testing. | ||
| var trainTestFeaturizedData = mlContext.Data | ||
| .TrainTestSplit(featurizedData, testFraction: 0.3); | ||
|
|
||
wangyems marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // Define training pielines(wholePipeline = featurizationPipeline + binaryRegressionpipeline) | ||
| var featurizationPipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") | ||
| // Convert categorical features to one-hot vectors | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) | ||
| // Combine all features into one feature vector | ||
| .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", | ||
| "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", | ||
| "capital-gain", "capital-loss", "hours-per-week")) | ||
| // Min-max normalize all the features | ||
| .Append(mlContext.Transforms.NormalizeMinMax("Features")); | ||
| var binaryRegressionpipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); | ||
| // Concatenate two pipelines into one | ||
| var wholePipeline = featurizationPipeline.Append(binaryRegressionpipeline); | ||
|
|
||
| // Fit the pipeline, and get a transformer that knows how to score new data | ||
| // There are two ways to generate the transformer | ||
| // 1. Fit the whole pipeline with original data | ||
| var transformer1 = wholePipeline.Fit(trainTestOriginalData.TrainSet); | ||
| // 2. Fit the partial(second half) pipeline with featurizedData | ||
| var transformer2 = binaryRegressionpipeline.Fit(trainTestFeaturizedData.TrainSet); | ||
|
|
||
| //What you need to convert an ML.NET model to an onnx model is a transformer and input data | ||
| //By default, the onnx conversion will generate the onnx file with the latest OpSet version | ||
| //There are two ways to do the onnx conversion | ||
| //1. Apply the transformer(Generated by fitting the whole pipeline) and original dataset | ||
| using (var stream = File.Create("sample_onnx_conversion_1.onnx")) | ||
| mlContext.Model.ConvertToOnnx(transformer1, originalData, stream); | ||
| //2. Apply the transformer(Generated by fitting the second half pipeline) and featurized dataset | ||
| using (var stream = File.Create("sample_onnx_conversion_2.onnx")) | ||
| mlContext.Model.ConvertToOnnx(transformer2, featurizedData, stream); | ||
| //Please note that the above two methods generate the exact same onnx file. | ||
|
|
||
| //However, you can also specify a custom OpSet version by using the following code | ||
| //Currently, we support OpSet versions 9 for most transformers, but there are certain transformers that require a higher OpSet version | ||
| //Please refer to the following link for most update information of what OpSet version we support | ||
| //https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.OnnxConverter/OnnxExportExtensions.cs | ||
| int customOpSetVersion = 9; | ||
| using (var stream = File.Create("sample_onnx_conversion_3.onnx")) | ||
| mlContext.Model.ConvertToOnnx(transformer1, originalData, customOpSetVersion, stream); | ||
|
|
||
| //Create the pipeline using onnx file. | ||
| var onnxModelPath1 = "your_path_to_sample_onnx_conversion_1.onnx"; | ||
| var onnxEstimator1 = mlContext.Transforms.ApplyOnnxModel(onnxModelPath1); | ||
| var onnxTransformer1 = onnxEstimator1.Fit(trainTestOriginalData.TrainSet); | ||
| //You may want to create the transformer by using onnxTransformer2 = onnxEstimator2.Fit(trainTestFeaturizedData.TrainSet) | ||
| //It's wrong because you can only apply original data to onnx model. This is a different concept from ML.NET model | ||
| //Please always use the orignal dataset to fit onnx estimator to get onnx transformer | ||
| var onnxModelPath2 = "your_path_to_sample_onnx_conversion_2.onnx"; | ||
| var onnxEstimator2 = mlContext.Transforms.ApplyOnnxModel(onnxModelPath2); | ||
| var onnxTransformer2 = onnxEstimator2.Fit(trainTestOriginalData.TrainSet); | ||
|
|
||
| //Inference the testset | ||
| var output1 = transformer1.Transform(trainTestOriginalData.TestSet); | ||
| var output2 = transformer2.Transform(trainTestFeaturizedData.TestSet); | ||
| //Always use original dataset to inference onnx transformer | ||
| var onnxOutput1 = onnxTransformer1.Transform(trainTestOriginalData.TestSet); | ||
| var onnxOutput2 = onnxTransformer2.Transform(trainTestOriginalData.TestSet); | ||
|
|
||
| //Get the outScores | ||
| var outScores1 = mlContext.Data.CreateEnumerable<ScoreValue>(output1, reuseRowObject: false); | ||
| var outScores2 = mlContext.Data.CreateEnumerable<ScoreValue>(output2, reuseRowObject: false); | ||
| var onnxOutScores1 = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput1, reuseRowObject: false); | ||
| var onnxOutScores2 = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput2, reuseRowObject: false); | ||
|
|
||
| PrintScore(outScores1, 5); | ||
| PrintScore(outScores2, 5); | ||
| PrintScore(onnxOutScores1, 5); | ||
| PrintScore(onnxOutScores2, 5); | ||
| //Expected same results for the above 4 methods | ||
| //Score - 0.09044361 | ||
| //Score - 9.105377 | ||
| //Score - 11.049 | ||
| //Score - 3.061928 | ||
| //Score - 6.375817 | ||
| } | ||
| } | ||
wangyems marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,6 +82,38 @@ public static IDataView LoadHousingRegressionDataset(MLContext mlContext) | |
| /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. | ||
| /// </remarks> | ||
| public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) | ||
| { | ||
| // Create data featurizing pipeline | ||
| var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") | ||
| // Convert categorical features to one-hot vectors | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you still need these changes? #Resolved
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, because the input data for the pipeline is raw data instead of featurizedData, and the onnx model can only be fed with raw data. So I refactor the SamplesDatasetUtils.cs to have a specific function to get raw data #Resolved |
||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) | ||
| // Combine all features into one feature vector | ||
| .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", | ||
| "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", | ||
| "capital-gain", "capital-loss", "hours-per-week")) | ||
| // Min-max normalize all the features | ||
| .Append(mlContext.Transforms.NormalizeMinMax("Features")); | ||
|
|
||
| var data = LoadRawAdultDataset(mlContext); | ||
| var featurizedData = pipeline.Fit(data).Transform(data); | ||
| return featurizedData; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Returns the path to the Adult UCI dataset and featurizes it to be suitable for classification tasks. | ||
| /// </summary> | ||
| /// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param> | ||
| /// <returns>Raw dataset.</returns> | ||
| /// <remarks> | ||
| /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. | ||
| /// </remarks> | ||
| public static IDataView LoadRawAdultDataset(MLContext mlContext) | ||
| { | ||
| // Obtains the path to the file | ||
| string dataFile = GetAdultDataset(); | ||
|
|
@@ -103,33 +135,14 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) | |
| new TextLoader.Column("capital-gain", DataKind.Single, 10), | ||
| new TextLoader.Column("capital-loss", DataKind.Single, 11), | ||
| new TextLoader.Column("hours-per-week", DataKind.Single, 12), | ||
| new TextLoader.Column("native-country", DataKind.Single, 13), | ||
| new TextLoader.Column("native-country", DataKind.String, 13), | ||
| new TextLoader.Column("IsOver50K", DataKind.Boolean, 14), | ||
| }, | ||
| separatorChar: ',', | ||
| hasHeader: true | ||
| ); | ||
|
|
||
| // Create data featurizing pipeline | ||
| var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") | ||
| // Convert categorical features to one-hot vectors | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) | ||
| // Combine all features into one feature vector | ||
| .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", | ||
| "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", | ||
| "capital-gain", "capital-loss", "hours-per-week")) | ||
| // Min-max normalize all the features | ||
| .Append(mlContext.Transforms.NormalizeMinMax("Features")); | ||
|
|
||
| var data = loader.Load(dataFile); | ||
| var featurizedData = pipeline.Fit(data).Transform(data); | ||
| return featurizedData; | ||
| return loader.Load(dataFile); | ||
| } | ||
|
|
||
| /// <summary> | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this sample is getting more complicated than necessary. Can we simplify it just to demonstrate basic Onnx export and exporting to a different opset? The scenarios of using the whole pipeline versus partial pipeline is more advanced and can be left out of this sample. #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure
In reply to: 436857134 [](ancestors = 436857134)