-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add a sample for Onnx conversion #5195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6e2b2b4
4719f1f
0b1a59c
7d7df23
918b671
b863595
ae1cf37
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.IO; | ||
| using System.Linq; | ||
| using Microsoft.ML; | ||
| using Microsoft.ML.Data; | ||
|
|
||
| namespace Samples.Dynamic.ModelOperations | ||
| { | ||
| public static class OnnxConversion | ||
| { | ||
| private class ScoreValue | ||
| { | ||
| public float Score { get; set; } | ||
| } | ||
|
|
||
| private class OnnxScoreValue | ||
| { | ||
| public VBuffer<float> Score { get; set; } | ||
| } | ||
|
|
||
| private static void PrintScore(IEnumerable<ScoreValue> values, int numRows) | ||
| { | ||
| foreach (var value in values.Take(numRows)) | ||
| Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); | ||
| } | ||
|
|
||
| private static void PrintScore(IEnumerable<OnnxScoreValue> values, int numRows) | ||
| { | ||
| foreach (var value in values.Take(numRows)) | ||
| Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score.GetItemOrDefault(0)); | ||
| } | ||
|
|
||
| public static void Example() | ||
| { | ||
| var mlContext = new MLContext(seed: 0); | ||
|
|
||
| //Get dataset | ||
| // Download the raw dataset. | ||
| var originalData = Microsoft.ML.SamplesUtils.DatasetUtils | ||
| .LoadRawAdultDataset(mlContext); | ||
|
|
||
| //Dataset partition | ||
| // Partition the original dataset. Leave out 10% of data for testing. | ||
| var trainTestOriginalData = mlContext.Data | ||
| .TrainTestSplit(originalData, testFraction: 0.3); | ||
|
|
||
wangyems marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // Define training pielines(wholePipeline = featurizationPipeline + binaryRegressionpipeline) | ||
| var wholePipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") | ||
| // Convert categorical features to one-hot vectors | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) | ||
| // Combine all features into one feature vector | ||
| .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", | ||
| "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", | ||
| "capital-gain", "capital-loss", "hours-per-week")) | ||
| // Min-max normalize all the features | ||
| .Append(mlContext.Transforms.NormalizeMinMax("Features")) | ||
| .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron()); | ||
|
|
||
| // Fit the pipeline, and get a transformer that knows how to score new data | ||
| var transformer = wholePipeline.Fit(trainTestOriginalData.TrainSet); | ||
|
|
||
| //What you need to convert an ML.NET model to an onnx model is a transformer and input data | ||
| //By default, the onnx conversion will generate the onnx file with the latest OpSet version | ||
| using (var stream = File.Create("sample_onnx_conversion_1.onnx")) | ||
| mlContext.Model.ConvertToOnnx(transformer, originalData, stream); | ||
|
|
||
| //However, you can also specify a custom OpSet version by using the following code | ||
| //Currently, we support OpSet versions 9 for most transformers, but there are certain transformers that require a higher OpSet version | ||
| //Please refer to the following link for most update information of what OpSet version we support | ||
| //https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.OnnxConverter/OnnxExportExtensions.cs | ||
| int customOpSetVersion = 9; | ||
| using (var stream = File.Create("sample_onnx_conversion_2.onnx")) | ||
| mlContext.Model.ConvertToOnnx(transformer, originalData, customOpSetVersion, stream); | ||
|
|
||
| //Create the pipeline using onnx file. | ||
| var onnxModelPath = "your_path_to_sample_onnx_conversion_1.onnx"; | ||
| var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxModelPath); | ||
| var onnxTransformer = onnxEstimator.Fit(trainTestOriginalData.TrainSet); | ||
|
|
||
| //Inference the testset | ||
| var output = transformer.Transform(trainTestOriginalData.TestSet); | ||
| var onnxOutput = onnxTransformer.Transform(trainTestOriginalData.TestSet); | ||
|
|
||
| //Get the outScores | ||
| var outScores = mlContext.Data.CreateEnumerable<ScoreValue>(output, reuseRowObject: false); | ||
| var onnxOutScores = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput, reuseRowObject: false); | ||
|
|
||
| PrintScore(outScores, 5); | ||
| PrintScore(onnxOutScores, 5); | ||
| //Expected same results for the above 4 methods | ||
| //Score - 0.09044361 | ||
| //Score - 9.105377 | ||
| //Score - 11.049 | ||
| //Score - 3.061928 | ||
| //Score - 6.375817 | ||
| } | ||
| } | ||
wangyems marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,6 +82,38 @@ public static IDataView LoadHousingRegressionDataset(MLContext mlContext) | |
| /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. | ||
| /// </remarks> | ||
| public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) | ||
| { | ||
| // Create data featurizing pipeline | ||
| var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") | ||
| // Convert categorical features to one-hot vectors | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you still need these changes? #Resolved
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, because the input data for the pipeline is raw data instead of featurizedData, and the onnx model can only be fed with raw data. So I refactor the SamplesDatasetUtils.cs to have a specific function to get raw data #Resolved |
||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) | ||
| // Combine all features into one feature vector | ||
| .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", | ||
| "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", | ||
| "capital-gain", "capital-loss", "hours-per-week")) | ||
| // Min-max normalize all the features | ||
| .Append(mlContext.Transforms.NormalizeMinMax("Features")); | ||
|
|
||
| var data = LoadRawAdultDataset(mlContext); | ||
| var featurizedData = pipeline.Fit(data).Transform(data); | ||
| return featurizedData; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Returns the path to the Adult UCI dataset and featurizes it to be suitable for classification tasks. | ||
| /// </summary> | ||
| /// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param> | ||
| /// <returns>Raw dataset.</returns> | ||
| /// <remarks> | ||
| /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. | ||
| /// </remarks> | ||
| public static IDataView LoadRawAdultDataset(MLContext mlContext) | ||
| { | ||
| // Obtains the path to the file | ||
| string dataFile = GetAdultDataset(); | ||
|
|
@@ -103,33 +135,14 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) | |
| new TextLoader.Column("capital-gain", DataKind.Single, 10), | ||
| new TextLoader.Column("capital-loss", DataKind.Single, 11), | ||
| new TextLoader.Column("hours-per-week", DataKind.Single, 12), | ||
| new TextLoader.Column("native-country", DataKind.Single, 13), | ||
| new TextLoader.Column("native-country", DataKind.String, 13), | ||
| new TextLoader.Column("IsOver50K", DataKind.Boolean, 14), | ||
| }, | ||
| separatorChar: ',', | ||
| hasHeader: true | ||
| ); | ||
|
|
||
| // Create data featurizing pipeline | ||
| var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") | ||
| // Convert categorical features to one-hot vectors | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) | ||
| .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) | ||
| // Combine all features into one feature vector | ||
| .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", | ||
| "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", | ||
| "capital-gain", "capital-loss", "hours-per-week")) | ||
| // Min-max normalize all the features | ||
| .Append(mlContext.Transforms.NormalizeMinMax("Features")); | ||
|
|
||
| var data = loader.Load(dataFile); | ||
| var featurizedData = pipeline.Fit(data).Transform(data); | ||
| return featurizedData; | ||
| return loader.Load(dataFile); | ||
| } | ||
|
|
||
| /// <summary> | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this sample is getting more complicated than necessary. Can we simplify it just to demonstrate basic Onnx export and exporting to a different opset? The scenarios of using the whole pipeline versus partial pipeline is more advanced and can be left out of this sample. #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure
In reply to: 436857134 [](ancestors = 436857134)