Skip to content

Commit 9244e68

Browse files
authored
Add a sample for Onnx conversion (#5195)
* initial checkin * remove unused using directives * temp * resolve comments * more comments * update * remove complicated sample
1 parent c0eeea9 commit 9244e68

File tree

3 files changed

+141
-21
lines changed

3 files changed

+141
-21
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using Microsoft.ML;
6+
using Microsoft.ML.Data;
7+
8+
namespace Samples.Dynamic.ModelOperations
9+
{
10+
public static class OnnxConversion
11+
{
12+
private class ScoreValue
13+
{
14+
public float Score { get; set; }
15+
}
16+
17+
private class OnnxScoreValue
18+
{
19+
public VBuffer<float> Score { get; set; }
20+
}
21+
22+
private static void PrintScore(IEnumerable<ScoreValue> values, int numRows)
23+
{
24+
foreach (var value in values.Take(numRows))
25+
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
26+
}
27+
28+
private static void PrintScore(IEnumerable<OnnxScoreValue> values, int numRows)
29+
{
30+
foreach (var value in values.Take(numRows))
31+
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score.GetItemOrDefault(0));
32+
}
33+
34+
public static void Example()
35+
{
36+
var mlContext = new MLContext(seed: 0);
37+
38+
//Get dataset
39+
// Download the raw dataset.
40+
var originalData = Microsoft.ML.SamplesUtils.DatasetUtils
41+
.LoadRawAdultDataset(mlContext);
42+
43+
//Dataset partition
44+
// Partition the original dataset. Leave out 10% of data for testing.
45+
var trainTestOriginalData = mlContext.Data
46+
.TrainTestSplit(originalData, testFraction: 0.3);
47+
48+
// Define training pielines(wholePipeline = featurizationPipeline + binaryRegressionpipeline)
49+
var wholePipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
50+
// Convert categorical features to one-hot vectors
51+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
52+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
53+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
54+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
55+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
56+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
57+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
58+
// Combine all features into one feature vector
59+
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
60+
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
61+
"capital-gain", "capital-loss", "hours-per-week"))
62+
// Min-max normalize all the features
63+
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
64+
.Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron());
65+
66+
// Fit the pipeline, and get a transformer that knows how to score new data
67+
var transformer = wholePipeline.Fit(trainTestOriginalData.TrainSet);
68+
69+
//What you need to convert an ML.NET model to an onnx model is a transformer and input data
70+
//By default, the onnx conversion will generate the onnx file with the latest OpSet version
71+
using (var stream = File.Create("sample_onnx_conversion_1.onnx"))
72+
mlContext.Model.ConvertToOnnx(transformer, originalData, stream);
73+
74+
//However, you can also specify a custom OpSet version by using the following code
75+
//Currently, we support OpSet versions 9 for most transformers, but there are certain transformers that require a higher OpSet version
76+
//Please refer to the following link for most update information of what OpSet version we support
77+
//https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.OnnxConverter/OnnxExportExtensions.cs
78+
int customOpSetVersion = 9;
79+
using (var stream = File.Create("sample_onnx_conversion_2.onnx"))
80+
mlContext.Model.ConvertToOnnx(transformer, originalData, customOpSetVersion, stream);
81+
82+
//Create the pipeline using onnx file.
83+
var onnxModelPath = "your_path_to_sample_onnx_conversion_1.onnx";
84+
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxModelPath);
85+
var onnxTransformer = onnxEstimator.Fit(trainTestOriginalData.TrainSet);
86+
87+
//Inference the testset
88+
var output = transformer.Transform(trainTestOriginalData.TestSet);
89+
var onnxOutput = onnxTransformer.Transform(trainTestOriginalData.TestSet);
90+
91+
//Get the outScores
92+
var outScores = mlContext.Data.CreateEnumerable<ScoreValue>(output, reuseRowObject: false);
93+
var onnxOutScores = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput, reuseRowObject: false);
94+
95+
//Print
96+
PrintScore(outScores, 5);
97+
PrintScore(onnxOutScores, 5);
98+
//Expected same results for the above 4 methods
99+
//Score - 0.09044361
100+
//Score - 9.105377
101+
//Score - 11.049
102+
//Score - 3.061928
103+
//Score - 6.375817
104+
}
105+
}
106+
}

docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
<ProjectReference Include="..\..\..\src\Microsoft.ML.TimeSeries\Microsoft.ML.TimeSeries.csproj" />
3030
<ProjectReference Include="..\..\..\src\Microsoft.ML.DnnImageFeaturizer.ResNet18\Microsoft.ML.DnnImageFeaturizer.ResNet18.csproj" />
3131
<ProjectReference Include="..\..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
32+
<ProjectReference Include="..\..\..\src\Microsoft.ML.OnnxConverter\Microsoft.ML.OnnxConverter.csproj" />
3233

3334
<NativeAssemblyReference Include="CpuMathNative" />
3435
<NativeAssemblyReference Include="FastTreeNative" />

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,38 @@ public static IDataView LoadHousingRegressionDataset(MLContext mlContext)
8282
/// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
8383
/// </remarks>
8484
public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
85+
{
86+
// Create data featurizing pipeline
87+
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
88+
// Convert categorical features to one-hot vectors
89+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
90+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
91+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
92+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
93+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
94+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
95+
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
96+
// Combine all features into one feature vector
97+
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
98+
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
99+
"capital-gain", "capital-loss", "hours-per-week"))
100+
// Min-max normalize all the features
101+
.Append(mlContext.Transforms.NormalizeMinMax("Features"));
102+
103+
var data = LoadRawAdultDataset(mlContext);
104+
var featurizedData = pipeline.Fit(data).Transform(data);
105+
return featurizedData;
106+
}
107+
108+
/// <summary>
109+
/// Returns the path to the Adult UCI dataset and featurizes it to be suitable for classification tasks.
110+
/// </summary>
111+
/// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param>
112+
/// <returns>Raw dataset.</returns>
113+
/// <remarks>
114+
/// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
115+
/// </remarks>
116+
public static IDataView LoadRawAdultDataset(MLContext mlContext)
85117
{
86118
// Obtains the path to the file
87119
string dataFile = GetAdultDataset();
@@ -103,33 +135,14 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
103135
new TextLoader.Column("capital-gain", DataKind.Single, 10),
104136
new TextLoader.Column("capital-loss", DataKind.Single, 11),
105137
new TextLoader.Column("hours-per-week", DataKind.Single, 12),
106-
new TextLoader.Column("native-country", DataKind.Single, 13),
138+
new TextLoader.Column("native-country", DataKind.String, 13),
107139
new TextLoader.Column("IsOver50K", DataKind.Boolean, 14),
108140
},
109141
separatorChar: ',',
110142
hasHeader: true
111143
);
112144

113-
// Create data featurizing pipeline
114-
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
115-
// Convert categorical features to one-hot vectors
116-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
117-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
118-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
119-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
120-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
121-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
122-
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
123-
// Combine all features into one feature vector
124-
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
125-
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
126-
"capital-gain", "capital-loss", "hours-per-week"))
127-
// Min-max normalize all the features
128-
.Append(mlContext.Transforms.NormalizeMinMax("Features"));
129-
130-
var data = loader.Load(dataFile);
131-
var featurizedData = pipeline.Fit(data).Transform(data);
132-
return featurizedData;
145+
return loader.Load(dataFile);
133146
}
134147

135148
/// <summary>

0 commit comments

Comments
 (0)