dotnet · wangyems · Jun 8, 2020 · Jun 2, 2020 · Jun 2, 2020 · Jun 3, 2020
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/OnnxConversion.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/OnnxConversion.cs
@@ -0,0 +1,138 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+namespace Samples.Dynamic.ModelOperations
+{
+    public static class OnnxConversion
+    {
+        private class ScoreValue
+        {
+            public float Score { get; set; }
+        }
+
+        private class OnnxScoreValue
+        {
+            public VBuffer<float> Score { get; set; }
+        }
+
+        private static void PrintScore(IEnumerable<ScoreValue> values, int numRows)
+        {
+            foreach (var value in values.Take(numRows))
+                Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
+        }
+
+        private static void PrintScore(IEnumerable<OnnxScoreValue> values, int numRows)
+        {
+            foreach (var value in values.Take(numRows))
+                Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score.GetItemOrDefault(0));
+        }
+
+        public static void Example()
+        {
+            var mlContext = new MLContext(seed: 0);
+
+            //Get dataset
+            // Download the raw dataset.
+            var originalData = Microsoft.ML.SamplesUtils.DatasetUtils
+                .LoadRawAdultDataset(mlContext);
+            // Download the featurized dataset. 
+            // featurizedData = featurizationPipeline.Transform(originalData)
+            var featurizedData = Microsoft.ML.SamplesUtils.DatasetUtils
+                .LoadFeaturizedAdultDataset(mlContext);
+
+            //Dataset partition
+            // Partition the original dataset. Leave out 10% of data for testing.
+            var trainTestOriginalData = mlContext.Data
+                .TrainTestSplit(originalData, testFraction: 0.3);
+            // Partition the featurized dataset. Leave out 10% of data for testing.
+            var trainTestFeaturizedData = mlContext.Data
+                .TrainTestSplit(featurizedData, testFraction: 0.3);
+
+            // Define training pielines(wholePipeline = featurizationPipeline + binaryRegressionpipeline)
+            var featurizationPipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
+                                        // Convert categorical features to one-hot vectors
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
+                                        .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
+                                        // Combine all features into one feature vector
+                                        .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
+                                        "occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
+                                        "capital-gain", "capital-loss", "hours-per-week"))
+                                        // Min-max normalize all the features
+                                        .Append(mlContext.Transforms.NormalizeMinMax("Features"));
+            var binaryRegressionpipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
+            // Concatenate two pipelines into one
+            var wholePipeline = featurizationPipeline.Append(binaryRegressionpipeline);
+
+            // Fit the pipeline, and get a transformer that knows how to score new data 
+            // There are two ways to generate the transformer
+            // 1. Fit the whole pipeline with original data
+            var transformer1 = wholePipeline.Fit(trainTestOriginalData.TrainSet);
+            // 2. Fit the partial(second half) pipeline with featurizedData
+            var transformer2 = binaryRegressionpipeline.Fit(trainTestFeaturizedData.TrainSet);
+
+            //What you need to convert an ML.NET model to an onnx model is a transformer and input data
+            //By default, the onnx conversion will generate the onnx file with the latest OpSet version
+            //There are two ways to do the onnx conversion
+            //1. Apply the transformer(Generated by fitting the whole pipeline) and original dataset
+            using (var stream = File.Create("sample_onnx_conversion_1.onnx"))
+                mlContext.Model.ConvertToOnnx(transformer1, originalData, stream);
+            //2. Apply the transformer(Generated by fitting the second half pipeline) and featurized dataset
+            using (var stream = File.Create("sample_onnx_conversion_2.onnx"))
+                mlContext.Model.ConvertToOnnx(transformer2, featurizedData, stream);
+            //Please note that the above two methods generate the exact same onnx file.
+
+            //However, you can also specify a custom OpSet version by using the following code
+            //Currently, we support OpSet versions 9 for most transformers, but there are certain transformers that require a higher OpSet version
+            //Please refer to the following link for most update information of what OpSet version we support
+            //https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.OnnxConverter/OnnxExportExtensions.cs
+            int customOpSetVersion = 9;
+            using (var stream = File.Create("sample_onnx_conversion_3.onnx"))
+                mlContext.Model.ConvertToOnnx(transformer1, originalData, customOpSetVersion, stream);
+
+            //Create the pipeline using onnx file.
+            var onnxModelPath1 = "your_path_to_sample_onnx_conversion_1.onnx";
+            var onnxEstimator1 = mlContext.Transforms.ApplyOnnxModel(onnxModelPath1);
+            var onnxTransformer1 = onnxEstimator1.Fit(trainTestOriginalData.TrainSet);
+            //You may want to create the transformer by using onnxTransformer2 = onnxEstimator2.Fit(trainTestFeaturizedData.TrainSet)
+            //It's wrong because you can only apply original data to onnx model. This is a different concept from ML.NET model
+            //Please always use the orignal dataset to fit onnx estimator to get onnx transformer
+            var onnxModelPath2 = "your_path_to_sample_onnx_conversion_2.onnx";
+            var onnxEstimator2 = mlContext.Transforms.ApplyOnnxModel(onnxModelPath2);
+            var onnxTransformer2 = onnxEstimator2.Fit(trainTestOriginalData.TrainSet);
+
+            //Inference the testset
+            var output1 = transformer1.Transform(trainTestOriginalData.TestSet);
+            var output2 = transformer2.Transform(trainTestFeaturizedData.TestSet);
+            //Always use original dataset to inference onnx transformer
+            var onnxOutput1 = onnxTransformer1.Transform(trainTestOriginalData.TestSet);
+            var onnxOutput2 = onnxTransformer2.Transform(trainTestOriginalData.TestSet);
+
+            //Get the outScores
+            var outScores1 = mlContext.Data.CreateEnumerable<ScoreValue>(output1, reuseRowObject: false);
+            var outScores2 = mlContext.Data.CreateEnumerable<ScoreValue>(output2, reuseRowObject: false);
+            var onnxOutScores1 = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput1, reuseRowObject: false);
+            var onnxOutScores2 = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput2, reuseRowObject: false);
+
+            //Print
+            PrintScore(outScores1, 5);
+            PrintScore(outScores2, 5);
+            PrintScore(onnxOutScores1, 5);
+            PrintScore(onnxOutScores2, 5);
+            //Expected same results for the above 4 methods
+            //Score - 0.09044361
+            //Score - 9.105377
+            //Score - 11.049
+            //Score - 3.061928
+            //Score - 6.375817
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj
@@ -25,6 +25,7 @@
     <ProjectReference Include="..\..\..\src\Microsoft.ML.TimeSeries\Microsoft.ML.TimeSeries.csproj" />
     <ProjectReference Include="..\..\..\src\Microsoft.ML.DnnImageFeaturizer.ResNet18\Microsoft.ML.DnnImageFeaturizer.ResNet18.csproj" />
     <ProjectReference Include="..\..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
+    <ProjectReference Include="..\..\..\src\Microsoft.ML.OnnxConverter\Microsoft.ML.OnnxConverter.csproj" />
 
     <NativeAssemblyReference Include="CpuMathNative" />
     <NativeAssemblyReference Include="FastTreeNative" />

diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -82,6 +82,38 @@ public static IDataView LoadHousingRegressionDataset(MLContext mlContext)
         /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
         /// </remarks>
         public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
+        {
+            // Create data featurizing pipeline
+            var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
+                // Convert categorical features to one-hot vectors
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
+                // Combine all features into one feature vector
+                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
+                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
+                    "capital-gain", "capital-loss", "hours-per-week"))
+                // Min-max normalize all the features
+                .Append(mlContext.Transforms.NormalizeMinMax("Features"));
+
+            var data = LoadRawAdultDataset(mlContext);
+            var featurizedData = pipeline.Fit(data).Transform(data);
+            return featurizedData;
+        }
+
+        /// <summary>
+        /// Returns the path to the Adult UCI dataset and featurizes it to be suitable for classification tasks.
+        /// </summary>
+        /// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param>
+        /// <returns>Raw dataset.</returns>
+        /// <remarks>
+        /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
+        /// </remarks>
+        public static IDataView LoadRawAdultDataset(MLContext mlContext)
         {
             // Obtains the path to the file
             string dataFile = GetAdultDataset();
@@ -103,33 +135,14 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
                         new TextLoader.Column("capital-gain", DataKind.Single, 10),
                         new TextLoader.Column("capital-loss", DataKind.Single, 11),
                         new TextLoader.Column("hours-per-week", DataKind.Single, 12),
-                        new TextLoader.Column("native-country", DataKind.Single, 13),
+                        new TextLoader.Column("native-country", DataKind.String, 13),
                         new TextLoader.Column("IsOver50K", DataKind.Boolean, 14),
                     },
                 separatorChar: ',',
                 hasHeader: true
             );
 
-            // Create data featurizing pipeline
-            var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
-                // Convert categorical features to one-hot vectors
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
-                // Combine all features into one feature vector
-                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
-                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
-                    "capital-gain", "capital-loss", "hours-per-week"))
-                // Min-max normalize all the features
-                .Append(mlContext.Transforms.NormalizeMinMax("Features"));
-
-            var data = loader.Load(dataFile);
-            var featurizedData = pipeline.Fit(data).Transform(data);
-            return featurizedData;
+            return loader.Load(dataFile);
         }
 
         /// <summary>