Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using System.IO;
using Microsoft.ML;

namespace Samples.Dynamic.ModelOperations
{
public static class OnnxConversion
{
private class ScoreValue
{
public float Score { get; set; }
}

public static void Example()
{
Copy link
Contributor

@harishsk harishsk Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this sample is getting more complicated than necessary. Can we simplify it just to demonstrate basic Onnx export and exporting to a different opset? The scenarios of using the whole pipeline versus partial pipeline is more advanced and can be left out of this sample. #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure


In reply to: 436857134 [](ancestors = 436857134)

var mlContext = new MLContext(seed: 0);

// Download the raw dataset.
var rawData = Microsoft.ML.SamplesUtils.DatasetUtils
.LoadRawAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.Data
.TrainTestSplit(rawData, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train
// Naive calibrator on top of it.
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
// Convert categorical features to one-hot vectors
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
// Combine all features into one feature vector
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron());

// Fit the pipeline, and get a transformer that knows how to score new data
var transformer = pipeline.Fit(trainTestData.TrainSet);

//What you need to convert an ML.NET model to an onnx model is a transformer and input data
//By default, the onnx conversion will generate the onnx file with the latest OpSet version
using (var stream = File.Create("sample_onnx_conversion_1.onnx"))
mlContext.Model.ConvertToOnnx(transformer, rawData, stream);

//However, you can also specify a custom OpSet version by using the following code
//Currently, we support OpSet versions 9 for most transformers, but there are certain transformers that require a higher OpSet version
//Please refer to the following link for most update information of what OpSet version we support
//https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.OnnxConverter/OnnxExportExtensions.cs
int customOpSetVersion = 9;
using (var stream = File.Create("sample_onnx_conversion_2.onnx"))
mlContext.Model.ConvertToOnnx(transformer, rawData, customOpSetVersion, stream);

//Inference using the onnx model
var onnxModelPath = "your_path_to_onnx_file";
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxModelPath);
var onnxTransformer = onnxEstimator.Fit(trainTestData.TrainSet);

var onnxResult = onnxTransformer.Transform(trainTestData.TestSet);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
<ProjectReference Include="..\..\..\src\Microsoft.ML.TimeSeries\Microsoft.ML.TimeSeries.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.DnnImageFeaturizer.ResNet18\Microsoft.ML.DnnImageFeaturizer.ResNet18.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.OnnxConverter\Microsoft.ML.OnnxConverter.csproj" />

<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
Expand Down
55 changes: 34 additions & 21 deletions src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,38 @@ public static IDataView LoadHousingRegressionDataset(MLContext mlContext)
/// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
/// </remarks>
public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
{
// Create data featurizing pipeline
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
// Convert categorical features to one-hot vectors
Copy link
Contributor

@harishsk harishsk Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you still need these changes? #Resolved

Copy link
Contributor Author

@wangyems wangyems Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, because the input data for the pipeline is raw data instead of featurizedData, and the onnx model can only be fed with raw data. So I refactor the SamplesDatasetUtils.cs to have a specific function to get raw data #Resolved

.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
// Combine all features into one feature vector
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = LoadRawAdultDataset(mlContext);
var featurizedData = pipeline.Fit(data).Transform(data);
return featurizedData;
}

/// <summary>
/// Returns the path to the Adult UCI dataset and featurizes it to be suitable for classification tasks.
/// </summary>
/// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param>
/// <returns>Raw dataset.</returns>
/// <remarks>
/// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
/// </remarks>
public static IDataView LoadRawAdultDataset(MLContext mlContext)
{
// Obtains the path to the file
string dataFile = GetAdultDataset();
Expand All @@ -103,33 +135,14 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
new TextLoader.Column("capital-gain", DataKind.Single, 10),
new TextLoader.Column("capital-loss", DataKind.Single, 11),
new TextLoader.Column("hours-per-week", DataKind.Single, 12),
new TextLoader.Column("native-country", DataKind.Single, 13),
new TextLoader.Column("native-country", DataKind.String, 13),
new TextLoader.Column("IsOver50K", DataKind.Boolean, 14),
},
separatorChar: ',',
hasHeader: true
);

// Create data featurizing pipeline
var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
// Convert categorical features to one-hot vectors
.Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
// Combine all features into one feature vector
.Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = loader.Load(dataFile);
var featurizedData = pipeline.Fit(data).Transform(data);
return featurizedData;
return loader.Load(dataFile);
}

/// <summary>
Expand Down