Skip to content

Problem with ML.NET RobustScaler #5237

@CBrauer

Description

@CBrauer

System information

  • Windows 10 Enterprise 10.0 18363 Built 18363
  • Visual Studio 2019, build 16.6.2

Source code

Program output. Notice that RobustScaler produced an extra column for "vwapGain"

image

Source code

My test program looks like:

namespace Test_RobustScaller {
  internal class Program {
    #region MyHead
    public static void MyHead(IDataView train, int numRows) {
      var trainPreview = train.Preview(maxRows: numRows);
      var nColumns = trainPreview.ColumnView.Length;
      var maxCharInHeaderName = 0;
      for (var k = 0; k < nColumns; k++) {
        var columnName = trainPreview.Schema[k].Name;
        maxCharInHeaderName = Math.Max(maxCharInHeaderName, columnName.Length);
      }
      var nSpaces = new int[nColumns];
      for (var k = 0; k < nColumns; k++) {
        var columnName = trainPreview.Schema[k].Name;
        for (var j = 0; j < maxCharInHeaderName - columnName.Length + 1; j++) {
          Console.Write(" ");
        }
        Console.Write("{0}", columnName);
        nSpaces[k] = maxCharInHeaderName - columnName.Length + 1;
      }
      Console.Write("\n");

      foreach (var row in trainPreview.RowView) {
        for (var k = 0; k < row.Values.Length; k++) {
          var field = string.Format("{0}", row.Values[k].Value);
          var nSpace = maxCharInHeaderName - field.Length + 1;
          for (var j = 0; j < nSpace; j++) {
            Console.Write(" ");
          }
          Console.Write(row.Values[k].Value);
        }
        Console.Write("\n");
      }

      Console.Write("\n");
    }
    #endregion
    public static void Run() {
      var mlContext = new MLContext(seed: 1);

      var df_full = DataFrame.LoadCsv("../../../data/model.csv");

      var header_names = new List<string> {
        "BoxRatio", "Thrust", "Acceleration", "Velocity",
        "OnBalRun", "vwapGain", "Altitude"
      };
      var nColumns = header_names.Count;
      var df_columns = new DataFrameColumn[nColumns];
      for (var k = 0; k < nColumns; k++) {
        var name = header_names[k];
        df_columns[k] = df_full.Columns[name];
      }

      var df = new DataFrame(df_columns);
      Console.WriteLine("Before transform:");
      Console.WriteLine(df.Head(5));

      var pipeline = mlContext.Transforms.RobustScaler("vwapGain");
      var model = pipeline.Fit(df);
      var transformed = model.Transform(df);
      Console.WriteLine("After Transform:");
      MyHead(transformed, 5);
    }

    static void Main() {
      Run();
      Console.WriteLine("Hit return to exit.");
      Console.ReadKey();
    }
  }
}

Charles

Metadata

Metadata

Assignees

Labels

wontfixThis will not be worked on

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions