Updated libmf and corresponding MatrixFactorizationSimpleTrainAndPredict() baselines per build (#5121)

mstfbl · web-flow · commit 8e5f7b42cd65 · 2020-05-13T19:38:22.000-07:00
* Updated libmf and corresponding MatrixFactorizationSimpleTrainAndPredict() baselines per build

* Updated for slight variance in CentOS 7

* Remove tolerance

* Updated baselines for Ubuntu and Linux, added back tolerance

* Removed CentOS 7 specific check

* Removed extra braces and space

* Updated name of expectedLinuxMeanSquaredError

* Added explanation of varying but consistent metrics
diff --git a/src/Native/MatrixFactorizationNative/libmf b/src/Native/MatrixFactorizationNative/libmf
@@ -1 +1 @@
-Subproject commit 298715a4e458bc09c6a27c8643a58095afbdadf1
+Subproject commit 403153ca204817e2901b2872d977088316360641
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
@@ -54,7 +54,6 @@ public void MatrixFactorization_Estimator()
         }
 
         [MatrixFactorizationFact]
-        //Skipping test temporarily. This test will be re-enabled once the cause of failures has been determined
         public void MatrixFactorizationSimpleTrainAndPredict()
         {
             var mlContext = new MLContext(seed: 1);
@@ -96,10 +95,10 @@ public void MatrixFactorizationSimpleTrainAndPredict()
             // MF produce different matrices on different platforms, so check their content on Windows.
             if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
             {
-                Assert.Equal(0.290507137775421, leftMatrix[0], 5);
-                Assert.Equal(0.558072924613953, leftMatrix[leftMatrix.Count - 1], 5);
-                Assert.Equal(0.270811557769775, rightMatrix[0], 5);
-                Assert.Equal(0.376706808805466, rightMatrix[rightMatrix.Count - 1], 5);
+                Assert.Equal(0.309137582778931, leftMatrix[0], 5);
+                Assert.Equal(0.468956589698792, leftMatrix[leftMatrix.Count - 1], 5);
+                Assert.Equal(0.303486406803131, rightMatrix[0], 5);
+                Assert.Equal(0.503888845443726, rightMatrix[rightMatrix.Count - 1], 5);
             }
             // Read the test data set as an IDataView
             var testData = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));
@@ -122,28 +121,30 @@ public void MatrixFactorizationSimpleTrainAndPredict()
             // Compute prediction errors
             var metrices = mlContext.Recommendation().Evaluate(prediction, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName);
 
-            // Determine if the selected metric is reasonable for different platforms
-            // Windows tolerance is set at 1e-7, and Linux tolerance is set at 1e-5
-            double windowsTolerance = Math.Pow(10, -7);
+            // Determine if the selected mean-squared error metric is reasonable on different platforms within the variation tolerance.
+            // Windows and Mac tolerances are set at 1e-7, and Linux tolerance is set at 1e-5.
+            // Here, each build OS has a different MSE baseline metric. While these metrics differ between builds, each build is expected to
+            // produce the same metric. This is because of minor build differences and varying implementations of sub-functions, such as random
+            // variables that are first obtained with the default random numger generator in libMF C++ libraries.
+            double windowsAndMacTolerance = Math.Pow(10, -7);
             double linuxTolerance = Math.Pow(10, -5);
             if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
             {
                 // Linux case
-                var expectedUnixL2Error = 0.610332110253861; // Linux baseline
-                Assert.InRange(metrices.MeanSquaredError, expectedUnixL2Error - linuxTolerance, expectedUnixL2Error + linuxTolerance);
+                double expectedLinuxMeanSquaredError = 0.6127260028273948; // Linux baseline
+                Assert.InRange(metrices.MeanSquaredError, expectedLinuxMeanSquaredError - linuxTolerance, expectedLinuxMeanSquaredError + linuxTolerance);
             }
             else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
             {
-                // The Mac case is just broken. Should be fixed later. Re-enable when done.
                 // Mac case
-                //var expectedMacL2Error = 0.61192207960271; // Mac baseline
-                //Assert.InRange(metrices.L2, expectedMacL2Error - 5e-3, expectedMacL2Error + 5e-3); // 1e-7 is too small for Mac so we try 1e-5
+                double expectedMacMeanSquaredError = 0.616389336408704; // Mac baseline
+                Assert.InRange(metrices.MeanSquaredError, expectedMacMeanSquaredError - windowsAndMacTolerance, expectedMacMeanSquaredError + windowsAndMacTolerance);
             }
             else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
             {
                 // Windows case
-                var expectedWindowsL2Error = 0.60226203382884; // Windows baseline
-                Assert.InRange(metrices.MeanSquaredError, expectedWindowsL2Error - windowsTolerance, expectedWindowsL2Error + windowsTolerance);
+                double expectedWindowsMeanSquaredError = 0.600329985097577; // Windows baseline
+                Assert.InRange(metrices.MeanSquaredError, expectedWindowsMeanSquaredError - windowsAndMacTolerance, expectedWindowsMeanSquaredError + windowsAndMacTolerance);
             }
 
             var modelWithValidation = pipeline.Fit(data, testData);