Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions python/pyspark/ml/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,7 @@ def test_linear_regression_summary(self):
self.assertAlmostEqual(s.r2, 1.0, 2)
self.assertTrue(isinstance(s.residuals, DataFrame))
self.assertEqual(s.numInstances, 2)
self.assertEqual(s.degreesOfFreedom, 1)
devResiduals = s.devianceResiduals
self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))
coefStdErr = s.coefficientStandardErrors
Expand All @@ -1075,7 +1076,8 @@ def test_linear_regression_summary(self):
pValues = s.pValues
self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
# test evaluation (with training dataset) produces a summary with same values
# one check is enough to verify a summary is returned, Scala version runs full test
# one check is enough to verify a summary is returned
# The child class LinearRegressionTrainingSummary runs full test
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what this comment means?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is not related with this PR, just because the previous comment is misleading.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MLnick This is because the type of LinearRegressionModel.summary is LinearRegressionTrainingSummary, but the return type of LinearRegressionModel.evalute() is LinearRegressionSummary. Theoretically we should test both, but we can simplify the test for only check one function since we have checked all functions in the child class. Thanks for @mpjlu to update this comments.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is not because Scala version runs full test. Even Scala version runs full test, we still need the function call test.
If a child class have done the function call test, we don't need to test parent class again.

sameSummary = model.evaluate(df)
self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)

Expand All @@ -1093,6 +1095,7 @@ def test_glr_summary(self):
self.assertEqual(s.numIterations, 1) # this should default to a single iteration of WLS
self.assertTrue(isinstance(s.predictions, DataFrame))
self.assertEqual(s.predictionCol, "prediction")
self.assertEqual(s.numInstances, 2)
self.assertTrue(isinstance(s.residuals(), DataFrame))
self.assertTrue(isinstance(s.residuals("pearson"), DataFrame))
coefStdErr = s.coefficientStandardErrors
Expand All @@ -1111,7 +1114,8 @@ def test_glr_summary(self):
self.assertTrue(isinstance(s.nullDeviance, float))
self.assertTrue(isinstance(s.dispersion, float))
# test evaluation (with training dataset) produces a summary with same values
# one check is enough to verify a summary is returned, Scala version runs full test
# one check is enough to verify a summary is returned
# The child class GeneralizedLinearRegressionTrainingSummary runs full test
sameSummary = model.evaluate(df)
self.assertAlmostEqual(sameSummary.deviance, s.deviance)

Expand Down