From 4f2b3e0948e6beb2ca7591d1b5719046e68e857f Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Tue, 19 Apr 2016 12:11:09 +0800 Subject: [PATCH 1/4] [SPARK-14712][ML]spark.ml.LogisticRegressionModel.toString should summarize model --- .../spark/ml/classification/LogisticRegression.scala | 11 ++++++++++- python/pyspark/ml/classification.py | 8 +++++++- python/pyspark/mllib/classification.py | 2 ++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index c2b440059b1f..32e44cfd2ad4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -159,7 +159,9 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas @Since("1.2.0") @Experimental class LogisticRegression @Since("1.2.0") ( - @Since("1.4.0") override val uid: String) + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val numFeatures: Int = 0, + @Since("2.0.0") val numClasses: Int = 0) extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel] with LogisticRegressionParams with DefaultParamsWritable with Logging { @@ -458,6 +460,13 @@ class LogisticRegression @Since("1.2.0") ( @Since("1.4.0") override def copy(extra: ParamMap): LogisticRegression = defaultCopy(extra) + + @Since("2.0.0") + override def toString: String = { + val td = getDefault(threshold) + s"${super.toString}, numClasses = ${numClasses}, " + + s"numFeatures = ${numFeatures} threshold = ${td.getOrElse("None")}" + } } @Since("1.6.0") diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index de1321b13975..794268b236d9 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -123,7 +123,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, threshold=0.5, thresholds=None, probabilityCol="probability", - rawPredictionCol="rawPrediction", standardization=True, weightCol=None): + rawPredictionCol="rawPrediction", standardization=True, weightCol=None, + numFeatures=0, numClasses=0): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ @@ -135,6 +136,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre kwargs = self.setParams._input_kwargs self._set(**kwargs) self._checkThresholdConsistency() + self._numFeatures = int(numFeatures) + self._numClasses = int(numClasses) return self def _create_model(self, java_model): @@ -204,6 +207,9 @@ def _checkThresholdConsistency(self): raise ValueError("Logistic Regression getThreshold found inconsistent values for" + " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) + def __repr__(self): + return self._call_java("toString") + class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 57106f8690a7..abbe2fa3ea9a 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -262,6 +262,8 @@ def load(cls, sc, path): model.setThreshold(threshold) return model + def __repr__(self): + return self._call_java("toString") class LogisticRegressionWithSGD(object): """ From 34307860c60217959546ee0f277d42ff715f51b7 Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Tue, 19 Apr 2016 15:10:27 +0800 Subject: [PATCH 2/4] move tostring to ml lrmodel and add tostring test case --- .../classification/LogisticRegression.scala | 20 +++++++++---------- .../LogisticRegressionSuite.scala | 10 +++++++++- python/pyspark/mllib/classification.py | 1 + 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 32e44cfd2ad4..e73fcb41d932 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV} import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} import org.apache.hadoop.fs.Path -import org.apache.spark.SparkException +import org.apache.spark.{SparkException} import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.feature.Instance @@ -159,9 +159,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas @Since("1.2.0") @Experimental class LogisticRegression @Since("1.2.0") ( - @Since("1.4.0") override val uid: String, - @Since("2.0.0") val numFeatures: Int = 0, - @Since("2.0.0") val numClasses: Int = 0) + @Since("1.4.0") override val uid: String) extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel] with LogisticRegressionParams with DefaultParamsWritable with Logging { @@ -460,13 +458,6 @@ class LogisticRegression @Since("1.2.0") ( @Since("1.4.0") override def copy(extra: ParamMap): LogisticRegression = defaultCopy(extra) - - @Since("2.0.0") - override def toString: String = { - val td = getDefault(threshold) - s"${super.toString}, numClasses = ${numClasses}, " + - s"numFeatures = ${numFeatures} threshold = ${td.getOrElse("None")}" - } } @Since("1.6.0") @@ -634,6 +625,13 @@ class LogisticRegressionModel private[spark] ( */ @Since("1.6.0") override def write: MLWriter = new LogisticRegressionModel.LogisticRegressionModelWriter(this) + + @Since("2.0.0") + override def toString: String = { + val td = getDefault(threshold) + s"${super.toString}, numClasses = $numClasses, " + + s"numFeatures = $numFeatures threshold = ${td.getOrElse("None")}" + } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 48db4281309b..e85e34066f86 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -941,8 +941,16 @@ class LogisticRegressionSuite lr, isClassification = true, sqlContext) { (expected, actual) => assert(expected.intercept === actual.intercept) assert(expected.coefficients.toArray === actual.coefficients.toArray) - } + } } + + test("toString") { + val lrModel = new LogisticRegressionModel(uid = "lrModeltest", + coefficients = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5) + val expected: String = "lrModeltest, numClasses = 2, numFeatures = 3 threshold = 0.5" + assert(lrModel.toString === expected) + } + } object LogisticRegressionSuite { diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index abbe2fa3ea9a..8f4a33d1364e 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -265,6 +265,7 @@ def load(cls, sc, path): def __repr__(self): return self._call_java("toString") + class LogisticRegressionWithSGD(object): """ .. versionadded:: 0.9.0 From ac228ea07a36953d16a538eb9132b9f1df9a47e1 Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Tue, 19 Apr 2016 15:20:25 +0800 Subject: [PATCH 3/4] remove unneeded {} --- .../org/apache/spark/ml/classification/LogisticRegression.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index e73fcb41d932..4466d68210e5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV} import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} import org.apache.hadoop.fs.Path -import org.apache.spark.{SparkException} +import org.apache.spark.SparkException import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.feature.Instance From 2dbe552ea86386ece1d4f0292cb8f0c42f6df503 Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Wed, 20 Apr 2016 09:34:55 +0800 Subject: [PATCH 4/4] remove newly added input parameters --- .../ml/classification/LogisticRegressionSuite.scala | 3 +-- python/pyspark/ml/classification.py | 12 +++++------- python/pyspark/mllib/classification.py | 1 + 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index e85e34066f86..d8eb16bec624 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -941,7 +941,7 @@ class LogisticRegressionSuite lr, isClassification = true, sqlContext) { (expected, actual) => assert(expected.intercept === actual.intercept) assert(expected.coefficients.toArray === actual.coefficients.toArray) - } + } } test("toString") { @@ -950,7 +950,6 @@ class LogisticRegressionSuite val expected: String = "lrModeltest, numClasses = 2, numFeatures = 3 threshold = 0.5" assert(lrModel.toString === expected) } - } object LogisticRegressionSuite { diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 794268b236d9..c21a6c9000f4 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -123,8 +123,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, threshold=0.5, thresholds=None, probabilityCol="probability", - rawPredictionCol="rawPrediction", standardization=True, weightCol=None, - numFeatures=0, numClasses=0): + rawPredictionCol="rawPrediction", standardization=True, weightCol=None): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ @@ -136,8 +135,6 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre kwargs = self.setParams._input_kwargs self._set(**kwargs) self._checkThresholdConsistency() - self._numFeatures = int(numFeatures) - self._numClasses = int(numClasses) return self def _create_model(self, java_model): @@ -207,9 +204,6 @@ def _checkThresholdConsistency(self): raise ValueError("Logistic Regression getThreshold found inconsistent values for" + " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) - def __repr__(self): - return self._call_java("toString") - class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ @@ -279,6 +273,10 @@ def evaluate(self, dataset): java_blr_summary = self._call_java("evaluate", dataset) return BinaryLogisticRegressionSummary(java_blr_summary) + @since("2.0.0") + def __repr__(self): + return self._call_java("toString") + class LogisticRegressionSummary(JavaWrapper): """ diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 8f4a33d1364e..c0cda2e5c4e4 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -262,6 +262,7 @@ def load(cls, sc, path): model.setThreshold(threshold) return model + @since("2.0.0") def __repr__(self): return self._call_java("toString")