diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index d6ed6a4570a4..8d556deef2be 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.functions.{col, lit} /** Params for linear SVM Classifier. */ private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol - with HasAggregationDepth { + with HasAggregationDepth with HasThreshold { /** * Param for threshold in binary classification prediction. @@ -53,11 +53,8 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR * * @group param */ - final val threshold: DoubleParam = new DoubleParam(this, "threshold", + final override val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction applied to rawPrediction") - - /** @group getParam */ - def getThreshold: Double = $(threshold) } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index b234bc4c2df4..0fe108d8bbf1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -366,6 +366,7 @@ class LogisticRegression @Since("1.2.0") ( @Since("1.5.0") override def setThreshold(value: Double): this.type = super.setThreshold(value) + setDefault(threshold -> 0.5) @Since("1.5.0") override def getThreshold: Double = super.getThreshold diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index b6909b3386b7..d4c8e4b36195 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -19,7 +19,6 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.SparkContext import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT} diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 23e0d45d943a..0b852b8be897 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -47,8 +47,8 @@ private[shared] object SharedParamsCodeGen { Some("\"probability\"")), ParamDesc[String]("varianceCol", "Column name for the biased sample variance of prediction"), ParamDesc[Double]("threshold", - "threshold in binary classification prediction, in range [0, 1]", Some("0.5"), - isValid = "ParamValidators.inRange(0, 1)", finalMethods = false), + "threshold in binary classification prediction, in range [0, 1]", + isValid = "ParamValidators.inRange(0, 1)", finalMethods = false, finalFields = false), ParamDesc[Array[Double]]("thresholds", "Thresholds in multi-class classification" + " to adjust the probability of predicting each class." + " Array must have length equal to the number of classes, with values > 0" + @@ -77,7 +77,7 @@ private[shared] object SharedParamsCodeGen { ParamDesc[Double]("tol", "the convergence tolerance for iterative algorithms (>= 0)", isValid = "ParamValidators.gtEq(0)"), ParamDesc[Double]("stepSize", "Step size to be used for each iteration of optimization (>" + - " 0)", isValid = "ParamValidators.gt(0)"), + " 0)", isValid = "ParamValidators.gt(0)", finalFields = false), ParamDesc[String]("weightCol", "weight column name. If this is not set or empty, we treat " + "all instance weights as 1.0"), ParamDesc[String]("solver", "the solver algorithm for optimization", finalFields = false), diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index 1a8f499798b8..a7f7db6f2839 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -162,9 +162,7 @@ private[ml] trait HasThreshold extends Params { * Param for threshold in binary classification prediction, in range [0, 1]. * @group param */ - final val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction, in range [0, 1]", ParamValidators.inRange(0, 1)) - - setDefault(threshold, 0.5) + val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction, in range [0, 1]", ParamValidators.inRange(0, 1)) /** @group getParam */ def getThreshold: Double = $(threshold) @@ -352,7 +350,7 @@ private[ml] trait HasStepSize extends Params { * Param for Step size to be used for each iteration of optimization (> 0). * @group param */ - final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization (> 0)", ParamValidators.gt(0)) + val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization (> 0)", ParamValidators.gt(0)) /** @group getParam */ final def getStepSize: Double = $(stepSize) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index 3fc3ac58b779..47079d9c6bb1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -458,7 +458,7 @@ private[ml] trait RandomForestRegressorParams * * Note: Marked as private and DeveloperApi since this may be made public in the future. */ -private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter { +private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter with HasStepSize { /* TODO: Add this doc when we add this param. SPARK-7132 * Threshold for stopping early when runWithValidation is used. @@ -484,13 +484,10 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter { * (default = 0.1) * @group param */ - final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size " + + final override val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size " + "(a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each estimator.", ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true)) - /** @group getParam */ - final def getStepSize: Double = $(stepSize) - /** * @deprecated This method is deprecated and will be removed in 3.0.0. * @group setParam diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 948806a5c936..5ee7d9636540 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -63,7 +63,7 @@ def numClasses(self): @inherit_doc class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization, - HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable): + HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental @@ -153,18 +153,6 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre def _create_model(self, java_model): return LinearSVCModel(java_model) - def setThreshold(self, value): - """ - Sets the value of :py:attr:`threshold`. - """ - return self._set(threshold=value) - - def getThreshold(self): - """ - Gets the value of threshold or its default value. - """ - return self.getOrDefault(self.threshold) - class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable): """ @@ -1030,6 +1018,11 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol "Supported options: " + ", ".join(GBTParams.supportedLossTypes), typeConverter=TypeConverters.toString) + stepSize = Param(Params._dummy(), "stepSize", + "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " + + "the contribution of each estimator.", + typeConverter=TypeConverters.toFloat) + @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index f0ff7a5f59ab..2cc623427edc 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -1014,6 +1014,11 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, "Supported options: " + ", ".join(GBTParams.supportedLossTypes), typeConverter=TypeConverters.toString) + stepSize = Param(Params._dummy(), "stepSize", + "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " + + "the contribution of each estimator.", + typeConverter=TypeConverters.toFloat) + @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,