Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ml.regression

import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
Expand All @@ -36,30 +36,31 @@ import org.apache.spark.sql.DataFrame
* for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class DecisionTreeRegressor(override val uid: String)
final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeParams with TreeRegressorParams {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("dtr"))

// Override parameter setters from parent trait for Java API compatibility.

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the whitespace at end of line.

override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

@Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)

override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = {
Expand All @@ -77,10 +78,11 @@ final class DecisionTreeRegressor(override val uid: String)
super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity,
subsamplingRate = 1.0)
}

@Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object DecisionTreeRegressor {
/** Accessor for supported impurities: variance */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
import com.github.fommil.netlib.BLAS.{getInstance => blas}

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
Expand All @@ -42,54 +42,56 @@ import org.apache.spark.sql.types.DoubleType
* learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class GBTRegressor(override val uid: String)
final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTParams with TreeRegressorParams with Logging {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("gbtr"))

// Override parameter setters from parent trait for Java API compatibility.

// Parameters from TreeRegressorParams:

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

/**
* The impurity setting is ignored for GBT models.
* Individual trees are built using impurity "Variance."
*/
@Since("1.4.0")
override def setImpurity(value: String): this.type = {
logWarning("GBTRegressor.setImpurity should NOT be used")
this
}

// Parameters from TreeEnsembleParams:

@Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)

@Since("1.4.0")
override def setSeed(value: Long): this.type = {
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
super.setSeed(value)
}

// Parameters from GBTParams:

@Since("1.4.0")
override def setMaxIter(value: Int): this.type = super.setMaxIter(value)

@Since("1.4.0")
override def setStepSize(value: Double): this.type = super.setStepSize(value)

// Parameters for GBTRegressor:
Expand All @@ -108,9 +110,11 @@ final class GBTRegressor(override val uid: String)
setDefault(lossType -> "squared")

/** @group setParam */
@Since("1.4.0")
def setLossType(value: String): this.type = set(lossType, value)

/** @group getParam */
@Since("1.4.0")
def getLossType: String = $(lossType).toLowerCase

/** (private[ml]) Convert new loss to old loss. */
Expand All @@ -133,10 +137,11 @@ final class GBTRegressor(override val uid: String)
val oldModel = oldGBT.run(oldDataset)
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures)
}

@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object GBTRegressor {
// The losses below should be lowercase.
Expand All @@ -153,20 +158,21 @@ object GBTRegressor {
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
@Since("1.4.0")
@Experimental
final class GBTRegressionModel(
override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
private val _treeWeights: Array[Double])
final class GBTRegressionModel @Since("1.4.0") (
@Since("1.4.0") override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
private val _treeWeights: Array[Double])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix the indentations for the parameters. There is no problem about the @Since tag.

extends PredictionModel[Vector, GBTRegressionModel]
with TreeEnsembleModel with Serializable {

require(numTrees > 0, "GBTRegressionModel requires at least 1 tree.")
require(_trees.length == _treeWeights.length, "GBTRegressionModel given trees, treeWeights of" +
s" non-matching lengths (${_trees.length}, ${_treeWeights.length}, respectively).")

@Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]

@Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights

override protected def transformImpl(dataset: DataFrame): DataFrame = {
Expand All @@ -183,11 +189,11 @@ final class GBTRegressionModel(
val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
}

@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressionModel = {
copyValues(new GBTRegressionModel(uid, _trees, _treeWeights), extra).setParent(parent)
}

@Since("1.4.0")
override def toString: String = {
s"GBTRegressionModel with $numTrees trees"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.regression

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol}
Expand Down Expand Up @@ -124,32 +124,39 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
*
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
@Since("1.5.0")
@Experimental
class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel]
with IsotonicRegressionBase {

class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase {
@Since("1.5.0")
def this() = this(Identifiable.randomUID("isoReg"))

/** @group setParam */
@Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)

/** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)

/** @group setParam */
@Since("1.5.0")
def setIsotonic(value: Boolean): this.type = set(isotonic, value)

/** @group setParam */
@Since("1.5.0")
def setWeightCol(value: String): this.type = set(weightCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)

@Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra)

@Since("1.5.0")
override def fit(dataset: DataFrame): IsotonicRegressionModel = {
validateAndTransformSchema(dataset.schema, fitting = true)
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
Expand All @@ -162,7 +169,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg

copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this))
}

@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = true)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV, norm => brzNorm}
import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}

import org.apache.spark.{Logging, SparkException}
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared._
Expand Down Expand Up @@ -61,18 +61,20 @@ private[regression] trait LinearRegressionParams extends PredictorParams
* - L1 (Lasso)
* - L2 + L1 (elastic net)
*/
@Since("1.3.0")
@Experimental
class LinearRegression(override val uid: String)
class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
extends Regressor[Vector, LinearRegression, LinearRegressionModel]
with LinearRegressionParams with Logging {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("linReg"))

/**
* Set the regularization parameter.
* Default is 0.0.
* @group setParam
*/
@Since("1.3.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)

Expand All @@ -81,6 +83,7 @@ class LinearRegression(override val uid: String)
* Default is true.
* @group setParam
*/
@Since("1.5.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true)

Expand All @@ -93,6 +96,7 @@ class LinearRegression(override val uid: String)
* Default is true.
* @group setParam
*/
@Since("1.5.0")
def setStandardization(value: Boolean): this.type = set(standardization, value)
setDefault(standardization -> true)

Expand All @@ -103,6 +107,7 @@ class LinearRegression(override val uid: String)
* Default is 0.0 which is an L2 penalty.
* @group setParam
*/
@Since("1.4.0")
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
setDefault(elasticNetParam -> 0.0)

Expand All @@ -111,6 +116,7 @@ class LinearRegression(override val uid: String)
* Default is 100.
* @group setParam
*/
@Since("1.3.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100)

Expand All @@ -120,6 +126,7 @@ class LinearRegression(override val uid: String)
* Default is 1E-6.
* @group setParam
*/
@Since("1.4.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)

Expand Down Expand Up @@ -254,7 +261,7 @@ class LinearRegression(override val uid: String)
objectiveHistory)
model.setSummary(trainingSummary)
}

@Since("1.3.0")
override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra)
}

Expand Down
Loading