-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18481][ML] ML 2.1 QA: Remove deprecated methods for ML #15913
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
ecddf15
9a8c926
13988b4
7beb568
36c2e24
8d3c47a
0c85fd4
1d8cef5
3c8ee2d
c874d0e
157acf9
864be6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -183,6 +183,12 @@ class GBTRegressionModel private[ml]( | |
| @Since("1.4.0") | ||
| override def trees: Array[DecisionTreeRegressionModel] = _trees | ||
|
|
||
| /** | ||
| * Number of trees in ensemble | ||
| */ | ||
| @Since("2.0.0") | ||
| val getNumTrees: Int = trees.length | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this have an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
|
|
||
| @Since("1.4.0") | ||
| override def treeWeights: Array[Double] = _treeWeights | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -317,8 +317,32 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams { | |
| } | ||
| } | ||
|
|
||
| /** Used for [[RandomForestParams]] */ | ||
| private[ml] trait HasFeatureSubsetStrategy extends Params { | ||
| /** | ||
| * Parameters for Random Forest algorithms. | ||
| */ | ||
| private[ml] trait RandomForestParams extends TreeEnsembleParams { | ||
|
|
||
| /** | ||
| * Number of trees to train (>= 1). | ||
| * If 1, then no bootstrapping is used. If > 1, then bootstrapping is done. | ||
| * TODO: Change to always do bootstrapping (simpler). SPARK-7130 | ||
| * (default = 20) | ||
| * | ||
| * Note: The reason that we cannot add this to both GBT and RF (i.e. in TreeEnsembleParams) | ||
| * is the param `maxIter` controls how many trees a GBT has. The semantics in the algorithms | ||
| * are a bit different. | ||
| * @group param | ||
| */ | ||
| final val numTrees: IntParam = new IntParam(this, "numTrees", "Number of trees to train (>= 1)", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: the reason that we cannot add this to both GBT and RF (i.e. in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good suggestion, added. |
||
| ParamValidators.gtEq(1)) | ||
|
|
||
| setDefault(numTrees -> 20) | ||
|
|
||
| /** @group setParam */ | ||
| def setNumTrees(value: Int): this.type = set(numTrees, value) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these setter methods in traits Java-compatible?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, we already have |
||
|
|
||
| /** @group getParam */ | ||
| final def getNumTrees: Int = $(numTrees) | ||
|
|
||
| /** | ||
| * The number of features to consider for splits at each tree node. | ||
|
|
@@ -364,38 +388,6 @@ private[ml] trait HasFeatureSubsetStrategy extends Params { | |
| final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase | ||
| } | ||
|
|
||
| /** | ||
| * Used for [[RandomForestParams]]. | ||
| * This is separated out from [[RandomForestParams]] because of an issue with the | ||
| * `numTrees` method conflicting with this Param in the Estimator. | ||
| */ | ||
| private[ml] trait HasNumTrees extends Params { | ||
|
|
||
| /** | ||
| * Number of trees to train (>= 1). | ||
| * If 1, then no bootstrapping is used. If > 1, then bootstrapping is done. | ||
| * TODO: Change to always do bootstrapping (simpler). SPARK-7130 | ||
| * (default = 20) | ||
| * @group param | ||
| */ | ||
| final val numTrees: IntParam = new IntParam(this, "numTrees", "Number of trees to train (>= 1)", | ||
| ParamValidators.gtEq(1)) | ||
|
|
||
| setDefault(numTrees -> 20) | ||
|
|
||
| /** @group setParam */ | ||
| def setNumTrees(value: Int): this.type = set(numTrees, value) | ||
|
|
||
| /** @group getParam */ | ||
| final def getNumTrees: Int = $(numTrees) | ||
| } | ||
|
|
||
| /** | ||
| * Parameters for Random Forest algorithms. | ||
| */ | ||
| private[ml] trait RandomForestParams extends TreeEnsembleParams | ||
| with HasFeatureSubsetStrategy with HasNumTrees | ||
|
|
||
| private[spark] object RandomForestParams { | ||
| // These options should be lowercase. | ||
| final val supportedFeatureSubsetStrategies: Array[String] = | ||
|
|
@@ -405,15 +397,9 @@ private[spark] object RandomForestParams { | |
| private[ml] trait RandomForestClassifierParams | ||
| extends RandomForestParams with TreeClassifierParams | ||
|
|
||
| private[ml] trait RandomForestClassificationModelParams extends TreeEnsembleParams | ||
| with HasFeatureSubsetStrategy with TreeClassifierParams | ||
|
|
||
| private[ml] trait RandomForestRegressorParams | ||
| extends RandomForestParams with TreeRegressorParams | ||
|
|
||
| private[ml] trait RandomForestRegressionModelParams extends TreeEnsembleParams | ||
| with HasFeatureSubsetStrategy with TreeRegressorParams | ||
|
|
||
| /** | ||
| * Parameters for Gradient-Boosted Tree algorithms. | ||
| * | ||
|
|
@@ -443,12 +429,11 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter with HasS | |
| * (default = 0.1) | ||
| * @group setParam | ||
| */ | ||
| def setStepSize(value: Double): this.type = set(stepSize, value) | ||
|
|
||
| override def validateParams(): Unit = { | ||
| def setStepSize(value: Double): this.type = { | ||
| require(ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true)( | ||
|
||
| getStepSize), "GBT parameter stepSize should be in interval (0, 1], " + | ||
| s"but it given invalid value $getStepSize.") | ||
| value), "GBT parameter stepSize should be in interval (0, 1], " + | ||
| s"but it given invalid value $value.") | ||
|
||
| set(stepSize, value) | ||
| } | ||
|
|
||
| /** (private[ml]) Create a BoostingStrategy instance to use with the old API. */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this actually necessary? Before,
validateParams()was never used. Seems like we could just remove it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's a bug that
validateParamswas never used. It should validate params interaction before fitting(if necessary), this is why we deprecatevalidateParamsand move what it does totransformSchema. We do not have corresponding test cases before, so no test was broken when we deprecatedvalidateParams. I added test cases in this PR.