-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15162][SPARK-15164][PySpark][DOCS][ML] update some pydocs #12938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
8389280
1fa57e5
b1ce817
8125c8c
c72fa46
3fd1dce
c7caa43
4776221
64942b7
2397004
130d05f
a73913b
50b41ae
9e38ddf
f4df8f0
5df5a93
2eec947
e11dbf8
4111b2d
53ab790
c2c7900
a7aadec
e4061f4
7b634b6
873f6c8
9fb2e41
74636b1
d925f38
3981612
3d13c6c
2be8cdf
4431daa
d842309
de63f9f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,6 +49,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti | |
| HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, | ||
| HasWeightCol, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Logistic regression. | ||
| Currently, this class only supports binary classification. | ||
|
|
||
|
|
@@ -96,7 +98,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti | |
|
|
||
| threshold = Param(Params._dummy(), "threshold", | ||
| "Threshold in binary classification prediction, in range [0, 1]." + | ||
| " If threshold and thresholds are both set, they must match.", | ||
| " If threshold and thresholds are both set, they must match." + | ||
| "e.g. threshold must be equal to [1-p, p].", | ||
|
||
| typeConverter=TypeConverters.toFloat) | ||
|
|
||
| @keyword_only | ||
|
|
@@ -154,7 +157,10 @@ def setThreshold(self, value): | |
| @since("1.4.0") | ||
| def getThreshold(self): | ||
| """ | ||
| Gets the value of threshold or its default value. | ||
| Gets the value of threshold or attempt to convert thresholds to threshold if set, or default | ||
|
||
| value if neither are set. | ||
| This conversion is equivalent to: | ||
| :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`. | ||
| """ | ||
| self._checkThresholdConsistency() | ||
| if self.isSet(self.thresholds): | ||
|
|
@@ -183,7 +189,7 @@ def getThresholds(self): | |
| If :py:attr:`thresholds` is set, return its value. | ||
| Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary | ||
| classification: (1-threshold, threshold). | ||
| If neither are set, throw an error. | ||
| If neither are set, return the default value. | ||
| """ | ||
| self._checkThresholdConsistency() | ||
| if not self.isSet(self.thresholds) and self.isSet(self.threshold): | ||
|
|
@@ -208,6 +214,8 @@ def _checkThresholdConsistency(self): | |
|
|
||
| class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by LogisticRegression. | ||
|
|
||
| .. versionadded:: 1.3.0 | ||
|
|
@@ -491,6 +499,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred | |
| TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, | ||
| JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_ | ||
| learning algorithm for classification. | ||
| It supports both binary and multiclass labels, as well as both continuous and categorical | ||
|
|
@@ -585,6 +595,8 @@ def _create_model(self, java_model): | |
| @inherit_doc | ||
| class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by DecisionTreeClassifier. | ||
|
|
||
| .. versionadded:: 1.4.0 | ||
|
|
@@ -618,6 +630,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred | |
| RandomForestParams, TreeClassifierParams, HasCheckpointInterval, | ||
| JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_ | ||
| learning algorithm for classification. | ||
| It supports both binary and multiclass labels, as well as both continuous and categorical | ||
|
|
@@ -710,6 +724,8 @@ def _create_model(self, java_model): | |
|
|
||
| class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by RandomForestClassifier. | ||
|
|
||
| .. versionadded:: 1.4.0 | ||
|
|
@@ -736,6 +752,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol | |
| GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, | ||
| JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_ | ||
| learning algorithm for classification. | ||
| It supports binary labels, as well as both continuous and categorical features. | ||
|
|
@@ -849,6 +867,8 @@ def getLossType(self): | |
|
|
||
| class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by GBTClassifier. | ||
|
|
||
| .. versionadded:: 1.4.0 | ||
|
|
@@ -874,6 +894,8 @@ def featureImportances(self): | |
| class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol, | ||
| HasRawPredictionCol, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Naive Bayes Classifiers. | ||
| It supports both Multinomial and Bernoulli NB. `Multinomial NB | ||
| <http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_ | ||
|
|
@@ -992,6 +1014,8 @@ def getModelType(self): | |
|
|
||
| class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by NaiveBayes. | ||
|
|
||
| .. versionadded:: 1.5.0 | ||
|
|
@@ -1016,8 +1040,11 @@ def theta(self): | |
|
|
||
| @inherit_doc | ||
| class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, | ||
| HasMaxIter, HasTol, HasSeed, JavaMLWritable, JavaMLReadable): | ||
| HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable, | ||
| JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Classifier trainer based on the Multilayer Perceptron. | ||
| Each layer has sigmoid activation function, output layer has softmax. | ||
| Number of inputs has to be equal to the size of feature vectors. | ||
|
|
@@ -1058,6 +1085,14 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, | |
| True | ||
| >>> model.weights == model2.weights | ||
| True | ||
| >>> mlp2 = mlp2.setWeights([ | ||
| ... 2, 5, 1, -7, -5, -10, 0, 0.6, -1, 2, -2, 1, 2, -7, -1, -2, 2, 1, -1, 9, -9, 3, -3, -3, | ||
| ... 3.0, 0, -1]) | ||
| >>> model3 = mlp2.fit(df) | ||
| >>> model3.weights != model2.weights | ||
| True | ||
| >>> model3.layers == model.layers | ||
| True | ||
|
|
||
| .. versionadded:: 1.6.0 | ||
| """ | ||
|
|
@@ -1071,28 +1106,37 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, | |
| "remaining data in a partition then it is adjusted to the size of this " + | ||
| "data. Recommended size is between 10 and 1000, default is 128.", | ||
| typeConverter=TypeConverters.toInt) | ||
| solver = Param(Params._dummy(), "solver", "Allows setting the solver: minibatch gradient " + | ||
| "descent (gd) or l-bfgs. (Default l-bfgs)", | ||
| typeConverter=TypeConverters.toString) | ||
| weights = Param(Params._dummy(), "weights", "Weights (either initial if before training or " + | ||
| "actual on model)", typeConverter=TypeConverters.toVector) | ||
|
|
||
| @keyword_only | ||
| def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128): | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, | ||
| solver="l-bfgs", weights=None): | ||
| """ | ||
| __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128) | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \ | ||
| solver="l-bfgs", weights=None) | ||
| """ | ||
| super(MultilayerPerceptronClassifier, self).__init__() | ||
| self._java_obj = self._new_java_obj( | ||
| "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid) | ||
| self._setDefault(maxIter=100, tol=1E-4, blockSize=128) | ||
| self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs") | ||
| kwargs = self.__init__._input_kwargs | ||
| self.setParams(**kwargs) | ||
|
|
||
| @keyword_only | ||
| @since("1.6.0") | ||
| def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128): | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, | ||
| solver="l-bfgs", weights=None): | ||
| """ | ||
| setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128) | ||
| maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \ | ||
| solver="l-bfgs", weights=None) | ||
| Sets params for MultilayerPerceptronClassifier. | ||
| """ | ||
| kwargs = self.setParams._input_kwargs | ||
|
|
@@ -1129,9 +1173,61 @@ def getBlockSize(self): | |
| """ | ||
| return self.getOrDefault(self.blockSize) | ||
|
|
||
| @since("2.0.0") | ||
| def setStepSize(self, value): | ||
| """ | ||
| Sets the value of :py:attr:`stepSize`. | ||
| """ | ||
| return self._set(stepSize=value) | ||
|
|
||
| @since("2.0.0") | ||
| def getStepSize(self): | ||
| """ | ||
| Gets the value of stepSize or its default value. | ||
| """ | ||
| return self.getOrDefault(self.stepSize) | ||
|
|
||
| @since("2.0.0") | ||
| def setSolver(self, value): | ||
| """ | ||
| Sets the value of :py:attr:`solver`. | ||
| """ | ||
| return self._set(solver=value) | ||
|
|
||
| @since("2.0.0") | ||
| def getSolver(self): | ||
| """ | ||
| Gets the value of solver or its default value. | ||
| """ | ||
| return self.getOrDefault(self.solver) | ||
|
|
||
| @property | ||
| @since("2.0.0") | ||
| def getOptimizer(self): | ||
| """ | ||
| Gets the optimizer used. | ||
| """ | ||
| return self.getSolver() | ||
|
|
||
| @since("2.0.0") | ||
| def setWeights(self, value): | ||
| """ | ||
| Sets the value of :py:attr:`weights`. | ||
| """ | ||
| return self._set(weights=value) | ||
|
|
||
| @since("2.0.0") | ||
| def getWeights(self): | ||
| """ | ||
| Gets the value of weights or its default value. | ||
| """ | ||
| return self.getOrDefault(self.weights) | ||
|
|
||
|
|
||
| class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by MultilayerPerceptronClassifier. | ||
|
|
||
| .. versionadded:: 1.6.0 | ||
|
|
@@ -1181,6 +1277,8 @@ def getClassifier(self): | |
| @inherit_doc | ||
| class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Reduction of Multiclass Classification to Binary Classification. | ||
| Performs reduction using one against all strategy. | ||
| For a multiclass classification with k classes, train k models (one per class). | ||
|
|
@@ -1335,6 +1433,8 @@ def _to_java(self): | |
|
|
||
| class OneVsRestModel(Model, OneVsRestParams, MLReadable, MLWritable): | ||
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by OneVsRest. | ||
| This stores the models resulting from training k binary classifiers: one for each class. | ||
| Each example is scored against all k models, and the model with the highest score | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
indentation is off here. Also prefer
(Default: l-bfgs)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, kept original indentation but will update.