-
Notifications
You must be signed in to change notification settings - Fork 29k
[DOC][MINOR] ml.feature Scala and Python API sync #13159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,7 +53,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC | |
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * PCA trains a model to project vectors to a low-dimensional space using PCA. | ||
| * PCA trains a model to project vectors to a lower dimensional space of the top [[PCA!.k]] | ||
| * principal components. | ||
| */ | ||
| @Experimental | ||
| class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams | ||
|
|
@@ -106,7 +107,7 @@ object PCA extends DefaultParamsReadable[PCA] { | |
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Model fitted by [[PCA]]. | ||
| * Model fitted by [[PCA]]. Transforms vectors to a lower dimensional space. | ||
|
||
| * | ||
| * @param pc A principal components Matrix. Each column is one principal component. | ||
| * @param explainedVariance A vector of proportions of variance explained by | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -240,7 +240,8 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] { | |
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Transform categorical features to use 0-based indices instead of their original values. | ||
| * Model fitted by [[VectorIndexer]]. Transform categorical features to use 0-based indices | ||
|
||
| * instead of their original values. | ||
| * - Categorical features are mapped to indices. | ||
| * - Continuous features (columns) are left unchanged. | ||
| * This also appends metadata to the output column, marking features as Numeric (continuous), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -609,7 +609,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab | |
| """ | ||
|
|
||
| minDocFreq = Param(Params._dummy(), "minDocFreq", | ||
| "minimum of documents in which a term should appear for filtering", | ||
| "minimum number of documents in which a term should appear for filtering", | ||
| typeConverter=TypeConverters.toInt) | ||
|
|
||
| @keyword_only | ||
|
|
@@ -1302,7 +1302,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, | |
|
|
||
| minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)", | ||
| typeConverter=TypeConverters.toInt) | ||
| gaps = Param(Params._dummy(), "gaps", "whether regex splits on gaps (True) or matches tokens") | ||
| gaps = Param(Params._dummy(), "gaps", "whether regex splits on gaps (True) or matches tokens " + | ||
| "(False)") | ||
| pattern = Param(Params._dummy(), "pattern", "regex pattern (Java dialect) used for tokenizing", | ||
| typeConverter=TypeConverters.toString) | ||
| toLowercase = Param(Params._dummy(), "toLowercase", "whether to convert all characters to " + | ||
|
|
@@ -1907,7 +1908,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja | |
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Class for indexing categorical feature columns in a dataset of [[Vector]]. | ||
| Class for indexing categorical feature columns in a dataset of `Vector`. | ||
|
|
||
| This has 2 usage modes: | ||
| - Automatically identify categorical features (default behavior) | ||
|
|
@@ -2025,6 +2026,16 @@ class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): | |
|
|
||
| Model fitted by VectorIndexer. | ||
|
|
||
| Transform categorical features to use 0-based indices instead of their original values. | ||
| - Categorical features are mapped to indices. | ||
| - Continuous features (columns) are left unchanged. | ||
|
|
||
| This also appends metadata to the output column, marking features as Numeric (continuous), | ||
| Nominal (categorical), or Binary (either continuous or categorical). | ||
| Non-ML metadata is not carried over from the input to the output column. | ||
|
|
||
| This maintains vector sparsity. | ||
|
|
||
| .. versionadded:: 1.4.0 | ||
| """ | ||
|
|
||
|
|
@@ -2327,7 +2338,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab | |
| """ | ||
| .. note:: Experimental | ||
|
|
||
| PCA trains a model to project vectors to a low-dimensional space using PCA. | ||
| PCA trains a model to project vectors to a lower dimensional space of the | ||
| top :py:attr:`k` principal components. | ||
|
|
||
| >>> from pyspark.ml.linalg import Vectors | ||
| >>> data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),), | ||
|
|
@@ -2401,7 +2413,7 @@ class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable): | |
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by PCA. | ||
| Model fitted by PCA. Transforms vectors to a lower dimensional space. | ||
|
||
|
|
||
| .. versionadded:: 1.5.0 | ||
| """ | ||
|
|
@@ -2532,7 +2544,8 @@ class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable): | |
| """ | ||
| .. note:: Experimental | ||
|
|
||
| Model fitted by :py:class:`RFormula`. | ||
| Model fitted by :py:class:`RFormula`. Fitting is required to determine the | ||
| factor levels of formula terms. | ||
|
|
||
| .. versionadded:: 1.5.0 | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
while you're looking @holdenk , is this right with the "!"? I saw this somewhere else and without that it seems like it can't find
k, but I couldn't find anything about it in scaladoc.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yah I took a look - it seems to generate the correct link and we use it elsewhere but it doesn't seem to really do much.