-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20736][Python] PySpark StringIndexer supports StringOrderType #17978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
ddf34a5
c1966bb
e5c8dcf
bd80b37
1f336ab
44f0a36
f66a445
36006bf
6acabc2
2fe9432
5bfa4dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2115,22 +2115,32 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, | |
| .. versionadded:: 1.4.0 | ||
| """ | ||
|
|
||
| stringOrderType = Param(Params._dummy(), "stringOrderType", | ||
| "How to order labels of string column. The first label after " + | ||
| "ordering is assigned an index of 0. Supported options: " + | ||
| "frequencyDesc, frequencyAsc, alphabetDsec, alphabetAsc.", | ||
| typeConverter=TypeConverters.toString) | ||
|
|
||
| @keyword_only | ||
| def __init__(self, inputCol=None, outputCol=None, handleInvalid="error"): | ||
| def __init__(self, inputCol=None, outputCol=None, handleInvalid="error", | ||
| stringOrderType="frequencyDesc"): | ||
| """ | ||
| __init__(self, inputCol=None, outputCol=None, handleInvalid="error") | ||
| __init__(self, inputCol=None, outputCol=None, handleInvalid="error", \ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we need at least a doctest.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @HyukjinKwon Thank you. Added tests. |
||
| stringOrderType="frequencyDesc") | ||
| """ | ||
| super(StringIndexer, self).__init__() | ||
| self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid) | ||
| self._setDefault(handleInvalid="error") | ||
| self._setDefault(handleInvalid="error", stringOrderType="frequencyDesc") | ||
| kwargs = self._input_kwargs | ||
| self.setParams(**kwargs) | ||
|
|
||
| @keyword_only | ||
| @since("1.4.0") | ||
| def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"): | ||
| def setParams(self, inputCol=None, outputCol=None, handleInvalid="error", | ||
| stringOrderType="frequencyDesc"): | ||
| """ | ||
| setParams(self, inputCol=None, outputCol=None, handleInvalid="error") | ||
| setParams(self, inputCol=None, outputCol=None, handleInvalid="error", \ | ||
| stringOrderType="frequencyDesc") | ||
| Sets params for this StringIndexer. | ||
| """ | ||
| kwargs = self._input_kwargs | ||
|
|
@@ -2139,6 +2149,20 @@ def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"): | |
| def _create_model(self, java_model): | ||
| return StringIndexerModel(java_model) | ||
|
|
||
| @since("2.3.0") | ||
| def setStringOrderType(self, value): | ||
| """ | ||
| Sets the value of :py:attr:`stringOrderType`. | ||
| """ | ||
| return self._set(stringOrderType=value) | ||
|
|
||
| @since("2.3.0") | ||
| def getStringOrderType(self): | ||
| """ | ||
| Gets the value of :py:attr:`stringOrderType` or its default value. | ||
|
||
| """ | ||
| return self.getOrDefault(self.stringOrderType) | ||
|
|
||
|
|
||
| class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alphabetDsec -> alphabetDesc