Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
528 changes: 8 additions & 520 deletions python/cuml/cuml/_thirdparty/_sklearn_compat.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions python/cuml/cuml/accel/_wrappers/sklearn/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import functools

import numpy as np
import sklearn.svm
from sklearn.svm import SVC as _SVC
from sklearn.utils.metaestimators import available_if

import cuml.svm
Expand Down Expand Up @@ -76,12 +76,12 @@ def _gpu_decision_function(self, X):
# ProxyBase lacks a builtin mechanism to do that, since this is the only
# use case so far we manually define them for now.
@available_if(_has_probability)
@functools.wraps(sklearn.svm.SVC.predict_proba)
@functools.wraps(_SVC.predict_proba)
def predict_proba(self, X):
return self._call_method("predict_proba", X)

@available_if(_has_probability)
@functools.wraps(sklearn.svm.SVC.predict_log_proba)
@functools.wraps(_SVC.predict_log_proba)
def predict_log_proba(self, X):
return self._call_method("predict_log_proba", X)

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/explainer/kernel_shap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ from random import randint

import cupy as cp
import numpy as np
from sklearn.linear_model import LassoLarsIC, lars_path

from cuml.explainer.base import SHAPBase
from cuml.explainer.common import get_cai_ptr, model_func_call
Expand Down Expand Up @@ -565,6 +564,7 @@ def _l1_regularization(X,
"""
Function calls LASSO or LARS if l1 regularization is needed.
"""
from sklearn.linear_model import LassoLarsIC, lars_path

# create augmented dataset for feature selection
s = cp.sum(X, axis=1)
Expand Down
29 changes: 15 additions & 14 deletions python/cuml/cuml/explainer/tree_shap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re

import numpy as np
import treelite

import cuml
from cuml.common import input_to_cuml_array
from cuml.ensemble import RandomForestClassifier as curfc
from cuml.ensemble import RandomForestRegressor as curfr
from cuml.internals.array import CumlArray
from cuml.internals.input_utils import determine_array_type

from cuml.internals.treelite cimport *
from cuml.internals.treelite import safe_treelite_call

from libc.stdint cimport uintptr_t

import re

import numpy as np
import treelite
from sklearn.ensemble import RandomForestClassifier as sklrfc
from sklearn.ensemble import RandomForestRegressor as sklrfr
from cuml.internals.treelite cimport *


cdef extern from "cuml/explainer/tree_shap.hpp" namespace "ML::Explainer" nogil:
Expand Down Expand Up @@ -178,15 +174,20 @@ cdef class TreeExplainer:
model = model.booster_
tl_model = treelite.frontend.from_lightgbm(model)
# cuML RF model object
elif isinstance(model, (curfr, curfc)):
elif isinstance(model, (cuml.RandomForestClassifier, cuml.RandomForestRegressor)):
tl_model = model.convert_to_treelite_model()
# scikit-learn RF model object
elif isinstance(model, (sklrfr, sklrfc)):
tl_model = treelite.sklearn.import_model(model)
elif isinstance(model, treelite.Model):
tl_model = model
else:
raise ValueError(f"Unrecognized model object type: {type(model)}")
from sklearn.ensemble import (
RandomForestClassifier,
RandomForestRegressor,
)
if isinstance(model, (RandomForestClassifier, RandomForestRegressor)):
tl_model = treelite.sklearn.import_model(model)
else:
raise ValueError(f"Unrecognized model object type: {type(model)}")

# Get num_class
self.num_class = tl_model.get_header_accessor().get_field("num_class").copy()
Expand Down
10 changes: 5 additions & 5 deletions python/cuml/cuml/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import os

import pylibraft.common.handle
from sklearn.utils import estimator_html_repr

import cuml
import cuml.common
Expand Down Expand Up @@ -458,10 +457,11 @@ def _more_tags(self):

def _repr_mimebundle_(self, **kwargs):
"""Prepare representations used by jupyter kernels to display estimator"""
if estimator_html_repr is not None:
output = {"text/plain": repr(self)}
output["text/html"] = estimator_html_repr(self)
return output
from sklearn.utils import estimator_html_repr

output = {"text/plain": repr(self)}
output["text/html"] = estimator_html_repr(self)
return output

def set_nvtx_annotations(self):
for func_name in [
Expand Down
23 changes: 12 additions & 11 deletions python/cuml/cuml/model_selection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from sklearn.model_selection import GridSearchCV

from cuml.model_selection._split import StratifiedKFold, train_test_split

GridSearchCV.__doc__ = (
"""
This code is developed and maintained by scikit-learn and imported
"""This code is developed and maintained by scikit-learn and imported
Comment thread
jcrist marked this conversation as resolved.
by cuML to maintain the familiar sklearn namespace structure.
cuML includes tests to ensure full compatibility of these wrappers
with CUDA-based data and cuML estimators, but all of the underlying code
is due to the scikit-learn developers.\n\n"""
+ GridSearchCV.__doc__
)
is due to the scikit-learn developers."""

from cuml.model_selection._split import StratifiedKFold, train_test_split

__all__ = ["train_test_split", "GridSearchCV", "StratifiedKFold"]


def __getattr__(name):
if name == "GridSearchCV":
from sklearn.model_selection import GridSearchCV

return GridSearchCV
raise AttributeError(f"module {__name__} has no attribute {name}")
23 changes: 14 additions & 9 deletions python/cuml/cuml/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from sklearn.pipeline import Pipeline, make_pipeline

disclaimer = """
This code is developed and maintained by scikit-learn and imported
"""This code is developed and maintained by scikit-learn and imported
by cuML to maintain the familiar sklearn namespace structure.
cuML includes tests to ensure full compatibility of these wrappers
with CUDA-based data and cuML estimators, but all of the underlying code
is due to the scikit-learn developers.\n\n"""

Pipeline.__doc__ = disclaimer + Pipeline.__doc__
make_pipeline.__doc__ = disclaimer + make_pipeline.__doc__
is due to the scikit-learn developers."""

__all__ = ["Pipeline", "make_pipeline"]


def __getattr__(name):
if name == "Pipeline":
from sklearn.pipeline import Pipeline

return Pipeline
elif name == "make_pipeline":
from sklearn.pipeline import make_pipeline

return make_pipeline
raise AttributeError(f"module {__name__} has no attribute {name}")
3 changes: 2 additions & 1 deletion python/cuml/cuml/random_projection/random_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import cupyx.scipy.sparse as cp_sp
import numpy as np
import scipy.sparse as sp
import sklearn.random_projection

import cuml
from cuml.common.array_descriptor import CumlArrayDescriptor
Expand Down Expand Up @@ -51,6 +50,8 @@ def johnson_lindenstrauss_min_dim(n_samples, eps=0.1):
The minimal number of components to guarantee with good probability
an eps-embedding with n_samples.
"""
import sklearn.random_projection

return sklearn.random_projection.johnson_lindenstrauss_min_dim(
n_samples, eps=eps
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,6 @@
- "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_estimators_overwrite_params]"
- "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',Ridge())])-check_dont_overwrite_parameters]"
- "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',Ridge())])-check_estimators_overwrite_params]"
- "sklearn.tests.test_docstrings::test_docstring[GridSearchCV-None]"
- "sklearn.tests.test_multioutput::test_multi_output_classes_[estimator0]"
- "sklearn.tests.test_multioutput::test_multi_output_classification_sample_weights"
- "sklearn.tests.test_multioutput::test_multi_target_sparse_regression[bsr_array]"
Expand Down