Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/pickling_cuml_models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@
"\n",
"Here are the steps to export the model:\n",
"\n",
"1. Call `convert_to_treelite_model().serialize()` to obtain the checkpoint file from the cuML Random Forest model."
"1. Call `as_treelite().serialize()` to obtain the checkpoint file from the cuML Random Forest model."
]
},
{
Expand All @@ -219,7 +219,7 @@
"\n",
"checkpoint_path = './checkpoint.tl'\n",
"# Export cuML RF model as Treelite checkpoint\n",
"clf.convert_to_treelite_model().serialize(checkpoint_path)"
"clf.as_treelite().serialize(checkpoint_path)"
]
},
{
Expand Down
53 changes: 36 additions & 17 deletions python/cuml/cuml/ensemble/randomforest_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -343,20 +343,39 @@ class BaseRandomForestModel(Base, InteropMixin):
return self.n_estimators

def convert_to_treelite_model(self):
"""Deprecated, use `as_treelite`."""
warnings.warn(
"`convert_to_treelite_model` was deprecated in 25.10 and will be "
"removed in 25.12. Please use `as_treelite` instead.",
FutureWarning,
)
return self.as_treelite()

def convert_to_fil_model(
self, layout="depth_first", default_chunk_size=None, align_bytes=None
):
"""Deprecated, use `as_fil`."""
warnings.warn(
"`convert_to_fil_model` was deprecated in 25.10 and will be "
"removed in 25.12. Please use `as_fil` instead.",
FutureWarning,
)
return self.as_fil(
layout=layout, default_chunk_size=default_chunk_size, align_bytes=align_bytes
)

def as_treelite(self):
"""
Converts the cuML RF model to a Treelite model
Converts this estimator to a Treelite model.

Returns
-------
tl_to_fil_model : treelite.Model
treelite.Model
"""
return treelite.Model.deserialize_bytes(self._treelite_model_bytes)

def convert_to_fil_model(
self,
layout="depth_first",
default_chunk_size=None,
align_bytes=None,
def as_fil(
self, layout="depth_first", default_chunk_size=None, align_bytes=None,
):
"""
Create a Forest Inference (FIL) model from the trained cuML
Expand Down Expand Up @@ -501,13 +520,13 @@ class BaseRandomForestModel(Base, InteropMixin):
verbose
)

# XXX: Theoretically we could wrap `tl_handle` with `treelite.Model` to manage
# ownership, and keep the loaded model around. However, this only works if the
# `libtreelite` used by `treelite` matches the one that `cuml` is linked against.
# This is currently true for conda environments, but not for wheels where
# `treelite` contains its own separate version. So for now we need to do this
# serialize-and-reload dance. If/when this is fixed we instead store the loaded
# model and use that everywhere.
# XXX: Theoretically we could wrap `tl_handle` with `treelite.Model` to
# manage ownership, and keep the loaded model around. However, this
# only works if the `libtreelite` is ABI compatible with the one used
# by `cuml`. This is currently true for conda environments, but not for
# wheels where `cuml` and `treelite` use different manylinux ABIs. So
# for now we need to do this serialize-and-reload dance. If/when this
# is fixed we could instead store the loaded model and use that instead.
Comment on lines +523 to +529
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for updating the comment. The language is clearer now.

cdef const char* tl_bytes = NULL
cdef size_t tl_bytes_len
safe_treelite_call(
Expand Down Expand Up @@ -540,9 +559,9 @@ class BaseRandomForestModel(Base, InteropMixin):
):
# default parameters, get (or create) the cached fil model
if (fil_model := getattr(self, "_fil_model", None)) is None:
fil_model = self._fil_model = self.convert_to_fil_model()
fil_model = self._fil_model = self.as_fil()
else:
fil_model = self.convert_to_fil_model(
fil_model = self.as_fil(
layout=layout,
default_chunk_size=default_chunk_size,
align_bytes=align_bytes,
Expand All @@ -554,7 +573,7 @@ class BaseRandomForestModel(Base, InteropMixin):
warnings.warn(
(
"`predict_model` is deprecated (and ignored) and will be removed "
"in 25.12. To infer on CPU use `model.convert_to_fil_model` to get "
"in 25.12. To infer on CPU use `model.as_fil` to get "
"a `FIL` instance which may then be used to perform inference on "
"both CPU and GPU."
),
Expand Down
10 changes: 4 additions & 6 deletions python/cuml/cuml/ensemble/randomforestclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,8 @@ def predict(

.. deprecated:: 25.10
`predict_model` is deprecated (and ignored) and will be removed
in 25.12. To infer on CPU use `model.convert_to_fil_model` to get
a `FIL` instance which may then be used to perform inference on
both CPU and GPU.
in 25.12. To infer on CPU use `model.as_fil` to get a `FIL` instance
which may then be used to perform inference on both CPU and GPU.

Returns
-------
Expand Down Expand Up @@ -400,9 +399,8 @@ def score(

.. deprecated:: 25.10
`predict_model` is deprecated (and ignored) and will be removed
in 25.12. To infer on CPU use `model.convert_to_fil_model` to get
a `FIL` instance which may then be used to perform inference on
both CPU and GPU.
in 25.12. To infer on CPU use `model.as_fil` to get a `FIL` instance
which may then be used to perform inference on both CPU and GPU.

Returns
-------
Expand Down
10 changes: 4 additions & 6 deletions python/cuml/cuml/ensemble/randomforestregressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,8 @@ def predict(

.. deprecated:: 25.10
`predict_model` is deprecated (and ignored) and will be removed
in 25.12. To infer on CPU use `model.convert_to_fil_model` to get
a `FIL` instance which may then be used to perform inference on
both CPU and GPU.
in 25.12. To infer on CPU use `model.as_fil` to get a `FIL` instance
which may then be used to perform inference on both CPU and GPU.

Returns
-------
Expand Down Expand Up @@ -337,9 +336,8 @@ def score(

.. deprecated:: 25.10
`predict_model` is deprecated (and ignored) and will be removed
in 25.12. To infer on CPU use `model.convert_to_fil_model` to get
a `FIL` instance which may then be used to perform inference on
both CPU and GPU.
in 25.12. To infer on CPU use `model.as_fil` to get a `FIL` instance
which may then be used to perform inference on both CPU and GPU.

Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/explainer/tree_shap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ cdef class TreeExplainer:
tl_model = treelite.frontend.from_lightgbm(model)
# cuML RF model object
elif isinstance(model, (cuml.RandomForestClassifier, cuml.RandomForestRegressor)):
tl_model = model.convert_to_treelite_model()
tl_model = model.as_treelite()
# scikit-learn RF model object
elif isinstance(model, treelite.Model):
tl_model = model
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/tests/dask/test_dask_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def test_rf_data_count(client, max_depth, n_estimators):
X_dask, y_dask = _prep_training_data(client, X, y, partitions_per_worker=2)
dask_model.fit(X_dask, y_dask)
model = dask_model.get_combined_model()
json_obj = json.loads(model.convert_to_treelite_model().dump_as_json())
json_obj = json.loads(model.as_treelite().dump_as_json())

def check_count(node, nodes):
if "left_child" in node:
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/tests/test_fil.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ def test_device_selection(device_id, model_kind, tmp_path):
n_streams=1,
)
cuml_model.fit(cp.array(X), cp.array(y))
fm = cuml_model.convert_to_fil_model()
fm = cuml_model.as_fil()
else:
raise NotImplementedError()

Expand Down
33 changes: 23 additions & 10 deletions python/cuml/tests/test_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,14 +707,14 @@ def test_rf_classification_sparse(small_clf, datatype, fil_layout):
acc = accuracy_score(y_test, preds)
np.testing.assert_almost_equal(acc, cuml_model.score(X_test, y_test))

fil_model = cuml_model.convert_to_fil_model()
fil_model = cuml_model.as_fil()

with cuml.using_output_type("numpy"):
fil_model_preds = fil_model.predict(X_test)
fil_model_acc = accuracy_score(y_test, fil_model_preds)
assert acc == fil_model_acc

tl_model = cuml_model.convert_to_treelite_model()
tl_model = cuml_model.as_treelite()
assert num_trees == tl_model.num_tree
assert X.shape[1] == tl_model.num_feature

Expand Down Expand Up @@ -771,15 +771,15 @@ def test_rf_regression_sparse(special_reg, datatype, fil_layout):
preds = cuml_model.predict(X_test, layout=fil_layout)
r2 = r2_score(y_test, preds)

fil_model = cuml_model.convert_to_fil_model()
fil_model = cuml_model.as_fil()

with cuml.using_output_type("numpy"):
fil_model_preds = fil_model.predict(X_test)
fil_model_preds = np.reshape(fil_model_preds, np.shape(y_test))
fil_model_r2 = r2_score(y_test, fil_model_preds)
assert r2 == fil_model_r2

tl_model = cuml_model.convert_to_treelite_model()
tl_model = cuml_model.as_treelite()
assert num_trees == tl_model.num_tree
assert X.shape[1] == tl_model.num_feature

Expand Down Expand Up @@ -1092,9 +1092,7 @@ def test_rf_regression_with_identical_labels():
max_depth=1,
)
model.fit(X, y)
trees = json.loads(model.convert_to_treelite_model().dump_as_json())[
"trees"
]
trees = json.loads(model.as_treelite().dump_as_json())["trees"]
assert len(trees) == 1
assert len(trees[0]["nodes"]) == 1
assert trees[0]["nodes"][0] == {
Expand All @@ -1112,7 +1110,7 @@ def test_rf_regressor_gtil_integration(tmpdir):
expected_pred = clf.predict(X).reshape((-1, 1, 1))

checkpoint_path = os.path.join(tmpdir, "checkpoint.tl")
clf.convert_to_treelite_model().serialize(checkpoint_path)
clf.as_treelite().serialize(checkpoint_path)

tl_model = treelite.Model.deserialize(checkpoint_path)
out_pred = treelite.gtil.predict(tl_model, X)
Expand All @@ -1127,7 +1125,7 @@ def test_rf_binary_classifier_gtil_integration(tmpdir):
expected_pred = clf.predict_proba(X).reshape((-1, 1, 2))

checkpoint_path = os.path.join(tmpdir, "checkpoint.tl")
clf.convert_to_treelite_model().serialize(checkpoint_path)
clf.as_treelite().serialize(checkpoint_path)

tl_model = treelite.Model.deserialize(checkpoint_path)
out_pred = treelite.gtil.predict(tl_model, X)
Expand All @@ -1142,7 +1140,7 @@ def test_rf_multiclass_classifier_gtil_integration(tmpdir):
expected_prob = clf.predict_proba(X).reshape((X.shape[0], 1, -1))

checkpoint_path = os.path.join(tmpdir, "checkpoint.tl")
clf.convert_to_treelite_model().serialize(checkpoint_path)
clf.as_treelite().serialize(checkpoint_path)

tl_model = treelite.Model.deserialize(checkpoint_path)
out_prob = treelite.gtil.predict(tl_model, X, pred_margin=True)
Expand Down Expand Up @@ -1256,3 +1254,18 @@ def test_accuracy_metric_deprecated():
model = cuml.RandomForestRegressor(accuracy_metric="mse")
score = model.fit(X, y).score(X, y)
np.testing.assert_allclose(score, mean_squared_error(y, model.predict(X)))


def test_convert_methods_deprecated():
X, y = make_regression(n_samples=500)
model = cuml.RandomForestRegressor().fit(X, y)

with pytest.warns(FutureWarning, match="convert_to_treelite_model"):
tl = model.convert_to_treelite_model()

assert isinstance(tl, treelite.Model)

with pytest.warns(FutureWarning, match="convert_to_fil_model"):
fil = model.convert_to_fil_model()

assert isinstance(fil, cuml.fil.ForestInference)