Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ Forest Inferencing

.. autoclass:: cuml.ForestInference
:members:
:inherited-members:

Coordinate Descent
------------------
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ add_subdirectory(cuml/decomposition)
add_subdirectory(cuml/ensemble)
add_subdirectory(cuml/explainer)
add_subdirectory(cuml/experimental/fil)
add_subdirectory(cuml/fil)
add_subdirectory(cuml/legacy/fil)
add_subdirectory(cuml/kernel_ridge)
add_subdirectory(cuml/linear_model)
add_subdirectory(cuml/manifold)
Expand Down
11 changes: 7 additions & 4 deletions python/cuml/cuml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
del libcuml

from cuml.internals.base import Base, UniversalBase
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.available_devices import is_cuda_available, GPU_ENABLED

# GPU only packages

Expand All @@ -44,8 +44,6 @@

from cuml.decomposition.incremental_pca import IncrementalPCA

from cuml.fil.fil import ForestInference

from cuml.ensemble.randomforestclassifier import RandomForestClassifier
from cuml.ensemble.randomforestregressor import RandomForestRegressor

Expand All @@ -54,7 +52,6 @@
from cuml.explainer.tree_shap import TreeExplainer

import cuml.feature_extraction
from cuml.fil import fil

from cuml.kernel_ridge.kernel_ridge import KernelRidge

Expand Down Expand Up @@ -108,6 +105,11 @@

from cuml.cluster.hdbscan import HDBSCAN

# FIL is currently not built in cuml-cpu distributions, even though it can be
# used in a CPU-only environment. Only import if the build supports it.
if GPU_ENABLED:
from cuml.fil import ForestInference
from cuml.fil import fil
from cuml.decomposition.pca import PCA
from cuml.decomposition.tsvd import TruncatedSVD

Expand Down Expand Up @@ -141,6 +143,7 @@ def __getattr__(name):
# Modules
"common",
"feature_extraction",
"fil",
"metrics",
"multiclass",
"naive_bayes",
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/ensemble/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,7 +13,7 @@
# limitations under the License.
#

from cuml.fil.fil import TreeliteModel
from cuml.legacy.fil.fil import TreeliteModel
from cuml.dask.common.utils import get_client, wait_and_raise_from_futures
from cuml.dask.common.input_utils import DistributedDataHandler, concatenate
from dask.distributed import Future
Expand Down
16 changes: 10 additions & 6 deletions python/cuml/cuml/ensemble/randomforest_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ import typing

from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
from cuml import ForestInference
from cuml.fil.fil import TreeliteModel
from cuml.legacy.fil.fil import ForestInference
from cuml.legacy.fil.fil import TreeliteModel
from pylibraft.common.handle import Handle
from cuml.internals.base import UniversalBase
from cuml.internals.array import CumlArray
Expand Down Expand Up @@ -448,8 +448,10 @@ class BaseRandomForestModel(UniversalBase):
_check_fil_parameter_validity(depth=self.max_depth,
fil_sparse_format=fil_sparse_format,
algo=algo)
fil_model = ForestInference(handle=self.handle, verbose=self.verbose,
output_type=self.output_type)
with warnings.catch_warnings():
warnings.simplefilter('ignore', FutureWarning)
fil_model = ForestInference(handle=self.handle, verbose=self.verbose,
output_type=self.output_type)
tl_to_fil_model = \
fil_model.load_using_treelite_handle(treelite_handle,
output_class=output_class,
Expand Down Expand Up @@ -550,8 +552,10 @@ def _obtain_fil_model(treelite_handle, depth,
fil_sparse_format=fil_sparse_format,
algo=algo)

# Use output_type="input" to prevent an error
fil_model = ForestInference(output_type="input")
with warnings.catch_warnings():
warnings.simplefilter('ignore', FutureWarning)
# Use output_type="input" to prevent an error
fil_model = ForestInference(output_type="input")

tl_to_fil_model = \
fil_model.load_using_treelite_handle(treelite_handle,
Expand Down
3 changes: 1 addition & 2 deletions python/cuml/cuml/ensemble/randomforest_shared.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -26,7 +26,6 @@ from libcpp.vector cimport vector
from libcpp.string cimport string

from pylibraft.common.handle import Handle
from cuml import ForestInference
from cuml.internals.base import Base
from pylibraft.common.handle cimport handle_t
cimport cuml.common.cuda
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/ensemble/randomforestclassifier.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ from cuml.ensemble.randomforest_common import BaseRandomForestModel
from cuml.ensemble.randomforest_common import _obtain_fil_model
from cuml.ensemble.randomforest_shared cimport *

from cuml.fil.fil import TreeliteModel
from cuml.legacy.fil.fil import TreeliteModel

from libcpp cimport bool
from libc.stdint cimport uintptr_t, uint64_t
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/ensemble/randomforestregressor.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ from cuml.ensemble.randomforest_common import BaseRandomForestModel
from cuml.ensemble.randomforest_common import _obtain_fil_model
from cuml.ensemble.randomforest_shared cimport *

from cuml.fil.fil import TreeliteModel
from cuml.legacy.fil.fil import TreeliteModel

from libcpp cimport bool
from libc.stdint cimport uintptr_t, uint64_t
Expand Down
121 changes: 1 addition & 120 deletions python/cuml/cuml/experimental/fil/README.md
Original file line number Diff line number Diff line change
@@ -1,122 +1,3 @@
# Experimental FIL - RAPIDS Forest Inference Library

This experimental feature offers a new implementation of cuML's existing
Forest Inference Library. The primary advantages of this new
implementation are:

1. Models can now be evaluated on CPU in addition to GPU.
2. Faster GPU execution on some models and hardware.
3. Support for a wider range of Treelite's available model parameters.

In addition, there are a few limitations of this implementation,
including:

1. Models with shallow trees (depth 2-4) typically execute slower than with
existing FIL.
2. This implementation has not been as exhaustively tested as the existing
FIL.

If you need to absolutely maximize runtime performance, it is
recommended that you test both the new and existing FIL implementations with
realistic batch sizes on your target hardware to determine which is optimal
for your specific model. Generally, however performance should be quite
comparable for both implementations.

**NOTE:** Because this implementation is relatively recent, it is recommended
that for use cases where stability is paramount, the existing FIL
implementation be used.

## Usage
With one exception, experimental FIL should be fully compatible with the
existing FIL API. Experimental FIL no longer allows a `threshold` to be
specified at the time a model is loaded for binary classifiers. Instead, the
threshold must be passed as a keyword argument to the `predict` method.

Besides this, all existing FIL calls should be compatible with experimental
FIL. There are, however, several performance parameters which have been
deprecated (will now emit a warning) and a few new ones which have been added.

The most basic usage remains the same:
```python
from cuml.experimental import ForestInference

fm = ForestInference.load(filename=model_path,
output_class=True,
model_type='xgboost')

X = ... load test samples as a numpy or cupy array ...

y_out = fm.predict(X)
```

In order to optimize performance, however, we introduce a new optional
parameter to the `predict` method called `chunk_size`:

```python
y_out = fm.predict(X, chunk_size=4)
```

The API docs cover `chunk_size` in more detail, but this parameter controls
how many rows within a batch are simultaneously evaluated during a single
iteration of FIL's inference algorithm. The optimal value for this
parameter depends on both the model and available hardware, and it is
difficult to predict _a priori_. In general, however, larger batches benefit
from larger `chunk_size` values, and smaller batches benefit from smaller
`chunk_size` values.

For GPU execution, `chunk_size` can be any power of 2 from 1 to 32. For CPU
execution, `chunk_size` can be any power of 2, but there is generally no
benefit in testing values over 512. On both CPU and GPU, there is never
any benefit from a chunk size that exceeds the batch size. Tuning the
chunk size can substantially improve performance, so it is often worthwhile
to perform a search over chunk sizes with sample data when deploying a model
with FIL.

### Loading Parameters
In addition to the `chunk_size` parameter for the `predict` and
`predict_proba` methods, FIL offers some parameters for optimizing
performance when the model is loaded. This implementation also
deprecates some existing parameters.

#### Deprecated `load` Parameters

- `threshold` (will raise a `DeprecationWarning` if used)
- `algo` (ignored, but a warning will be logged)
- `storage_type` (ignored, but a warning will be logged)
- `blocks_per_sm` (ignored, but a warning will be logged)
- `threads_per_tree` (ignored, but a warning will be logged)
- `n_items` (ignored, but a warning will be logged)
- `compute_shape_str` (ignored, but a warning will be logged)

#### New `load` Parameters
- `layout`: Replaces the functionality of `algo` and specifies the in-memory
layout of nodes in FIL forests. One of `'depth_first'` (default) or
`'breadth_first'`. Except in cases where absolutely optimal
performance is critical, the default should be acceptable.
- `align_bytes`: If specified, trees will be padded such that their in-memory
size is a multiple of this value. Theoretically, this can improve
performance by guaranteeing that memory reads from trees begin on a cache
line boundary. Empirically, little benefit has been observed for this
parameter, and it may be deprecated before this version of FIL moves out of
experimental status.

#### Optimizing `load` parameters
While these two new parameters have been provided for cases in which it is
necessary to eke out every possible performance gain for a model, in general
the performance benefit will be tiny relative to the benefit of
optimizing `chunk_size` for predict calls.

## Future Development
Once experimental FIL has been thoroughly tested and evaluated in real-world
deployments, it will be moved out of experimental status and replace the
existing FIL implementation. Before this happens, RAPIDS developers will
also address the current underperformance of experimental FIL on shallow
trees to ensure performance parity.

While this version of FIL remains in experimental status, feedback is very
much welcome. Please consider [submitting an
issue](https://github.com/rapidsai/cuml/issues/new/choose) if you notice
any performance regression when transitioning from the current FIL, have
thoughts on how to make the API more useful, or have features you
would like to see in the new version of FIL before it transitions out of
experimental.
As of RAPIDS 25.04, experimental FIL has been promoted to stable. It is retained here temporarily to support users who have already migrated to experimental FIL, but it will be moved exclusively to `cuml.fil` in RAPIDS 25.06.
4 changes: 2 additions & 2 deletions python/cuml/cuml/explainer/tree_shap.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -18,7 +18,7 @@ from cuml.common import input_to_cuml_array
from cuml.internals.array import CumlArray
from cuml.internals.import_utils import has_sklearn
from cuml.internals.input_utils import determine_array_type
from cuml.fil.fil import TreeliteModel
from cuml.legacy.fil.fil import TreeliteModel
from cuml.ensemble import RandomForestRegressor as curfr
from cuml.ensemble import RandomForestClassifier as curfc

Expand Down
Loading