rapidsai · rapids-bot · Mar 26, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -399,6 +399,7 @@ Forest Inferencing
 
 .. autoclass:: cuml.ForestInference
     :members:
+    :inherited-members:
 
 Coordinate Descent
 ------------------

@@ -144,7 +144,7 @@ add_subdirectory(cuml/decomposition)
 add_subdirectory(cuml/ensemble)
 add_subdirectory(cuml/explainer)
 add_subdirectory(cuml/experimental/fil)
-add_subdirectory(cuml/fil)
+add_subdirectory(cuml/legacy/fil)
 add_subdirectory(cuml/kernel_ridge)
 add_subdirectory(cuml/linear_model)
 add_subdirectory(cuml/manifold)

@@ -25,7 +25,7 @@
     del libcuml
 
 from cuml.internals.base import Base, UniversalBase
-from cuml.internals.available_devices import is_cuda_available
+from cuml.internals.available_devices import is_cuda_available, GPU_ENABLED
 
 # GPU only packages
 
@@ -44,8 +44,6 @@
 
     from cuml.decomposition.incremental_pca import IncrementalPCA
 
-    from cuml.fil.fil import ForestInference
-
     from cuml.ensemble.randomforestclassifier import RandomForestClassifier
     from cuml.ensemble.randomforestregressor import RandomForestRegressor
 
@@ -54,7 +52,6 @@
     from cuml.explainer.tree_shap import TreeExplainer
 
     import cuml.feature_extraction
-    from cuml.fil import fil
 
     from cuml.kernel_ridge.kernel_ridge import KernelRidge
 
@@ -108,6 +105,11 @@
 
 from cuml.cluster.hdbscan import HDBSCAN
 
+# FIL is currently not built in cuml-cpu distributions, even though it can be
+# used in a CPU-only environment. Only import if the build supports it.
+if GPU_ENABLED:
+    from cuml.fil import ForestInference
+    from cuml.fil import fil
 from cuml.decomposition.pca import PCA
 from cuml.decomposition.tsvd import TruncatedSVD
 
@@ -141,6 +143,7 @@ def __getattr__(name):
     # Modules
     "common",
     "feature_extraction",
+    "fil",
     "metrics",
     "multiclass",
     "naive_bayes",

@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,7 @@
 # limitations under the License.
 #
 
-from cuml.fil.fil import TreeliteModel
+from cuml.legacy.fil.fil import TreeliteModel
 from cuml.dask.common.utils import get_client, wait_and_raise_from_futures
 from cuml.dask.common.input_utils import DistributedDataHandler, concatenate
 from dask.distributed import Future

@@ -26,8 +26,8 @@ import typing
 
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
-from cuml import ForestInference
-from cuml.fil.fil import TreeliteModel
+from cuml.legacy.fil.fil import ForestInference
+from cuml.legacy.fil.fil import TreeliteModel
 from pylibraft.common.handle import Handle
 from cuml.internals.base import UniversalBase
 from cuml.internals.array import CumlArray
@@ -448,8 +448,10 @@ class BaseRandomForestModel(UniversalBase):
             _check_fil_parameter_validity(depth=self.max_depth,
                                           fil_sparse_format=fil_sparse_format,
                                           algo=algo)
-        fil_model = ForestInference(handle=self.handle, verbose=self.verbose,
-                                    output_type=self.output_type)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', FutureWarning)
+            fil_model = ForestInference(handle=self.handle, verbose=self.verbose,
+                                        output_type=self.output_type)
         tl_to_fil_model = \
             fil_model.load_using_treelite_handle(treelite_handle,
                                                  output_class=output_class,
@@ -550,8 +552,10 @@ def _obtain_fil_model(treelite_handle, depth,
                                       fil_sparse_format=fil_sparse_format,
                                       algo=algo)
 
-    # Use output_type="input" to prevent an error
-    fil_model = ForestInference(output_type="input")
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore', FutureWarning)
+        # Use output_type="input" to prevent an error
+        fil_model = ForestInference(output_type="input")
 
     tl_to_fil_model = \
         fil_model.load_using_treelite_handle(treelite_handle,

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,7 +26,6 @@ from libcpp.vector cimport vector
 from libcpp.string cimport string
 
 from pylibraft.common.handle import Handle
-from cuml import ForestInference
 from cuml.internals.base import Base
 from pylibraft.common.handle cimport handle_t
 cimport cuml.common.cuda

@@ -48,7 +48,7 @@ from cuml.ensemble.randomforest_common import BaseRandomForestModel
 from cuml.ensemble.randomforest_common import _obtain_fil_model
 from cuml.ensemble.randomforest_shared cimport *
 
-from cuml.fil.fil import TreeliteModel
+from cuml.legacy.fil.fil import TreeliteModel
 
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t, uint64_t

@@ -47,7 +47,7 @@ from cuml.ensemble.randomforest_common import BaseRandomForestModel
 from cuml.ensemble.randomforest_common import _obtain_fil_model
 from cuml.ensemble.randomforest_shared cimport *
 
-from cuml.fil.fil import TreeliteModel
+from cuml.legacy.fil.fil import TreeliteModel
 
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t, uint64_t

@@ -1,122 +1,3 @@
 # Experimental FIL - RAPIDS Forest Inference Library
 
-This experimental feature offers a new implementation of cuML's existing
-Forest Inference Library. The primary advantages of this new
-implementation are:
-
-1. Models can now be evaluated on CPU in addition to GPU.
-2. Faster GPU execution on some models and hardware.
-3. Support for a wider range of Treelite's available model parameters.
-
-In addition, there are a few limitations of this implementation,
-including:
-
-1. Models with shallow trees (depth 2-4) typically execute slower than with
-   existing FIL.
-2. This implementation has not been as exhaustively tested as the existing
-   FIL.
-
-If you need to absolutely maximize runtime performance, it is
-recommended that you test both the new and existing FIL implementations with
-realistic batch sizes on your target hardware to determine which is optimal
-for your specific model. Generally, however performance should be quite
-comparable for both implementations.
-
-**NOTE:** Because this implementation is relatively recent, it is recommended
-that for use cases where stability is paramount, the existing FIL
-implementation be used.
-
-## Usage
-With one exception, experimental FIL should be fully compatible with the
-existing FIL API. Experimental FIL no longer allows a `threshold` to be
-specified at the time a model is loaded for binary classifiers. Instead, the
-threshold must be passed as a keyword argument to the `predict` method.
-
-Besides this, all existing FIL calls should be compatible with experimental
-FIL. There are, however, several performance parameters which have been
-deprecated (will now emit a warning) and a few new ones which have been added.
-
-The most basic usage remains the same:
-```python
-from cuml.experimental import ForestInference
-
-fm = ForestInference.load(filename=model_path,
-                          output_class=True,
-                          model_type='xgboost')
-
-X = ... load test samples as a numpy or cupy array ...
-
-y_out = fm.predict(X)
-```
-
-In order to optimize performance, however, we introduce a new optional
-parameter to the `predict` method called `chunk_size`:
-
-```python
-y_out = fm.predict(X, chunk_size=4)
-```
-
-The API docs cover `chunk_size` in more detail, but this parameter controls
-how many rows within a batch are simultaneously evaluated during a single
-iteration of FIL's inference algorithm. The optimal value for this
-parameter depends on both the model and available hardware, and it is
-difficult to predict _a priori_. In general, however, larger batches benefit
-from larger `chunk_size` values, and smaller batches benefit from smaller
-`chunk_size` values.
-
-For GPU execution, `chunk_size` can be any power of 2 from 1 to 32. For CPU
-execution, `chunk_size` can be any power of 2, but there is generally no
-benefit in testing values over 512. On both CPU and GPU, there is never
-any benefit from a chunk size that exceeds the batch size. Tuning the
-chunk size can substantially improve performance, so it is often worthwhile
-to perform a search over chunk sizes with sample data when deploying a model
-with FIL.
-
-### Loading Parameters
-In addition to the `chunk_size` parameter for the `predict` and
-`predict_proba` methods, FIL offers some parameters for optimizing
-performance when the model is loaded. This implementation also
-deprecates some existing parameters.
-
-#### Deprecated `load` Parameters
-
-- `threshold` (will raise a `DeprecationWarning` if used)
-- `algo` (ignored, but a warning will be logged)
-- `storage_type` (ignored, but a warning will be logged)
-- `blocks_per_sm` (ignored, but a warning will be logged)
-- `threads_per_tree` (ignored, but a warning will be logged)
-- `n_items` (ignored, but a warning will be logged)
-- `compute_shape_str` (ignored, but a warning will be logged)
-
-#### New `load` Parameters
-- `layout`: Replaces the functionality of `algo` and specifies the in-memory
-  layout of nodes in FIL forests. One of `'depth_first'` (default) or
-  `'breadth_first'`. Except in cases where absolutely optimal
-  performance is critical, the default should be acceptable.
-- `align_bytes`: If specified, trees will be padded such that their in-memory
-  size is a multiple of this value. Theoretically, this can improve
-  performance by guaranteeing that memory reads from trees begin on a cache
-  line boundary. Empirically, little benefit has been observed for this
-  parameter, and it may be deprecated before this version of FIL moves out of
-  experimental status.
-
-#### Optimizing `load` parameters
-While these two new parameters have been provided for cases in which it is
-necessary to eke out every possible performance gain for a model, in general
-the performance benefit will be tiny relative to the benefit of
-optimizing `chunk_size` for predict calls.
-
-## Future Development
-Once experimental FIL has been thoroughly tested and evaluated in real-world
-deployments, it will be moved out of experimental status and replace the
-existing FIL implementation. Before this happens, RAPIDS developers will
-also address the current underperformance of experimental FIL on shallow
-trees to ensure performance parity.
-
-While this version of FIL remains in experimental status, feedback is very
-much welcome. Please consider [submitting an
-issue](https://github.com/rapidsai/cuml/issues/new/choose) if you notice
-any performance regression when transitioning from the current FIL, have
-thoughts on how to make the API more useful, or have features you
-would like to see in the new version of FIL before it transitions out of
-experimental.
+As of RAPIDS 25.04, experimental FIL has been promoted to stable. It is retained here temporarily to support users who have already migrated to experimental FIL, but it will be moved exclusively to `cuml.fil` in RAPIDS 25.06.
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@ from cuml.common import input_to_cuml_array
 from cuml.internals.array import CumlArray
 from cuml.internals.import_utils import has_sklearn
 from cuml.internals.input_utils import determine_array_type
-from cuml.fil.fil import TreeliteModel
+from cuml.legacy.fil.fil import TreeliteModel
 from cuml.ensemble import RandomForestRegressor as curfr
 from cuml.ensemble import RandomForestClassifier as curfc