Skip to content
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
228bf9b
Begin move to separate traversal logic
wphicks Oct 7, 2024
4983192
Restructure import to accommodate more generic layouts
wphicks Oct 14, 2024
ebbb379
Add debugging for decision forest construction
wphicks Oct 15, 2024
fe16b55
Begin adding traversal tests
wphicks Oct 16, 2024
832279a
Add traversal order tests
wphicks Oct 17, 2024
46529c1
Test depth and parent tracking in traversal
wphicks Oct 17, 2024
f298257
Remove copied src file
wphicks Oct 17, 2024
2cc7612
Begin adding Treelite traversal tests
wphicks Oct 21, 2024
9c2a71e
Update node_transform test
wphicks Oct 23, 2024
9b857a4
Merge branch 'branch-24.12' into fea/fil_shallow
wphicks Oct 23, 2024
b3de75e
Begin adding node_accumulate test
wphicks Oct 25, 2024
f39e601
Merge branch 'branch-25.02' into fea/fil_shallow
wphicks Jan 16, 2025
a129b88
Fix typo in forest docs
wphicks Jan 16, 2025
5c507de
Add tests for all Treelite traversal orders
wphicks Jan 17, 2025
4a92a3e
Add test for full treelite import to FIL
wphicks Jan 21, 2025
1d8a493
Merge branch 'branch-25.04' into fea/fil_shallow
wphicks Feb 28, 2025
5d1a294
Hook up layered_children_together
wphicks Feb 28, 2025
12c2988
Allow benchmarking of layered layout
wphicks Feb 28, 2025
334e336
Remove debug logging
wphicks Feb 28, 2025
372dfbf
Update benchmarks for XGBoost ubjson default
wphicks Mar 1, 2025
24cf628
Merge branch 'branch-25.04' into fea/fil_shallow
wphicks Mar 3, 2025
5b08adf
Provide correct align_bytes default
wphicks Mar 4, 2025
764e519
Ensure optimize never explores chunks larger than batch
wphicks Mar 4, 2025
c836f19
Merge branch 'branch-25.04' into fea/fil_shallow
wphicks Mar 5, 2025
b8e3dc1
Improve thread count selection for CPU FIL
wphicks Mar 6, 2025
6dc409d
Update defaults and docs
wphicks Mar 7, 2025
0aa729d
Ensure pad nodes are added in correct order
wphicks Mar 8, 2025
216ba79
Merge branch 'branch-25.04' into fea/fil_shallow
wphicks Mar 10, 2025
e4ea72d
Use optimized default hyperparameters
wphicks Mar 11, 2025
966f65e
Update layout description
wphicks Mar 11, 2025
1a2a805
Merge branch 'branch-25.04' into fea/fil_shallow
wphicks Mar 11, 2025
32bf670
Update style
wphicks Mar 11, 2025
046a4a0
Remove unused variable in test
wphicks Mar 12, 2025
4d1ffa9
Add support for non-openmp builds
wphicks Mar 12, 2025
4a6f9a0
Ensure clang-tidy has access to omp.h when necessary
wphicks Mar 13, 2025
895b5e0
Merge branch 'branch-25.04' into fea/fil_shallow
wphicks Mar 13, 2025
e17fbaa
Remove unused variable
wphicks Mar 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,11 @@ if(BUILD_CUML_CPP_LIBRARY)
src/experimental/fil/infer4.cu
src/experimental/fil/infer5.cu
src/experimental/fil/infer6.cu
src/experimental/fil/infer7.cu)
src/experimental/fil/infer7.cu
src/experimental/fil/infer8.cu
src/experimental/fil/infer9.cu
src/experimental/fil/infer10.cu
src/experimental/fil/infer11.cu)
endif()
target_sources(${CUML_CPP_TARGET}
PRIVATE
Expand All @@ -395,7 +399,11 @@ if(BUILD_CUML_CPP_LIBRARY)
src/experimental/fil/infer4.cpp
src/experimental/fil/infer5.cpp
src/experimental/fil/infer6.cpp
src/experimental/fil/infer7.cpp)
src/experimental/fil/infer7.cpp
src/experimental/fil/infer8.cpp
src/experimental/fil/infer9.cpp
src/experimental/fil/infer10.cpp
src/experimental/fil/infer11.cpp)
endif()

# todo: organize linear models better
Expand Down
6 changes: 3 additions & 3 deletions cpp/bench/sg/filex.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -101,9 +101,9 @@ class FILEX : public RegressionFixture<float> {
allowed_storage_types.push_back(ML::fil::storage_type_t::SPARSE8);
}
auto allowed_layouts = std::vector<ML::experimental::fil::tree_layout>{
ML::experimental::fil::tree_layout::breadth_first,
ML::experimental::fil::tree_layout::depth_first,
};
ML::experimental::fil::tree_layout::breadth_first,
ML::experimental::fil::tree_layout::layered_children_together};
auto min_time = std::numeric_limits<std::int64_t>::max();

// Iterate through storage type, algorithm type, and chunk sizes and find optimum
Expand Down
84 changes: 50 additions & 34 deletions cpp/include/cuml/experimental/fil/decision_forest.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -381,39 +381,55 @@ using preset_decision_forest = decision_forest<
} // namespace detail

/** A variant containing all standard decision_forest instantiations */
using decision_forest_variant =
std::variant<detail::preset_decision_forest<
std::variant_alternative_t<0, detail::specialization_variant>::layout,
std::variant_alternative_t<0, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<0, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<1, detail::specialization_variant>::layout,
std::variant_alternative_t<1, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<1, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<2, detail::specialization_variant>::layout,
std::variant_alternative_t<2, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<2, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<3, detail::specialization_variant>::layout,
std::variant_alternative_t<3, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<3, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<4, detail::specialization_variant>::layout,
std::variant_alternative_t<4, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<4, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<5, detail::specialization_variant>::layout,
std::variant_alternative_t<5, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<5, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<6, detail::specialization_variant>::layout,
std::variant_alternative_t<6, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<6, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<7, detail::specialization_variant>::layout,
std::variant_alternative_t<7, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<7, detail::specialization_variant>::has_large_trees>>;
using decision_forest_variant = std::variant<
detail::preset_decision_forest<
std::variant_alternative_t<0, detail::specialization_variant>::layout,
std::variant_alternative_t<0, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<0, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<1, detail::specialization_variant>::layout,
std::variant_alternative_t<1, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<1, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<2, detail::specialization_variant>::layout,
std::variant_alternative_t<2, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<2, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<3, detail::specialization_variant>::layout,
std::variant_alternative_t<3, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<3, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<4, detail::specialization_variant>::layout,
std::variant_alternative_t<4, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<4, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<5, detail::specialization_variant>::layout,
std::variant_alternative_t<5, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<5, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<6, detail::specialization_variant>::layout,
std::variant_alternative_t<6, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<6, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<7, detail::specialization_variant>::layout,
std::variant_alternative_t<7, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<7, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<8, detail::specialization_variant>::layout,
std::variant_alternative_t<8, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<8, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<9, detail::specialization_variant>::layout,
std::variant_alternative_t<9, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<9, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<10, detail::specialization_variant>::layout,
std::variant_alternative_t<10, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<10, detail::specialization_variant>::has_large_trees>,
detail::preset_decision_forest<
std::variant_alternative_t<11, detail::specialization_variant>::layout,
std::variant_alternative_t<11, detail::specialization_variant>::is_double_precision,
std::variant_alternative_t<11, detail::specialization_variant>::has_large_trees>>;

/**
* Determine the variant index of the decision_forest type to used based on
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -58,32 +58,13 @@ struct decision_forest_builder {
/* The type for nodes in the given decision_forest type */
using node_type = typename decision_forest_t::node_type;

/* Add a root node, indicating the beginning of a new tree */
void start_new_tree()
{
if (root_node_indexes_.empty()) {
root_node_indexes_.emplace_back();
} else {
max_tree_size_ = std::max(cur_tree_size_, max_tree_size_);
if (alignment_ != index_type{}) {
if (cur_tree_size_ % alignment_ != index_type{}) {
auto padding = (alignment_ - cur_tree_size_ % alignment_);
for (auto i = index_type{}; i < padding; ++i) {
add_node(typename node_type::threshold_type{}, std::nullopt);
}
}
}
root_node_indexes_.push_back(root_node_indexes_.back() + cur_tree_size_);
cur_tree_size_ = index_type{};
}
}

/* Add a node with a categorical split */
template <typename iter_t>
void add_categorical_node(
iter_t vec_begin,
iter_t vec_end,
std::optional<int> tl_node_id = std::nullopt,
std::size_t depth = std::size_t{1},
bool default_to_distant_child = false,
typename node_type::metadata_storage_type feature = typename node_type::metadata_storage_type{},
typename node_type::offset_type offset = typename node_type::offset_type{})
Expand All @@ -103,46 +84,62 @@ struct decision_forest_builder {
auto set = bitset{set_storage, max_node_categories};
std::for_each(vec_begin, vec_end, [&set](auto&& cat_index) { set.set(cat_index); });

add_node(node_value, tl_node_id, false, default_to_distant_child, true, feature, offset, false);
add_node(
node_value, tl_node_id, depth, false, default_to_distant_child, true, feature, offset, false);
}

/* Add a leaf node with vector output */
template <typename iter_t>
void add_leaf_vector_node(iter_t vec_begin,
iter_t vec_end,
std::optional<int> tl_node_id = std::nullopt)
std::optional<int> tl_node_id = std::nullopt,
std::size_t depth = std::size_t{1})
{
auto leaf_index = typename node_type::index_type(vector_output_.size() / output_size_);
std::copy(vec_begin, vec_end, std::back_inserter(vector_output_));
nodes_.emplace_back(leaf_index,
true,
false,
false,
typename node_type::metadata_storage_type{},
typename node_type::offset_type{});
// 0 indicates the lack of ID mapping for a particular node
node_id_mapping_.push_back(static_cast<index_type>(tl_node_id.value_or(0)));
++cur_tree_size_;

add_node(leaf_index,
tl_node_id,
depth,
true,
false,
false,
typename node_type::metadata_storage_type{},
typename node_type::offset_type{},
false);
}

/* Add a node to the model */
template <typename value_t>
void add_node(
value_t val,
std::optional<int> tl_node_id = std::nullopt,
std::size_t depth = std::size_t{1},
bool is_leaf_node = true,
bool default_to_distant_child = false,
bool is_categorical_node = false,
typename node_type::metadata_storage_type feature = typename node_type::metadata_storage_type{},
typename node_type::offset_type offset = typename node_type::offset_type{},
bool is_inclusive = false)
{
if (depth == std::size_t{}) {
if (alignment_ != index_type{}) {
if (cur_node_index_ % alignment_ != index_type{}) {
auto padding = (alignment_ - cur_node_index_ % alignment_);
for (auto i = index_type{}; i < padding; ++i) {
add_node(typename node_type::threshold_type{}, std::nullopt);
}
}
}
root_node_indexes_.push_back(cur_node_index_);
}

if (is_inclusive) { val = std::nextafter(val, std::numeric_limits<value_t>::infinity()); }
nodes_.emplace_back(
val, is_leaf_node, default_to_distant_child, is_categorical_node, feature, offset);
// 0 indicates the lack of ID mapping for a particular node
node_id_mapping_.push_back(static_cast<index_type>(tl_node_id.value_or(0)));
++cur_tree_size_;
++cur_node_index_;
}

/* Set the element-wise postprocessing operation for this model */
Expand All @@ -167,16 +164,15 @@ struct decision_forest_builder {

decision_forest_builder(index_type max_num_categories = index_type{},
index_type align_bytes = index_type{})
: cur_tree_size_{},
: cur_node_index_{},
max_num_categories_{max_num_categories},
alignment_{std::lcm(align_bytes, index_type(sizeof(node_type)))},
output_size_{1},
row_postproc_{},
element_postproc_{},
average_factor_{},
row_postproc_{},
bias_{},
postproc_constant_{},
max_tree_size_{},
nodes_{},
root_node_indexes_{},
vector_output_{}
Expand Down Expand Up @@ -233,7 +229,7 @@ struct decision_forest_builder {
}

private:
index_type cur_tree_size_;
index_type cur_node_index_;
index_type max_num_categories_;
index_type alignment_;
index_type output_size_;
Expand All @@ -242,7 +238,6 @@ struct decision_forest_builder {
double average_factor_;
double bias_;
double postproc_constant_;
index_type max_tree_size_;

std::vector<node_type> nodes_;
std::vector<index_type> root_node_indexes_;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -242,6 +242,10 @@ CUML_FIL_INITIALIZE_DEVICE(extern template, 4)
CUML_FIL_INITIALIZE_DEVICE(extern template, 5)
CUML_FIL_INITIALIZE_DEVICE(extern template, 6)
CUML_FIL_INITIALIZE_DEVICE(extern template, 7)
CUML_FIL_INITIALIZE_DEVICE(extern template, 8)
CUML_FIL_INITIALIZE_DEVICE(extern template, 9)
CUML_FIL_INITIALIZE_DEVICE(extern template, 10)
CUML_FIL_INITIALIZE_DEVICE(extern template, 11)

} // namespace device_initialization
} // namespace detail
Expand Down
6 changes: 5 additions & 1 deletion cpp/include/cuml/experimental/fil/detail/infer/cpu.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -147,6 +147,10 @@ CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 4)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 5)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 6)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 7)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 8)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 9)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 10)
CUML_FIL_INFER_ALL(extern template, raft_proto::device_type::cpu, 11)

} // namespace inference
} // namespace detail
Expand Down
Loading