rapidsai
diff --git a/‎cpp/examples/symreg/symreg_example.cpp‎
Lines changed: 4 additions & 2 deletions b/‎cpp/examples/symreg/symreg_example.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎cpp/src/genetic/genetic.cu‎
Lines changed: 4 additions & 4 deletions b/‎cpp/src/genetic/genetic.cu‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎cpp/tests/sg/genetic/evolution_test.cu‎
Lines changed: 3 additions & 3 deletions b/‎cpp/tests/sg/genetic/evolution_test.cu‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/tests/sg/genetic/program_test.cu‎
Lines changed: 12 additions & 7 deletions b/‎cpp/tests/sg/genetic/program_test.cu‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎python/cuml/cuml/common/classification.py‎
Lines changed: 2 additions & 131 deletions b/‎python/cuml/cuml/common/classification.py‎
Lines changed: 2 additions & 131 deletions
diff --git a/‎python/cuml/cuml/ensemble/randomforestclassifier.py‎
Lines changed: 4 additions & 5 deletions b/‎python/cuml/cuml/ensemble/randomforestclassifier.py‎
Lines changed: 4 additions & 5 deletions
@@ -235,7 +235,8 @@ int main(int argc, char* argv[])
 
   // Initialize AST
   auto curr_mr = rmm::mr::get_current_device_resource_ref();
-  d_finalprogs = static_cast<cg::program_t>(curr_mr.allocate(stream, params.population_size));
+  d_finalprogs = static_cast<cg::program_t>(
+    curr_mr.allocate(stream, params.population_size * sizeof(cg::program), alignof(cg::program)));
 
   std::vector<std::vector<cg::program>> history;
   history.reserve(params.generations);
@@ -327,7 +328,8 @@ int main(int argc, char* argv[])
 
   /* ======================= Reset data ======================= */
 
-  curr_mr.deallocate(stream, d_finalprogs, params.population_size);
+  curr_mr.deallocate(
+    stream, d_finalprogs, params.population_size * sizeof(cg::program), alignof(cg::program));
   CUDA_RT_CALL(cudaEventDestroy(start));
   CUDA_RT_CALL(cudaEventDestroy(stop));
   return 0;
 
@@ -221,15 +221,15 @@ void parallel_evolve(const raft::handle_t& h,
 
     // Set current generation device nodes
     tmp.nodes = (node*)rmm::mr::get_current_device_resource_ref().allocate(
-      stream, h_nextprogs[i].len * sizeof(node));
+      stream, h_nextprogs[i].len * sizeof(node), alignof(node));
     raft::copy(tmp.nodes, h_nextprogs[i].nodes, h_nextprogs[i].len, stream);
     raft::copy(d_nextprogs + i, &tmp, 1, stream);
 
     if (generation > 1) {
       // Free device memory allocated to program nodes in previous generation
       raft::copy(&tmp, d_oldprogs + i, 1, stream);
       rmm::mr::get_current_device_resource_ref().deallocate(
-        stream, tmp.nodes, h_nextprogs[i].len * sizeof(node));
+        stream, tmp.nodes, tmp.len * sizeof(node), alignof(node));
     }
 
     tmp.nodes = nullptr;
@@ -399,7 +399,7 @@ void symFit(const raft::handle_t& handle,
 
   program_t d_currprogs;  // pointer to current programs
   d_currprogs = (program_t)rmm::mr::get_current_device_resource_ref().allocate(
-    stream, params.population_size * sizeof(program));
+    stream, params.population_size * sizeof(program), alignof(program));
   program_t d_nextprogs = final_progs;  // Reuse memory already allocated for final_progs
   final_progs           = nullptr;
 
@@ -481,7 +481,7 @@ void symFit(const raft::handle_t& handle,
 
   // Deallocate the previous generation device memory
   rmm::mr::get_current_device_resource_ref().deallocate(
-    stream, d_nextprogs, params.population_size * sizeof(program));
+    stream, d_nextprogs, params.population_size * sizeof(program), alignof(program));
   d_currprogs = nullptr;
   d_nextprogs = nullptr;
 }
 
@@ -259,7 +259,7 @@ TEST_F(GeneticEvolutionTest, SymReg)
   MLCommon::CompareApprox<float> compApprox(tolerance);
   program_t final_progs;
   final_progs = (program_t)rmm::mr::get_current_device_resource_ref().allocate(
-    stream, hyper_params.population_size * sizeof(program));
+    stream, hyper_params.population_size * sizeof(program), alignof(program));
   std::vector<std::vector<program>> history;
   history.reserve(hyper_params.generations);
 
@@ -327,12 +327,12 @@ TEST_F(GeneticEvolutionTest, SymReg)
     program tmp = program();
     raft::copy(&tmp, final_progs + i, 1, stream);
     rmm::mr::get_current_device_resource_ref().deallocate(
-      stream, tmp.nodes, tmp.len * sizeof(node));
+      stream, tmp.nodes, tmp.len * sizeof(node), alignof(node));
     tmp.nodes = nullptr;
   }
   // deallocate the final programs from device memory
   rmm::mr::get_current_device_resource_ref().deallocate(
-    stream, final_progs, hyper_params.population_size * sizeof(program));
+    stream, final_progs, hyper_params.population_size * sizeof(program), alignof(program));
 
   ASSERT_TRUE(compApprox(history[n_gen - 1][best_idx].raw_fitness_, 0.0036f));
   std::cout << "Some Predicted test values:" << std::endl;
 
@@ -90,10 +90,12 @@ class GeneticProgramTest : public ::testing::Test {
     d_lY.resize(250, stream);
     d_lunitW.resize(250, stream);
     d_lW.resize(250, stream);
-    d_nodes1 = (node*)rmm::mr::get_current_device_resource_ref().allocate(stream, 7 * sizeof(node));
-    d_nodes2 = (node*)rmm::mr::get_current_device_resource_ref().allocate(stream, 7 * sizeof(node));
-    d_progs =
-      (program_t)rmm::mr::get_current_device_resource_ref().allocate(stream, 2 * sizeof(program));
+    d_nodes1 = (node*)rmm::mr::get_current_device_resource_ref().allocate(
+      stream, 7 * sizeof(node), alignof(node));
+    d_nodes2 = (node*)rmm::mr::get_current_device_resource_ref().allocate(
+      stream, 7 * sizeof(node), alignof(node));
+    d_progs = (program_t)rmm::mr::get_current_device_resource_ref().allocate(
+      stream, 2 * sizeof(program), alignof(program));
 
     RAFT_CUDA_TRY(cudaMemcpyAsync(
       d_lYpred.data(), h_lYpred.data(), 500 * sizeof(float), cudaMemcpyHostToDevice, stream));
@@ -146,9 +148,12 @@ class GeneticProgramTest : public ::testing::Test {
 
   void TearDown() override
   {
-    rmm::mr::get_current_device_resource_ref().deallocate(stream, d_nodes1, 7 * sizeof(node));
-    rmm::mr::get_current_device_resource_ref().deallocate(stream, d_nodes2, 7 * sizeof(node));
-    rmm::mr::get_current_device_resource_ref().deallocate(stream, d_progs, 2 * sizeof(program));
+    rmm::mr::get_current_device_resource_ref().deallocate(
+      stream, d_nodes1, 7 * sizeof(node), alignof(node));
+    rmm::mr::get_current_device_resource_ref().deallocate(
+      stream, d_nodes2, 7 * sizeof(node), alignof(node));
+    rmm::mr::get_current_device_resource_ref().deallocate(
+      stream, d_progs, 2 * sizeof(program), alignof(program));
   }
 
   raft::handle_t handle;
 
@@ -1,142 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
-import warnings
-
 import cudf
 import cupy as cp
 import numpy as np
-import pandas as pd
 
 from cuml.internals.array import CumlArray, cuda_ptr
-from cuml.internals.input_utils import input_to_cuml_array, input_to_cupy_array
+from cuml.internals.input_utils import input_to_cupy_array
 from cuml.internals.output_utils import cudf_to_pandas
 
-is_integral = cp.ReductionKernel(
-    "T x",
-    "bool out",
-    "ceilf(x) == x",
-    "a && b",
-    "out = a",
-    "true",
-    "is_integral",
-)
-
-
-def check_classification_targets(y):
-    """Check if `y` is composed of valid class labels"""
-    if y.dtype.kind == "f" and not is_integral(y):
-        raise ValueError(
-            "Unknown label type: continuous. Maybe you are trying to fit a "
-            "classifier, which expects discrete classes on a regression target "
-            "with continuous values."
-        )
-
-
-def preprocess_labels(
-    y, dtype=None, order="C", n_samples=None, allow_multitarget=False
-):
-    """Preprocess the `y` input to a classifier.
-
-    Parameters
-    ----------
-    y : array-like
-        The labels for fitting, may be any type cuml supports as input.
-    dtype : dtype, optional
-        The output dtype to use for the encoded labels. If not provided,
-        a data-dependent integral type will be used.
-    order : {"C", "F"}, optional
-        The array order to use for the encoded labels.
-    n_samples : int, optional
-        If provided, will raise an error if the number of samples in `y`
-        doesn't match.
-    allow_multitarget : bool, optional
-        Whether to allow multi-target labels.
-
-    Returns
-    -------
-    y_encoded : cp.ndarray
-        The labels, encoded as integers in [0, n_classes - 1].
-    classes : np.ndarray or list[np.ndarray]
-        The classes as a numpy array, or a list of numpy arrays if
-        y is multi-target.
-    """
-    # cudf may coerce the dtype, store the original so we can cast back later
-    y_dtype = y.dtype if isinstance(y, np.ndarray) else None
-
-    # No cuda container supports all dtypes. Here we coerce to cupy when
-    # possible, falling back to cudf Series/DataFrame otherwise.
-    if isinstance(y, np.ndarray) and y.dtype.kind in "iufb":
-        y = cp.asarray(y)
-    elif isinstance(y, pd.DataFrame):
-        y = cudf.DataFrame(y)
-    elif isinstance(y, pd.Series):
-        y = cudf.Series(y)
-    elif not isinstance(y, (cp.ndarray, cudf.DataFrame, cudf.Series)):
-        # Non-numeric dtype, always go through cudf
-        y = input_to_cuml_array(y, convert_to_mem_type=False).array
-        if y.dtype.kind in "iufb":
-            y = y.to_output("cupy")
-        else:
-            y = (cudf.DataFrame if y.ndim == 2 else cudf.Series)(
-                y, dtype=(np.dtype("O") if y.dtype.kind in "U" else None)
-            )
-
-    # Validate dimensionality, ensuring 1D/2D y is as expected
-    if y.ndim == 2 and y.shape[1] == 1:
-        warnings.warn(
-            "A column-vector y was passed when a 1d array was expected. Please "
-            "change the shape of y to (n_samples,), for example using ravel()."
-        )
-        y = y.iloc[:, 0] if isinstance(y, cudf.DataFrame) else y.ravel()
-    elif allow_multitarget and y.ndim not in (1, 2):
-        raise ValueError(
-            f"y should be a 1d or 2d array, got an array of shape {y.shape} instead."
-        )
-    elif not allow_multitarget and y.ndim != 1:
-        raise ValueError(
-            f"y should be a 1d array, got an array of shape {y.shape} instead."
-        )
-
-    # Validate correct number of samples
-    if n_samples is not None and y.shape[0] != n_samples:
-        raise ValueError(
-            f"Expected `y` with {n_samples} samples, got {y.shape[0]}"
-        )
-
-    def _encode(y):
-        """Encode `y` to codes and classes"""
-        check_classification_targets(y)
-        if isinstance(y, cudf.Series):
-            y = y.astype("category")
-            codes = cp.asarray(y.cat.codes)
-            classes = y.cat.categories.to_numpy()
-            # cudf will sometimes translate non-numeric dtypes. Coerce back to
-            # the input dtype if the input was originally a numpy array.
-            if y_dtype is not None:
-                classes = classes.astype(y_dtype, copy=False)
-        else:
-            classes, codes = cp.unique(y, return_inverse=True)
-            classes = classes.get()
-        return codes, classes
-
-    if y.ndim == 1:
-        y_encoded, classes = _encode(y)
-        if dtype is not None:
-            y_encoded = y_encoded.astype(dtype, copy=False)
-    else:
-        getter = y.iloc if isinstance(y, cudf.DataFrame) else y
-        encoded_cols, classes = zip(
-            *(_encode(getter[:, i]) for i in range(y.shape[1]))
-        )
-        classes = list(classes)
-        if dtype is None:
-            dtype = cp.result_type(*(c.dtype for c in encoded_cols))
-        y_encoded = cp.empty(shape=y.shape, dtype=dtype, order=order)
-        for i, col in enumerate(encoded_cols):
-            y_encoded[:, i] = col
-
-    return y_encoded, classes
-
 
 def decode_labels(y_encoded, classes, output_type="cupy"):
     """Convert encoded labels back into their original classes.
 
@@ -7,14 +7,14 @@
 import cuml.internals
 import cuml.internals.nvtx as nvtx
 from cuml.common.array_descriptor import CumlArrayDescriptor
-from cuml.common.classification import decode_labels, preprocess_labels
+from cuml.common.classification import decode_labels
 from cuml.common.doc_utils import generate_docstring, insert_into_docstring
 from cuml.ensemble.randomforest_common import BaseRandomForestModel
 from cuml.internals.array import CumlArray
 from cuml.internals.input_utils import input_to_cuml_array
 from cuml.internals.interop import UnsupportedOnGPU
 from cuml.internals.mixins import ClassifierMixin
-from cuml.internals.validation import check_features
+from cuml.internals.validation import check_features, check_y
 from cuml.metrics import accuracy_score
 
 
@@ -222,15 +222,14 @@ def fit(self, X, y, *, convert_dtype=True) -> "RandomForestClassifier":
             y to be of dtype int32. This will increase memory used for
             the method.
         """
+        y, classes = check_y(y, dtype=cp.int32, return_classes=True)
         X_m = input_to_cuml_array(
             X,
             convert_to_dtype=(np.float32 if convert_dtype else None),
             check_dtype=[np.float32, np.float64],
             order="F",
+            check_rows=y.shape[0],
         ).array
-        y, classes = preprocess_labels(
-            y, n_samples=X_m.shape[0], dtype=cp.int32
-        )
         self.classes_ = classes
         self.n_classes_ = len(classes)
         y_m = CumlArray(data=y)