From c8698203bce480b7eec09eeb2c2ea4ade16bda6b Mon Sep 17 00:00:00 2001 From: y Date: Wed, 5 Nov 2025 23:14:36 -0800 Subject: [PATCH 1/6] Initial dpctl tensor removal --- examples/sklearnex/basic_statistics_spmd.py | 8 +-- examples/sklearnex/covariance_spmd.py | 6 +- examples/sklearnex/dbscan_spmd.py | 6 +- ...y => incremental_basic_statistics_dpnp.py} | 12 ++-- .../sklearnex/incremental_covariance_spmd.py | 7 +-- ... => incremental_linear_regression_dpnp.py} | 14 ++--- ...l_pca_dpctl.py => incremental_pca_dpnp.py} | 16 ++--- examples/sklearnex/kmeans_spmd.py | 12 ++-- .../sklearnex/knn_bf_classification_dpnp.py | 58 ------------------ .../sklearnex/knn_bf_classification_spmd.py | 17 +++--- examples/sklearnex/knn_bf_regression_spmd.py | 17 +++--- examples/sklearnex/linear_regression_spmd.py | 14 ++--- .../sklearnex/logistic_regression_spmd.py | 18 +++--- examples/sklearnex/pca_spmd.py | 6 +- .../random_forest_classifier_dpctl.py | 53 ---------------- .../random_forest_classifier_spmd.py | 14 ++--- .../sklearnex/random_forest_regressor_dpnp.py | 60 ------------------- .../sklearnex/random_forest_regressor_spmd.py | 13 ++-- onedal/datatypes/_data_conversion.py | 12 +--- onedal/datatypes/tests/test_data.py | 23 ++++--- onedal/dummy/dummy.py | 2 +- onedal/tests/utils/_dataframes_support.py | 17 +----- onedal/utils/_array_api.py | 7 +-- onedal/utils/_third_party.py | 19 ------ onedal/utils/tests/test_validation.py | 6 +- sklearnex/decomposition/tests/test_pca.py | 2 +- sklearnex/dummy/tests/test_dummy.py | 2 +- .../tests/test_incremental_pca.py | 2 +- .../tests/test_basic_statistics_spmd.py | 4 +- .../test_incremental_basic_statistics_spmd.py | 16 ++--- .../spmd/cluster/tests/test_dbscan_spmd.py | 4 +- .../spmd/cluster/tests/test_kmeans_spmd.py | 4 +- .../covariance/tests/test_covariance_spmd.py | 2 +- .../tests/test_incremental_covariance_spmd.py | 12 ++-- .../tests/test_incremental_pca_spmd.py | 16 ++--- .../spmd/decomposition/tests/test_pca_spmd.py | 4 +- .../spmd/ensemble/tests/test_forest_spmd.py | 8 +-- .../tests/test_incremental_linear_spmd.py | 16 ++--- .../tests/test_linear_regression_spmd.py | 4 +- .../tests/test_logistic_regression_spmd.py | 4 +- .../neighbors/tests/test_neighbors_spmd.py | 14 ++--- sklearnex/tests/test_memory_usage.py | 2 +- sklearnex/tests/utils/spmd.py | 14 ++--- tests/run_examples.py | 43 ++++++------- tests/test_examples_sklearnex.py | 2 +- 45 files changed, 190 insertions(+), 422 deletions(-) rename examples/sklearnex/{incremental_basic_statistics_dpctl.py => incremental_basic_statistics_dpnp.py} (82%) rename examples/sklearnex/{incremental_linear_regression_dpctl.py => incremental_linear_regression_dpnp.py} (79%) rename examples/sklearnex/{incremental_pca_dpctl.py => incremental_pca_dpnp.py} (83%) delete mode 100644 examples/sklearnex/knn_bf_classification_dpnp.py delete mode 100644 examples/sklearnex/random_forest_classifier_dpctl.py delete mode 100644 examples/sklearnex/random_forest_regressor_dpnp.py diff --git a/examples/sklearnex/basic_statistics_spmd.py b/examples/sklearnex/basic_statistics_spmd.py index 097d58e1a5..ac1f065cd2 100644 --- a/examples/sklearnex/basic_statistics_spmd.py +++ b/examples/sklearnex/basic_statistics_spmd.py @@ -14,7 +14,7 @@ # limitations under the License. # ============================================================================== -import dpctl.tensor as dpt +import dpnp import numpy as np from dpctl import SyclQueue from mpi4py import MPI @@ -51,14 +51,14 @@ def generate_data(par, size, seed=777): data, weights = generate_data(params_spmd, size, seed=rank) weighted_data = np.diag(weights) @ data -dpt_data = dpt.asarray(data, usm_type="device", sycl_queue=q) -dpt_weights = dpt.asarray(weights, usm_type="device", sycl_queue=q) +dpnp_data = dpnp.asarray(data, usm_type="device", sycl_queue=q) +dpnp_weights = dpnp.asarray(weights, usm_type="device", sycl_queue=q) gtr_mean = np.mean(weighted_data, axis=0) gtr_std = np.std(weighted_data, axis=0) bss = BasicStatisticsSpmd(["mean", "standard_deviation"]) -bss.fit(dpt_data, dpt_weights) +bss.fit(dpnp_data, dpnp_weights) print(f"Computed mean on rank {rank}:\n", bss.mean_) print(f"Computed std on rank {rank}:\n", bss.standard_deviation_) diff --git a/examples/sklearnex/covariance_spmd.py b/examples/sklearnex/covariance_spmd.py index 4d7a051ab3..8c2467d1a7 100644 --- a/examples/sklearnex/covariance_spmd.py +++ b/examples/sklearnex/covariance_spmd.py @@ -15,7 +15,7 @@ # ============================================================================== import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI @@ -35,8 +35,8 @@ def get_data(data_seed): size = comm.Get_size() X = get_data(rank) -dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=q) +dpnp_X = dpnp.asarray(X, usm_type="device", sycl_queue=q) -cov = EmpiricalCovariance().fit(dpt_X) +cov = EmpiricalCovariance().fit(dpnp_X) print(f"Computed covariance values on rank {rank}:\n", cov.covariance_) diff --git a/examples/sklearnex/dbscan_spmd.py b/examples/sklearnex/dbscan_spmd.py index e67e7a7cbb..96d39c2ae9 100644 --- a/examples/sklearnex/dbscan_spmd.py +++ b/examples/sklearnex/dbscan_spmd.py @@ -20,7 +20,7 @@ from warnings import warn -import dpctl.tensor as dpt +import dpnp import numpy as np from dpctl import SyclQueue from mpi4py import MPI @@ -55,8 +55,8 @@ def get_test_data(size): queue = SyclQueue("gpu") -dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=queue) +dpnp_X = dpnp.asarray(X, usm_type="device", sycl_queue=queue) -model = DBSCAN(eps=3, min_samples=2).fit(dpt_X) +model = DBSCAN(eps=3, min_samples=2).fit(dpnp_X) print(f"Labels on rank {rank} (slice of 2):\n", model.labels_[:2]) diff --git a/examples/sklearnex/incremental_basic_statistics_dpctl.py b/examples/sklearnex/incremental_basic_statistics_dpnp.py similarity index 82% rename from examples/sklearnex/incremental_basic_statistics_dpctl.py rename to examples/sklearnex/incremental_basic_statistics_dpnp.py index 7b6a905dec..a2a9b9c3aa 100644 --- a/examples/sklearnex/incremental_basic_statistics_dpctl.py +++ b/examples/sklearnex/incremental_basic_statistics_dpnp.py @@ -15,11 +15,11 @@ # ============================================================================== import dpctl -import dpctl.tensor as dpt +import dpnp from sklearnex.basic_statistics import IncrementalBasicStatistics -# We create GPU SyclQueue and then put data to dpctl tensor using +# We create GPU SyclQueue and then put data to dpnp arrays using # the queue. It allows us to do computation on GPU. queue = dpctl.SyclQueue("gpu") @@ -27,13 +27,13 @@ incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"]) # We do partial_fit for each batch and then print final result. -X_1 = dpt.asarray([[0, 1], [0, 1]], sycl_queue=queue) +X_1 = dpnp.asarray([[0, 1], [0, 1]], sycl_queue=queue) result = incbs.partial_fit(X_1) -X_2 = dpt.asarray([[1, 2]], sycl_queue=queue) +X_2 = dpnp.asarray([[1, 2]], sycl_queue=queue) result = incbs.partial_fit(X_2) -X_3 = dpt.asarray([[1, 1], [1, 2], [2, 3]], sycl_queue=queue) +X_3 = dpnp.asarray([[1, 1], [1, 2], [2, 3]], sycl_queue=queue) result = incbs.partial_fit(X_3) print(f"Mean:\n{result.mean_}") @@ -43,7 +43,7 @@ # We put the whole data to fit method, it is split automatically and then # partial_fit is called for each batch. incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"], batch_size=3) -X = dpt.asarray([[0, 1], [0, 1], [1, 2], [1, 1], [1, 2], [2, 3]], sycl_queue=queue) +X = dpnp.asarray([[0, 1], [0, 1], [1, 2], [1, 1], [1, 2], [2, 3]], sycl_queue=queue) result = incbs.fit(X) print(f"Mean:\n{result.mean_}") diff --git a/examples/sklearnex/incremental_covariance_spmd.py b/examples/sklearnex/incremental_covariance_spmd.py index ccc1a7d9cd..c134524a05 100644 --- a/examples/sklearnex/incremental_covariance_spmd.py +++ b/examples/sklearnex/incremental_covariance_spmd.py @@ -15,7 +15,7 @@ # =============================================================================== import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI @@ -28,7 +28,6 @@ def get_local_data(data, comm): local_size = (data.shape[0] + num_ranks - 1) // num_ranks return data[rank * local_size : (rank + 1) * local_size] - # We create SYCL queue and MPI communicator to perform computation on multiple GPUs q = dpctl.SyclQueue("gpu") @@ -50,8 +49,8 @@ def get_local_data(data, comm): # Partial fit is called for each batch on each GPU for i in range(num_batches): - dpt_X = dpt.asarray(X_split[i], usm_type="device", sycl_queue=q) - cov.partial_fit(dpt_X) + dpnp_X = dpnp.asarray(X_split[i], usm_type="device", sycl_queue=q) + cov.partial_fit(dpnp_X) # Finalization of results is performed in a lazy way after requesting results like in non-SPMD incremental estimators. diff --git a/examples/sklearnex/incremental_linear_regression_dpctl.py b/examples/sklearnex/incremental_linear_regression_dpnp.py similarity index 79% rename from examples/sklearnex/incremental_linear_regression_dpctl.py rename to examples/sklearnex/incremental_linear_regression_dpnp.py index 9af37b4203..7433e91de6 100644 --- a/examples/sklearnex/incremental_linear_regression_dpctl.py +++ b/examples/sklearnex/incremental_linear_regression_dpnp.py @@ -15,11 +15,11 @@ # ============================================================================== import dpctl -import dpctl.tensor as dpt +import dpnp from sklearnex.linear_model import IncrementalLinearRegression -# We create GPU SyclQueue and then put data to dpctl tensors using +# We create GPU SyclQueue and then put data to dpnp arrays using # the queue. It allows us to do computation on GPU. queue = dpctl.SyclQueue("gpu") @@ -27,15 +27,15 @@ inclin = IncrementalLinearRegression() # We do partial_fit for each batch and then print final result. -X_1, y_1 = dpt.asarray([[0, 1], [1, 2]], sycl_queue=queue), dpt.asarray( +X_1, y_1 = dpnp.asarray([[0, 1], [1, 2]], sycl_queue=queue), dpnp.asarray( [2, 4], sycl_queue=queue ) result = inclin.partial_fit(X_1, y_1) -X_2, y_2 = dpt.asarray([[2, 3]], sycl_queue=queue), dpt.asarray([6], sycl_queue=queue) +X_2, y_2 = dpnp.asarray([[2, 3]], sycl_queue=queue), dpnp.asarray([6], sycl_queue=queue) result = inclin.partial_fit(X_2, y_2) -X_3, y_3 = dpt.asarray([[0, 2], [1, 3], [2, 4]], sycl_queue=queue), dpt.asarray( +X_3, y_3 = dpnp.asarray([[0, 2], [1, 3], [2, 4]], sycl_queue=queue), dpnp.asarray( [3, 5, 7], sycl_queue=queue ) result = inclin.partial_fit(X_3, y_3) @@ -46,9 +46,9 @@ # We put the whole data to fit method, it is split automatically and then # partial_fit is called for each batch. inclin = IncrementalLinearRegression(batch_size=3) -X, y = dpt.asarray( +X, y = dpnp.asarray( [[0, 1], [1, 2], [2, 3], [0, 2], [1, 3], [2, 4]], sycl_queue=queue -), dpt.asarray([2, 4, 6, 3, 5, 7], sycl_queue=queue) +), dpnp.asarray([2, 4, 6, 3, 5, 7], sycl_queue=queue) result = inclin.fit(X, y) print(f"Coefs:\n{result.coef_}") diff --git a/examples/sklearnex/incremental_pca_dpctl.py b/examples/sklearnex/incremental_pca_dpnp.py similarity index 83% rename from examples/sklearnex/incremental_pca_dpctl.py rename to examples/sklearnex/incremental_pca_dpnp.py index 935c34770e..802993bc1d 100644 --- a/examples/sklearnex/incremental_pca_dpctl.py +++ b/examples/sklearnex/incremental_pca_dpnp.py @@ -14,11 +14,11 @@ # limitations under the License. # ============================================================================== -# sklearnex IncrementalPCA example for GPU offloading with DPCtl usm ndarray: +# sklearnex IncrementalPCA example for GPU offloading with DPNP ndarray: # SKLEARNEX_PREVIEW=YES python ./incremental_pca_dpctl.py import dpctl -import dpctl.tensor as dpt +import dpnp # Import estimator via sklearnex's patch mechanism from sklearn from sklearnex import patch_sklearn, sklearn_is_patched @@ -35,23 +35,23 @@ # Or just directly import estimator from sklearnex namespace. # from sklearnex.preview.decomposition import IncrementalPCA -# We create GPU SyclQueue and then put data to dpctl tensor using +# We create GPU SyclQueue and then put data to dpnp arrays using # the queue. It allows us to do computation on GPU. queue = dpctl.SyclQueue("gpu") incpca = IncrementalPCA() # We do partial_fit for each batch and then print final result. -X_1 = dpt.asarray([[-1, -1], [-2, -1]], sycl_queue=queue) +X_1 = dpnp.asarray([[-1, -1], [-2, -1]], sycl_queue=queue) result = incpca.partial_fit(X_1) -X_2 = dpt.asarray([[-3, -2], [1, 1]], sycl_queue=queue) +X_2 = dpnp.asarray([[-3, -2], [1, 1]], sycl_queue=queue) result = incpca.partial_fit(X_2) -X_3 = dpt.asarray([[2, 1], [3, 2]], sycl_queue=queue) +X_3 = dpnp.asarray([[2, 1], [3, 2]], sycl_queue=queue) result = incpca.partial_fit(X_3) -X = dpt.concat((X_1, X_2, X_3)) +X = dpnp.concat((X_1, X_2, X_3)) transformed_X = incpca.transform(X) print(f"Principal components:\n{result.components_}") @@ -61,7 +61,7 @@ # We put the whole data to fit method, it is split automatically and then # partial_fit is called for each batch. incpca = IncrementalPCA(batch_size=3) -X = dpt.asarray([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) +X = dpnp.asarray([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) result = incpca.fit(X) transformed_X = incpca.transform(X) diff --git a/examples/sklearnex/kmeans_spmd.py b/examples/sklearnex/kmeans_spmd.py index 38d2364b01..a8b66ccc6f 100644 --- a/examples/sklearnex/kmeans_spmd.py +++ b/examples/sklearnex/kmeans_spmd.py @@ -16,7 +16,7 @@ from warnings import warn -import dpctl.tensor as dpt +import dpnp import numpy as np from dpctl import SyclQueue from mpi4py import MPI @@ -51,17 +51,17 @@ def get_test_data(size): queue = SyclQueue("gpu") -dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=queue) +dpnp_X = dpnp.asarray(X, usm_type="device", sycl_queue=queue) -model = KMeans(n_clusters=10).fit(dpt_X) +model = KMeans(n_clusters=10).fit(dpnp_X) print(f"Number of iterations on {rank}:\n", model.n_iter_) print(f"Labels on rank {rank} (slice of 2):\n", model.labels_[:2]) print(f"Centers on rank {rank} (slice of 2):\n", model.cluster_centers_[:2, :]) X_test, _ = get_test_data(size) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=queue) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=queue) -result = model.predict(dpt_X_test) +result = model.predict(dpnp_X_test) -print(f"Result labels on rank {rank} (slice of 5):\n", dpt.to_numpy(result)[:5]) +print(f"Result labels on rank {rank} (slice of 5):\n", dpnp.asnumpy(result)[:5]) diff --git a/examples/sklearnex/knn_bf_classification_dpnp.py b/examples/sklearnex/knn_bf_classification_dpnp.py deleted file mode 100644 index 7a777abf3f..0000000000 --- a/examples/sklearnex/knn_bf_classification_dpnp.py +++ /dev/null @@ -1,58 +0,0 @@ -# ============================================================================== -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# sklearnex kNN example for GPU offloading with DPNP ndarray: -# python ./knn_bf_classification_dpnp_batch.py - -import dpctl -import dpnp -import numpy as np -from sklearn.datasets import make_classification -from sklearn.metrics import accuracy_score -from sklearn.model_selection import train_test_split - -from sklearnex.neighbors import KNeighborsClassifier - -X, y = make_classification( - n_samples=1000, - n_features=4, - n_informative=2, - n_redundant=0, - random_state=0, - shuffle=False, -) - -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) - -# Make sure that all DPNP ndarrays using the same device. -q = dpctl.SyclQueue("gpu") # GPU - -dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) -dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) -dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) - -knn_mdl = KNeighborsClassifier( - algorithm="brute", n_neighbors=20, weights="uniform", p=2, metric="minkowski" -) -knn_mdl.fit(dpnp_X_train, dpnp_y_train) - -y_predict = knn_mdl.predict(dpnp_X_test) - -print("Brute Force Distributed kNN classification results:") -print("Ground truth (first 5 observations):\n{}".format(y_test[:5])) -print("Classification results (first 5 observations):\n{}".format(y_predict[:5])) -print("Accuracy (2 classes): {}\n".format(accuracy_score(y_test, y_predict.asnumpy()))) -print("Are predicted results on GPU: {}".format(y_predict.sycl_device.is_gpu)) diff --git a/examples/sklearnex/knn_bf_classification_spmd.py b/examples/sklearnex/knn_bf_classification_spmd.py index 915747eb21..2a1ef99c74 100644 --- a/examples/sklearnex/knn_bf_classification_spmd.py +++ b/examples/sklearnex/knn_bf_classification_spmd.py @@ -17,7 +17,7 @@ from warnings import warn import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI from sklearn.metrics import accuracy_score @@ -53,27 +53,26 @@ def generate_X_y(par, seed): X_train, y_train = generate_X_y(params_train, rank) X_test, y_test = generate_X_y(params_test, rank + 99) -dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q) -dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q) -dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q) +dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) +dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) model_spmd = KNeighborsClassifier( algorithm="brute", n_neighbors=20, weights="uniform", p=2, metric="minkowski" ) -model_spmd.fit(dpt_X_train, dpt_y_train) +model_spmd.fit(dpnp_X_train, dpnp_y_train) -y_predict = model_spmd.predict(dpt_X_test) +y_predict = model_spmd.predict(dpnp_X_test) print("Brute Force Distributed kNN classification results:") print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) print( "Classification results (first 5 observations on rank {}):\n{}".format( - rank, dpt.to_numpy(y_predict)[:5] + rank, dpnp.asnumpy(y_predict)[:5] ) ) print( "Accuracy for entire rank {} (256 classes): {}\n".format( - rank, accuracy_score(y_test, dpt.to_numpy(y_predict)) + rank, accuracy_score(y_test, dpnp.asnumpy(y_predict)) ) ) diff --git a/examples/sklearnex/knn_bf_regression_spmd.py b/examples/sklearnex/knn_bf_regression_spmd.py index 06e70ca013..ffc15d3167 100644 --- a/examples/sklearnex/knn_bf_regression_spmd.py +++ b/examples/sklearnex/knn_bf_regression_spmd.py @@ -17,7 +17,7 @@ from warnings import warn import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI from numpy.testing import assert_allclose @@ -57,30 +57,29 @@ def generate_X_y(par, coef_seed, data_seed): X_train, y_train, coef_train = generate_X_y(params_train, 10, rank) X_test, y_test, coef_test = generate_X_y(params_test, 10, rank + 99) -dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q) -dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q) -# dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q) +dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) +dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) assert_allclose(coef_train, coef_test) model_spmd = KNeighborsRegressor( algorithm="brute", n_neighbors=5, weights="uniform", p=2, metric="minkowski" ) -model_spmd.fit(dpt_X_train, dpt_y_train) +model_spmd.fit(dpnp_X_train, dpnp_y_train) -y_predict = model_spmd.predict(dpt_X_test) +y_predict = model_spmd.predict(dpnp_X_test) print("Brute Force Distributed kNN regression results:") print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) print( "Regression results (first 5 observations on rank {}):\n{}".format( - rank, dpt.to_numpy(y_predict)[:5] + rank, dpnp.asnumpy(y_predict)[:5] ) ) print( "MSE for entire rank {}: {}\n".format( rank, - mean_squared_error(y_test, dpt.to_numpy(y_predict)), + mean_squared_error(y_test, dpnp.asnumpy(y_predict)), ) ) diff --git a/examples/sklearnex/linear_regression_spmd.py b/examples/sklearnex/linear_regression_spmd.py index 00ca3b15ea..0b41fd596f 100755 --- a/examples/sklearnex/linear_regression_spmd.py +++ b/examples/sklearnex/linear_regression_spmd.py @@ -16,7 +16,7 @@ from warnings import warn -import dpctl.tensor as dpt +import dpnp import numpy as np from dpctl import SyclQueue from mpi4py import MPI @@ -60,17 +60,17 @@ def get_test_data(rank): queue = SyclQueue("gpu") -dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=queue) -dpt_y = dpt.asarray(y, usm_type="device", sycl_queue=queue) +dpnp_X = dpnp.asarray(X, usm_type="device", sycl_queue=queue) +dpnp_y = dpnp.asarray(y, usm_type="device", sycl_queue=queue) -model = LinearRegression().fit(dpt_X, dpt_y) +model = LinearRegression().fit(dpnp_X, dpnp_y) print(f"Coefficients on rank {rank}:\n", model.coef_) print(f"Intercept on rank {rank}:\n", model.intercept_) X_test, _ = get_test_data(rank) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=queue) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=queue) -result = model.predict(dpt_X_test) +result = model.predict(dpnp_X_test) -print(f"Result on rank {rank}:\n", dpt.to_numpy(result)) +print(f"Result on rank {rank}:\n", dpnp.asnumpy(result)) diff --git a/examples/sklearnex/logistic_regression_spmd.py b/examples/sklearnex/logistic_regression_spmd.py index b69bfc36de..406ff2aaab 100644 --- a/examples/sklearnex/logistic_regression_spmd.py +++ b/examples/sklearnex/logistic_regression_spmd.py @@ -17,7 +17,7 @@ from warnings import warn import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI from scipy.special import expit @@ -65,15 +65,15 @@ def generate_X_y(par, seed): X, y, test_size=0.2, random_state=rank ) -dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q) -dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q) -dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q) +dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) +dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) +dpnp_y_test = dpnp.asarray(y_test, usm_type="device", sycl_queue=q) model_spmd = LogisticRegression() -model_spmd.fit(dpt_X_train, dpt_y_train) +model_spmd.fit(dpnp_X_train, dpnp_y_train) -y_predict = model_spmd.predict(dpt_X_test) +y_predict = model_spmd.predict(dpnp_X_test) print("Distributed LogisticRegression results:") print("Coefficients on rank {}:\n{}:".format(rank, model_spmd.coef_)) @@ -81,11 +81,11 @@ def generate_X_y(par, seed): print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) print( "Classification results (first 5 observations on rank {}):\n{}".format( - rank, dpt.to_numpy(y_predict)[:5] + rank, dpnp.asnumpy(y_predict)[:5] ) ) print( "Accuracy for entire rank {} (2 classes): {}\n".format( - rank, accuracy_score(y_test, dpt.to_numpy(y_predict)) + rank, accuracy_score(y_test, dpnp.asnumpy(y_predict)) ) ) diff --git a/examples/sklearnex/pca_spmd.py b/examples/sklearnex/pca_spmd.py index d6780d8653..26703a8ea7 100644 --- a/examples/sklearnex/pca_spmd.py +++ b/examples/sklearnex/pca_spmd.py @@ -15,7 +15,7 @@ # ============================================================================== import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI @@ -35,9 +35,9 @@ def get_data(data_seed): size = comm.Get_size() X = get_data(rank) -dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=q) +dpnp_X = dpnp.asarray(X, usm_type="device", sycl_queue=q) -pca = PCA(n_components=2).fit(dpt_X) +pca = PCA(n_components=2).fit(dpnp_X) print(f"Singular values on rank {rank}:\n", pca.singular_values_) print(f"Explained variance Ratio on rank {rank}:\n", pca.explained_variance_ratio_) diff --git a/examples/sklearnex/random_forest_classifier_dpctl.py b/examples/sklearnex/random_forest_classifier_dpctl.py deleted file mode 100644 index 061b9e0473..0000000000 --- a/examples/sklearnex/random_forest_classifier_dpctl.py +++ /dev/null @@ -1,53 +0,0 @@ -# ============================================================================== -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# sklearnex RF example for GPU offloading with DPCtl tensor: -# python ./random_forest_classifier_dpctl_batch.py - -import dpctl -import dpctl.tensor as dpt -import numpy as np -from sklearn.datasets import make_classification -from sklearn.model_selection import train_test_split - -from sklearnex.ensemble import RandomForestClassifier - -# Make sure that all DPCtl tensors using the same device. -q = dpctl.SyclQueue("gpu") # GPU - -X, y = make_classification( - n_samples=1000, - n_features=4, - n_informative=2, - n_redundant=0, - random_state=0, - shuffle=False, -) - -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) - -dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q) -dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q) - -rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train) - -pred = rf.predict(dpt_X_test) - -print("Random Forest classification results:") -print("Ground truth (first 5 observations):\n{}".format(y_test[:5])) -print("Classification results (first 5 observations):\n{}".format(pred[:5])) -print("Are predicted results on GPU: {}".format(pred.sycl_device.is_gpu)) diff --git a/examples/sklearnex/random_forest_classifier_spmd.py b/examples/sklearnex/random_forest_classifier_spmd.py index da02a8ac81..4704243afc 100644 --- a/examples/sklearnex/random_forest_classifier_spmd.py +++ b/examples/sklearnex/random_forest_classifier_spmd.py @@ -19,7 +19,7 @@ # mpirun -n 4 python ./random_forest_classifier_spmd.py import dpctl -import dpctl.tensor as dpt +import dpnp import numpy as np from mpi4py import MPI @@ -48,18 +48,18 @@ def generate_X_y(par, seed): q = dpctl.SyclQueue("gpu") # GPU -dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q) -dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q) -dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q) +dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) +dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) +dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) -rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train) +rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpnp_X_train, dpnp_y_train) -pred = rf.predict(dpt_X_test) +pred = rf.predict(dpnp_X_test) print("Random Forest classification results:") print("Ground truth (first 5 observations on rank {}):\n{}".format(mpi_rank, y_test[:5])) print( "Classification results (first 5 observations on rank {}):\n{}".format( - mpi_rank, dpt.to_numpy(pred)[:5] + mpi_rank, dpnp.asnumpy(pred)[:5] ) ) diff --git a/examples/sklearnex/random_forest_regressor_dpnp.py b/examples/sklearnex/random_forest_regressor_dpnp.py deleted file mode 100644 index da87a10b4b..0000000000 --- a/examples/sklearnex/random_forest_regressor_dpnp.py +++ /dev/null @@ -1,60 +0,0 @@ -# ============================================================================== -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# sklearnex RF example for GPU offloading with DPNP ndarray: -# python ./random_forest_regressor_dpnp.py - -import dpctl -import dpnp -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split - -# Import estimator via sklearnex's patch mechanism from sklearn -from sklearnex import patch_sklearn, sklearn_is_patched - -patch_sklearn() - -# Function that can validate current state of patching -sklearn_is_patched() - -# Import estimator from the patched sklearn namespace. -from sklearn.ensemble import RandomForestRegressor - -# Or just directly import estimator from sklearnex namespace. -from sklearnex.ensemble import RandomForestRegressor - -# We create GPU SyclQueue and then put data to dpctl tensor using -# the queue. It allows us to do computation on GPU. -queue = dpctl.SyclQueue("gpu") - -X, y = make_regression( - n_samples=1000, n_features=4, n_informative=2, random_state=0, shuffle=False -) - -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) - -dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=queue) -dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=queue) -dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=queue) - -rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpnp_X_train, dpnp_y_train) - -pred = rf.predict(dpnp_X_test) - -print("Random Forest regression results:") -print("Ground truth (first 5 observations):\n{}".format(y_test[:5])) -print("Regression results (first 5 observations):\n{}".format(pred[:5])) -print("Are predicted results on GPU: {}".format(pred.sycl_device.is_gpu)) diff --git a/examples/sklearnex/random_forest_regressor_spmd.py b/examples/sklearnex/random_forest_regressor_spmd.py index 6f2ce9cdff..7c7198b94d 100644 --- a/examples/sklearnex/random_forest_regressor_spmd.py +++ b/examples/sklearnex/random_forest_regressor_spmd.py @@ -19,7 +19,6 @@ # mpirun -n 4 python ./random_forest_regressor_spmd.py import dpctl -import dpctl.tensor as dpt import dpnp import numpy as np from mpi4py import MPI @@ -53,18 +52,14 @@ def generate_X_y(par, coef_seed, data_seed): assert_allclose(coef_train, coef_test) -# Both `dpnp.ndarrays` and `dpctl.tensors` can be used in the same flow -# for invoking GPU offloading. Just make sure that, they are using -# the same sycl context. +q = dpctl.SyclQueue("gpu") -q = dpctl.SyclQueue("gpu") # GPU - -dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q) -dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q) +dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) +dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) -rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train) +rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpnp_X_train, dpnp_y_train) y_predict = rf.predict(dpnp_X_test) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 9410ed2124..15a58efc99 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -98,13 +98,7 @@ def return_type_constructor(array): xp = array.__array_namespace__() # array api support added in dpnp starting in 0.19, will fail for # older versions - if is_dpctl_tensor(array): - func = lambda x: ( - xp.asarray(x) - if hasattr(x, "__sycl_usm_array_interface__") - else xp.asarray(backend.from_table(x), device=device) - ) - elif is_dpnp_ndarray(array): + if is_dpnp_ndarray(array): func = lambda x: ( xp.asarray(xp.as_usm_ndarray(x)) if hasattr(x, "__sycl_usm_array_interface__") @@ -128,8 +122,8 @@ def return_type_constructor(array): def from_table(*args, like=None): """Create 2 dimensional arrays from oneDAL tables. - oneDAL tables are converted to numpy ndarrays, dpctl tensors, dpnp - ndarrays, or array API standard arrays of designated type. + oneDAL tables are converted to numpy ndarrays, dpnp ndarrays, + or array API standard arrays of designated type. Parameters ---------- diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 5664f10206..1471063d9d 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -247,7 +247,7 @@ def test_conversion_to_table(dtype): reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu") ) @pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) @@ -281,7 +281,7 @@ def test_input_zero_copy_sycl_usm(dataframe, queue, order, dtype): reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu") ) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("data_shape", data_shapes) @@ -340,7 +340,7 @@ def test_table_conversions_sycl_usm(dataframe, queue, order, data_shape, dtype): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("numpy,dpctl,dpnp,array_api", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("numpy,dpnp,array_api", "cpu,gpu") ) @pytest.mark.parametrize("data_shape", unsupported_data_shapes) def test_interop_invalid_shape(dataframe, queue, data_shape): @@ -357,7 +357,7 @@ def test_interop_invalid_shape(dataframe, queue, data_shape): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp,array_api", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("dpnp,array_api", "cpu,gpu") ) @pytest.mark.parametrize( "dtype", @@ -370,8 +370,7 @@ def test_interop_invalid_shape(dataframe, queue, data_shape): def test_interop_unsupported_dtypes(dataframe, queue, dtype): # sua iface interobility supported only for oneDAL supported dtypes # for input data: int32, int64, float32, float64. - # Checking some common dtypes supported by dpctl, dpnp for exception - # raise. + # Checking some common dtypes supported by dpnp for exception raise. X = np.zeros((10, 20), dtype=dtype) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) expected_err_msg = r"Found unsupported (array|tensor) type" @@ -381,10 +380,10 @@ def test_interop_unsupported_dtypes(dataframe, queue, dtype): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("numpy,dpctl,dpnp", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("numpy,dpnp", "cpu,gpu") ) def test_to_table_non_contiguous_input(dataframe, queue): - if dataframe in "dpnp,dpctl" and not backend.is_dpc: + if dataframe == "dpnp" and not backend.is_dpc: pytest.skip("__sycl_usm_array_interface__ support requires DPC backend.") X, _ = np.mgrid[:10, :10] X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) @@ -399,7 +398,7 @@ def test_to_table_non_contiguous_input(dataframe, queue): reason="Required check should be done if no DPC backend.", ) @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu") ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_interop_if_no_dpc_backend_sycl_usm(dataframe, queue, dtype): @@ -525,7 +524,7 @@ def test_basic_ndarray_types_numpy(X): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,numpy", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("numpy", "cpu,gpu") ) @pytest.mark.parametrize("can_copy", [True, False]) def test_to_table_non_contiguous_input_dlpack(dataframe, queue, can_copy): @@ -552,7 +551,7 @@ def test_to_table_non_contiguous_input_dlpack(dataframe, queue, can_copy): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,numpy", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("dpnp,numpy", "cpu,gpu") ) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("data_shape", data_shapes) @@ -578,7 +577,7 @@ def test_table_conversions_dlpack(dataframe, queue, order, data_shape, dtype): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("numpy,dpctl,array_api", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("numpy,dpnp,array_api", "cpu,gpu") ) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("data_shape", data_shapes) diff --git a/onedal/dummy/dummy.py b/onedal/dummy/dummy.py index 99621e0634..bba79bee02 100644 --- a/onedal/dummy/dummy.py +++ b/onedal/dummy/dummy.py @@ -93,7 +93,7 @@ def fit(self, X, y, queue=None): # Oftentimes oneDAL table objects are attributes of the oneDAL C++ # object. These can be converted into various common data frameworks - # like ``numpy`` or ``dpctl.tensor`` using ``from_table``. In this + # like ``numpy`` or ``dpnp.ndarray`` using ``from_table``. In this # case the output is a basic python type (bool) which can be handled # easily just with pybind11 without any special code. Attributes of # the result object are copied to attributes of the onedal estimator diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index 989dfffc8f..6d56f3aa59 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -20,11 +20,6 @@ from sklearnex import get_config -from ...utils._third_party import dpctl_available - -if dpctl_available: - import dpctl.tensor as dpt - try: import dpnp @@ -55,7 +50,7 @@ from onedal.tests.utils._device_selection import get_queues test_frameworks = os.environ.get( - "ONEDAL_PYTEST_FRAMEWORKS", "numpy,pandas,dpnp,dpctl,array_api" + "ONEDAL_PYTEST_FRAMEWORKS", "numpy,pandas,dpnp,array_api" ) @@ -113,8 +108,6 @@ def get_df_and_q(dataframe: str): df_and_q.append(pytest.param(dataframe, queue.values[0], id=id)) return df_and_q - if dpctl_available and "dpctl" in dataframe_filter_: - dataframes_and_queues.extend(get_df_and_q("dpctl")) if dpnp_available and "dpnp" in dataframe_filter_: dataframes_and_queues.extend(get_df_and_q("dpnp")) if ( @@ -131,8 +124,6 @@ def _as_numpy(obj, *args, **kwargs): """Converted input object to numpy.ndarray format.""" if dpnp_available and isinstance(obj, dpnp.ndarray): return obj.asnumpy(*args, **kwargs) - if dpctl_available and isinstance(obj, dpt.usm_ndarray): - return dpt.to_numpy(obj, *args, **kwargs) if isinstance(obj, pd.DataFrame) or isinstance(obj, pd.Series): return obj.to_numpy(*args, **kwargs) if sp.issparse(obj): @@ -166,12 +157,8 @@ def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs) return dpnp.asarray( obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs ) - elif target_df == "dpctl": - # DPCtl tensor. - return dpt.asarray(obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs) elif target_df in array_api_modules: - # Array API input other than DPNP ndarray, DPCtl tensor or - # Numpy ndarray. + # Array API input other than DPNP ndarray or Numpy ndarray. xp = array_api_modules[target_df] return xp.asarray(obj) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index f5a44f7a38..e64d9bc1de 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -65,12 +65,7 @@ def _is_numpy_namespace(xp): @lru_cache(100) def _cls_to_sycl_namespace(cls): - # use caching to minimize imports, derived from array_api_compat - if _is_subclass_fast(cls, "dpctl.tensor", "usm_ndarray"): - import dpctl.tensor as dpt - - return dpt - elif _is_subclass_fast(cls, "dpnp", "ndarray"): + if _is_subclass_fast(cls, "dpnp", "ndarray"): import dpnp return dpnp diff --git a/onedal/utils/_third_party.py b/onedal/utils/_third_party.py index ba3a625cd1..7317887b25 100644 --- a/onedal/utils/_third_party.py +++ b/onedal/utils/_third_party.py @@ -182,25 +182,6 @@ def is_dpnp_ndarray(x: object) -> bool: return _is_subclass_fast(type(x), "dpnp", "ndarray") -def is_dpctl_tensor(x: object) -> bool: - """Return True if 'x' is a dpctl usm_ndarray. - - This function does not import dpctl.tensor if it has not already been - imported and is therefore cheap to use. - - Parameters - ---------- - x : object - Any python object. - - Returns - ------- - is_dpctl : bool - Flag if subclass of dpctl.tensor.usm_ndarray. - """ - return _is_subclass_fast(type(x), "dpctl.tensor", "usm_ndarray") - - def is_torch_tensor(x: object) -> bool: """Return True if 'x' is a PyTorch Tensor. diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index a624717bca..010cb6f928 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -40,7 +40,7 @@ ) @pytest.mark.parametrize("allow_nan", [False, True]) @pytest.mark.parametrize( - "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl,array_api") + "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,array_api") ) def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue): X = np.empty(shape, dtype=dtype) @@ -65,7 +65,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue) @pytest.mark.parametrize("check", ["inf", "NaN", None]) @pytest.mark.parametrize("seed", [0, 123456]) @pytest.mark.parametrize( - "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl,array_api") + "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,array_api") ) def test_assert_finite_random_location( dtype, shape, allow_nan, check, seed, dataframe, queue @@ -92,7 +92,7 @@ def test_assert_finite_random_location( @pytest.mark.parametrize("check", ["inf", "NaN", None]) @pytest.mark.parametrize("seed", [0, 123456]) @pytest.mark.parametrize( - "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl,array_api") + "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,array_api") ) def test_assert_finite_random_shape_and_location( dtype, allow_nan, check, seed, dataframe, queue diff --git a/sklearnex/decomposition/tests/test_pca.py b/sklearnex/decomposition/tests/test_pca.py index bd5dca1fcb..5975243fd2 100755 --- a/sklearnex/decomposition/tests/test_pca.py +++ b/sklearnex/decomposition/tests/test_pca.py @@ -113,7 +113,7 @@ def test_non_batched_covariance(hyperparameters, dataframe, queue): assert_allclose(res_non_batched, res_batched) -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("numpy,dpctl")) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("numpy,dpnp")) def test_changed_estimated_attributes(with_array_api, dataframe, queue): # check that attributes necessary for the PCA onedal estimator match # changes occurring in the sklearnex estimator diff --git a/sklearnex/dummy/tests/test_dummy.py b/sklearnex/dummy/tests/test_dummy.py index 7b88d4eaed..d3333b55b8 100644 --- a/sklearnex/dummy/tests/test_dummy.py +++ b/sklearnex/dummy/tests/test_dummy.py @@ -44,7 +44,7 @@ def test_sklearnex_import_DummyRegressor(dataframe, queue): @pytest.mark.skipif( not sklearn_check_version("1.3"), reason="lacks sklearn array API support" ) -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpctl,dpnp")) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpnp")) def test_fitted_attribute_conversion_DummyRegressor(dataframe, queue): rng = np.random.default_rng(seed=42) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index f839366557..d1d2e74f50 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -339,7 +339,7 @@ def test_sklearnex_incremental_estimatior_pickle(dataframe, queue, dtype): ) -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("numpy,dpctl")) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("numpy,dpnp")) def test_changed_estimated_attributes(with_array_api, dataframe, queue): # check that attributes necessary for the PCA onedal estimator match # changes occurring in the sklearnex estimator diff --git a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py index e399fff082..e36311ec6c 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py @@ -38,7 +38,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_basic_stats_spmd_gold(dataframe, queue): @@ -82,7 +82,7 @@ def test_basic_stats_spmd_gold(dataframe, queue): @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi diff --git a/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py index c9cf98e1e6..9626f70733 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py @@ -38,7 +38,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("weighted", [True, False]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -50,7 +50,7 @@ def test_incremental_basic_statistics_fit_spmd_gold(dataframe, queue, weighted, IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp data = np.array( [ [0.0, 0.0, 0.0], @@ -104,7 +104,7 @@ def test_incremental_basic_statistics_fit_spmd_gold(dataframe, queue, weighted, ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("num_blocks", [1, 2]) @pytest.mark.parametrize("weighted", [True, False]) @@ -119,7 +119,7 @@ def test_incremental_basic_statistics_partial_fit_spmd_gold( IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp data = np.array( [ [0.0, 0.0, 0.0], @@ -178,7 +178,7 @@ def test_incremental_basic_statistics_partial_fit_spmd_gold( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("num_blocks", [1, 2]) @pytest.mark.parametrize("weighted", [True, False]) @@ -194,7 +194,7 @@ def test_incremental_basic_statistics_single_option_partial_fit_spmd_gold( IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp data = np.array( [ [0.0, 0.0, 0.0], @@ -247,7 +247,7 @@ def test_incremental_basic_statistics_single_option_partial_fit_spmd_gold( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("num_blocks", [1, 2]) @pytest.mark.parametrize("weighted", [True, False]) @@ -267,7 +267,7 @@ def test_incremental_basic_statistics_partial_fit_spmd_synthetic( tol = 2e-3 if dtype == np.float32 else 1e-7 - # Create gold data and process into dpt + # Create gold data and process into dpnp data = _generate_statistic_data(n_samples, n_features, dtype=dtype) local_data = _get_local_tensor(data) split_local_data = np.array_split(local_data, num_blocks) diff --git a/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py b/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py index d6671b9748..c5bbb034c0 100644 --- a/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py @@ -36,7 +36,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_dbscan_spmd_gold(dataframe, queue): @@ -67,7 +67,7 @@ def test_dbscan_spmd_gold(dataframe, queue): @pytest.mark.parametrize("min_samples", [2, 5, 15]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) diff --git a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py index 69cc0a8580..1ed14a02fe 100644 --- a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py @@ -38,7 +38,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_kmeans_spmd_gold(dataframe, queue): @@ -106,7 +106,7 @@ def test_kmeans_spmd_gold(dataframe, queue): @pytest.mark.parametrize("n_clusters", [2, 5, 15]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) diff --git a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py index 86fa630f3a..f30534c616 100644 --- a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py @@ -37,7 +37,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_covariance_spmd_gold(dataframe, queue): diff --git a/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py index 8e8fc33649..034f637fa5 100644 --- a/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py @@ -37,7 +37,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("assume_centered", [True, False]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -49,7 +49,7 @@ def test_incremental_covariance_fit_spmd_gold(dataframe, queue, assume_centered, IncrementalEmpiricalCovariance as IncrementalEmpiricalCovariance_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp data = np.array( [ [0.0, 0.0, 0.0], @@ -88,7 +88,7 @@ def test_incremental_covariance_fit_spmd_gold(dataframe, queue, assume_centered, ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("num_blocks", [1, 2]) @pytest.mark.parametrize("assume_centered", [True, False]) @@ -103,7 +103,7 @@ def test_incremental_covariance_partial_fit_spmd_gold( IncrementalEmpiricalCovariance as IncrementalEmpiricalCovariance_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp data = np.array( [ [0.0, 0.0, 0.0], @@ -149,7 +149,7 @@ def test_incremental_covariance_partial_fit_spmd_gold( @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi @@ -169,7 +169,7 @@ def test_incremental_covariance_partial_fit_spmd_synthetic( IncrementalEmpiricalCovariance as IncrementalEmpiricalCovariance_SPMD, ) - # Generate data and process into dpt + # Generate data and process into dpnp data = _generate_statistic_data(n_samples, n_features, dtype=dtype) dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe) diff --git a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py index 75d9c68e12..09b570b5fe 100644 --- a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py @@ -47,7 +47,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("whiten", [True, False]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -57,7 +57,7 @@ def test_incremental_pca_fit_spmd_gold(dataframe, queue, whiten, dtype): from sklearnex.preview.decomposition import IncrementalPCA from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD - # Create gold data and process into dpt + # Create gold data and process into dpnp X = np.array( [ [0.0, 0.0], @@ -95,7 +95,7 @@ def test_incremental_pca_fit_spmd_gold(dataframe, queue, whiten, dtype): ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("whiten", [True, False]) @pytest.mark.parametrize("num_blocks", [1, 2]) @@ -108,7 +108,7 @@ def test_incremental_pca_partial_fit_spmd_gold( from sklearnex.preview.decomposition import IncrementalPCA from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD - # Create gold data and process into dpt + # Create gold data and process into dpnp X = np.array( [ [0.0, 0.0], @@ -159,7 +159,7 @@ def test_incremental_pca_partial_fit_spmd_gold( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("whiten", [True, False]) @pytest.mark.parametrize("n_components", [None, 2, 5]) @@ -177,7 +177,7 @@ def test_incremental_pca_fit_spmd_random( # Increased test dataset size requires a higher tol setting in comparison to other tests tol = 7e-5 if dtype == np.float32 else 1e-7 - # Create data and process into dpt + # Create data and process into dpnp X = _generate_statistic_data(num_samples, num_features, dtype) dpt_X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) X_test = _generate_statistic_data(num_samples // 5, num_features, dtype) @@ -211,7 +211,7 @@ def test_incremental_pca_fit_spmd_random( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("whiten", [True, False]) @pytest.mark.parametrize("n_components", [None, 2, 5]) @@ -238,7 +238,7 @@ def test_incremental_pca_partial_fit_spmd_random( tol = 3e-4 if dtype == np.float32 else 1e-7 - # Create data and process into dpt + # Create data and process into dpnp X = _generate_statistic_data(num_samples, num_features, dtype) dpt_X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) X_test = _generate_statistic_data(num_samples // 5, num_features, dtype) diff --git a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py index b17f53ed1c..2144620881 100644 --- a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py @@ -37,7 +37,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_pca_spmd_gold(dataframe, queue): @@ -90,7 +90,7 @@ def test_pca_spmd_gold(dataframe, queue): @pytest.mark.parametrize("whiten", [True, False]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) diff --git a/sklearnex/spmd/ensemble/tests/test_forest_spmd.py b/sklearnex/spmd/ensemble/tests/test_forest_spmd.py index 07ca5e1d0f..dbbd2f67ef 100644 --- a/sklearnex/spmd/ensemble/tests/test_forest_spmd.py +++ b/sklearnex/spmd/ensemble/tests/test_forest_spmd.py @@ -38,7 +38,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_rfcls_spmd_gold(dataframe, queue): @@ -107,7 +107,7 @@ def test_rfcls_spmd_gold(dataframe, queue): @pytest.mark.parametrize("local_trees_mode", [False, True]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) @@ -173,7 +173,7 @@ def test_rfcls_spmd_synthetic( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_rfreg_spmd_gold(dataframe, queue): @@ -242,7 +242,7 @@ def test_rfreg_spmd_gold(dataframe, queue): @pytest.mark.parametrize("local_trees_mode", [False, True]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) diff --git a/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py b/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py index e7143de246..dabf83b8a7 100644 --- a/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py @@ -37,7 +37,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("macro_block", [None, 1024]) @@ -52,7 +52,7 @@ def test_incremental_linear_regression_fit_spmd_gold( IncrementalLinearRegression as IncrementalLinearRegression_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp X = np.array( [ [0.0, 0.0], @@ -112,7 +112,7 @@ def test_incremental_linear_regression_fit_spmd_gold( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("num_blocks", [1, 2]) @@ -129,7 +129,7 @@ def test_incremental_linear_regression_partial_fit_spmd_gold( IncrementalLinearRegression as IncrementalLinearRegression_SPMD, ) - # Create gold data and process into dpt + # Create gold data and process into dpnp X = np.array( [ [0.0, 0.0], @@ -199,7 +199,7 @@ def test_incremental_linear_regression_partial_fit_spmd_gold( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("num_samples", [100, 1000]) @@ -218,7 +218,7 @@ def test_incremental_linear_regression_fit_spmd_random( tol = 5e-3 if dtype == np.float32 else 1e-7 - # Generate random data and process into dpt + # Generate random data and process into dpnp X_train, X_test, y_train, _ = _generate_regression_data( num_samples, num_features, dtype ) @@ -264,7 +264,7 @@ def test_incremental_linear_regression_fit_spmd_random( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("num_blocks", [1, 2]) @@ -291,7 +291,7 @@ def test_incremental_linear_regression_partial_fit_spmd_random( tol = 5e-3 if dtype == np.float32 else 1e-7 - # Generate random data and process into dpt + # Generate random data and process into dpnp X_train, X_test, y_train, _ = _generate_regression_data( num_samples, num_features, dtype, 573 ) diff --git a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py index 3f18164b4e..e5edd2df84 100644 --- a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py @@ -38,7 +38,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_linear_spmd_gold(dataframe, queue): @@ -102,7 +102,7 @@ def test_linear_spmd_gold(dataframe, queue): @pytest.mark.parametrize("n_features", [10, 100]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) diff --git a/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py index abb49f57ab..62d51a18e4 100644 --- a/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py @@ -38,7 +38,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_logistic_spmd_gold(dataframe, queue): @@ -112,7 +112,7 @@ def test_logistic_spmd_gold(dataframe, queue): @pytest.mark.parametrize("tol", [1e-2, 1e-4]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) diff --git a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py index ca41194701..1757041c2e 100644 --- a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +++ b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py @@ -39,7 +39,7 @@ ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_knncls_spmd_gold(dataframe, queue): @@ -109,7 +109,7 @@ def test_knncls_spmd_gold(dataframe, queue): @pytest.mark.parametrize("weights", ["uniform", "distance"]) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) @@ -175,7 +175,7 @@ def test_knncls_spmd_synthetic( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_knnreg_spmd_gold(dataframe, queue): @@ -247,7 +247,7 @@ def test_knnreg_spmd_gold(dataframe, queue): ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) @@ -312,7 +312,7 @@ def test_knnreg_spmd_synthetic( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_knnsearch_spmd_gold(dataframe, queue): @@ -349,7 +349,7 @@ def test_knnsearch_spmd_gold(dataframe, queue): ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.mpi @@ -394,7 +394,7 @@ def test_knnsearch_spmd_synthetic( ) @pytest.mark.parametrize( "dataframe,queue", - get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), + get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.mpi def test_knn_spmd_empty_kneighbors(dataframe, queue): diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 17d53e67c8..bc5e585e46 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -307,7 +307,7 @@ def test_gpu_memory_leaks(estimator, queue, order, data_shape): @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp,array_api", "cpu,gpu") + "dataframe,queue", get_dataframes_and_queues("dpnp,array_api", "cpu,gpu") ) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("data_shape", data_shapes) diff --git a/sklearnex/tests/utils/spmd.py b/sklearnex/tests/utils/spmd.py index d5feae9500..7c9a38fa82 100644 --- a/sklearnex/tests/utils/spmd.py +++ b/sklearnex/tests/utils/spmd.py @@ -40,10 +40,10 @@ def _get_local_tensor(full_data): Called on each rank to extract the subset of data assigned to that rank. Args: - full_data (numpy or dpctl array): The entire set of data + full_data (numpy or dpnp array): The entire set of data Returns: - local_data (numpy or dpctl array): The subset of data used by the rank + local_data (numpy or dpnp array): The subset of data used by the rank """ # create sycl queue and gather communicator details @@ -52,7 +52,7 @@ def _get_local_tensor(full_data): rank = comm.Get_rank() size = comm.Get_size() - # divide data across ranks and move to dpt tensor + # divide data across ranks and move to dpnp array data_rows = full_data.shape[0] local_start = rank * data_rows // size local_end = (1 + rank) * data_rows // size @@ -124,7 +124,7 @@ def _spmd_assert_allclose(spmd_result, batch_result, **kwargs): subset of batch result that corresponds to that rank. Args: - spmd_result (numpy or dpctl array): The result for the subset of data on the rank the function is called from, computed by the spmd estimator + spmd_result (numpy or dpnp array): The result for the subset of data on the rank the function is called from, computed by the spmd estimator batch_result (numpy array): The result for all data, computed by the batch estimator Raises: @@ -145,7 +145,7 @@ def _assert_unordered_allclose(spmd_result, batch_result, localize=False, **kwar capable of handling localization. Args: - spmd_result (numpy or dpctl array): Result computed by the spmd estimator + spmd_result (numpy or dpnp array): Result computed by the spmd estimator batch_result (numpy array): Result computed by batch estimator localize (bool): Whether of not spmd result is specific to the rank, in which case batch result needs to be localized @@ -179,9 +179,9 @@ def _assert_kmeans_labels_allclose( may not match) to identify cluster center and ensure results match. Args: - spmd_labels (numpy or dpctl array): The labels for the subset of data on the rank the function is called from, computed by the spmd estimator + spmd_labels (numpy or dpnp array): The labels for the subset of data on the rank the function is called from, computed by the spmd estimator batch_labels (numpy array): The labels for all data, computed by the batch estimator - spmd_centers (numpy or dpctl array): Centers computed by the spmd estimator + spmd_centers (numpy or dpnp array): Centers computed by the spmd estimator batch_centers (numpy array): Centers computed by batch estimator Raises: diff --git a/tests/run_examples.py b/tests/run_examples.py index e3222c18ad..223b7944a3 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -145,41 +145,35 @@ def check_library(rule): req_device["basic_statistics_spmd.py"] = ["gpu"] req_device["covariance_spmd.py"] = ["gpu"] req_device["dbscan_spmd.py"] = ["gpu"] -req_device["incremental_basic_statistics_dpctl.py"] = ["gpu"] +req_device["incremental_basic_statistics_dpnp.py"] = ["gpu"] req_device["incremental_covariance_spmd.py"] = ["gpu"] -req_device["incremental_linear_regression_dpctl.py"] = ["gpu"] -req_device["incremental_pca_dpctl.py"] = ["gpu"] +req_device["incremental_linear_regression_dpnp.py"] = ["gpu"] +req_device["incremental_pca_dpnp.py"] = ["gpu"] req_device["kmeans_spmd.py"] = ["gpu"] -req_device["knn_bf_classification_dpnp.py"] = ["gpu"] req_device["knn_bf_classification_spmd.py"] = ["gpu"] req_device["knn_bf_regression_spmd.py"] = ["gpu"] req_device["linear_regression_spmd.py"] = ["gpu"] req_device["logistic_regression_spmd.py"] = ["gpu"] req_device["pca_spmd.py"] = ["gpu"] -req_device["random_forest_classifier_dpctl.py"] = ["gpu"] req_device["random_forest_classifier_spmd.py"] = ["gpu"] -req_device["random_forest_regressor_dpnp.py"] = ["gpu"] req_device["random_forest_regressor_spmd.py"] = ["gpu"] req_library = defaultdict(lambda: []) -req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"] -req_library["covariance_spmd.py"] = ["dpctl", "mpi4py"] -req_library["dbscan_spmd.py"] = ["dpctl", "mpi4py"] +req_library["basic_statistics_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["covariance_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["dbscan_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_library["dbscan_array_api.py"] = ["array_api_strict"] -req_library["incremental_basic_statistics_dpctl.py"] = ["dpctl"] -req_library["incremental_covariance_spmd.py"] = ["dpctl", "mpi4py"] -req_library["incremental_linear_regression_dpctl.py"] = ["dpctl"] -req_library["incremental_pca_dpctl.py"] = ["dpctl"] -req_library["kmeans_spmd.py"] = ["dpctl", "mpi4py"] -req_library["knn_bf_classification_dpnp.py"] = ["dpctl", "dpnp"] -req_library["knn_bf_classification_spmd.py"] = ["dpctl", "mpi4py"] -req_library["knn_bf_regression_spmd.py"] = ["dpctl", "mpi4py"] -req_library["linear_regression_spmd.py"] = ["dpctl", "mpi4py"] -req_library["logistic_regression_spmd.py"] = ["dpctl", "mpi4py"] -req_library["pca_spmd.py"] = ["dpctl", "mpi4py"] -req_library["random_forest_classifier_dpctl.py"] = ["dpctl"] -req_library["random_forest_classifier_spmd.py"] = ["dpctl", "mpi4py"] -req_library["random_forest_regressor_dpnp.py"] = ["dpnp"] +req_library["incremental_basic_statistics_dpnp.py"] = ["dpctl" "dpnp",] +req_library["incremental_covariance_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["incremental_linear_regression_dpnp.py"] = ["dpctl" "dpnp",] +req_library["incremental_pca_dpnp.py"] = ["dpctl" "dpnp",] +req_library["kmeans_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["knn_bf_classification_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["knn_bf_regression_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["linear_regression_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["logistic_regression_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["pca_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] +req_library["random_forest_classifier_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_library["random_forest_regressor_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_os = defaultdict(lambda: []) @@ -187,15 +181,12 @@ def check_library(rule): req_os["covariance_spmd.py"] = ["lnx"] req_os["dbscan_spmd.py"] = ["lnx"] req_os["kmeans_spmd.py"] = ["lnx"] -req_os["knn_bf_classification_dpnp.py"] = ["lnx"] req_os["knn_bf_classification_spmd.py"] = ["lnx"] req_os["knn_bf_regression_spmd.py"] = ["lnx"] req_os["linear_regression_spmd.py"] = ["lnx"] req_os["logistic_regression_spmd.py"] = ["lnx"] req_os["pca_spmd.py"] = ["lnx"] -req_os["random_forest_classifier_dpctl.py"] = ["lnx"] req_os["random_forest_classifier_spmd.py"] = ["lnx"] -req_os["random_forest_regressor_dpnp.py"] = ["lnx"] req_os["random_forest_regressor_spmd.py"] = ["lnx"] skiped_files = [] diff --git a/tests/test_examples_sklearnex.py b/tests/test_examples_sklearnex.py index e49c2958c3..1d70c77662 100644 --- a/tests/test_examples_sklearnex.py +++ b/tests/test_examples_sklearnex.py @@ -34,7 +34,7 @@ [ f for f in os.listdir(examples_path) - if f.endswith(".py") and "spmd" not in f and "dpnp" not in f and "dpctl" not in f + if f.endswith(".py") and "spmd" not in f and "dpnp" not in f ], ) def test_sklearn_example(file): From a78832f735b30812553bc993199d4b1d5b60a32a Mon Sep 17 00:00:00 2001 From: y Date: Thu, 6 Nov 2025 08:52:20 -0800 Subject: [PATCH 2/6] fixes and formatting --- .../sklearnex/incremental_covariance_spmd.py | 1 + onedal/datatypes/_data_conversion.py | 4 ++-- onedal/datatypes/tests/test_data.py | 16 ++++------------ tests/run_examples.py | 12 +++++++++--- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/examples/sklearnex/incremental_covariance_spmd.py b/examples/sklearnex/incremental_covariance_spmd.py index c134524a05..213964ca1b 100644 --- a/examples/sklearnex/incremental_covariance_spmd.py +++ b/examples/sklearnex/incremental_covariance_spmd.py @@ -28,6 +28,7 @@ def get_local_data(data, comm): local_size = (data.shape[0] + num_ranks - 1) // num_ranks return data[rank * local_size : (rank + 1) * local_size] + # We create SYCL queue and MPI communicator to perform computation on multiple GPUs q = dpctl.SyclQueue("gpu") diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 15a58efc99..7e2415ff09 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -19,7 +19,7 @@ from onedal import _default_backend as backend -from ..utils._third_party import is_dpctl_tensor, is_dpnp_ndarray, lazy_import +from ..utils._third_party import is_dpnp_ndarray, lazy_import def _apply_and_pass(func, *args, **kwargs): @@ -122,7 +122,7 @@ def return_type_constructor(array): def from_table(*args, like=None): """Create 2 dimensional arrays from oneDAL tables. - oneDAL tables are converted to numpy ndarrays, dpnp ndarrays, + oneDAL tables are converted to numpy ndarrays, dpnp ndarrays, or array API standard arrays of designated type. Parameters diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 1471063d9d..44688cbee7 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -246,9 +246,7 @@ def test_conversion_to_table(dtype): not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu")) @pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) def test_input_zero_copy_sycl_usm(dataframe, queue, order, dtype): @@ -280,9 +278,7 @@ def test_input_zero_copy_sycl_usm(dataframe, queue, order, dtype): not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu")) @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("data_shape", data_shapes) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -397,9 +393,7 @@ def test_to_table_non_contiguous_input(dataframe, queue): backend.is_dpc, reason="Required check should be done if no DPC backend.", ) -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpnp", "cpu,gpu")) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_interop_if_no_dpc_backend_sycl_usm(dataframe, queue, dtype): X = np.zeros((10, 20), dtype=dtype) @@ -523,9 +517,7 @@ def test_basic_ndarray_types_numpy(X): test_non_array(np.asarray(X), None) -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("numpy", "cpu,gpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("numpy", "cpu,gpu")) @pytest.mark.parametrize("can_copy", [True, False]) def test_to_table_non_contiguous_input_dlpack(dataframe, queue, can_copy): X, _ = np.mgrid[:10, :10] diff --git a/tests/run_examples.py b/tests/run_examples.py index 223b7944a3..e16720d70f 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -163,10 +163,16 @@ def check_library(rule): req_library["covariance_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_library["dbscan_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_library["dbscan_array_api.py"] = ["array_api_strict"] -req_library["incremental_basic_statistics_dpnp.py"] = ["dpctl" "dpnp",] +req_library["incremental_basic_statistics_dpnp.py"] = [ + "dpctl" "dpnp", +] req_library["incremental_covariance_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] -req_library["incremental_linear_regression_dpnp.py"] = ["dpctl" "dpnp",] -req_library["incremental_pca_dpnp.py"] = ["dpctl" "dpnp",] +req_library["incremental_linear_regression_dpnp.py"] = [ + "dpctl" "dpnp", +] +req_library["incremental_pca_dpnp.py"] = [ + "dpctl" "dpnp", +] req_library["kmeans_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_library["knn_bf_classification_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_library["knn_bf_regression_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] From 9f59916b7a8cb2a1e918189a340f2c7b09404d70 Mon Sep 17 00:00:00 2001 From: y Date: Mon, 17 Nov 2025 17:10:00 -0800 Subject: [PATCH 3/6] re-add internal dpctl handling --- onedal/datatypes/_data_conversion.py | 14 ++++++++++---- onedal/utils/_array_api.py | 7 ++++++- onedal/utils/_third_party.py | 16 ++++++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 7e2415ff09..9410ed2124 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -19,7 +19,7 @@ from onedal import _default_backend as backend -from ..utils._third_party import is_dpnp_ndarray, lazy_import +from ..utils._third_party import is_dpctl_tensor, is_dpnp_ndarray, lazy_import def _apply_and_pass(func, *args, **kwargs): @@ -98,7 +98,13 @@ def return_type_constructor(array): xp = array.__array_namespace__() # array api support added in dpnp starting in 0.19, will fail for # older versions - if is_dpnp_ndarray(array): + if is_dpctl_tensor(array): + func = lambda x: ( + xp.asarray(x) + if hasattr(x, "__sycl_usm_array_interface__") + else xp.asarray(backend.from_table(x), device=device) + ) + elif is_dpnp_ndarray(array): func = lambda x: ( xp.asarray(xp.as_usm_ndarray(x)) if hasattr(x, "__sycl_usm_array_interface__") @@ -122,8 +128,8 @@ def return_type_constructor(array): def from_table(*args, like=None): """Create 2 dimensional arrays from oneDAL tables. - oneDAL tables are converted to numpy ndarrays, dpnp ndarrays, - or array API standard arrays of designated type. + oneDAL tables are converted to numpy ndarrays, dpctl tensors, dpnp + ndarrays, or array API standard arrays of designated type. Parameters ---------- diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index e64d9bc1de..f5a44f7a38 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -65,7 +65,12 @@ def _is_numpy_namespace(xp): @lru_cache(100) def _cls_to_sycl_namespace(cls): - if _is_subclass_fast(cls, "dpnp", "ndarray"): + # use caching to minimize imports, derived from array_api_compat + if _is_subclass_fast(cls, "dpctl.tensor", "usm_ndarray"): + import dpctl.tensor as dpt + + return dpt + elif _is_subclass_fast(cls, "dpnp", "ndarray"): import dpnp return dpnp diff --git a/onedal/utils/_third_party.py b/onedal/utils/_third_party.py index 7317887b25..71b24dd254 100644 --- a/onedal/utils/_third_party.py +++ b/onedal/utils/_third_party.py @@ -182,6 +182,22 @@ def is_dpnp_ndarray(x: object) -> bool: return _is_subclass_fast(type(x), "dpnp", "ndarray") +def is_dpctl_tensor(x: object) -> bool: + """Return True if 'x' is a dpctl usm_ndarray. + This function does not import dpctl.tensor if it has not already been + imported and is therefore cheap to use. + Parameters + ---------- + x : object + Any python object. + Returns + ------- + is_dpctl : bool + Flag if subclass of dpctl.tensor.usm_ndarray. + """ + return _is_subclass_fast(type(x), "dpctl.tensor", "usm_ndarray") + + def is_torch_tensor(x: object) -> bool: """Return True if 'x' is a PyTorch Tensor. From 78786695350c1370a7423af18178e85d22ffb478 Mon Sep 17 00:00:00 2001 From: y Date: Thu, 20 Nov 2025 12:37:31 -0800 Subject: [PATCH 4/6] minor --- onedal/utils/_third_party.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/onedal/utils/_third_party.py b/onedal/utils/_third_party.py index 71b24dd254..ba3a625cd1 100644 --- a/onedal/utils/_third_party.py +++ b/onedal/utils/_third_party.py @@ -184,12 +184,15 @@ def is_dpnp_ndarray(x: object) -> bool: def is_dpctl_tensor(x: object) -> bool: """Return True if 'x' is a dpctl usm_ndarray. + This function does not import dpctl.tensor if it has not already been imported and is therefore cheap to use. + Parameters ---------- x : object Any python object. + Returns ------- is_dpctl : bool From 8dde02092002d8303dd14371ff0736f4e8667e61 Mon Sep 17 00:00:00 2001 From: y Date: Mon, 24 Nov 2025 15:35:43 -0800 Subject: [PATCH 5/6] remove unnecessary dpnp.asnumpy calls --- examples/sklearnex/kmeans_spmd.py | 2 +- examples/sklearnex/knn_bf_classification_spmd.py | 4 ++-- examples/sklearnex/knn_bf_regression_spmd.py | 4 ++-- examples/sklearnex/linear_regression_spmd.py | 2 +- examples/sklearnex/logistic_regression_spmd.py | 5 ++--- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/sklearnex/kmeans_spmd.py b/examples/sklearnex/kmeans_spmd.py index a8b66ccc6f..e1f055214e 100644 --- a/examples/sklearnex/kmeans_spmd.py +++ b/examples/sklearnex/kmeans_spmd.py @@ -64,4 +64,4 @@ def get_test_data(size): result = model.predict(dpnp_X_test) -print(f"Result labels on rank {rank} (slice of 5):\n", dpnp.asnumpy(result)[:5]) +print(f"Result labels on rank {rank} (slice of 5):\n", result[:5]) diff --git a/examples/sklearnex/knn_bf_classification_spmd.py b/examples/sklearnex/knn_bf_classification_spmd.py index b623679428..55e39361ea 100644 --- a/examples/sklearnex/knn_bf_classification_spmd.py +++ b/examples/sklearnex/knn_bf_classification_spmd.py @@ -68,11 +68,11 @@ def generate_X_y(par, seed): print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) print( "Classification results (first 5 observations on rank {}):\n{}".format( - rank, dpnp.asnumpy(y_predict)[:5] + rank, y_predict[:5] ) ) print( "Accuracy for entire rank {} (256 classes): {}\n".format( - rank, accuracy_score(y_test, dpnp.asnumpy(y_predict)) + rank, accuracy_score(y_test, y_predict) ) ) diff --git a/examples/sklearnex/knn_bf_regression_spmd.py b/examples/sklearnex/knn_bf_regression_spmd.py index ffc15d3167..32460db26d 100644 --- a/examples/sklearnex/knn_bf_regression_spmd.py +++ b/examples/sklearnex/knn_bf_regression_spmd.py @@ -74,12 +74,12 @@ def generate_X_y(par, coef_seed, data_seed): print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) print( "Regression results (first 5 observations on rank {}):\n{}".format( - rank, dpnp.asnumpy(y_predict)[:5] + rank, y_predict[:5] ) ) print( "MSE for entire rank {}: {}\n".format( rank, - mean_squared_error(y_test, dpnp.asnumpy(y_predict)), + mean_squared_error(y_test, y_predict), ) ) diff --git a/examples/sklearnex/linear_regression_spmd.py b/examples/sklearnex/linear_regression_spmd.py index 0b41fd596f..7e253a9a92 100755 --- a/examples/sklearnex/linear_regression_spmd.py +++ b/examples/sklearnex/linear_regression_spmd.py @@ -73,4 +73,4 @@ def get_test_data(rank): result = model.predict(dpnp_X_test) -print(f"Result on rank {rank}:\n", dpnp.asnumpy(result)) +print(f"Result on rank {rank}:\n", result) diff --git a/examples/sklearnex/logistic_regression_spmd.py b/examples/sklearnex/logistic_regression_spmd.py index 406ff2aaab..7d40c1348d 100644 --- a/examples/sklearnex/logistic_regression_spmd.py +++ b/examples/sklearnex/logistic_regression_spmd.py @@ -68,7 +68,6 @@ def generate_X_y(par, seed): dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q) dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q) dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q) -dpnp_y_test = dpnp.asarray(y_test, usm_type="device", sycl_queue=q) model_spmd = LogisticRegression() model_spmd.fit(dpnp_X_train, dpnp_y_train) @@ -81,11 +80,11 @@ def generate_X_y(par, seed): print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) print( "Classification results (first 5 observations on rank {}):\n{}".format( - rank, dpnp.asnumpy(y_predict)[:5] + rank, y_predict[:5] ) ) print( "Accuracy for entire rank {} (2 classes): {}\n".format( - rank, accuracy_score(y_test, dpnp.asnumpy(y_predict)) + rank, accuracy_score(y_test, y_predict) ) ) From a933b3c4f61aae74357d1d45cc2bfa84a5cb26bb Mon Sep 17 00:00:00 2001 From: y Date: Mon, 24 Nov 2025 21:25:36 -0800 Subject: [PATCH 6/6] fixes --- examples/sklearnex/knn_bf_classification_spmd.py | 2 +- examples/sklearnex/knn_bf_regression_spmd.py | 2 +- examples/sklearnex/logistic_regression_spmd.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/sklearnex/knn_bf_classification_spmd.py b/examples/sklearnex/knn_bf_classification_spmd.py index 55e39361ea..b8dc7833b9 100644 --- a/examples/sklearnex/knn_bf_classification_spmd.py +++ b/examples/sklearnex/knn_bf_classification_spmd.py @@ -73,6 +73,6 @@ def generate_X_y(par, seed): ) print( "Accuracy for entire rank {} (256 classes): {}\n".format( - rank, accuracy_score(y_test, y_predict) + rank, accuracy_score(y_test, dpnp.asnumpy(y_predict)) ) ) diff --git a/examples/sklearnex/knn_bf_regression_spmd.py b/examples/sklearnex/knn_bf_regression_spmd.py index 32460db26d..887f11dc8e 100644 --- a/examples/sklearnex/knn_bf_regression_spmd.py +++ b/examples/sklearnex/knn_bf_regression_spmd.py @@ -80,6 +80,6 @@ def generate_X_y(par, coef_seed, data_seed): print( "MSE for entire rank {}: {}\n".format( rank, - mean_squared_error(y_test, y_predict), + mean_squared_error(y_test, dpnp.asnumpy(y_predict)), ) ) diff --git a/examples/sklearnex/logistic_regression_spmd.py b/examples/sklearnex/logistic_regression_spmd.py index 7d40c1348d..8c6a68349d 100644 --- a/examples/sklearnex/logistic_regression_spmd.py +++ b/examples/sklearnex/logistic_regression_spmd.py @@ -85,6 +85,6 @@ def generate_X_y(par, seed): ) print( "Accuracy for entire rank {} (2 classes): {}\n".format( - rank, accuracy_score(y_test, y_predict) + rank, accuracy_score(y_test, dpnp.asnumpy(y_predict)) ) )