1- # Copyright (c) 2020-2023 , NVIDIA CORPORATION.
1+ # Copyright (c) 2020-2025 , NVIDIA CORPORATION.
22#
33# Licensed under the Apache License, Version 2.0 (the "License");
44# you may not use this file except in compliance with the License.
@@ -77,21 +77,7 @@ def _conv_array_to_sparse(arr):
7777def to_sparse_dask_array (cudf_or_array , client = None ):
7878 """
7979 Converts an array or cuDF to a sparse Dask array backed by sparse CuPy.
80- CSR matrices. Unfortunately, due to current limitations in Dask, there is
81- no direct path to convert a cupyx.scipy.sparse.spmatrix into a CuPy backed
82- dask.Array without copying to host.
83-
84-
85- NOTE: Until https://github.com/cupy/cupy/issues/2655 and
86- https://github.com/dask/dask/issues/5604 are implemented, compute()
87- will not be able to be called on a Dask.array that is backed with
88- sparse CuPy arrays because they lack the necessary functionality
89- to be stacked into a single array. The array returned from this
90- utility will, however, still be able to be passed into functions
91- that can make use of sparse CuPy-backed Dask.Array (eg. Distributed
92- Naive Bayes).
93-
94- Relevant cuML issue: https://github.com/rapidsai/cuml/issues/1387
80+ Csr matrices.
9581
9682 Parameters
9783 ----------
@@ -105,51 +91,15 @@ def to_sparse_dask_array(cudf_or_array, client=None):
10591 -------
10692 dask_array : dask.Array backed by cupyx.scipy.sparse.csr_matrix
10793 """
108- client = default_client () if client is None else client
109-
110- # Makes sure the MatDescriptor workaround for CuPy sparse arrays
111- # is loaded (since Dask lazy-loaded serialization in cuML is only
112- # executed when object from the cuML package needs serialization.
113- # This can go away once the MatDescriptor pickling bug is fixed
114- # in CuPy.
115- # Ref: https://github.com/cupy/cupy/issues/3061
116- from cuml .comm import serialize # NOQA
117-
94+ ret = cudf_or_array
11895 shape = cudf_or_array .shape
119-
12096 meta = cupyx .scipy .sparse .csr_matrix (rmm_cupy_ary (cp .zeros , 1 ))
12197
122- ret = cudf_or_array
123-
124- # If we have a Dask array, convert it to a Dask DataFrame
125- if isinstance (ret , dask .array .Array ):
126- # At the time of developing this, using map_blocks will not work
127- # to convert a Dask.Array to CuPy sparse arrays underneath.
128-
129- def _conv_np_to_df (x ):
130- cupy_ary = rmm_cupy_ary (cp .asarray , x , dtype = x .dtype )
131- return cudf .DataFrame (cupy_ary )
132-
133- parts = client .sync (_extract_partitions , ret )
134- futures = [
135- client .submit (_conv_np_to_df , part , workers = [w ], pure = False )
136- for w , part in parts
137- ]
138-
139- ret = df_to_dask_cudf (futures )
140-
141- # If we have a Dask Dataframe, use `map_partitions` to convert it
142- # to a Sparse Cupy-backed Dask Array. This will also convert the dense
143- # Dask array above to a Sparse Cupy-backed Dask Array, since we cannot
144- # use map_blocks on the array, but we can use `map_partitions` on the
145- # Dataframe.
14698 if isinstance (ret , dask .dataframe .DataFrame ):
147- ret = ret .map_partitions (
148- _conv_df_to_sparse , meta = dask .array .from_array (meta )
149- )
99+ ret = ret .to_dask_array ()
150100
151- # This will also handle the input of dask.array.Array
152- return ret
101+ if isinstance ( cudf_or_array , dask .array .Array ):
102+ return cudf_or_array . map_blocks ( _conv_array_to_sparse , meta = meta )
153103
154104 else :
155105
0 commit comments