diff --git a/python/cudf/cudf/core/accessors/lists.py b/python/cudf/cudf/core/accessors/lists.py index 24207fd30ba..37d523863cb 100644 --- a/python/cudf/cudf/core/accessors/lists.py +++ b/python/cudf/cudf/core/accessors/lists.py @@ -11,8 +11,8 @@ from cudf.api.types import is_scalar from cudf.core.accessors.base_accessor import BaseAccessor from cudf.core.column.column import as_column +from cudf.core.dtype.validators import is_dtype_obj_list, is_dtype_obj_numeric from cudf.core.dtypes import ListDtype, dtype as cudf_dtype -from cudf.utils.dtypes import is_dtype_obj_list, is_dtype_obj_numeric from cudf.utils.scalar import pa_scalar_to_plc_scalar if TYPE_CHECKING: diff --git a/python/cudf/cudf/core/accessors/struct.py b/python/cudf/cudf/core/accessors/struct.py index d5a2da272a4..3a6f6b6468c 100644 --- a/python/cudf/cudf/core/accessors/struct.py +++ b/python/cudf/cudf/core/accessors/struct.py @@ -7,8 +7,9 @@ from cudf.core.accessors.base_accessor import BaseAccessor from cudf.core.column.struct import StructColumn +from cudf.core.dtype.validators import is_dtype_obj_struct from cudf.core.dtypes import StructDtype -from cudf.utils.dtypes import get_dtype_of_same_kind, is_dtype_obj_struct +from cudf.utils.dtypes import get_dtype_of_same_kind if TYPE_CHECKING: from cudf.core.dataframe import DataFrame diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 016720aa08f..1e1a372dd91 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -79,6 +79,15 @@ class CategoricalColumn(column.ColumnBase): plc.TypeId.UINT64, } + @classmethod + def _validate_args( # type: ignore[override] + cls, plc_column: plc.Column, dtype: CategoricalDtype + ) -> tuple[plc.Column, CategoricalDtype]: + plc_column, dtype = super()._validate_args(plc_column, dtype) # type: ignore[assignment] + if not isinstance(dtype, CategoricalDtype): + raise ValueError(f"{dtype=} must be a CategoricalDtype instance") + return plc_column, dtype + def __contains__(self, item: ScalarLike) -> bool: try: encoded = self._encode(item) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 85a5ab1e8d8..cfa409afe60 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -50,6 +50,13 @@ ) from cudf.core.column.utils import access_columns from cudf.core.copy_types import GatherMap +from cudf.core.dtype.validators import ( + is_dtype_obj_decimal, + is_dtype_obj_interval, + is_dtype_obj_list, + is_dtype_obj_numeric, + is_dtype_obj_struct, +) from cudf.core.dtypes import ( CategoricalDtype, DecimalDtype, @@ -72,11 +79,6 @@ find_common_type, get_dtype_of_same_kind, is_column_like, - is_dtype_obj_decimal, - is_dtype_obj_interval, - is_dtype_obj_list, - is_dtype_obj_numeric, - is_dtype_obj_struct, is_mixed_with_object_dtype, is_pandas_nullable_extension_dtype, min_signed_type, @@ -863,7 +865,7 @@ def _prep_pandas_compat_repr(self) -> StringColumn | Self: * null (other types)= str(pd.NA) """ if self.has_nulls(): - return self.astype(np.dtype("str")).fillna( + return self.astype(CUDF_STRING_DTYPE).fillna( str(self._PANDAS_NA_VALUE) ) return self diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 1b216064de8..8b0b1404c37 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -5,7 +5,7 @@ import warnings from decimal import Decimal -from typing import TYPE_CHECKING, Any, Self, cast +from typing import TYPE_CHECKING, Any, ClassVar, Self, cast import numpy as np import pandas as pd @@ -19,14 +19,16 @@ from cudf.core._internals import binaryop from cudf.core.column.column import ColumnBase, as_column from cudf.core.column.numerical_base import NumericalBaseColumn +from cudf.core.dtype.validators import ( + is_dtype_obj_decimal32, + is_dtype_obj_decimal64, + is_dtype_obj_decimal128, +) from cudf.core.dtypes import ( Decimal32Dtype, Decimal64Dtype, Decimal128Dtype, DecimalDtype, - is_decimal32_dtype, - is_decimal64_dtype, - is_decimal128_dtype, ) from cudf.core.mixins import BinaryOperand from cudf.utils.dtypes import ( @@ -38,7 +40,7 @@ from cudf.utils.utils import is_na_like if TYPE_CHECKING: - from collections.abc import Mapping + from collections.abc import Callable, Mapping from cudf._typing import ( ColumnBinaryOperand, @@ -70,14 +72,17 @@ class DecimalBaseColumn(NumericalBaseColumn): """Base column for decimal32, decimal64 or decimal128 columns""" _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS + _decimal_type_check: ClassVar[Callable[[DtypeObj], bool]] @classmethod def _validate_args( # type: ignore[override] cls, plc_column: plc.Column, dtype: DecimalDtype ) -> tuple[plc.Column, DecimalDtype]: plc_column, dtype = super()._validate_args(plc_column, dtype) # type: ignore[assignment] - if not cls._decimal_check(dtype): # type: ignore[attr-defined] - raise ValueError(f"{dtype=} must be a Decimal128Dtype instance") + if not cls._decimal_type_check(dtype): + raise ValueError( + f"{dtype=} must be a valid decimal dtype instance" + ) return plc_column, dtype def _with_type_metadata(self: Self, dtype: DtypeObj) -> Self: @@ -379,19 +384,19 @@ def to_pandas( class Decimal32Column(DecimalBaseColumn): _VALID_PLC_TYPES = {plc.TypeId.DECIMAL32} _decimal_cls = Decimal32Dtype - _decimal_check = is_decimal32_dtype + _decimal_type_check = is_dtype_obj_decimal32 class Decimal64Column(DecimalBaseColumn): _VALID_PLC_TYPES = {plc.TypeId.DECIMAL64} _decimal_cls = Decimal64Dtype - _decimal_check = is_decimal64_dtype + _decimal_type_check = is_dtype_obj_decimal64 class Decimal128Column(DecimalBaseColumn): _VALID_PLC_TYPES = {plc.TypeId.DECIMAL128} _decimal_cls = Decimal128Dtype - _decimal_check = is_decimal128_dtype + _decimal_type_check = is_dtype_obj_decimal128 def to_arrow(self) -> pa.Array: arrow_array = super().to_arrow() diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 056cf91d5b1..c352e6bc2f3 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -13,8 +13,8 @@ import cudf from cudf.core.column.column import ColumnBase, _handle_nulls, as_column +from cudf.core.dtype.validators import is_dtype_obj_interval from cudf.core.dtypes import IntervalDtype, _dtype_to_metadata -from cudf.utils.dtypes import is_dtype_obj_interval from cudf.utils.scalar import maybe_nested_pa_scalar_to_py if TYPE_CHECKING: diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 6e9663f7435..d38f20ec9cb 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -15,12 +15,10 @@ import cudf from cudf.core.column.column import ColumnBase, as_column, column_empty +from cudf.core.dtype.validators import is_dtype_obj_list from cudf.core.dtypes import ListDtype from cudf.core.missing import NA -from cudf.utils.dtypes import ( - get_dtype_of_same_kind, - is_dtype_obj_list, -) +from cudf.utils.dtypes import get_dtype_of_same_kind from cudf.utils.scalar import ( maybe_nested_pa_scalar_to_py, pa_scalar_to_plc_scalar, diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 29757c38a12..c9ea4775276 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -19,14 +19,13 @@ from cudf.api.types import is_scalar from cudf.core._internals import binaryop from cudf.core.column.column import ColumnBase, as_column, column_empty +from cudf.core.dtype.validators import is_dtype_obj_string from cudf.core.mixins import Scannable from cudf.errors import MixedTypeError from cudf.utils.dtypes import ( - CUDF_STRING_DTYPE, cudf_dtype_to_pa_type, dtype_to_pylibcudf_type, get_dtype_of_same_kind, - is_dtype_obj_string, is_pandas_nullable_extension_dtype, ) from cudf.utils.scalar import pa_scalar_to_plc_scalar @@ -117,21 +116,8 @@ def _validate_args( cls, plc_column: plc.Column, dtype: np.dtype ) -> tuple[plc.Column, np.dtype]: plc_column, dtype = super()._validate_args(plc_column, dtype) - if ( - not cudf.get_option("mode.pandas_compatible") - and dtype != CUDF_STRING_DTYPE - and dtype.kind != "U" - ) or ( - cudf.get_option("mode.pandas_compatible") - and not is_dtype_obj_string(dtype) - ): - raise ValueError(f"dtype must be {CUDF_STRING_DTYPE}") - if ( - cudf.get_option("mode.pandas_compatible") - and isinstance(dtype, np.dtype) - and dtype.kind == "U" - ): - dtype = CUDF_STRING_DTYPE + if not is_dtype_obj_string(dtype): + raise ValueError("dtype must be a valid cuDF string dtype") return plc_column, dtype @property diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 2aff7c1051e..ca0c0df69f6 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -11,11 +11,9 @@ import cudf from cudf.core.column.column import ColumnBase +from cudf.core.dtype.validators import is_dtype_obj_struct from cudf.core.dtypes import StructDtype -from cudf.utils.dtypes import ( - dtype_from_pylibcudf_column, - is_dtype_obj_struct, -) +from cudf.utils.dtypes import dtype_from_pylibcudf_column from cudf.utils.scalar import ( maybe_nested_pa_scalar_to_py, pa_scalar_to_plc_scalar, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 6f74878a8c5..aaae6eaf051 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -62,6 +62,7 @@ ) from cudf.core.column_accessor import ColumnAccessor from cudf.core.copy_types import BooleanMask +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import ( CategoricalDtype, Decimal32Dtype, @@ -107,7 +108,6 @@ find_common_type, get_dtype_of_same_kind, is_column_like, - is_dtype_obj_numeric, is_mixed_with_object_dtype, is_pandas_nullable_extension_dtype, min_signed_type, diff --git a/python/cudf/cudf/core/dtype/__init__.py b/python/cudf/cudf/core/dtype/__init__.py new file mode 100644 index 00000000000..8eca3cc68ad --- /dev/null +++ b/python/cudf/cudf/core/dtype/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 diff --git a/python/cudf/cudf/core/dtype/validators.py b/python/cudf/cudf/core/dtype/validators.py new file mode 100644 index 00000000000..1e5abcb6598 --- /dev/null +++ b/python/cudf/cudf/core/dtype/validators.py @@ -0,0 +1,198 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pandas as pd +import pyarrow as pa + +import cudf +from cudf.utils.dtypes import CUDF_STRING_DTYPE + +if TYPE_CHECKING: + from cudf._typing import DtypeObj + + +def is_dtype_obj_string(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF string type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF string type. + """ + return ( + obj == CUDF_STRING_DTYPE + or isinstance(obj, pd.StringDtype) + or ( + isinstance(obj, pd.ArrowDtype) + and ( + pa.types.is_string(obj.pyarrow_dtype) + or pa.types.is_large_string(obj.pyarrow_dtype) + ) + ) + ) + + +def is_dtype_obj_list(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF list type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF list type. + """ + return isinstance(obj, cudf.ListDtype) or ( + isinstance(obj, pd.ArrowDtype) and pa.types.is_list(obj.pyarrow_dtype) + ) + + +def is_dtype_obj_struct(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF struct type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF struct type. + """ + return isinstance(obj, cudf.StructDtype) or ( + isinstance(obj, pd.ArrowDtype) + and pa.types.is_struct(obj.pyarrow_dtype) + ) + + +def is_dtype_obj_interval(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF interval type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF interval type. + """ + return isinstance(obj, cudf.IntervalDtype) or ( + isinstance(obj, pd.ArrowDtype) + and pa.types.is_interval(obj.pyarrow_dtype) + ) + + +def is_dtype_obj_decimal(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF decimal type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF decimal type. + """ + return ( + is_dtype_obj_decimal32(obj) + or is_dtype_obj_decimal64(obj) + or is_dtype_obj_decimal128(obj) + ) + + +def is_dtype_obj_decimal32(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF decimal32 type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF decimal32 type. + """ + return isinstance(obj, cudf.Decimal32Dtype) or ( + isinstance(obj, pd.ArrowDtype) + and pa.types.is_decimal32(obj.pyarrow_dtype) + ) + + +def is_dtype_obj_decimal64(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF decimal64 type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF decimal64 type. + """ + return isinstance(obj, cudf.Decimal64Dtype) or ( + isinstance(obj, pd.ArrowDtype) + and pa.types.is_decimal64(obj.pyarrow_dtype) + ) + + +def is_dtype_obj_decimal128(obj: DtypeObj) -> bool: + """Check whether the provided dtype object is a cuDF decimal128 type. + + Parameters + ---------- + obj : DtypeObj + The dtype object to check. + + Returns + ------- + bool + Whether or not the dtype object is a cuDF decimal128 type. + """ + return isinstance(obj, cudf.Decimal128Dtype) or ( + isinstance(obj, pd.ArrowDtype) + and pa.types.is_decimal128(obj.pyarrow_dtype) + ) + + +def is_dtype_obj_numeric( + dtype: DtypeObj, include_decimal: bool = True +) -> bool: + """ + Check whether the provided dtype object is a numeric type. + + Parameters + ---------- + dtype: DtypeObj + The dtype object to check. + include_decimal: bool, default True + Whether to include decimal types in the check. + + Returns + ------- + bool + Whether or not the dtype object is a numeric type. + """ + is_non_decimal = dtype.kind in set("iufb") + if include_decimal: + return is_non_decimal or is_dtype_obj_decimal(dtype) + else: + return is_non_decimal diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index e3295707601..2a47ad20261 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -21,6 +21,7 @@ import cudf from cudf.core._compat import PANDAS_GE_210, PANDAS_LT_300 from cudf.core.abc import Serializable +from cudf.core.dtype.validators import is_dtype_obj_string from cudf.utils.docutils import doc_apply from cudf.utils.dtypes import ( CUDF_STRING_DTYPE, @@ -995,9 +996,13 @@ def __init__( self._fields = {} else: self._subtype = cudf.dtype(subtype) - if isinstance( - self._subtype, cudf.CategoricalDtype - ) or cudf.utils.dtypes.is_dtype_obj_string(self._subtype): + # TODO: Remove self._subtype.kind == "U" once cudf.dtype no longer accepts + # numpy string types + if ( + isinstance(self._subtype, CategoricalDtype) + or is_dtype_obj_string(self._subtype) + or self._subtype.kind == "U" + ): raise TypeError( "category, object, and string subtypes are not supported " "for IntervalDtype" diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index a16ea1d57d7..0367150e260 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -33,10 +33,10 @@ serialize_columns, ) from cudf.core.column_accessor import ColumnAccessor +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.mixins import BinaryOperand, Scannable from cudf.utils.dtypes import ( find_common_type, - is_dtype_obj_numeric, is_pandas_nullable_extension_dtype, ) from cudf.utils.performance_tracking import _performance_tracking diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 459563912e6..f852c426c63 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -34,6 +34,7 @@ from cudf.core.column_accessor import ColumnAccessor from cudf.core.common import pipe from cudf.core.copy_types import GatherMap +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import ( CategoricalDtype, DecimalDtype, @@ -53,7 +54,6 @@ SIZE_TYPE_DTYPE, cudf_dtype_to_pa_type, get_dtype_of_same_kind, - is_dtype_obj_numeric, ) from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.scalar import pa_scalar_to_plc_scalar diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index e58e384dec7..a7c3115e083 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -43,6 +43,7 @@ from cudf.core.column.column import as_column, column_empty, concat_columns from cudf.core.column_accessor import ColumnAccessor from cudf.core.copy_types import GatherMap +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import IntervalDtype, dtype as cudf_dtype from cudf.core.join._join_helpers import _match_join_keys from cudf.core.single_column_frame import SingleColumnFrame @@ -56,7 +57,6 @@ cudf_dtype_to_pa_type, dtype_to_pylibcudf_type, find_common_type, - is_dtype_obj_numeric, is_mixed_with_object_dtype, ) from cudf.utils.performance_tracking import _performance_tracking diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 0846f18547e..4ceb04586e2 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -51,6 +51,7 @@ from cudf.core.column_accessor import ColumnAccessor from cudf.core.common import pipe from cudf.core.copy_types import BooleanMask, GatherMap +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import ListDtype from cudf.core.frame import Frame from cudf.core.groupby.groupby import GroupBy @@ -75,7 +76,6 @@ find_common_type, get_dtype_of_same_kind, is_column_like, - is_dtype_obj_numeric, is_mixed_with_object_dtype, is_pandas_nullable_extension_dtype, ) diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index e7e0147ff5f..aaedb410616 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -10,6 +10,7 @@ import numpy as np from cudf.api.types import is_dtype_equal +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import ( CategoricalDtype, Decimal32Dtype, @@ -20,7 +21,6 @@ from cudf.utils.dtypes import ( find_common_type, get_dtype_of_same_kind, - is_dtype_obj_numeric, ) if TYPE_CHECKING: diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 207c92773ac..6515382b9c9 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -25,6 +25,7 @@ from cudf.core.column import access_columns from cudf.core.column.column import ColumnBase from cudf.core.column_accessor import ColumnAccessor +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.frame import Frame from cudf.core.index import ( Index, @@ -39,7 +40,6 @@ CUDF_STRING_DTYPE, SIZE_TYPE_DTYPE, is_column_like, - is_dtype_obj_numeric, is_pandas_nullable_extension_dtype, ) from cudf.utils.performance_tracking import _performance_tracking diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 36ac5f3b5a3..3e80f476d8c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -41,6 +41,7 @@ ) from cudf.core.column.column import concat_columns from cudf.core.column_accessor import ColumnAccessor +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import CategoricalDtype, IntervalDtype from cudf.core.groupby.groupby import SeriesGroupBy, groupby_doc_template from cudf.core.index import ( @@ -65,7 +66,6 @@ _get_nan_for_dtype, find_common_type, get_dtype_of_same_kind, - is_dtype_obj_numeric, is_mixed_with_object_dtype, is_pandas_nullable_extension_dtype, ) diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 65d8eb0c2da..242a7853044 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -16,9 +16,10 @@ ) from cudf.core.column import ColumnBase, as_column, column_empty from cudf.core.column_accessor import ColumnAccessor +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.frame import Frame from cudf.core.mixins import NotIterable -from cudf.utils.dtypes import SIZE_TYPE_DTYPE, is_dtype_obj_numeric +from cudf.utils.dtypes import SIZE_TYPE_DTYPE from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _is_same_name diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index 2ddc651e1d6..93977e12e95 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -9,13 +9,13 @@ import pandas as pd from cudf.core.column import as_column +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.dtypes import CategoricalDtype, ListDtype, StructDtype from cudf.core.index import ensure_index from cudf.core.series import Series from cudf.utils.dtypes import ( CUDF_STRING_DTYPE, can_convert_to_column, - is_dtype_obj_numeric, ) if TYPE_CHECKING: diff --git a/python/cudf/cudf/core/window/ewm.py b/python/cudf/cudf/core/window/ewm.py index 330216c2b9b..4d64a317f27 100644 --- a/python/cudf/cudf/core/window/ewm.py +++ b/python/cudf/cudf/core/window/ewm.py @@ -7,8 +7,8 @@ import numpy as np +from cudf.core.dtype.validators import is_dtype_obj_numeric from cudf.core.window.rolling import _RollingBase -from cudf.utils.dtypes import is_dtype_obj_numeric if TYPE_CHECKING: from cudf.core.column.column import ColumnBase diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 6d1f904121f..1ac6f6700f6 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -544,7 +544,6 @@ def set_copy_on_write_option(): "tests/arrays/floating/test_construction.py::test_floating_array_constructor_copy", "tests/arrays/floating/test_function.py::test_ufuncs_single[sign]", "tests/arrays/floating/test_function.py::test_value_counts_empty", - "tests/arrays/floating/test_repr.py::test_frame_repr[Float32Dtype]", "tests/arrays/floating/test_to_numpy.py::test_to_numpy_copy", "tests/arrays/integer/test_arithmetic.py::test_values_multiplying_large_series_by_NA", "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Int16Dtype]", @@ -1432,7 +1431,6 @@ def set_copy_on_write_option(): "tests/dtypes/test_dtypes.py::TestIntervalDtype::test_construction_from_string", "tests/dtypes/test_dtypes.py::TestIntervalDtype::test_construction_generic[Interval]", "tests/dtypes/test_dtypes.py::TestIntervalDtype::test_construction_generic[interval]", - "tests/dtypes/test_dtypes.py::TestIntervalDtype::test_construction_not_supported[ DtypeObj | None: if any( isinstance(dtype, cudf.core.dtypes.DecimalDtype) for dtype in dtypes ): + from cudf.core.dtype.validators import is_dtype_obj_numeric + if all( is_dtype_obj_numeric(dtype, include_decimal=True) for dtype in dtypes @@ -390,20 +390,6 @@ def _get_base_dtype(dtype: pd.DatetimeTZDtype) -> np.dtype: return dtype.base -def is_dtype_obj_numeric( - dtype: DtypeObj, include_decimal: bool = True -) -> bool: - """Like is_numeric_dtype but does not introspect argument.""" - is_non_decimal = dtype.kind in set("iufb") - if include_decimal: - return is_non_decimal or isinstance( - dtype, - (cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype), - ) - else: - return is_non_decimal - - def pyarrow_dtype_to_cudf_dtype(dtype: pd.ArrowDtype) -> DtypeObj: """Given a pandas ArrowDtype, converts it into the equivalent cudf pandas dtype. @@ -602,168 +588,6 @@ def dtype_from_pylibcudf_column(col: plc.Column) -> DtypeObj: return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid] -def is_dtype_obj_categorical(obj): - if obj is None: - return False - - if isinstance( - obj, - ( - pd.CategoricalDtype, - cudf.CategoricalDtype, - ), - ): - return True - - if any( - obj is t - for t in ( - cudf.CategoricalDtype, - pd.CategoricalDtype, - pd.CategoricalDtype.type, - ) - ): - return True - if isinstance(obj, str) and obj == "category": - return True - - # TODO: A lot of the above checks are probably redundant and should be - # farmed out to this function here instead. - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - return pd_types.is_categorical_dtype(obj) - - -def is_dtype_obj_string(obj): - """Check whether the provided array or dtype is of the string dtype. - - Parameters - ---------- - obj : array-like or dtype - The array or dtype to check. - - Returns - ------- - bool - Whether or not the array or dtype is of the string dtype. - """ - return ( - obj is CUDF_STRING_DTYPE - or obj is np.dtype("str") - or (isinstance(obj, pd.StringDtype)) - or ( - isinstance(obj, pd.ArrowDtype) - and ( - pa.types.is_string(obj.pyarrow_dtype) - or pa.types.is_large_string(obj.pyarrow_dtype) - ) - ) - ) - - -def is_dtype_obj_list(obj): - """Check whether an array-like or dtype is of the list dtype. - - Parameters - ---------- - obj : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - bool - Whether or not the array-like or dtype is of the list dtype. - """ - return type(obj) is cudf.ListDtype or ( - isinstance(obj, pd.ArrowDtype) and pa.types.is_list(obj.pyarrow_dtype) - ) - - -def is_dtype_obj_struct(obj): - """Check whether an array-like or dtype is of the struct dtype. - - Parameters - ---------- - obj : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - bool - Whether or not the array-like or dtype is of the struct dtype. - """ - # TODO: This behavior is currently inconsistent for interval types. the - # actual class IntervalDtype will return False, but instances (e.g. - # IntervalDtype(int)) will return True. For now this is not being changed - # since the interval dtype is being modified as part of the array refactor, - # but this behavior should be made consistent afterwards. - return isinstance(obj, cudf.StructDtype) or ( - isinstance(obj, pd.ArrowDtype) - and pa.types.is_struct(obj.pyarrow_dtype) - ) - - -def is_dtype_obj_interval(obj): - return isinstance( - obj, - ( - cudf.IntervalDtype, - pd.IntervalDtype, - ), - ) or ( - isinstance(obj, pd.ArrowDtype) - and pa.types.is_interval(obj.pyarrow_dtype) - ) - - -def is_dtype_obj_decimal(obj): - """Check whether an array-like or dtype is of the decimal dtype. - - Parameters - ---------- - obj : array-like or dtype - The array-like or dtype to check. - - Returns - ------- - bool - Whether or not the array-like or dtype is of the decimal dtype. - """ - return ( - is_dtype_obj_decimal32(obj) - or is_dtype_obj_decimal64(obj) - or is_dtype_obj_decimal128(obj) - ) - - -def is_dtype_obj_decimal32(obj): - return ( - type(obj) is cudf.Decimal32Dtype - or obj is cudf.Decimal32Dtype - or (isinstance(obj, str) and obj == cudf.Decimal32Dtype.name) - ) - - -def is_dtype_obj_decimal64(obj): - return ( - type(obj) is cudf.Decimal64Dtype - or obj is cudf.Decimal64Dtype - or (isinstance(obj, str) and obj == cudf.Decimal64Dtype.name) - ) - - -def is_dtype_obj_decimal128(obj): - return ( - type(obj) is cudf.Decimal128Dtype - or obj is cudf.Decimal128Dtype - or (isinstance(obj, str) and obj == cudf.Decimal128Dtype.name) - or ( - isinstance(obj, pd.ArrowDtype) - and pa.types.is_decimal128(obj.pyarrow_dtype) - ) - ) - - SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES: dict[np.dtype[Any], plc.types.TypeId] = { np.dtype("int8"): plc.types.TypeId.INT8, np.dtype("int16"): plc.types.TypeId.INT16,