Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 73 additions & 28 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,52 +41,58 @@


@overload
def dtype(value: type[int] | Literal["int"], nullable: bool = True) -> Int64: ...
def dtype(value: type[int] | Literal["int"], nullable: bool | None = None) -> Int64: ...
@overload
def dtype(
value: type[str] | Literal["str", "string"], nullable: bool = True
value: type[str] | Literal["str", "string"], nullable: bool | None = None
) -> String: ...
@overload
def dtype(
value: type[bool] | Literal["bool", "boolean"], nullable: bool = True
value: type[bool] | Literal["bool", "boolean"], nullable: bool | None = None
) -> Boolean: ...
@overload
def dtype(value: type[bytes] | Literal["bytes"], nullable: bool = True) -> Binary: ...
def dtype(
value: type[bytes] | Literal["bytes"], nullable: bool | None = None
) -> Binary: ...
@overload
def dtype(value: type[Real] | Literal["float"], nullable: bool = True) -> Float64: ...
def dtype(
value: type[Real] | Literal["float"], nullable: bool | None = None
) -> Float64: ...
@overload
def dtype(
value: type[pydecimal.Decimal] | Literal["decimal"], nullable: bool = True
value: type[pydecimal.Decimal] | Literal["decimal"], nullable: bool | None = None
) -> Decimal: ...
@overload
def dtype(
value: type[pydatetime.datetime] | Literal["timestamp"], nullable: bool = True
value: type[pydatetime.datetime] | Literal["timestamp"],
nullable: bool | None = None,
) -> Timestamp: ...
@overload
def dtype(
value: type[pydatetime.date] | Literal["date"], nullable: bool = True
value: type[pydatetime.date] | Literal["date"], nullable: bool | None = None
) -> Date: ...
@overload
def dtype(
value: type[pydatetime.time] | Literal["time"], nullable: bool = True
value: type[pydatetime.time] | Literal["time"], nullable: bool | None = None
) -> Time: ...
@overload
def dtype(
value: type[pydatetime.timedelta] | Literal["interval"], nullable: bool = True
value: type[pydatetime.timedelta] | Literal["interval"],
nullable: bool | None = None,
) -> Interval: ...
@overload
def dtype(
value: type[pyuuid.UUID] | Literal["uuid"], nullable: bool = True
value: type[pyuuid.UUID] | Literal["uuid"], nullable: bool | None = None
) -> UUID: ...
@overload
def dtype(
value: DataType | str | np.dtype | ExtensionDtype | pl.DataType | pa.DataType,
nullable: bool = True,
nullable: bool | None = None,
) -> DataType: ...


@lazy_singledispatch
def dtype(value, nullable=True) -> DataType:
def dtype(value, nullable: bool | None = None) -> DataType:
"""Create a DataType object.

Parameters
Expand All @@ -96,21 +102,42 @@ def dtype(value, nullable=True) -> DataType:
strings, python type annotations, numpy dtypes, pandas dtypes, and
pyarrow types.
nullable
Whether the type should be nullable. Defaults to True.
If `value` is a string prefixed by "!", the type is always non-nullable.
Whether the resulting type should be nullable.
If `None`, we try to infer nullability from the input value.
For example, if `value` is a string starting with '!', the resulting type
will be non-nullable.
For inputs without an explicit nullability (like the python type `int` or
numpy dtype of `np.int32`), we default to `nullable=True`.

Examples
--------
>>> import ibis
>>> ibis.dtype("int32")
Int32(nullable=True)

Prefixing the type with "!" makes it non-nullable:

>>> ibis.dtype("!int32")
Int32(nullable=False)
>>> ibis.dtype("array<float>")
Array(value_type=Float64(nullable=True), length=None, nullable=True)

We support a rich string syntax for nested and parametric types:

>>> ibis.dtype("array<!float>")
Array(value_type=Float64(nullable=False), length=None, nullable=True)
>>> ibis.dtype("!struct<a: interval('s'), b: !bool>")
Struct([('a', Interval(unit=<IntervalUnit.SECOND: 's'>, nullable=True)), ('b', Boolean(nullable=False))], nullable=False)
>>> ibis.dtype("map<timestamp('America/Anchorage', 6), boolean>")
Map(key_type=Timestamp(timezone='America/Anchorage', scale=6, nullable=True), value_type=Boolean(nullable=True), nullable=True)

The function is idempotent (AKA is a no-op when passed a DataType):
>>> t = ibis.dtype("int32")
>>> ibis.dtype(t) is t
True

DataType objects may also be created from Python types:

>>> ibis.dtype(int)
Int64(nullable=True)
>>> ibis.dtype(int, nullable=False)
Int64(nullable=False)
>>> ibis.dtype(list[float])
Expand All @@ -121,36 +148,52 @@ def dtype(value, nullable=True) -> DataType:
>>> import pyarrow as pa
>>> ibis.dtype(pa.int32())
Int32(nullable=True)
>>> ibis.dtype(pa.int32(), nullable=False)
Int32(nullable=False)

The `nullable` parameter may be used to override the nullability:

>>> ibis.dtype("!int32", nullable=True)
Int32(nullable=True)
>>> i = ibis.dtype("int32")
>>> i
Int32(nullable=True)
>>> ibis.dtype(i, nullable=False)
Int32(nullable=False)

"""
if isinstance(value, DataType):
return value
if nullable is None:
return value
return value.copy(nullable=nullable)
else:
if nullable is None:
nullable = True
return DataType.from_typehint(value, nullable)


@dtype.register(str)
def from_string(value, nullable: bool = True):
def from_string(value, nullable=None):
return DataType.from_string(value, nullable)


@dtype.register("numpy.dtype")
def from_numpy_dtype(value, nullable=True):
def from_numpy_dtype(value, nullable=None):
return DataType.from_numpy(value, nullable)


@dtype.register("pandas.core.dtypes.base.ExtensionDtype")
def from_pandas_extension_dtype(value, nullable=True):
def from_pandas_extension_dtype(value, nullable=None):
return DataType.from_pandas(value, nullable)


@dtype.register("pyarrow.lib.DataType")
def from_pyarrow(value, nullable=True):
def from_pyarrow(value, nullable=None):
return DataType.from_pyarrow(value, nullable)


@dtype.register("polars.datatypes.classes.DataTypeClass")
def from_polars(value, nullable=True):
def from_polars(value, nullable=None):
return DataType.from_polars(value, nullable)


Expand Down Expand Up @@ -228,15 +271,15 @@ def castable(self, to: DataType, **kwargs) -> bool:
return castable(self, to, **kwargs)

@classmethod
def from_string(cls, value: str, nullable: bool = True) -> Self:
def from_string(cls, value: str, nullable: bool | None = None) -> Self:
from ibis.expr.datatypes.parse import parse

try:
typ = parse(value)
except SyntaxError:
raise TypeError(f"{value!r} cannot be parsed as a datatype")

if not nullable:
if nullable is not None:
return typ.copy(nullable=nullable)
return typ

Expand Down Expand Up @@ -309,23 +352,25 @@ def from_typehint(cls, typ, nullable=True) -> Self:
raise TypeError(f"Value {typ!r} is not a valid datatype")

@classmethod
def from_numpy(cls, numpy_type: np.dtype, nullable: bool = True) -> Self:
def from_numpy(cls, numpy_type: np.dtype, nullable: bool | None = None) -> Self:
"""Return the equivalent ibis datatype."""
from ibis.formats.numpy import NumpyType

return NumpyType.to_ibis(numpy_type, nullable=nullable)

@classmethod
def from_pandas(
cls, pandas_type: np.dtype | ExtensionDtype, nullable: bool = True
cls, pandas_type: np.dtype | ExtensionDtype, nullable: bool | None = None
) -> Self:
"""Return the equivalent ibis datatype."""
from ibis.formats.pandas import PandasType

return PandasType.to_ibis(pandas_type, nullable=nullable)

@classmethod
def from_pyarrow(cls, arrow_type: pa.DataType, nullable: bool = True) -> Self:
def from_pyarrow(
cls, arrow_type: pa.DataType, nullable: bool | None = None
) -> Self:
"""Return the equivalent ibis datatype."""
from ibis.formats.pyarrow import PyArrowType

Expand Down
73 changes: 70 additions & 3 deletions ibis/expr/datatypes/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def test_dtype(spec, expected):
marks=pytest.mark.xfail(sys.version_info < (3, 10), reason="python 3.9"),
),
(lambda: ("!int",), dt.Int64(nullable=False)),
(lambda: ("!int", True), dt.Int64(nullable=False)), # "!" overrides `nullable`
(lambda: ("!int", None), dt.Int64(nullable=False)),
(lambda: ("!int", False), dt.Int64(nullable=False)),
(lambda: ("!int", True), dt.Int64(nullable=True)),
],
)
def test_nullable_dtype(args, expected):
Expand Down Expand Up @@ -105,8 +107,73 @@ def test_bogus_union():
(dt.Time, dt.time),
],
)
def test_dtype_from_classes(klass, expected):
assert dt.dtype(klass) == expected
@pytest.mark.parametrize(
("nullable", "expected_nullable"),
[
(True, True),
(False, False),
(None, True),
],
)
def test_dtype_from_classes(klass, expected, nullable, expected_nullable):
assert dt.dtype(klass, nullable=nullable) == expected.copy(
nullable=expected_nullable
)


@pytest.mark.parametrize(
("inp", "nullable", "expected"),
[
(dt.Null(nullable=True), True, dt.Null(nullable=True)),
(dt.Null(nullable=True), False, dt.Null(nullable=False)),
(dt.Null(nullable=True), None, dt.Null(nullable=True)),
(dt.Null(nullable=False), True, dt.Null(nullable=True)),
(dt.Null(nullable=False), False, dt.Null(nullable=False)),
(dt.Null(nullable=False), None, dt.Null(nullable=False)),
(dt.Int16(nullable=True), True, dt.Int16(nullable=True)),
(dt.Int16(nullable=True), False, dt.Int16(nullable=False)),
(dt.Int16(nullable=True), None, dt.Int16(nullable=True)),
(dt.Int16(nullable=False), True, dt.Int16(nullable=True)),
(dt.Int16(nullable=False), False, dt.Int16(nullable=False)),
(dt.Int16(nullable=False), None, dt.Int16(nullable=False)),
# The nullability of the element type is NEVER changed,
# only the outer nullability can be changed.
(
dt.Array(dt.Int16(nullable=True), nullable=True),
True,
dt.Array(dt.Int16(nullable=True), nullable=True),
),
(
dt.Array(dt.Int16(nullable=True), nullable=True),
False,
dt.Array(dt.Int16(nullable=True), nullable=False),
),
(
dt.Array(dt.Int16(nullable=True), nullable=True),
None,
dt.Array(dt.Int16(nullable=True), nullable=True),
),
(
dt.Array(dt.Int16(nullable=False), nullable=True),
True,
dt.Array(dt.Int16(nullable=False), nullable=True),
),
(
dt.Array(dt.Int16(nullable=False), nullable=True),
False,
dt.Array(dt.Int16(nullable=False), nullable=False),
),
(
dt.Array(dt.Int16(nullable=False), nullable=True),
None,
dt.Array(dt.Int16(nullable=False), nullable=True),
),
],
)
def test_dtype_from_datatype_instance(
inp: dt.DataType, nullable: bool | None, expected: dt.DataType
):
assert dt.dtype(inp, nullable=nullable) == expected


@pytest.mark.parametrize(
Expand Down
5 changes: 5 additions & 0 deletions ibis/expr/datatypes/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
)
def test_primitive_from_string(nullable, spec, expected):
assert dt.dtype(spec, nullable=nullable) == expected(nullable=nullable)
assert dt.dtype(spec, nullable=None) == expected(nullable=True)
assert dt.dtype(spec) == expected(nullable=True)
assert dt.dtype("!" + spec, nullable=nullable) == expected(nullable=nullable)
assert dt.dtype("!" + spec, nullable=None) == expected(nullable=False)
assert dt.dtype("!" + spec) == expected(nullable=False)


@pytest.mark.parametrize(
Expand Down
7 changes: 5 additions & 2 deletions ibis/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def from_ibis(cls, dtype: DataType) -> T:
raise NotImplementedError

@classmethod
def to_ibis(cls, typ: T, nullable: bool = True) -> DataType:
def to_ibis(cls, typ: T, nullable: bool | None = None) -> DataType:
"""Convert a format-specific type object to an Ibis DataType.

Parameters
Expand All @@ -47,6 +47,8 @@ def to_ibis(cls, typ: T, nullable: bool = True) -> DataType:
The format-specific type object to convert.
nullable
Whether the Ibis DataType should be nullable.
If `None`, the nullability will be inferred from `typ` if possible.
If inference is not possible, we assume `nullable=True`.

Returns
-------
Expand All @@ -56,7 +58,7 @@ def to_ibis(cls, typ: T, nullable: bool = True) -> DataType:
raise NotImplementedError

@classmethod
def from_string(cls, text: str, nullable: bool = True) -> DataType:
def from_string(cls, text: str, nullable: bool | None = None) -> DataType:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The subclasses SqlglotType and PostgresType already use this signature. So this is more of a bugfix to bring this ABC into compliance with how we actually are using them.

"""Convert a backend-specific string representation into an Ibis DataType.

Parameters
Expand All @@ -65,6 +67,7 @@ def from_string(cls, text: str, nullable: bool = True) -> DataType:
The backend-specific string representation to convert.
nullable
Whether the Ibis DataType should be nullable.
If `None`, the specific type mapper will choose a default.

Returns
-------
Expand Down
8 changes: 7 additions & 1 deletion ibis/formats/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@

class NumpyType(TypeMapper[np.dtype]):
@classmethod
def to_ibis(cls, typ: np.dtype, nullable: bool = True) -> dt.DataType:
def to_ibis(cls, typ: np.dtype, nullable: bool | None = True) -> dt.DataType:
# numpy's type system doesn't keep track of nullability.
# We accept nullable=None to be compatible with the rest of TypeMapper.to_ibis()
# implementations, but we treat None as True, since we can't infer nullability
# from a numpy dtype.
if nullable is None:
nullable = True
if np.issubdtype(typ, np.datetime64):
# TODO(kszucs): the following code provedes proper timestamp roundtrips
# between ibis and numpy/pandas but breaks the test suite at several
Expand Down
8 changes: 7 additions & 1 deletion ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@

class PandasType(NumpyType):
@classmethod
def to_ibis(cls, typ, nullable=True):
def to_ibis(cls, typ, nullable: bool | None = None):
# pandas's type system doesn't keep track of nullability.
# We accept nullable=None to be compatible with the rest of TypeMapper.to_ibis()
# implementations, but we treat None as True, since we can't infer nullability
# from a pandas dtype.
if nullable is None:
nullable = True
if isinstance(typ, pdt.DatetimeTZDtype):
return dt.Timestamp(timezone=str(typ.tz), nullable=nullable)
elif pdt.is_datetime64_dtype(typ):
Expand Down
Loading