-
Notifications
You must be signed in to change notification settings - Fork 71
Improve scalar logic for timestamps #1025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
788c8e5
7fe3eb7
df9e8a1
30abf05
ca9702a
b67371d
8419dcd
fdaad32
41edc0b
493795f
bf713e8
4f4be76
4627db6
f8e65c6
961223c
758cb3f
920e75a
41df31d
0f125f6
f04695c
ad26b7e
4f69eea
1d89cc7
b7bf2fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,10 @@ def as_timelike(op): | |
| raise ValueError(f"Don't know how to make {type(op)} timelike") | ||
|
|
||
|
|
||
| def is_timestamp_nano(obj): | ||
| return "int" in str(type(obj)) or "int" in str(getattr(obj, "dtype", "")) | ||
|
|
||
|
|
||
| class Operation: | ||
| """Helper wrapper around a function, which is used as operator""" | ||
|
|
||
|
|
@@ -137,6 +141,9 @@ def reduce(self, *operands, **kwargs): | |
| ) | ||
| ): | ||
| operands = tuple(map(as_timelike, operands)) | ||
| if is_cudf_type(operands[0]) and isinstance(operands[1], np.timedelta64): | ||
| operands = (dd.to_datetime(operands[0], unit="s"), operands[1]) | ||
|
||
|
|
||
| return reduce(partial(self.operation, **kwargs), operands) | ||
| else: | ||
| return self.unary_operation(*operands, **kwargs) | ||
|
|
@@ -250,6 +257,9 @@ def cast(self, operand, rex=None) -> SeriesOrScalar: | |
| if output_type == "DECIMAL": | ||
| sql_type_args = rex.getPrecisionScale() | ||
|
|
||
| if output_type == "TIMESTAMP" and is_timestamp_nano(operand): | ||
| operand = operand * 10**9 | ||
|
|
||
| if not is_frame(operand): # pragma: no cover | ||
| return sql_to_python_value(sql_type, operand) | ||
|
|
||
|
|
@@ -612,17 +622,8 @@ def to_timestamp(self, df, format): | |
| format = format.replace('"', "") | ||
| format = format.replace("'", "") | ||
|
|
||
| # TODO: format timestamps for GPU tests | ||
| if is_cudf_type(df): | ||
| if format != default_format: | ||
| raise RuntimeError("Non-default timestamp formats not supported on GPU") | ||
| if df.dtype == "object": | ||
| return df | ||
| else: | ||
| nanoseconds_to_seconds = 10**9 | ||
| return df * nanoseconds_to_seconds | ||
| # String cases | ||
| elif type(df) == str: | ||
| if type(df) == str: | ||
| return np.datetime64(datetime.strptime(df, format)) | ||
| elif df.dtype == "object": | ||
| return dd.to_datetime(df, format=format) | ||
|
|
@@ -634,7 +635,10 @@ def to_timestamp(self, df, format): | |
| else: | ||
| if format != default_format: | ||
| raise RuntimeError("Integer input does not accept a format argument") | ||
| return dd.to_datetime(df, unit="s") | ||
| if is_cudf_type(df): | ||
| return df | ||
|
||
| else: | ||
| return dd.to_datetime(df, unit="s") | ||
|
|
||
|
|
||
| class YearOperation(Operation): | ||
|
|
@@ -655,7 +659,10 @@ def timestampadd(self, unit, interval, df: SeriesOrScalar): | |
| interval = int(interval) | ||
| if interval < 0: | ||
| raise RuntimeError(f"Negative time interval {interval} is not supported.") | ||
| df = df.astype("datetime64[ns]") | ||
| if is_timestamp_nano(df): | ||
| df = df.astype("datetime64[s]") | ||
| else: | ||
| df = df.astype("datetime64[ns]") | ||
sarahyurick marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| if is_cudf_type(df): | ||
| from cudf import DateOffset | ||
|
|
@@ -699,11 +706,23 @@ def __init__(self): | |
| super().__init__(self.datetime_sub) | ||
|
|
||
| def datetime_sub(self, unit, df1, df2): | ||
| if is_timestamp_nano(df1): | ||
| df1 = df1 * 10**9 | ||
| if is_timestamp_nano(df2): | ||
| df2 = df2 * 10**9 | ||
| if "datetime64[s]" == str(getattr(df1, "dtype", "")): | ||
| df1 = df1.astype("datetime64[ns]") | ||
| if "datetime64[s]" == str(getattr(df2, "dtype", "")): | ||
| df2 = df2.astype("datetime64[ns]") | ||
charlesbluca marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| subtraction_op = ReduceOperation( | ||
| operation=operator.sub, unary_operation=lambda x: -x | ||
| ) | ||
| result = subtraction_op(df2, df1) | ||
|
|
||
| if is_cudf_type(df1): | ||
| result = result.astype("int") | ||
|
|
||
| if unit in {"NANOSECOND", "NANOSECONDS"}: | ||
| return result | ||
| elif unit in {"MICROSECOND", "MICROSECONDS"}: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to use something like
pd.api.types.is_integer_dtypefor this check, or is there a specific case I'm missing?