Skip to content

Commit 7cff0f4

Browse files
API: Period.to_timestamp default to microsecond unit (#63760)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 5521808 commit 7cff0f4

File tree

15 files changed

+169
-147
lines changed

15 files changed

+169
-147
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,7 @@ Other API changes
800800
^^^^^^^^^^^^^^^^^
801801
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
802802
- :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`)
803+
- :meth:`Period.to_timestamp` and :meth:`PeriodIndex.to_timestamp` now give microsecond-unit objects when possible, and nanosecond-unit objects in other cases. This affects the actual value of :meth:`Period.end_time` and :meth:`PeriodIndex.end_time` (:issue:`56164`)
803804
- All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`)
804805
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
805806
- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)

pandas/_libs/tslibs/period.pyx

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ from libc.time cimport (
3838
tm,
3939
)
4040

41-
from pandas._libs.tslibs.dtypes cimport c_OFFSET_TO_PERIOD_FREQSTR
41+
from pandas._libs.tslibs.dtypes cimport (
42+
PeriodDtypeCode,
43+
c_OFFSET_TO_PERIOD_FREQSTR,
44+
)
4245

4346
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
4447

@@ -69,13 +72,6 @@ from pandas._libs.tslibs.ccalendar cimport (
6972
get_week_of_year,
7073
is_leapyear,
7174
)
72-
from pandas._libs.tslibs.timedeltas cimport (
73-
delta_to_nanoseconds,
74-
is_any_td_scalar,
75-
)
76-
77-
from pandas._libs.tslibs.conversion import DT64NS_DTYPE
78-
7975
from pandas._libs.tslibs.dtypes cimport (
8076
FR_ANN,
8177
FR_BUS,
@@ -95,6 +91,10 @@ from pandas._libs.tslibs.dtypes cimport (
9591
freq_group_code_to_npy_unit,
9692
)
9793
from pandas._libs.tslibs.parsing cimport quarter_to_myear
94+
from pandas._libs.tslibs.timedeltas cimport (
95+
delta_to_nanoseconds,
96+
is_any_td_scalar,
97+
)
9898

9999
from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso
100100

@@ -973,13 +973,13 @@ def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq):
973973
for i in range(N):
974974
out[i] = period_ordinal_to_dt64(periodarr[i], freq)
975975

976-
return out.base # .base to access underlying np.ndarray
976+
return out.base.view("M8[us]") # .base to access underlying np.ndarray
977977

978978
else:
979979
# Short-circuit for performance
980980
if freq == FR_NS:
981981
# TODO: copy?
982-
return periodarr.base
982+
return periodarr.base.view("M8[ns]")
983983

984984
if freq == FR_US:
985985
dta = periodarr.base.view("M8[us]")
@@ -993,7 +993,8 @@ def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq):
993993
dta = periodarr.base.view("M8[h]")
994994
elif freq == FR_DAY:
995995
dta = periodarr.base.view("M8[D]")
996-
return astype_overflowsafe(dta, dtype=DT64NS_DTYPE)
996+
# GH#63760 give microseconds for everything other than freq="ns"
997+
return astype_overflowsafe(dta, dtype=np.dtype("M8[us]"))
997998

998999

9991000
cdef void get_asfreq_info(int from_freq, int to_freq,
@@ -1161,13 +1162,19 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
11611162
if ordinal == NPY_NAT:
11621163
return NPY_NAT
11631164

1165+
if freq == PeriodDtypeCode.N:
1166+
# We have to return nanosecond unit, but this is a no-op
1167+
return ordinal
1168+
11641169
get_date_info(ordinal, freq, &dts)
11651170

11661171
try:
1167-
result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts)
1172+
result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_us, &dts)
11681173
except OverflowError as err:
11691174
fmt = dts_to_iso_string(&dts)
1170-
raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") from err
1175+
raise OutOfBoundsDatetime(
1176+
f"Out of bounds microsecond timestamp: {fmt}"
1177+
) from err
11711178

11721179
return result
11731180

@@ -1681,7 +1688,7 @@ cdef class PeriodMixin:
16811688
Timestamp('2012-01-01 00:00:00')
16821689

16831690
>>> period.end_time
1684-
Timestamp('2012-01-01 23:59:59.999999999')
1691+
Timestamp('2012-01-01 23:59:59.999999')
16851692
"""
16861693
return self.to_timestamp(how="start")
16871694

@@ -1706,7 +1713,7 @@ cdef class PeriodMixin:
17061713
For Period:
17071714

17081715
>>> pd.Period('2020-01', 'D').end_time
1709-
Timestamp('2020-01-01 23:59:59.999999999')
1716+
Timestamp('2020-01-01 23:59:59.999999')
17101717

17111718
For Series:
17121719

@@ -1718,19 +1725,19 @@ cdef class PeriodMixin:
17181725
2 2020-03
17191726
dtype: period[M]
17201727
>>> s.dt.end_time
1721-
0 2020-01-31 23:59:59.999999999
1722-
1 2020-02-29 23:59:59.999999999
1723-
2 2020-03-31 23:59:59.999999999
1724-
dtype: datetime64[ns]
1728+
0 2020-01-31 23:59:59.999999
1729+
1 2020-02-29 23:59:59.999999
1730+
2 2020-03-31 23:59:59.999999
1731+
dtype: datetime64[us]
17251732

17261733
For PeriodIndex:
17271734

17281735
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
17291736
>>> idx.end_time
1730-
DatetimeIndex(['2023-01-31 23:59:59.999999999',
1731-
'2023-02-28 23:59:59.999999999',
1732-
'2023-03-31 23:59:59.999999999'],
1733-
dtype='datetime64[ns]', freq=None)
1737+
DatetimeIndex(['2023-01-31 23:59:59.999999',
1738+
'2023-02-28 23:59:59.999999',
1739+
'2023-03-31 23:59:59.999999'],
1740+
dtype='datetime64[us]', freq=None)
17341741
"""
17351742
return self.to_timestamp(how="end")
17361743

@@ -2061,6 +2068,9 @@ cdef class _Period(PeriodMixin):
20612068
Uses the target frequency specified at the part of the period specified
20622069
by `how`, which is either `Start` or `Finish`.
20632070

2071+
If possible, gives microsecond-unit Timestamp. Otherwise gives nanosecond
2072+
unit.
2073+
20642074
Parameters
20652075
----------
20662076
freq : str or DateOffset
@@ -2089,14 +2099,19 @@ cdef class _Period(PeriodMixin):
20892099
"""
20902100
how = validate_end_alias(how)
20912101

2102+
if self._dtype._dtype_code == PeriodDtypeCode.N or freq == "ns":
2103+
unit = "ns"
2104+
else:
2105+
unit = "us"
2106+
20922107
end = how == "E"
20932108
if end:
20942109
if freq == "B" or self._freq == "B":
20952110
# roll forward to ensure we land on B date
2096-
adjust = np.timedelta64(1, "D") - np.timedelta64(1, "ns")
2111+
adjust = np.timedelta64(1, "D") - np.timedelta64(1, unit)
20972112
return self.to_timestamp(how="start") + adjust
20982113
endpoint = (self + self._freq).to_timestamp(how="start")
2099-
return endpoint - np.timedelta64(1, "ns")
2114+
return endpoint - np.timedelta64(1, unit)
21002115

21012116
if freq is None:
21022117
freq_code = self._dtype._get_to_timestamp_base()
@@ -2110,7 +2125,7 @@ cdef class _Period(PeriodMixin):
21102125
val = self.asfreq(freq, how)
21112126

21122127
dt64 = period_ordinal_to_dt64(val.ordinal, base)
2113-
return Timestamp(dt64)
2128+
return Timestamp(dt64, unit=unit)
21142129

21152130
@property
21162131
def year(self) -> int:

pandas/core/arrays/period.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,9 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
759759
"""
760760
Cast to DatetimeArray/Index.
761761
762+
If possible, gives microsecond-unit DatetimeArray/Index. Otherwise
763+
gives nanosecond unit.
764+
762765
Parameters
763766
----------
764767
freq : str or DateOffset, optional
@@ -789,34 +792,39 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
789792
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
790793
>>> idx.to_timestamp()
791794
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'],
792-
dtype='datetime64[ns]', freq='MS')
795+
dtype='datetime64[us]', freq='MS')
793796
794797
The frequency will not be inferred if the index contains less than
795798
three elements, or if the values of index are not strictly monotonic:
796799
797800
>>> idx = pd.PeriodIndex(["2023-01", "2023-02"], freq="M")
798801
>>> idx.to_timestamp()
799-
DatetimeIndex(['2023-01-01', '2023-02-01'], dtype='datetime64[ns]', freq=None)
802+
DatetimeIndex(['2023-01-01', '2023-02-01'], dtype='datetime64[us]', freq=None)
800803
801804
>>> idx = pd.PeriodIndex(
802805
... ["2023-01", "2023-02", "2023-02", "2023-03"], freq="2M"
803806
... )
804807
>>> idx.to_timestamp()
805808
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-02-01', '2023-03-01'],
806-
dtype='datetime64[ns]', freq=None)
809+
dtype='datetime64[us]', freq=None)
807810
"""
808811
from pandas.core.arrays import DatetimeArray
809812

810813
how = libperiod.validate_end_alias(how)
811814

815+
if self.freq.base == "ns" or freq == "ns":
816+
unit = "ns"
817+
else:
818+
unit = "us"
819+
812820
end = how == "E"
813821
if end:
814822
if freq == "B" or self.freq == "B":
815823
# roll forward to ensure we land on B date
816-
adjust = Timedelta(1, "D") - Timedelta(1, "ns")
824+
adjust = Timedelta(1, unit="D") - Timedelta(1, unit=unit)
817825
return self.to_timestamp(how="start") + adjust
818826
else:
819-
adjust = Timedelta(1, "ns")
827+
adjust = Timedelta(1, unit=unit)
820828
return (self + self.freq).to_timestamp(how="start") - adjust
821829

822830
if freq is None:
@@ -831,7 +839,8 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
831839
new_parr = self.asfreq(freq, how=how)
832840

833841
new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
834-
dta = DatetimeArray._from_sequence(new_data, dtype=np.dtype("M8[ns]"))
842+
dta = DatetimeArray._from_sequence(new_data, dtype=new_data.dtype)
843+
assert dta.unit == unit
835844

836845
if self.freq.name == "B":
837846
# See if we can retain BDay instead of Day in cases where

pandas/core/frame.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16256,7 +16256,7 @@ def to_timestamp(
1625616256
2023-01-01 1 3
1625716257
2024-01-01 2 4
1625816258
>>> df1.index
16259-
DatetimeIndex(['2023-01-01', '2024-01-01'], dtype='datetime64[ns]', freq=None)
16259+
DatetimeIndex(['2023-01-01', '2024-01-01'], dtype='datetime64[us]', freq=None)
1626016260

1626116261
Using `freq` which is the offset that the Timestamps will have
1626216262

@@ -16267,7 +16267,7 @@ def to_timestamp(
1626716267
2023-01-31 1 3
1626816268
2024-01-31 2 4
1626916269
>>> df2.index
16270-
DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[ns]', freq=None)
16270+
DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[us]', freq=None)
1627116271
"""
1627216272
self._check_copy_deprecation(copy)
1627316273
new_obj = self.copy(deep=False)

pandas/core/indexes/datetimes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,10 @@ def _parsed_string_to_bounds(
896896
"""
897897
freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
898898
per = Period(parsed, freq=freq)
899-
start, end = per.start_time, per.end_time
899+
start = per.start_time
900+
# Can't use end_time here bc that will subtract a microsecond
901+
# instead of a nanosecond
902+
end = (per + 1).start_time - np.timedelta64(1, "ns")
900903
start = start.as_unit(self.unit)
901904
end = end.as_unit(self.unit)
902905

pandas/tests/arrays/test_datetimelike.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from pandas._libs import (
1010
NaT,
11-
OutOfBoundsDatetime,
1211
Timestamp,
1312
)
1413
from pandas._libs.tslibs import to_offset
@@ -1110,28 +1109,25 @@ def test_to_timestamp_roundtrip_bday(self):
11101109
parr = dta.to_period()
11111110
result = parr.to_timestamp()
11121111
assert result.freq == "B"
1113-
tm.assert_extension_array_equal(result, dta)
1112+
tm.assert_extension_array_equal(result, dta.as_unit("us"))
11141113

11151114
dta2 = dta[::2]
11161115
parr2 = dta2.to_period()
11171116
result2 = parr2.to_timestamp()
11181117
assert result2.freq == "2B"
1119-
tm.assert_extension_array_equal(result2, dta2)
1118+
tm.assert_extension_array_equal(result2, dta2.as_unit("us"))
11201119

11211120
parr3 = dta.to_period("2B")
11221121
result3 = parr3.to_timestamp()
11231122
assert result3.freq == "B"
1124-
tm.assert_extension_array_equal(result3, dta)
1123+
tm.assert_extension_array_equal(result3, dta.as_unit("us"))
11251124

11261125
def test_to_timestamp_out_of_bounds(self):
11271126
# GH#19643 previously overflowed silently
11281127
pi = pd.period_range("1500", freq="Y", periods=3)
1129-
msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00"
1130-
with pytest.raises(OutOfBoundsDatetime, match=msg):
1131-
pi.to_timestamp()
1132-
1133-
with pytest.raises(OutOfBoundsDatetime, match=msg):
1134-
pi._data.to_timestamp()
1128+
pi.to_timestamp()
1129+
dta = pi._data.to_timestamp()
1130+
assert dta[0] == Timestamp(1500, 1, 1)
11351131

11361132
@pytest.mark.parametrize("propname", PeriodArray._bool_ops)
11371133
def test_bool_properties(self, arr1d, propname):

pandas/tests/frame/methods/test_to_csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,11 +393,11 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols, temp_file)
393393
if r_idx_type == "dt":
394394
expected.index = expected.index.astype("M8[us]")
395395
elif r_idx_type == "p":
396-
expected.index = expected.index.astype("M8[ns]")
396+
expected.index = expected.index.astype("M8[us]")
397397
if c_idx_type == "dt":
398398
expected.columns = expected.columns.astype("M8[us]")
399399
elif c_idx_type == "p":
400-
expected.columns = expected.columns.astype("M8[ns]")
400+
expected.columns = expected.columns.astype("M8[us]")
401401
tm.assert_frame_equal(result, expected, check_names=False)
402402

403403
@pytest.mark.slow

0 commit comments

Comments
 (0)