Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,7 @@ Other API changes
^^^^^^^^^^^^^^^^^
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
- :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`)
- :meth:`Period.to_timestamp` and :meth:`PeriodIndex.to_timestamp` now give microsecond-unit objects when possible, and nanosecond-unit objects in other cases. This affects the actual value of :meth:`Period.end_time` and :meth:`PeriodIndex.end_time` (:issue:`56164`)
- All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`)
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)
Expand Down
67 changes: 41 additions & 26 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ from libc.time cimport (
tm,
)

from pandas._libs.tslibs.dtypes cimport c_OFFSET_TO_PERIOD_FREQSTR
from pandas._libs.tslibs.dtypes cimport (
PeriodDtypeCode,
c_OFFSET_TO_PERIOD_FREQSTR,
)

from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime

Expand Down Expand Up @@ -69,13 +72,6 @@ from pandas._libs.tslibs.ccalendar cimport (
get_week_of_year,
is_leapyear,
)
from pandas._libs.tslibs.timedeltas cimport (
delta_to_nanoseconds,
is_any_td_scalar,
)

from pandas._libs.tslibs.conversion import DT64NS_DTYPE

from pandas._libs.tslibs.dtypes cimport (
FR_ANN,
FR_BUS,
Expand All @@ -95,6 +91,10 @@ from pandas._libs.tslibs.dtypes cimport (
freq_group_code_to_npy_unit,
)
from pandas._libs.tslibs.parsing cimport quarter_to_myear
from pandas._libs.tslibs.timedeltas cimport (
delta_to_nanoseconds,
is_any_td_scalar,
)

from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso

Expand Down Expand Up @@ -973,13 +973,13 @@ def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq):
for i in range(N):
out[i] = period_ordinal_to_dt64(periodarr[i], freq)

return out.base # .base to access underlying np.ndarray
return out.base.view("M8[us]") # .base to access underlying np.ndarray

else:
# Short-circuit for performance
if freq == FR_NS:
# TODO: copy?
return periodarr.base
return periodarr.base.view("M8[ns]")

if freq == FR_US:
dta = periodarr.base.view("M8[us]")
Expand All @@ -993,7 +993,8 @@ def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq):
dta = periodarr.base.view("M8[h]")
elif freq == FR_DAY:
dta = periodarr.base.view("M8[D]")
return astype_overflowsafe(dta, dtype=DT64NS_DTYPE)
# GH#63760 give microseconds for everything other than freq="ns"
return astype_overflowsafe(dta, dtype=np.dtype("M8[us]"))


cdef void get_asfreq_info(int from_freq, int to_freq,
Expand Down Expand Up @@ -1161,13 +1162,19 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
if ordinal == NPY_NAT:
return NPY_NAT

if freq == PeriodDtypeCode.N:
# We have to return nanosecond unit, but this is a no-op
return ordinal

get_date_info(ordinal, freq, &dts)

try:
result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts)
result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_us, &dts)
except OverflowError as err:
fmt = dts_to_iso_string(&dts)
raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") from err
raise OutOfBoundsDatetime(
f"Out of bounds microsecond timestamp: {fmt}"
) from err

return result

Expand Down Expand Up @@ -1681,7 +1688,7 @@ cdef class PeriodMixin:
Timestamp('2012-01-01 00:00:00')

>>> period.end_time
Timestamp('2012-01-01 23:59:59.999999999')
Timestamp('2012-01-01 23:59:59.999999')
"""
return self.to_timestamp(how="start")

Expand All @@ -1706,7 +1713,7 @@ cdef class PeriodMixin:
For Period:

>>> pd.Period('2020-01', 'D').end_time
Timestamp('2020-01-01 23:59:59.999999999')
Timestamp('2020-01-01 23:59:59.999999')

For Series:

Expand All @@ -1718,19 +1725,19 @@ cdef class PeriodMixin:
2 2020-03
dtype: period[M]
>>> s.dt.end_time
0 2020-01-31 23:59:59.999999999
1 2020-02-29 23:59:59.999999999
2 2020-03-31 23:59:59.999999999
dtype: datetime64[ns]
0 2020-01-31 23:59:59.999999
1 2020-02-29 23:59:59.999999
2 2020-03-31 23:59:59.999999
dtype: datetime64[us]

For PeriodIndex:

>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
>>> idx.end_time
DatetimeIndex(['2023-01-31 23:59:59.999999999',
'2023-02-28 23:59:59.999999999',
'2023-03-31 23:59:59.999999999'],
dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2023-01-31 23:59:59.999999',
'2023-02-28 23:59:59.999999',
'2023-03-31 23:59:59.999999'],
dtype='datetime64[us]', freq=None)
"""
return self.to_timestamp(how="end")

Expand Down Expand Up @@ -2061,6 +2068,9 @@ cdef class _Period(PeriodMixin):
Uses the target frequency specified at the part of the period specified
by `how`, which is either `Start` or `Finish`.

If possible, gives microsecond-unit Timestamp. Otherwise gives nanosecond
unit.

Parameters
----------
freq : str or DateOffset
Expand Down Expand Up @@ -2089,14 +2099,19 @@ cdef class _Period(PeriodMixin):
"""
how = validate_end_alias(how)

if self._dtype._dtype_code == PeriodDtypeCode.N or freq == "ns":
unit = "ns"
else:
unit = "us"

end = how == "E"
if end:
if freq == "B" or self._freq == "B":
# roll forward to ensure we land on B date
adjust = np.timedelta64(1, "D") - np.timedelta64(1, "ns")
adjust = np.timedelta64(1, "D") - np.timedelta64(1, unit)
return self.to_timestamp(how="start") + adjust
endpoint = (self + self._freq).to_timestamp(how="start")
return endpoint - np.timedelta64(1, "ns")
return endpoint - np.timedelta64(1, unit)

if freq is None:
freq_code = self._dtype._get_to_timestamp_base()
Expand All @@ -2110,7 +2125,7 @@ cdef class _Period(PeriodMixin):
val = self.asfreq(freq, how)

dt64 = period_ordinal_to_dt64(val.ordinal, base)
return Timestamp(dt64)
return Timestamp(dt64, unit=unit)

@property
def year(self) -> int:
Expand Down
21 changes: 15 additions & 6 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,9 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
"""
Cast to DatetimeArray/Index.

If possible, gives microsecond-unit DatetimeArray/Index. Otherwise
gives nanosecond unit.

Parameters
----------
freq : str or DateOffset, optional
Expand Down Expand Up @@ -789,34 +792,39 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
>>> idx.to_timestamp()
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'],
dtype='datetime64[ns]', freq='MS')
dtype='datetime64[us]', freq='MS')

The frequency will not be inferred if the index contains less than
three elements, or if the values of index are not strictly monotonic:

>>> idx = pd.PeriodIndex(["2023-01", "2023-02"], freq="M")
>>> idx.to_timestamp()
DatetimeIndex(['2023-01-01', '2023-02-01'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2023-01-01', '2023-02-01'], dtype='datetime64[us]', freq=None)

>>> idx = pd.PeriodIndex(
... ["2023-01", "2023-02", "2023-02", "2023-03"], freq="2M"
... )
>>> idx.to_timestamp()
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-02-01', '2023-03-01'],
dtype='datetime64[ns]', freq=None)
dtype='datetime64[us]', freq=None)
"""
from pandas.core.arrays import DatetimeArray

how = libperiod.validate_end_alias(how)

if self.freq.base == "ns" or freq == "ns":
unit = "ns"
else:
unit = "us"

end = how == "E"
if end:
if freq == "B" or self.freq == "B":
# roll forward to ensure we land on B date
adjust = Timedelta(1, "D") - Timedelta(1, "ns")
adjust = Timedelta(1, unit="D") - Timedelta(1, unit=unit)
return self.to_timestamp(how="start") + adjust
else:
adjust = Timedelta(1, "ns")
adjust = Timedelta(1, unit=unit)
return (self + self.freq).to_timestamp(how="start") - adjust

if freq is None:
Expand All @@ -831,7 +839,8 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
new_parr = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
dta = DatetimeArray._from_sequence(new_data, dtype=np.dtype("M8[ns]"))
dta = DatetimeArray._from_sequence(new_data, dtype=new_data.dtype)
assert dta.unit == unit

if self.freq.name == "B":
# See if we can retain BDay instead of Day in cases where
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -16256,7 +16256,7 @@ def to_timestamp(
2023-01-01 1 3
2024-01-01 2 4
>>> df1.index
DatetimeIndex(['2023-01-01', '2024-01-01'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2023-01-01', '2024-01-01'], dtype='datetime64[us]', freq=None)

Using `freq` which is the offset that the Timestamps will have

Expand All @@ -16267,7 +16267,7 @@ def to_timestamp(
2023-01-31 1 3
2024-01-31 2 4
>>> df2.index
DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[us]', freq=None)
"""
self._check_copy_deprecation(copy)
new_obj = self.copy(deep=False)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,10 @@ def _parsed_string_to_bounds(
"""
freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
per = Period(parsed, freq=freq)
start, end = per.start_time, per.end_time
start = per.start_time
# Can't use end_time here bc that will subtract a microsecond
# instead of a nanosecond
end = (per + 1).start_time - np.timedelta64(1, "ns")
start = start.as_unit(self.unit)
end = end.as_unit(self.unit)

Expand Down
16 changes: 6 additions & 10 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from pandas._libs import (
NaT,
OutOfBoundsDatetime,
Timestamp,
)
from pandas._libs.tslibs import to_offset
Expand Down Expand Up @@ -1110,28 +1109,25 @@ def test_to_timestamp_roundtrip_bday(self):
parr = dta.to_period()
result = parr.to_timestamp()
assert result.freq == "B"
tm.assert_extension_array_equal(result, dta)
tm.assert_extension_array_equal(result, dta.as_unit("us"))

dta2 = dta[::2]
parr2 = dta2.to_period()
result2 = parr2.to_timestamp()
assert result2.freq == "2B"
tm.assert_extension_array_equal(result2, dta2)
tm.assert_extension_array_equal(result2, dta2.as_unit("us"))

parr3 = dta.to_period("2B")
result3 = parr3.to_timestamp()
assert result3.freq == "B"
tm.assert_extension_array_equal(result3, dta)
tm.assert_extension_array_equal(result3, dta.as_unit("us"))

def test_to_timestamp_out_of_bounds(self):
# GH#19643 previously overflowed silently
pi = pd.period_range("1500", freq="Y", periods=3)
msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00"
with pytest.raises(OutOfBoundsDatetime, match=msg):
pi.to_timestamp()

with pytest.raises(OutOfBoundsDatetime, match=msg):
pi._data.to_timestamp()
pi.to_timestamp()
dta = pi._data.to_timestamp()
assert dta[0] == Timestamp(1500, 1, 1)

@pytest.mark.parametrize("propname", PeriodArray._bool_ops)
def test_bool_properties(self, arr1d, propname):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,11 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols, temp_file)
if r_idx_type == "dt":
expected.index = expected.index.astype("M8[us]")
elif r_idx_type == "p":
expected.index = expected.index.astype("M8[ns]")
expected.index = expected.index.astype("M8[us]")
if c_idx_type == "dt":
expected.columns = expected.columns.astype("M8[us]")
elif c_idx_type == "p":
expected.columns = expected.columns.astype("M8[ns]")
expected.columns = expected.columns.astype("M8[us]")
tm.assert_frame_equal(result, expected, check_names=False)

@pytest.mark.slow
Expand Down
Loading
Loading