Skip to content

Commit 7d2e7f6

Browse files
fix(python): Add {top, bottom}_k_by to Series (#22902)
Co-authored-by: TareqKomboz <[email protected]>
1 parent f9ee27c commit 7d2e7f6

2 files changed

Lines changed: 122 additions & 0 deletions

File tree

py-polars/polars/series/series.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3381,7 +3381,9 @@ def top_k(self, k: int = 5) -> Series:
33813381
33823382
See Also
33833383
--------
3384+
top_k_by
33843385
bottom_k
3386+
bottom_k_by
33853387
33863388
Examples
33873389
--------
@@ -3396,6 +3398,56 @@ def top_k(self, k: int = 5) -> Series:
33963398
]
33973399
"""
33983400

3401+
def top_k_by(
3402+
self,
3403+
by: IntoExpr | Iterable[IntoExpr],
3404+
k: int = 5,
3405+
*,
3406+
reverse: bool | Sequence[bool] = False,
3407+
) -> Series:
3408+
r"""
3409+
Return the `k` largest elements of the `by` column.
3410+
3411+
Non-null elements are always preferred over null elements, regardless of
3412+
the value of `reverse`. The output is not guaranteed to be in any
3413+
particular order, call :func:`sort` after this function if you wish the
3414+
output to be sorted.
3415+
3416+
This has time complexity:
3417+
3418+
.. math:: O(n \log{n})
3419+
3420+
Parameters
3421+
----------
3422+
by
3423+
Column used to determine the largest elements.
3424+
Accepts expression input. Strings are parsed as column names.
3425+
k
3426+
Number of elements to return.
3427+
reverse
3428+
Consider the `k` smallest elements of the `by` column (instead of the `k`
3429+
largest). This can be specified per column by passing a sequence of
3430+
booleans.
3431+
3432+
See Also
3433+
--------
3434+
top_k
3435+
bottom_k
3436+
bottom_k_by
3437+
3438+
Examples
3439+
--------
3440+
>>> s = pl.Series("a", [2, 5, 1, 4, 3])
3441+
>>> s.top_k_by("a", 3)
3442+
shape: (3,)
3443+
Series: 'a' [i64]
3444+
[
3445+
5
3446+
4
3447+
3
3448+
]
3449+
"""
3450+
33993451
def bottom_k(self, k: int = 5) -> Series:
34003452
r"""
34013453
Return the `k` smallest elements.
@@ -3416,6 +3468,8 @@ def bottom_k(self, k: int = 5) -> Series:
34163468
See Also
34173469
--------
34183470
top_k
3471+
top_k_by
3472+
bottom_k_by
34193473
34203474
Examples
34213475
--------
@@ -3430,6 +3484,56 @@ def bottom_k(self, k: int = 5) -> Series:
34303484
]
34313485
"""
34323486

3487+
def bottom_k_by(
3488+
self,
3489+
by: IntoExpr | Iterable[IntoExpr],
3490+
k: int = 5,
3491+
*,
3492+
reverse: bool | Sequence[bool] = False,
3493+
) -> Series:
3494+
r"""
3495+
Return the `k` smallest elements of the `by` column.
3496+
3497+
Non-null elements are always preferred over null elements, regardless of
3498+
the value of `reverse`. The output is not guaranteed to be in any
3499+
particular order, call :func:`sort` after this function if you wish the
3500+
output to be sorted.
3501+
3502+
This has time complexity:
3503+
3504+
.. math:: O(n \log{n})
3505+
3506+
Parameters
3507+
----------
3508+
by
3509+
Column used to determine the smallest elements.
3510+
Accepts expression input. Strings are parsed as column names.
3511+
k
3512+
Number of elements to return.
3513+
reverse
3514+
Consider the `k` largest elements of the `by` column( (instead of the `k`
3515+
smallest). This can be specified per column by passing a sequence of
3516+
booleans.
3517+
3518+
See Also
3519+
--------
3520+
top_k
3521+
top_k_by
3522+
bottom_k
3523+
3524+
Examples
3525+
--------
3526+
>>> s = pl.Series("a", [2, 5, 1, 4, 3])
3527+
>>> s.bottom_k_by("a", 3)
3528+
shape: (3,)
3529+
Series: 'a' [i64]
3530+
[
3531+
1
3532+
2
3533+
3
3534+
]
3535+
"""
3536+
34333537
def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) -> Series:
34343538
"""
34353539
Get the index values that would sort this Series.

py-polars/tests/unit/operations/test_top_k.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,21 @@ def test_top_k_sorted_21260() -> None:
544544
assert s.bottom_k(3).sort().to_list() == [1, 2, 3]
545545
assert s.sort(descending=False).bottom_k(3).sort().to_list() == [1, 2, 3]
546546
assert s.sort(descending=True).bottom_k(3).sort().to_list() == [1, 2, 3]
547+
548+
549+
def test_top_k_by() -> None:
550+
# expression
551+
s = pl.Series("a", [3, 8, 1, 5, 2])
552+
553+
assert_series_equal(
554+
s.top_k_by("a", 3), pl.Series("a", [8, 5, 3]), check_order=False
555+
)
556+
557+
558+
def test_bottom_k_by() -> None:
559+
# expression
560+
s = pl.Series("a", [3, 8, 1, 5, 2])
561+
562+
assert_series_equal(
563+
s.bottom_k_by("a", 4), pl.Series("a", [3, 2, 1, 5]), check_order=False
564+
)

0 commit comments

Comments
 (0)