Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9a706bf
Work in progress. Change functions to use a new function
tim-one May 5, 2024
ba68ccc
New test that `n == int(str(n))` for large random n.
tim-one May 5, 2024
949507b
Merge branch 'main' into intconv
tim-one May 5, 2024
748046f
Merge remote-tracking branch 'upstream/main' into intconv
tim-one May 5, 2024
4c7c30c
Merge branch 'main' into intconv
tim-one May 6, 2024
4ff9a26
Fixed the logic so that `**` is used only for the smalles power
tim-one May 6, 2024
b0db76a
Merge branch 'intconv' of https://github.com/tim-one/cpython into int…
tim-one May 6, 2024
350a2ae
I typed "1009" by mistake instead of "1000". This could cause the
tim-one May 6, 2024
3d533e4
Merge remote-tracking branch 'upstream/main' into intconv
tim-one May 6, 2024
7da69e7
Remove the recursion in compute_powers().
tim-one May 6, 2024
bd0ba0a
Added a comment, and adopted Serhiy's suggestion to speed "lo" extrac…
tim-one May 6, 2024
39928c4
Merge remote-tracking branch 'upstream/main' into intconv
tim-one May 6, 2024
120e867
Merge branch 'main' into intconv
tim-one May 6, 2024
7a870e0
Remove the old functions, and the ability to switch implementations.
tim-one May 6, 2024
5f9cd7a
Merge remote-tracking branch 'upstream/main' into intconv
tim-one May 6, 2024
f5b410f
In the new test, allow random changes to the bit length
tim-one May 7, 2024
0280663
Merge remote-tracking branch 'upstream/main' into intconv
tim-one May 7, 2024
1122332
Update Lib/_pylong.py
tim-one May 7, 2024
de424d4
Update Lib/test/test_int.py
tim-one May 7, 2024
cca53d5
Update Lib/test/test_int.py
tim-one May 7, 2024
c46f295
Add named constant for int_to_decimal_string's digit limit.
tim-one May 7, 2024
271928f
Merge remote-tracking branch 'upstream/main' into intconv
tim-one May 7, 2024
afc681c
Merge branch 'main' into intconv
tim-one May 7, 2024
52bddcd
Merge branch 'main' into intconv
tim-one May 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 202 additions & 1 deletion Lib/_pylong.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,82 @@
except ImportError:
_decimal = None

# A number of functions have this form, where `w` is a desired number of
# digits in base `base`:
#
# def inner(...w...):
# if w <= LIMIT:
# return something
# lo = w >> 1
# hi = w - lo
# something involving base**lo, inner(...lo...), j, and inner(...hi...)
# figure out largest w needed
# result = inner(w)
#
# They all had some on-the-fly scheme to cache `base**lo` results for reuse.
# Power is costly.
#
# This routine aims to compute all amd only the needed powers in advance, as
# efficiently as reasonably possible. This isn't trivial, and all the
# on-the-fly methods did needless work in many cases. The driving code above
# changes to:
#
# figure out largest w needed
# mycache = compute_powers(w, base, LIMIT)
# result = inner(w)
#
# and `mycache[lo]` replaces `base**lo` in the inner function.
#
# While this does give minor speedups (a few percent at best), the primary
# intent is to simplify the functions using this, by eliminating the need for
# them to craft their own ad-hoc caching schemes.
def compute_powers(w, base, more_than, show=False):
seen = set()
need = set()

def inner(w):
if w in seen or w <= more_than:
return
seen.add(w)
lo = w >> 1
hi = w - lo
# only _nned_ lo here; some other path may, or may not,
# need hi
need.add(lo)
inner(lo)
if lo != hi:
inner(hi)
inner(w)

d = {}
if not need:
return d
it = iter(sorted(need))
first = next(it)
if show:
print("pow at", first)
d[first] = base ** first
for this in it:
if this - 1 in d:
if show:
print("* base at", this)
d[this] = d[this - 1] * base # cheap
else:
lo = this >> 1
hi = this - lo
assert lo in d
if show:
print("square at", this)
# Multiplying a bigint by itself (same object!) is
# about twice as fast in CPython.
sq = d[lo] * d[lo]
if hi != lo:
assert hi == lo + 1
if show:
print(" and * base")
sq *= base
d[this] = sq
return d

def int_to_decimal(n):
"""Asymptotically fast conversion of an 'int' to Decimal."""
Expand Down Expand Up @@ -83,6 +159,50 @@ def inner(n, w):
result = -result
return result

old_int_to_decimal = int_to_decimal

_unbounded_dec_context = decimal.getcontext().copy()
_unbounded_dec_context.prec = decimal.MAX_PREC
_unbounded_dec_context.Emax = decimal.MAX_EMAX
_unbounded_dec_context.Emin = decimal.MIN_EMIN
_unbounded_dec_context.traps[decimal.Inexact] = 1 # sanity check

def new_int_to_decimal(n):
"""Asymptotically fast conversion of an 'int' to Decimal."""

# Function due to Tim Peters. See GH issue #90716 for details.
# https://github.com/python/cpython/issues/90716
#
# The implementation in longobject.c of base conversion algorithms
# between power-of-2 and non-power-of-2 bases are quadratic time.
# This function implements a divide-and-conquer algorithm that is
# faster for large numbers. Builds an equal decimal.Decimal in a
# "clever" recursive way. If we want a string representation, we
# apply str to _that_.

from decimal import Decimal as D
BITLIM = 200

def inner(n, w):
if w <= BITLIM:
return D(n)
w2 = w >> 1
hi = n >> w2
lo = n - (hi << w2)
return inner(lo, w2) + inner(hi, w - w2) * w2pow[w2]

with decimal.localcontext(_unbounded_dec_context):
nbits = n.bit_length()
w2pow = compute_powers(nbits, D(2), BITLIM)
if n < 0:
negate = True
n = -n
else:
negate = False
result = inner(n, nbits)
if negate:
result = -result
return result

def int_to_decimal_string(n):
"""Asymptotically fast conversion of an 'int' to a decimal string."""
Expand Down Expand Up @@ -128,6 +248,49 @@ def inner(n, w):
s = s.lstrip('0')
return sign + s

old_int_to_decimal_string = int_to_decimal_string
def new_int_to_decimal_string(n):
"""Asymptotically fast conversion of an 'int' to a decimal string."""
w = n.bit_length()
if w > 450_000 and _decimal is not None:
# It is only usable with the C decimal implementation.
# _pydecimal.py calls str() on very large integers, which in its
# turn calls int_to_decimal_string(), causing very deep recursion.
return str(int_to_decimal(n))

# Fallback algorithm for the case when the C decimal module isn't
# available. This algorithm is asymptotically worse than the algorithm
# using the decimal module, but better than the quadratic time
# implementation in longobject.c.
def inner(n, w):
if w <= 1000:
return str(n)
w2 = w >> 1
hi, lo = divmod(n, pow10[w2])
return inner(hi, w - w2) + inner(lo, w2).zfill(w2)

# The estimation of the number of decimal digits.
# There is no harm in small error. If we guess too large, there may
# be leading 0's that need to be stripped. If we guess too small, we
# may need to call str() recursively for the remaining highest digits,
# which can still potentially be a large integer. This is manifested
# only if the number has way more than 10**15 digits, that exceeds
# the 52-bit physical address limit in both Intel64 and AMD64.
w = int(w * 0.3010299956639812 + 1) # log10(2)
pow10 = compute_powers(w, 5, 1000)
for k, v in pow10.items():
pow10[k] = v << k
if n < 0:
n = -n
sign = '-'
else:
sign = ''
s = inner(n, w)
if s[0] == '0' and n:
# If our guess of w is too large, there may be leading 0's that
# need to be stripped.
s = s.lstrip('0')
return sign + s

def _str_to_int_inner(s):
"""Asymptotically fast conversion of a 'str' to an 'int'."""
Expand Down Expand Up @@ -175,6 +338,45 @@ def inner(a, b):

return inner(0, len(s))

old_str_to_int_inner = _str_to_int_inner

def new_str_to_int_inner(s):
"""Asymptotically fast conversion of a 'str' to an 'int'."""

# Function due to Bjorn Martinsson. See GH issue #90716 for details.
# https://github.com/python/cpython/issues/90716
#
# The implementation in longobject.c of base conversion algorithms
# between power-of-2 and non-power-of-2 bases are quadratic time.
# This function implements a divide-and-conquer algorithm making use
# of Python's built in big int multiplication. Since Python uses the
# Karatsuba algorithm for multiplication, the time complexity
# of this function is O(len(s)**1.58).

DIGLIM = 2048

def inner(a, b):
if b - a <= DIGLIM:
return int(s[a:b])
mid = (a + b + 1) >> 1
return (inner(mid, b)
+ ((inner(a, mid) * w5pow[b - mid])
<< (b - mid)))

w5pow = compute_powers(len(s), 5, DIGLIM)
return inner(0, len(s))

def setold():
global int_to_decimal, int_to_decimal_string, _str_to_int_inner
int_to_decimal = old_int_to_decimal
int_to_decimal_string = old_int_to_decimal_string
_str_to_int_inner = old_str_to_int_inner

def setnew():
global int_to_decimal, int_to_decimal_string, _str_to_int_inner
int_to_decimal = new_int_to_decimal
int_to_decimal_string = new_int_to_decimal_string
_str_to_int_inner = new_str_to_int_inner

def int_from_string(s):
"""Asymptotically fast version of PyLong_FromString(), conversion
Expand All @@ -186,7 +388,6 @@ def int_from_string(s):
s = s.rstrip().replace('_', '')
return _str_to_int_inner(s)


def str_to_int(s):
"""Asymptotically fast version of decimal string to 'int' conversion."""
# FIXME: this doesn't support the full syntax that int() supports.
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/test_int.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,16 @@ def test_pylong_misbehavior_error_path_from_str(
with self.assertRaises(RuntimeError):
int(big_value)

def test_pylong_roundtrip(self):
from random import randrange
bits = 5000
while bits <= 1_000_000:
bits += randrange(100) # break bitlength patterns
hibit = 1 << (bits - 1)
n = hibit + randrange(hibit)
assert n.bit_length() == bits
self.assertEqual(n, int(str(n)))
bits <<= 1

if __name__ == "__main__":
unittest.main()