Skip to content

Commit b4ff5f9

Browse files
authored
Merge pull request #4398 from tybug/next
Skip files over 512kb and over 1024 constants for constants
2 parents 0328676 + 989ae59 commit b4ff5f9

5 files changed

Lines changed: 88 additions & 21 deletions

File tree

hypothesis-python/RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
Further improve the performance of the constants-collection feature introduced in :ref:`version 6.131.1 <v6.131.1>`, by ignoring large files and files with many constants.

hypothesis-python/src/hypothesis/internal/conjecture/providers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B02
568568
.. important::
569569
570570
For |PrimitiveProvider.on_observation| to be called by Hypothesis,
571-
|PrimitiveProvider.add_observability_callback| must be set to ``True``,
571+
|PrimitiveProvider.add_observability_callback| must be set to ``True``.
572572
573573
|PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
574574
observability might increase runtime or memory usage.

hypothesis-python/src/hypothesis/internal/constants_ast.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,24 @@ def __eq__(self, other: object) -> bool:
9898
)
9999

100100

101+
class TooManyConstants(Exception):
102+
# a control flow exception which we raise in ConstantsVisitor when the
103+
# number of constants in a module gets too large.
104+
pass
105+
106+
101107
class ConstantVisitor(NodeVisitor):
102-
def __init__(self):
108+
CONSTANTS_LIMIT: int = 1024
109+
110+
def __init__(self, *, limit: bool):
103111
super().__init__()
104112
self.constants = Constants()
113+
self.limit = limit
105114

106115
def _add_constant(self, value: object) -> None:
116+
if self.limit and len(self.constants) >= self.CONSTANTS_LIMIT:
117+
raise TooManyConstants
118+
107119
if isinstance(value, str) and (
108120
value.isspace()
109121
or value == ""
@@ -166,33 +178,49 @@ def visit_Constant(self, node):
166178
self.generic_visit(node)
167179

168180

169-
def _constants_from_source(source: Union[str, bytes]) -> Constants:
181+
def _constants_from_source(source: Union[str, bytes], *, limit: bool) -> Constants:
170182
tree = ast.parse(source)
171-
visitor = ConstantVisitor()
172-
visitor.visit(tree)
183+
visitor = ConstantVisitor(limit=limit)
184+
185+
try:
186+
visitor.visit(tree)
187+
except TooManyConstants:
188+
# in the case of an incomplete collection, return nothing, to avoid
189+
# muddying caches etc.
190+
return Constants()
191+
173192
return visitor.constants
174193

175194

176195
@lru_cache(4096)
177-
def constants_from_module(module: ModuleType) -> Constants:
196+
def constants_from_module(module: ModuleType, *, limit: bool = True) -> Constants:
178197
try:
179198
module_file = inspect.getsourcefile(module)
180199
# use type: ignore because we know this might error
181200
source_bytes = Path(module_file).read_bytes() # type: ignore
182201
except Exception:
183202
return Constants()
184203

204+
if limit and len(source_bytes) > 512 * 1024:
205+
# Skip files over 512kb. For reference, the largest source file
206+
# in Hypothesis is strategies/_internal/core.py at 107kb at time
207+
# of writing.
208+
return Constants()
209+
185210
source_hash = hashlib.sha1(source_bytes).hexdigest()[:16]
186-
cache_p = storage_directory("constants") / source_hash
211+
# separate cache files for each limit param. see discussion in pull/4398
212+
cache_p = storage_directory("constants") / (
213+
source_hash + ("" if limit else "_nolimit")
214+
)
187215
try:
188-
return _constants_from_source(cache_p.read_bytes())
216+
return _constants_from_source(cache_p.read_bytes(), limit=limit)
189217
except Exception:
190218
# if the cached location doesn't exist, or it does exist but there was
191219
# a problem reading it, fall back to standard computation of the constants
192220
pass
193221

194222
try:
195-
constants = _constants_from_source(source_bytes)
223+
constants = _constants_from_source(source_bytes, limit=limit)
196224
except Exception:
197225
# A bunch of things can go wrong here.
198226
# * ast.parse may fail on the source code

hypothesis-python/tests/cover/test_constants_ast.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from hypothesis.internal.constants_ast import (
2323
Constants,
2424
ConstantVisitor,
25+
TooManyConstants,
26+
_constants_from_source,
2527
constants_from_module,
2628
is_local_module_file,
2729
)
@@ -44,7 +46,7 @@
4446

4547

4648
def constants_from_ast(tree):
47-
visitor = ConstantVisitor()
49+
visitor = ConstantVisitor(limit=True)
4850
visitor.visit(tree)
4951
return visitor.constants
5052

@@ -234,6 +236,40 @@ def test_ignores_ast_parse_error(tmp_path):
234236
@given(constants_classes)
235237
def test_constant_visitor_roundtrips_string(constants):
236238
# our files in storage_directory("constants") rely on this roundtrip
237-
visitor = ConstantVisitor()
239+
visitor = ConstantVisitor(limit=True)
238240
visitor.visit(ast.parse(str(set(constants))))
239241
assert visitor.constants == constants
242+
243+
244+
def test_too_many_constants():
245+
visitor = ConstantVisitor(limit=True)
246+
# start at n=1000 to avoid ConstantVisitor ignoring small integers
247+
s = "; ".join(
248+
f"n = {i}" for i in range(1000, 1000 + ConstantVisitor.CONSTANTS_LIMIT + 1)
249+
)
250+
# visitor should raise on too many constants
251+
with pytest.raises(TooManyConstants):
252+
visitor.visit(ast.parse(s))
253+
254+
# and also _constants_from_source should return empty on too many constants
255+
assert _constants_from_source(s, limit=True) == Constants()
256+
257+
# but it parses fine with limit=False
258+
visitor = ConstantVisitor(limit=False)
259+
visitor.visit(ast.parse(s))
260+
assert _constants_from_source(s, limit=False) == Constants(
261+
integers=set(range(1000, 1000 + ConstantVisitor.CONSTANTS_LIMIT + 1))
262+
)
263+
264+
265+
def test_module_too_large(tmp_path):
266+
constant = 11231783
267+
268+
p = tmp_path / "large_file.py"
269+
content = f"a = {constant}\n\n" + "#" * (512 * 1024 + 1)
270+
p.write_text(content, encoding="utf-8")
271+
272+
module = ModuleType("large_module")
273+
module.__file__ = str(p)
274+
assert constants_from_module(module) == Constants()
275+
assert constants_from_module(module, limit=False) == Constants(integers={constant})

hypothesis-python/tests/cover/test_replay_logic.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,9 @@ def test(ls):
5555

5656
def test_does_not_shrink_on_replay_with_multiple_bugs():
5757
database = InMemoryExampleDatabase()
58-
5958
call_count = 0
60-
61-
tombstone = 1000093
59+
raised = False
60+
marker = 1000093
6261

6362
@settings(
6463
database=database,
@@ -67,12 +66,13 @@ def test_does_not_shrink_on_replay_with_multiple_bugs():
6766
max_examples=1000,
6867
)
6968
@given(st.integers())
70-
def test(i):
71-
nonlocal call_count
69+
def test(n):
70+
nonlocal call_count, raised
7271
call_count += 1
73-
if i > tombstone:
72+
if n >= marker:
73+
raised = True
7474
raise AssertionError
75-
elif i == tombstone:
75+
elif n < marker and raised:
7676
raise AssertionError
7777

7878
with pytest.raises(ExceptionGroup):
@@ -99,10 +99,10 @@ def test_will_always_shrink_if_previous_example_does_not_replay():
9999
max_examples=1000,
100100
)
101101
@given(st.integers(min_value=0))
102-
def test(i):
102+
def test(n):
103103
nonlocal last
104-
if i not in good:
105-
last = i
104+
if n not in good:
105+
last = n
106106
raise AssertionError
107107

108108
for i in range(20):

0 commit comments

Comments
 (0)