Skip to content

Commit f5a202d

Browse files
committed
Speed up new backtracking parser
1 parent 092959f commit f5a202d

File tree

3 files changed

+169
-5
lines changed

3 files changed

+169
-5
lines changed

src/blib2to3/pgen2/parse.py

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ def lam_sub(grammar: Grammar, node: RawNode) -> NL:
4646
return Node(type=node[0], children=node[3], context=node[2])
4747

4848

49+
# A placeholder node, used when parser is backtracking.
50+
FAKE_NODE = (-1, None, None, None)
51+
52+
53+
def stack_copy(
54+
stack: List[Tuple[DFAS, int, RawNode]]
55+
) -> List[Tuple[DFAS, int, RawNode]]:
56+
"""Nodeless stack copy."""
57+
return [(copy.deepcopy(dfa), label, FAKE_NODE) for dfa, label, _ in stack]
58+
59+
4960
class Recorder:
5061
def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
5162
self.parser = parser
@@ -54,21 +65,45 @@ def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> No
5465

5566
self._dead_ilabels: Set[int] = set()
5667
self._start_point = self.parser.stack
57-
self._points = {ilabel: copy.deepcopy(self._start_point) for ilabel in ilabels}
68+
self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
5869

5970
@property
6071
def ilabels(self) -> Set[int]:
6172
return self._dead_ilabels.symmetric_difference(self._ilabels)
6273

6374
@contextmanager
6475
def switch_to(self, ilabel: int) -> Iterator[None]:
65-
self.parser.stack = self._points[ilabel]
76+
with self.patch():
77+
self.parser.stack = self._points[ilabel]
78+
try:
79+
yield
80+
except ParseError:
81+
self._dead_ilabels.add(ilabel)
82+
finally:
83+
self.parser.stack = self._start_point
84+
85+
@contextmanager
86+
def patch(self) -> Iterator[None]:
87+
"""
88+
Patch basic state operations (push/pop/shift) with node-level
89+
immutable variants. These still will operate on the stack; but
90+
they won't create any new nodes, or modify the contents of any
91+
other existing nodes.
92+
93+
This saves us a ton of time when we are backtracking, since we
94+
want to restore to the initial state as quick as possible, which
95+
can only be done by having as little mutatations as possible.
96+
"""
97+
original_functions = {}
98+
for name in self.parser.STATE_OPERATIONS:
99+
original_functions[name] = getattr(self.parser, name)
100+
safe_variant = getattr(self.parser, name + "_safe")
101+
setattr(self.parser, name, safe_variant)
66102
try:
67103
yield
68-
except ParseError:
69-
self._dead_ilabels.add(ilabel)
70104
finally:
71-
self.parser.stack = self._start_point
105+
for name, func in original_functions.items():
106+
setattr(self.parser, name, func)
72107

73108
def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None:
74109
func: Callable[..., Any]
@@ -317,6 +352,8 @@ def classify(self, type: int, value: Text, context: Context) -> List[int]:
317352
raise ParseError("bad token", type, value, context)
318353
return [ilabel]
319354

355+
STATE_OPERATIONS = ["shift", "push", "pop"]
356+
320357
def shift(self, type: int, value: Text, newstate: int, context: Context) -> None:
321358
"""Shift a token. (Internal)"""
322359
dfa, state, node = self.stack[-1]
@@ -344,3 +381,22 @@ def pop(self) -> None:
344381
else:
345382
self.rootnode = newnode
346383
self.rootnode.used_names = self.used_names
384+
385+
def shift_safe(
386+
self, type: int, value: Text, newstate: int, context: Context
387+
) -> None:
388+
"""Immutable (node-level) version of shift()"""
389+
dfa, state, _ = self.stack[-1]
390+
self.stack[-1] = (dfa, newstate, FAKE_NODE)
391+
392+
def push_safe(
393+
self, type: int, newdfa: DFAS, newstate: int, context: Context
394+
) -> None:
395+
"""Immutable (node-level) version of push()"""
396+
dfa, state, _ = self.stack[-1]
397+
self.stack[-1] = (dfa, newstate, FAKE_NODE)
398+
self.stack.append((newdfa, 0, FAKE_NODE))
399+
400+
def pop_safe(self) -> None:
401+
"""Immutable (node-level) version of pop()"""
402+
self.stack.pop()
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
re.match()
2+
match = a
3+
with match() as match:
4+
match = f"{match}"
5+
6+
re.match()
7+
match = a
8+
with match() as match:
9+
match = f"{match}"
10+
11+
12+
def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
13+
if not target_versions:
14+
# No target_version specified, so try all grammars.
15+
return [
16+
# Python 3.7+
17+
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
18+
# Python 3.0-3.6
19+
pygram.python_grammar_no_print_statement_no_exec_statement,
20+
# Python 2.7 with future print_function import
21+
pygram.python_grammar_no_print_statement,
22+
# Python 2.7
23+
pygram.python_grammar,
24+
]
25+
26+
match match:
27+
case case:
28+
match match:
29+
case case:
30+
pass
31+
32+
if all(version.is_python2() for version in target_versions):
33+
# Python 2-only code, so try Python 2 grammars.
34+
return [
35+
# Python 2.7 with future print_function import
36+
pygram.python_grammar_no_print_statement,
37+
# Python 2.7
38+
pygram.python_grammar,
39+
]
40+
41+
re.match()
42+
match = a
43+
with match() as match:
44+
match = f"{match}"
45+
46+
def test_patma_139(self):
47+
x = False
48+
match x:
49+
case bool(z):
50+
y = 0
51+
self.assertIs(x, False)
52+
self.assertEqual(y, 0)
53+
self.assertIs(z, x)
54+
55+
# Python 3-compatible code, so only try Python 3 grammar.
56+
grammars = []
57+
if supports_feature(target_versions, Feature.PATTERN_MATCHING):
58+
# Python 3.10+
59+
grammars.append(pygram.python_grammar_soft_keywords)
60+
# If we have to parse both, try to parse async as a keyword first
61+
if not supports_feature(
62+
target_versions, Feature.ASYNC_IDENTIFIERS
63+
) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
64+
# Python 3.7-3.9
65+
grammars.append(
66+
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
67+
)
68+
if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
69+
# Python 3.0-3.6
70+
grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
71+
72+
def test_patma_155(self):
73+
x = 0
74+
y = None
75+
match x:
76+
case 1e1000:
77+
y = 0
78+
self.assertEqual(x, 0)
79+
self.assertIs(y, None)
80+
81+
x = range(3)
82+
match x:
83+
case [y, case as x, z]:
84+
w = 0
85+
86+
# At least one of the above branches must have been taken, because every Python
87+
# version has exactly one of the two 'ASYNC_*' flags
88+
return grammars
89+
90+
91+
def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
92+
"""Given a string with source, return the lib2to3 Node."""
93+
if not src_txt.endswith("\n"):
94+
src_txt += "\n"
95+
96+
grammars = get_grammars(set(target_versions))
97+
98+
99+
re.match()
100+
match = a
101+
with match() as match:
102+
match = f"{match}"
103+
104+
re.match()
105+
match = a
106+
with match() as match:
107+
match = f"{match}"

tests/test_format.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
"pattern_matching_complex",
7676
"pattern_matching_extras",
7777
"pattern_matching_style",
78+
"pattern_matching_generic",
7879
"parenthesized_context_managers",
7980
]
8081

0 commit comments

Comments
 (0)