Skip to content

Commit 3c2ea8a

Browse files
isidenticalichard26
authored andcommitted
black/parser: partial support for pattern matching (#2586)
Partial implementation for #2242. Only works when explicitly stated -t py310. Co-authored-by: Richard Si <[email protected]>
1 parent 2e1b951 commit 3c2ea8a

File tree

14 files changed

+553
-22
lines changed

14 files changed

+553
-22
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
- Warn about Python 2 deprecation in more cases by improving Python 2 only syntax
88
detection (#2592)
9+
- Add partial support for the match statement. As it's experimental, it's only enabled
10+
when `--target-version py310` is explicitly specified (#2586)
11+
- Add support for parenthesized with (#2586)
912

1013
## 21.10b0
1114

src/black/linegen.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def visit_stmt(
126126
"""Visit a statement.
127127
128128
This implementation is shared for `if`, `while`, `for`, `try`, `except`,
129-
`def`, `with`, `class`, `assert` and assignments.
129+
`def`, `with`, `class`, `assert`, `match`, `case` and assignments.
130130
131131
The relevant Python language `keywords` for a given statement will be
132132
NAME leaves within it. This methods puts those on a separate line.
@@ -292,6 +292,10 @@ def __post_init__(self) -> None:
292292
self.visit_async_funcdef = self.visit_async_stmt
293293
self.visit_decorated = self.visit_decorators
294294

295+
# PEP 634
296+
self.visit_match_stmt = partial(v, keywords={"match"}, parens=Ø)
297+
self.visit_case_block = partial(v, keywords={"case"}, parens=Ø)
298+
295299

296300
def transform_line(
297301
line: Line, mode: Mode, features: Collection[Feature] = ()

src/black/mode.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class TargetVersion(Enum):
2020
PY37 = 7
2121
PY38 = 8
2222
PY39 = 9
23+
PY310 = 10
2324

2425
def is_python2(self) -> bool:
2526
return self is TargetVersion.PY27
@@ -39,6 +40,7 @@ class Feature(Enum):
3940
ASSIGNMENT_EXPRESSIONS = 8
4041
POS_ONLY_ARGUMENTS = 9
4142
RELAXED_DECORATORS = 10
43+
PATTERN_MATCHING = 11
4244
FORCE_OPTIONAL_PARENTHESES = 50
4345

4446
# temporary for Python 2 deprecation
@@ -108,6 +110,9 @@ class Feature(Enum):
108110
Feature.RELAXED_DECORATORS,
109111
Feature.POS_ONLY_ARGUMENTS,
110112
},
113+
TargetVersion.PY310: {
114+
Feature.PATTERN_MATCHING,
115+
},
111116
}
112117

113118

src/black/parsing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
5959

6060
# Python 3-compatible code, so only try Python 3 grammar.
6161
grammars = []
62+
if supports_feature(target_versions, Feature.PATTERN_MATCHING):
63+
# Python 3.10+
64+
grammars.append(pygram.python_grammar_soft_keywords)
6265
# If we have to parse both, try to parse async as a keyword first
6366
if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
6467
# Python 3.7+

src/blib2to3/Grammar.txt

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
105105
exec_stmt: 'exec' expr ['in' test [',' test]]
106106
assert_stmt: 'assert' test [',' test]
107107

108-
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
108+
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt
109109
async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
110110
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
111111
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
@@ -115,9 +115,8 @@ try_stmt: ('try' ':' suite
115115
['else' ':' suite]
116116
['finally' ':' suite] |
117117
'finally' ':' suite))
118-
with_stmt: 'with' with_item (',' with_item)* ':' suite
119-
with_item: test ['as' expr]
120-
with_var: 'as' expr
118+
with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite
119+
121120
# NB compile.c makes sure that the default except clause is last
122121
except_clause: 'except' [test [(',' | 'as') test]]
123122
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
@@ -131,7 +130,15 @@ testlist_safe: old_test [(',' old_test)+ [',']]
131130
old_test: or_test | old_lambdef
132131
old_lambdef: 'lambda' [varargslist] ':' old_test
133132

134-
namedexpr_test: test [':=' test]
133+
namedexpr_test: asexpr_test [':=' asexpr_test]
134+
135+
# This is actually not a real rule, though since the parser is very
136+
# limited in terms of the strategy about match/case rules, we are inserting
137+
# a virtual case (<expr> as <expr>) as a valid expression. Unless a better
138+
# approach is thought, the only side effect of this seem to be just allowing
139+
# more stuff to be parser (which would fail on the ast).
140+
asexpr_test: test ['as' test]
141+
135142
test: or_test ['if' or_test 'else' test] | lambdef
136143
or_test: and_test ('or' and_test)*
137144
and_test: not_test ('and' not_test)*
@@ -213,3 +220,27 @@ encoding_decl: NAME
213220

214221
yield_expr: 'yield' [yield_arg]
215222
yield_arg: 'from' test | testlist_star_expr
223+
224+
225+
# 3.10 match statement definition
226+
227+
# PS: normally the grammar is much much more restricted, but
228+
# at this moment for not trying to bother much with encoding the
229+
# exact same DSL in a LL(1) parser, we will just accept an expression
230+
# and let the ast.parse() step of the safe mode to reject invalid
231+
# grammar.
232+
233+
# The reason why it is more restricted is that, patterns are some
234+
# sort of a DSL (more advanced than our LHS on assignments, but
235+
# still in a very limited python subset). They are not really
236+
# expressions, but who cares. If we can parse them, that is enough
237+
# to reformat them.
238+
239+
match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT
240+
subject_expr: namedexpr_test
241+
242+
# cases
243+
case_block: "case" patterns [guard] ':' suite
244+
guard: 'if' namedexpr_test
245+
patterns: pattern ['as' pattern]
246+
pattern: (expr|star_expr) (',' (expr|star_expr))* [',']

src/blib2to3/pgen2/driver.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,92 @@
2828
List,
2929
Optional,
3030
Text,
31+
Iterator,
3132
Tuple,
33+
TypeVar,
34+
Generic,
3235
Union,
3336
)
37+
from dataclasses import dataclass, field
3438

3539
# Pgen imports
3640
from . import grammar, parse, token, tokenize, pgen
3741
from logging import Logger
3842
from blib2to3.pytree import _Convert, NL
3943
from blib2to3.pgen2.grammar import Grammar
44+
from contextlib import contextmanager
4045

4146
Path = Union[str, "os.PathLike[str]"]
4247

4348

49+
@dataclass
50+
class ReleaseRange:
51+
start: int
52+
end: Optional[int] = None
53+
tokens: List[Any] = field(default_factory=list)
54+
55+
def lock(self) -> None:
56+
total_eaten = len(self.tokens)
57+
self.end = self.start + total_eaten
58+
59+
60+
class TokenProxy:
61+
def __init__(self, generator: Any) -> None:
62+
self._tokens = generator
63+
self._counter = 0
64+
self._release_ranges: List[ReleaseRange] = []
65+
66+
@contextmanager
67+
def release(self) -> Iterator["TokenProxy"]:
68+
release_range = ReleaseRange(self._counter)
69+
self._release_ranges.append(release_range)
70+
try:
71+
yield self
72+
finally:
73+
# Lock the last release range to the final position that
74+
# has been eaten.
75+
release_range.lock()
76+
77+
def eat(self, point: int) -> Any:
78+
eaten_tokens = self._release_ranges[-1].tokens
79+
if point < len(eaten_tokens):
80+
return eaten_tokens[point]
81+
else:
82+
while point >= len(eaten_tokens):
83+
token = next(self._tokens)
84+
eaten_tokens.append(token)
85+
return token
86+
87+
def __iter__(self) -> "TokenProxy":
88+
return self
89+
90+
def __next__(self) -> Any:
91+
# If the current position is already compromised (looked up)
92+
# return the eaten token, if not just go further on the given
93+
# token producer.
94+
for release_range in self._release_ranges:
95+
assert release_range.end is not None
96+
97+
start, end = release_range.start, release_range.end
98+
if start <= self._counter < end:
99+
token = release_range.tokens[self._counter - start]
100+
break
101+
else:
102+
token = next(self._tokens)
103+
self._counter += 1
104+
return token
105+
106+
def can_advance(self, to: int) -> bool:
107+
# Try to eat, fail if it can't. The eat operation is cached
108+
# so there wont be any additional cost of eating here
109+
try:
110+
self.eat(to)
111+
except StopIteration:
112+
return False
113+
else:
114+
return True
115+
116+
44117
class Driver(object):
45118
def __init__(
46119
self,
@@ -57,14 +130,18 @@ def __init__(
57130
def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
58131
"""Parse a series of tokens and return the syntax tree."""
59132
# XXX Move the prefix computation into a wrapper around tokenize.
133+
proxy = TokenProxy(tokens)
134+
60135
p = parse.Parser(self.grammar, self.convert)
61-
p.setup()
136+
p.setup(proxy=proxy)
137+
62138
lineno = 1
63139
column = 0
64140
indent_columns = []
65141
type = value = start = end = line_text = None
66142
prefix = ""
67-
for quintuple in tokens:
143+
144+
for quintuple in proxy:
68145
type, value, start, end, line_text = quintuple
69146
if start != (lineno, column):
70147
assert (lineno, column) <= start, ((lineno, column), start)

src/blib2to3/pgen2/grammar.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def __init__(self) -> None:
8989
self.dfas: Dict[int, DFAS] = {}
9090
self.labels: List[Label] = [(0, "EMPTY")]
9191
self.keywords: Dict[str, int] = {}
92+
self.soft_keywords: Dict[str, int] = {}
9293
self.tokens: Dict[int, int] = {}
9394
self.symbol2label: Dict[str, int] = {}
9495
self.start = 256
@@ -136,6 +137,7 @@ def copy(self: _P) -> _P:
136137
"number2symbol",
137138
"dfas",
138139
"keywords",
140+
"soft_keywords",
139141
"tokens",
140142
"symbol2label",
141143
):

0 commit comments

Comments
 (0)