From 7c9bab1abfe51d8df654c8b5cffb3ce0f9073b23 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 24 Apr 2024 19:49:49 -0700 Subject: [PATCH 1/5] fix some regexes --- src/blib2to3/pgen2/tokenize.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index d6b684ab1aa..48fc7d98e8c 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -123,9 +123,9 @@ def _combinations(*l: str) -> Set[str]: # Tail end of " string. Double = r'[^"\\]*(?:\\.[^"\\]*)*"' # Tail end of ''' string. -Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +Single3 = r"[^'\\]*(?:\\.|'(?!'')|[^'\\])*'''" # Tail end of """ string. -Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +Double3 = r'[^"\\]*(?:\\.|"(?!"")|[^"\\])*"""' _litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?" _fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)" Triple = group( @@ -136,12 +136,12 @@ def _combinations(*l: str) -> Set[str]: ) # beginning of a single quoted f-string. must not end with `{{` or `\N{` -SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(? Set[str]: _string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*' # FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{` -_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(? 0 and not inside_fstring_braces: endprog = endprog_stack[-1] + print("REGEX", endprog.pattern) + print(":LINE", line) endmatch = endprog.match(line, pos) if endmatch: # all on one line start, end = endmatch.span(0) From 4508f57339111af2c3e4a224328f1eb6f493158a Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 24 Apr 2024 19:52:23 -0700 Subject: [PATCH 2/5] simplify more regexes --- src/blib2to3/pgen2/tokenize.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index 48fc7d98e8c..09542b99557 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -119,13 +119,13 @@ def _combinations(*l: str) -> Set[str]: Number = group(Imagnumber, Floatnumber, Intnumber) # Tail end of ' string. -Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +Single = r"(?:\\.|[^'\\])*'" # Tail end of " string. -Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +Double = r'(?:\\.|[^"\\])*"' # Tail end of ''' string. -Single3 = r"[^'\\]*(?:\\.|'(?!'')|[^'\\])*'''" +Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''" # Tail end of """ string. -Double3 = r'[^"\\]*(?:\\.|"(?!"")|[^"\\])*"""' +Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""' _litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?" _fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)" Triple = group( @@ -136,12 +136,12 @@ def _combinations(*l: str) -> Set[str]: ) # beginning of a single quoted f-string. must not end with `{{` or `\N{` -SingleLbrace = r"[^'\\{]*(?:\\N{|\\.|{{|[^'\\{])*(? Date: Wed, 24 Apr 2024 20:13:47 -0700 Subject: [PATCH 3/5] Add test, fix more regexes --- src/blib2to3/pgen2/tokenize.py | 14 ++++++-------- tests/data/cases/pep_701.py | 4 ++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index 09542b99557..fd0b5564f43 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -140,8 +140,8 @@ def _combinations(*l: str) -> Set[str]: DoubleLbrace = r'(?:\\N{|\\.|{{|[^"\\{])*(? Set[str]: Special = group(r"\r?\n", r"[:;.,`@]") Funny = group(Operator, Bracket, Special) -_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*" -_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*' +_string_middle_single = r"(?:[^\n'\\]|\\.)*" +_string_middle_double = r'(?:[^\n"\\]|\\.)*' # FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{` -_fstring_middle_single = r"[^\n'{]*(?:\\N{|\\[^{]|{{|[^\n'{])*(? 0 and not inside_fstring_braces: endprog = endprog_stack[-1] - print("REGEX", endprog.pattern) - print(":LINE", line) endmatch = endprog.match(line, pos) if endmatch: # all on one line start, end = endmatch.span(0) diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py index f4a69e47413..a0b67413ee9 100644 --- a/tests/data/cases/pep_701.py +++ b/tests/data/cases/pep_701.py @@ -119,6 +119,8 @@ level=0, ) +f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}' + # output x = f"foo" @@ -240,3 +242,5 @@ f"{self.writer._transport.get_extra_info('peername')}", # type: ignore[attr-defined] level=0, ) + +f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}' From fdd1813a0cdd7d5f3f4bc1f4bd49bca779c5ed84 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 24 Apr 2024 20:24:02 -0700 Subject: [PATCH 4/5] changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index b39f9ab4f54..8c219346d85 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -29,6 +29,7 @@ ### Performance +- Fix bad performance on certain complex string literals (#4331) ### Output From 315ea67669aab217659e23c4561bd72b235dd573 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 24 Apr 2024 20:26:19 -0700 Subject: [PATCH 5/5] format changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 8c219346d85..79e7b0b1444 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -29,6 +29,7 @@ ### Performance + - Fix bad performance on certain complex string literals (#4331) ### Output