llvm · AngryLoki · Mar 27, 2024 · Mar 27, 2024 · May 22, 2024 · ellishg
diff --git a/.github/workflows/version-check.py b/.github/workflows/version-check.py
@@ -6,7 +6,7 @@
 
 
 def get_version_from_tag(tag):
-    m = re.match("llvmorg-([0-9]+)\.([0-9]+)\.([0-9]+)(-rc[0-9]+)?$", tag)
+    m = re.match(r"llvmorg-([0-9]+)\.([0-9]+)\.([0-9]+)(-rc[0-9]+)?$", tag)
     if m:
         if m.lastindex == 4:
             # We have an rc tag.

diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py
@@ -211,7 +211,7 @@ def adapt_module(module_path, module, check_name, check_name_camel):
                         f.write(check_decl)
                     else:
                         match = re.search(
-                            'registerCheck<(.*)> *\( *(?:"([^"]*)")?', line
+                            r'registerCheck<(.*)> *\( *(?:"([^"]*)")?', line
                         )
                         prev_line = None
                         if match:
@@ -383,7 +383,7 @@ def filename_from_module(module_name, check_name):
             if stmt_start_pos == -1:
                 return ""
             stmt = code[stmt_start_pos + 1 : stmt_end_pos]
-            matches = re.search('registerCheck<([^>:]*)>\(\s*"([^"]*)"\s*\)', stmt)
+            matches = re.search(r'registerCheck<([^>:]*)>\(\s*"([^"]*)"\s*\)', stmt)
             if matches and matches[2] == full_check_name:
                 class_name = matches[1]
                 if "::" in class_name:
@@ -401,8 +401,8 @@ def filename_from_module(module_name, check_name):
     # Examine code looking for a c'tor definition to get the base class name.
     def get_base_class(code, check_file):
         check_class_name = os.path.splitext(os.path.basename(check_file))[0]
-        ctor_pattern = check_class_name + "\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\("
-        matches = re.search("\s+" + check_class_name + "::" + ctor_pattern, code)
+        ctor_pattern = check_class_name + r"\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\("
+        matches = re.search(r"\s+" + check_class_name + "::" + ctor_pattern, code)
 
         # The constructor might be inline in the header.
         if not matches:
@@ -476,7 +476,7 @@ def process_doc(doc_file):
                 # Orphan page, don't list it.
                 return "", ""
 
-            match = re.search(".*:http-equiv=refresh: \d+;URL=(.*).html(.*)", content)
+            match = re.search(r".*:http-equiv=refresh: \d+;URL=(.*).html(.*)", content)
             # Is it a redirect?
             return check_name, match
 
@@ -505,7 +505,7 @@ def format_link_alias(doc_file):
                 ref_begin = ""
                 ref_end = "_"
             else:
-                redirect_parts = re.search("^\.\./([^/]*)/([^/]*)$", match.group(1))
+                redirect_parts = re.search(r"^\.\./([^/]*)/([^/]*)$", match.group(1))
                 title = redirect_parts[1] + "-" + redirect_parts[2]
                 target = redirect_parts[1] + "/" + redirect_parts[2]
                 autofix = has_auto_fix(title)

diff --git a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py
@@ -242,7 +242,7 @@ def main():
     filename = None
     lines_by_file = {}
     for line in sys.stdin:
-        match = re.search('^\+\+\+\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
+        match = re.search(r'^\+\+\+\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
         if match:
             filename = match.group(2)
         if filename is None:
@@ -255,7 +255,7 @@ def main():
             if not re.match("^%s$" % args.iregex, filename, re.IGNORECASE):
                 continue
 
-        match = re.search("^@@.*\+(\d+)(,(\d+))?", line)
+        match = re.search(r"^@@.*\+(\d+)(,(\d+))?", line)
         if match:
             start_line = int(match.group(1))
             line_count = 1

diff --git a/clang-tools-extra/docs/clang-tidy/checks/gen-static-analyzer-docs.py b/clang-tools-extra/docs/clang-tidy/checks/gen-static-analyzer-docs.py
@@ -59,7 +59,7 @@ def get_checkers(checkers_td, checkers_rst):
             "clang-analyzer-" + checker_package_prefix + "." + checker_name
         )
         anchor_url = re.sub(
-            "\.", "-", checker_package_prefix + "." + checker_name
+            r"\.", "-", checker_package_prefix + "." + checker_name
         ).lower()
 
         if not hidden and "alpha" not in full_package_name.lower():

diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py
@@ -101,7 +101,7 @@ def extract_result_types(comment):
 
 
 def strip_doxygen(comment):
-    """Returns the given comment without \-escaped words."""
+    """Returns the given comment without \\-escaped words."""
     # If there is only a doxygen keyword in the line, delete the whole line.
     comment = re.sub(r"^\\[^\s]+\n", r"", comment, flags=re.M)
 
@@ -236,7 +236,7 @@ def act_on_decl(declaration, comment, allowed_types):
 
         # Parse the various matcher definition macros.
         m = re.match(
-            """.*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
+            r""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
                        \s*([^\s,]+\s*),
                        \s*(?:[^\s,]+\s*),
                        \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)

diff --git a/clang/test/Analysis/check-analyzer-fixit.py b/clang/test/Analysis/check-analyzer-fixit.py
@@ -55,7 +55,7 @@ def run_test_once(args, extra_args):
     # themselves.  We need to keep the comments to preserve line numbers while
     # avoiding empty lines which could potentially trigger formatting-related
     # checks.
-    cleaned_test = re.sub("// *CHECK-[A-Z0-9\-]*:[^\r\n]*", "//", input_text)
+    cleaned_test = re.sub(r"// *CHECK-[A-Z0-9\-]*:[^\r\n]*", "//", input_text)
     write_file(temp_file_name, cleaned_test)
 
     original_file_name = temp_file_name + ".orig"

diff --git a/clang/utils/ABITest/TypeGen.py b/clang/utils/ABITest/TypeGen.py
@@ -43,7 +43,7 @@ def isBitField(self):
         return self.bitFieldSize is not None
 
     def isPaddingBitField(self):
-        return self.bitFieldSize is 0
+        return self.bitFieldSize == 0
 
     def getBitFieldSize(self):
         assert self.isBitField()

diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py
@@ -316,7 +316,7 @@ def symbolize(self, addr, binary, offset):
         #   * For C functions atos omits parentheses and argument types.
         #   * For C++ functions the function name (i.e., `foo` above) may contain
         #     templates which may contain parentheses.
-        match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
+        match = re.match(r"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
         logging.debug("atos_line: %s", atos_line)
         if match:
             function_name = match.group(1)
@@ -541,7 +541,7 @@ def process_line_posix(self, line):
         # names in the regex because it could be an
         # Objective-C or C++ demangled name.
         stack_trace_line_format = (
-            "^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
+            r"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
         )
         match = re.match(stack_trace_line_format, line)
         if not match:

diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py
@@ -128,7 +128,7 @@ def get_address_object(address_name: str, offset: int = 0):
 
 
 def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
-    """Scan `line` for a string matching any key in `valid_commands`.
+    r"""Scan `line` for a string matching any key in `valid_commands`.
 
     Start searching from `start`.
     Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
@@ -543,7 +543,7 @@ def test_parse_share_line(self):
     def test_parse_escaped(self):
         """Escaped commands are ignored."""
 
-        lines = ['words \MockCmd("IGNORED") words words words\n']
+        lines = ['words \\MockCmd("IGNORED") words words words\n']
 
         values = self._find_all_mock_values_in_lines(lines)
 

diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
@@ -226,7 +226,7 @@ def can_target_host():
     xcode_lldb_vers = subprocess.check_output(["xcrun", "lldb", "--version"]).decode(
         "utf-8"
     )
-    match = re.search("lldb-(\d+)", xcode_lldb_vers)
+    match = re.search(r"lldb-(\d+)", xcode_lldb_vers)
     if match:
         apple_lldb_vers = int(match.group(1))
         if apple_lldb_vers < 1000:
@@ -250,7 +250,7 @@ def get_gdb_version_string():
     if len(gdb_vers_lines) < 1:
         print("Unkown GDB version format (too few lines)", file=sys.stderr)
         return None
-    match = re.search("GNU gdb \(.*?\) ((\d|\.)+)", gdb_vers_lines[0].strip())
+    match = re.search(r"GNU gdb \(.*?\) ((\d|\.)+)", gdb_vers_lines[0].strip())
     if match is None:
         print(f"Unkown GDB version format: {gdb_vers_lines[0]}", file=sys.stderr)
         return None
@@ -264,7 +264,7 @@ def get_clang_default_dwarf_version_string(triple):
     # Get the flags passed by the driver and look for -dwarf-version.
     cmd = f'{llvm_config.use_llvm_tool("clang")} -g -xc  -c - -v -### --target={triple}'
     stderr = subprocess.run(cmd.split(), stderr=subprocess.PIPE).stderr.decode()
-    match = re.search("-dwarf-version=(\d+)", stderr)
+    match = re.search(r"-dwarf-version=(\d+)", stderr)
     if match is None:
         print("Cannot determine default dwarf version", file=sys.stderr)
         return None

@@ -31,35 +31,42 @@
 
 BLOCKLIT = '' # block Lit from interpreting a RUN/XFAIL/etc inside the generation script
 if regenerate_expected_results:
-  print(f"""\
+    print(
+        f"""\
 //--- generate-transitive-includes.sh.cpp
 // RUN{BLOCKLIT}: mkdir %t
-""")
+"""
+    )
 
-  all_traces = []
-  for header in sorted(public_headers):
-    if header.endswith('.h'): # Skip C compatibility or detail headers
-      continue
+    all_traces = []
+    for header in sorted(public_headers):
+        if header.endswith(".h"):  # Skip C compatibility or detail headers
+            continue
 
-    normalized_header = re.sub('/', '_', header)
-    print(f"""\
+        normalized_header = re.sub("/", "_", header)
+        print(
+            f"""\
 // RUN{BLOCKLIT}: echo "#include <{header}>" | %{{cxx}} -xc++ - %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.{normalized_header}.txt
-""")
-    all_traces.append(f'%t/trace-includes.{normalized_header}.txt')
+"""
+        )
+        all_traces.append(f"%t/trace-includes.{normalized_header}.txt")
 
-  print(f"""\
+    print(
+        f"""\
 // RUN{BLOCKLIT}: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
-""")
+"""
+    )
 
 else:
-  for header in public_headers:
-    if header.endswith('.h'): # Skip C compatibility or detail headers
-      continue
+    for header in public_headers:
+        if header.endswith(".h"):  # Skip C compatibility or detail headers
+            continue
 
-    # Escape slashes for the awk command below
-    escaped_header = header.replace('/', '\/')
+        # Escape slashes for the awk command below
+        escaped_header = header.replace("/", "\\/")
 
-    print(f"""\
+        print(
+            f"""\
 //--- {header}.sh.cpp
 {lit_header_restrictions.get(header, '')}
 
@@ -87,4 +94,5 @@
 // RUN{BLOCKLIT}: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv
 // RUN{BLOCKLIT}: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv
 #include <{header}>
-""")
+"""
+        )
@@ -136,7 +136,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]:
 
 /// Returns whether the code unit needs to be escaped.
 ///
-/// \pre The code point is a valid Unicode code point.
+/// \\pre The code point is a valid Unicode code point.
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool __needs_escape(const char32_t __code_point) noexcept {{
   // Since __unallocated_region_lower_bound contains the unshifted range do the
   // comparison without shifting.

@@ -155,7 +155,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]:
 
 /// Returns the estimated width of a Unicode code point.
 ///
-/// \pre The code point is a valid Unicode code point.
+/// \\pre The code point is a valid Unicode code point.
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept {{
   // Since __table_upper_bound contains the unshifted range do the
   // comparison without shifting.

diff --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py
@@ -11,7 +11,7 @@
 
 
 def main():
-    hex = "[a-f\d]"
+    hex = r"[a-f\d]"
     hex8 = hex + "{8}"
 
     parser = argparse.ArgumentParser(description=__doc__)

diff --git a/lld/utils/benchmark.py b/lld/utils/benchmark.py
@@ -51,7 +51,7 @@ def __str__(self):
 def getBenchmarks():
     ret = []
     for i in glob.glob("*/response*.txt"):
-        m = re.match("response-(.*)\.txt", os.path.basename(i))
+        m = re.match(r"response-(.*)\.txt", os.path.basename(i))
         variant = m.groups()[0] if m else None
         ret.append(Bench(os.path.dirname(i), variant))
     return ret

@@ -294,7 +294,7 @@ class DarwinImage(symbolication.Image):
             except:
                 dsymForUUIDBinary = ""
 
-        dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*")
+        dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*")
 
         def __init__(
             self, text_addr_lo, text_addr_hi, identifier, version, uuid, path, verbose
@@ -488,7 +488,7 @@ def find_image_with_identifier(self, identifier):
         for image in self.images:
             if image.identifier == identifier:
                 return image
-        regex_text = "^.*\.%s$" % (re.escape(identifier))
+        regex_text = r"^.*\.%s$" % (re.escape(identifier))
         regex = re.compile(regex_text)
         for image in self.images:
             if regex.match(image.identifier):
@@ -865,7 +865,7 @@ def get(cls):
             version = r"(?:" + super().version + r"\s+)?"
             address = r"(0x[0-9a-fA-F]{4,})"  # 4 digits or more
 
-            symbol = """
+            symbol = r"""
                         (?:
                             [ ]+
                             (?P<symbol>.+)
@@ -1035,7 +1035,7 @@ def parse_normal(self, line):
             self.crashlog.process_identifier = line[11:].strip()
         elif line.startswith("Version:"):
             version_string = line[8:].strip()
-            matched_pair = re.search("(.+)\((.+)\)", version_string)
+            matched_pair = re.search(r"(.+)\((.+)\)", version_string)
             if matched_pair:
                 self.crashlog.process_version = matched_pair.group(1)
                 self.crashlog.process_compatability_version = matched_pair.group(2)

@@ -99,7 +99,7 @@ def parse_log_file(file, options):
     print("# Log file: '%s'" % file)
     print("#----------------------------------------------------------------------")
 
-    timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
+    timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
 
     base_time = 0.0
     last_time = 0.0

@@ -1537,13 +1537,13 @@ def parse_gdb_log(file, options):
     a long time during a preset set of debugger commands."""
 
     tricky_commands = ["qRegisterInfo"]
-    timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
-    packet_name_regex = re.compile("([A-Za-z_]+)[^a-z]")
+    timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$")
+    packet_name_regex = re.compile(r"([A-Za-z_]+)[^a-z]")
     packet_transmit_name_regex = re.compile(
-        "(?P<direction>send|read) packet: (?P<packet>.*)"
+        r"(?P<direction>send|read) packet: (?P<packet>.*)"
     )
-    packet_contents_name_regex = re.compile("\$([^#]*)#[0-9a-fA-F]{2}")
-    packet_checksum_regex = re.compile(".*#[0-9a-fA-F]{2}$")
+    packet_contents_name_regex = re.compile(r"\$([^#]*)#[0-9a-fA-F]{2}")
+    packet_checksum_regex = re.compile(r".*#[0-9a-fA-F]{2}$")
     packet_names_regex_str = "(" + "|".join(gdb_remote_commands.keys()) + ")(.*)"
     packet_names_regex = re.compile(packet_names_regex_str)
 

@@ -38,7 +38,7 @@ def parse_linespec(linespec, frame, result):
             )
 
     if not matched:
-        mo = re.match("^\+([0-9]+)$", linespec)
+        mo = re.match(r"^\+([0-9]+)$", linespec)
         if mo is not None:
             matched = True
             # print "Matched +<count>"
@@ -54,7 +54,7 @@ def parse_linespec(linespec, frame, result):
             )
 
     if not matched:
-        mo = re.match("^\-([0-9]+)$", linespec)
+        mo = re.match(r"^\-([0-9]+)$", linespec)
         if mo is not None:
             matched = True
             # print "Matched -<count>"
@@ -79,7 +79,7 @@ def parse_linespec(linespec, frame, result):
             breakpoint = target.BreakpointCreateByLocation(file_name, line_number)
 
     if not matched:
-        mo = re.match("\*((0x)?([0-9a-f]+))$", linespec)
+        mo = re.match(r"\*((0x)?([0-9a-f]+))$", linespec)
         if mo is not None:
             matched = True
             # print "Matched <address-expression>"

@@ -346,7 +346,7 @@ def __init__(self, pid):
 
     def Measure(self):
         output = subprocess.getoutput(self.command).split("\n")[-1]
-        values = re.split("[-+\s]+", output)
+        values = re.split(r"[-+\s]+", output)
         for idx, stat in enumerate(values):
             multiplier = 1
             if stat: