reorganize llc checks script to allow more flexibility; NFCI

The goal is to enhance this script to be used with opt and clang: Group all of the regexes together, so it's easier to see what's going on. This will make it easier to break main() up into pieces too. Also, note that some of the regexes are for x86-specific asm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@264197 91177308-0d34-0410-b5e6-96231b3b80d8
author: Sanjay Patel <spatel@rotateright.com> 2016-03-23 21:40:53 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2016-03-23 21:40:53 +0000
commit: c062937b5ea1101f5a408359763903d9f2df90dd (patch)
tree: 638f547c9754595e1c6b6fc2431505103613475c
parent: 825efc59cb586f9698d9b0d9e763f80aa2f1cdd5 (diff)
1 files changed, 31 insertions, 28 deletions
diff --git a/utils/update_llc_test_checks.py b/utils/update_llc_test_checks.py
index 4ce034df308..781fab54f69 100755
--- a/utils/update_llc_test_checks.py
+++ b/utils/update_llc_test_checks.py
@@ -15,7 +15,7 @@ import sys
 import tempfile
 import re
 
-
+# Invoke the tool that is being tested.
 def llc(args, cmd_args, ir):
   with open(ir) as ir_file:
     stdout = subprocess.check_output(args.llc_binary + ' ' + cmd_args,
@@ -25,33 +25,45 @@ def llc(args, cmd_args, ir):
   return stdout
 
 
-ASM_SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-ASM_SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-ASM_SCRUB_SHUFFLES_RE = (
+# RegEx: this is where the magic happens.
+
+SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
+SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
+SCRUB_X86_SHUFFLES_RE = (
     re.compile(
         r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$',
         flags=re.M))
-ASM_SCRUB_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
-ASM_SCRUB_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
-ASM_SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
+SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
+SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+
+RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
+IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
+ASM_FUNCTION_RE = re.compile(
+    r'^_?(?P<f>[^:]+):[ \t]*#+[ \t]*@(?P=f)\n[^:]*?'
+    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
+    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
+    flags=(re.M | re.S))
+CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
+CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
 
 
 def scrub_asm(asm):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
-  asm = ASM_SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
   # Expand the tabs used for indentation.
   asm = string.expandtabs(asm, 2)
   # Detect shuffle asm comments and hide the operands in favor of the comments.
-  asm = ASM_SCRUB_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
+  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
   # Generically match the stack offset of a memory operand.
-  asm = ASM_SCRUB_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
+  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
   # Generically match a RIP-relative memory operand.
-  asm = ASM_SCRUB_RIP_RE.sub(r'{{.*}}(%rip)', asm)
+  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
   # Strip kill operands inserted into the asm.
-  asm = ASM_SCRUB_KILL_COMMENT_RE.sub('', asm)
+  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
   # Strip trailing whitespace.
-  asm = ASM_SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
 
@@ -66,15 +78,6 @@ def main():
   parser.add_argument('tests', nargs='+')
   args = parser.parse_args()
 
-  run_line_re = re.compile('^\s*;\s*RUN:\s*(.*)$')
-  ir_function_re = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
-  asm_function_re = re.compile(
-      r'^_?(?P<f>[^:]+):[ \t]*#+[ \t]*@(?P=f)\n[^:]*?'
-      r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-      r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
-      flags=(re.M | re.S))
-  check_prefix_re = re.compile('--check-prefix=(\S+)')
-  check_re = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
   autogenerated_note = ('; NOTE: Assertions have been autogenerated by '
                         'utils/update_llc_test_checks.py')
 
@@ -85,7 +88,7 @@ def main():
       test_lines = [l.rstrip() for l in f]
 
     run_lines = [m.group(1)
-                 for m in [run_line_re.match(l) for l in test_lines] if m]
+                 for m in [RUN_LINE_RE.match(l) for l in test_lines] if m]
     if args.verbose:
       print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
       for l in run_lines:
@@ -106,7 +109,7 @@ def main():
       llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
       check_prefixes = [m.group(1)
-                        for m in check_prefix_re.finditer(filecheck_cmd)]
+                        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
       if not check_prefixes:
         check_prefixes = ['CHECK']
 
@@ -124,7 +127,7 @@ def main():
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
       raw_asm = llc(args, llc_args, test)
       # Build up a dictionary of all the function bodies.
-      for m in asm_function_re.finditer(raw_asm):
+      for m in ASM_FUNCTION_RE.finditer(raw_asm):
         if not m:
           continue
         f = m.group('f')
@@ -158,7 +161,7 @@ def main():
     for l in test_lines:
       if is_in_function_start:
         if l.lstrip().startswith(';'):
-          m = check_re.match(l)
+          m = CHECK_RE.match(l)
           if not m or m.group(1) not in prefix_set:
             fixed_lines.append(l)
             continue
@@ -187,7 +190,7 @@ def main():
         if l.strip() == ';':
           continue
         # And skip any CHECK lines. We'll build our own.
-        m = check_re.match(l)
+        m = CHECK_RE.match(l)
         if m and m.group(1) in prefix_set:
           continue
         # Collect the remaining lines in the function body and look for the end
@@ -201,7 +204,7 @@ def main():
         continue
       fixed_lines.append(l)
 
-      m = ir_function_re.match(l)
+      m = IR_FUNCTION_RE.match(l)
       if not m:
         continue
       name = m.group(1)
author	Sanjay Patel <spatel@rotateright.com>	2016-03-23 21:40:53 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2016-03-23 21:40:53 +0000
commit	c062937b5ea1101f5a408359763903d9f2df90dd (patch)
tree	638f547c9754595e1c6b6fc2431505103613475c
parent	825efc59cb586f9698d9b0d9e763f80aa2f1cdd5 (diff)