x86: preserve flags when folding atomic operations

Summary: D4796 taught LLVM to fold some atomic integer operations into a single instruction. The pattern was unaware that the instructions clobbered flags. This patch adds the missing EFLAGS definition. Floating point operations don't set flags, the subsequent fadd optimization is therefore correct. The same applies for surrounding load/store optimizations. Reviewers: rsmith, rtrieu Subscribers: llvm-commits, reames, morisset Differential Revision: http://reviews.llvm.org/D13680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250135 91177308-0d34-0410-b5e6-96231b3b80d8
author: JF Bastien <jfb@google.com> 2015-10-13 00:28:47 +0000
committer: JF Bastien <jfb@google.com> 2015-10-13 00:28:47 +0000
commit: a4b41272897aebd7360d431179703403c832c8d3 (patch)
tree: daab9f62e1da32dd7cc111261471a890d154ffa6
parent: 41438be88af1a2ea003378a866447b4842990c69 (diff)
2 files changed, 46 insertions, 6 deletions
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 5d07c702499..d16a05321b7 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -774,12 +774,14 @@ multiclass RELEASE_BINOP_MI<SDNode op> {
         [(atomic_store_64 addr:$dst, (op
             (atomic_load_64 addr:$dst), GR64:$src))]>;
 }
-defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
-defm RELEASE_AND : RELEASE_BINOP_MI<and>;
-defm RELEASE_OR  : RELEASE_BINOP_MI<or>;
-defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
-// Note: we don't deal with sub, because substractions of constants are
-// optimized into additions before this code can run
+let Defs = [EFLAGS] in {
+  defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
+  defm RELEASE_AND : RELEASE_BINOP_MI<and>;
+  defm RELEASE_OR  : RELEASE_BINOP_MI<or>;
+  defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
+  // Note: we don't deal with sub, because substractions of constants are
+  //       optimized into additions before this code can run.
+}
 
 // Same as above, but for floating-point.
 // FIXME: imm version.
diff --git a/test/CodeGen/X86/atomic-flags.ll b/test/CodeGen/X86/atomic-flags.ll
new file mode 100644
index 00000000000..141a7690dba
--- /dev/null
+++ b/test/CodeGen/X86/atomic-flags.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s
+
+; Make sure that flags are properly preserved despite atomic optimizations.
+
+define i32 @atomic_and_flags(i8* %p, i32 %a, i32 %b) {
+; CHECK-LABEL: atomic_and_flags:
+
+  ; Generate flags value, and use it.
+  ; CHECK:      cmpl
+  ; CHECK-NEXT: jne
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %L1, label %L2
+
+L1:
+  ; The following pattern will get folded.
+  ; CHECK: addb
+  %1 = load atomic i8, i8* %p seq_cst, align 1
+  %2 = add i8 %1, 2
+  store atomic i8 %2, i8* %p release, align 1
+
+  ; Use the comparison result again. We need to rematerialize the comparison
+  ; somehow. This test checks that cmpl gets emitted again, but any
+  ; rematerialization would work (the optimizer used to clobber the flags with
+  ; the add).
+  ; CHECK-NEXT: cmpl
+  ; CHECK-NEXT: jne
+  br i1 %cmp, label %L3, label %L4
+
+L2:
+  ret i32 2
+
+L3:
+  ret i32 3
+
+L4:
+  ret i32 4
+}
author	JF Bastien <jfb@google.com>	2015-10-13 00:28:47 +0000
committer	JF Bastien <jfb@google.com>	2015-10-13 00:28:47 +0000
commit	a4b41272897aebd7360d431179703403c832c8d3 (patch)
tree	daab9f62e1da32dd7cc111261471a890d154ffa6
parent	41438be88af1a2ea003378a866447b4842990c69 (diff)