File: atomic-idempotent.ll

package info (click to toggle)
llvm-toolchain-9 1%3A9.0.1-16
links: PTS, VCS
area: main
in suites: bullseye
size: 882,436 kB
sloc: cpp: 4,167,636; ansic: 714,256; asm: 457,610; python: 155,927; objc: 65,094; sh: 42,856; lisp: 26,908; perl: 7,786; pascal: 7,722; makefile: 6,881; ml: 5,581; awk: 3,648; cs: 2,027; xml: 888; javascript: 381; ruby: 156
file content (320 lines) | stat: -rw-r--r-- 8,709 bytes
parent folder | download | duplicates (2)
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X86

; On x86, an atomic rmw operation that does not modify the value in memory
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
; This is explained (with the motivation for such an optimization) in
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf

define i8 @add8(i8* %p) {
; X64-LABEL: add8:
; X64:       # %bb.0:
; X64-NEXT:    mfence
; X64-NEXT:    movb (%rdi), %al
; X64-NEXT:    retq
;
; X86-LABEL: add8:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    mfence
; X86-NEXT:    movb (%eax), %al
; X86-NEXT:    retl
  %1 = atomicrmw add i8* %p, i8 0 monotonic
  ret i8 %1
}

define i16 @or16(i16* %p) {
; X64-LABEL: or16:
; X64:       # %bb.0:
; X64-NEXT:    mfence
; X64-NEXT:    movzwl (%rdi), %eax
; X64-NEXT:    retq
;
; X86-LABEL: or16:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    mfence
; X86-NEXT:    movzwl (%eax), %eax
; X86-NEXT:    retl
  %1 = atomicrmw or i16* %p, i16 0 acquire
  ret i16 %1
}

define i32 @xor32(i32* %p) {
; X64-LABEL: xor32:
; X64:       # %bb.0:
; X64-NEXT:    mfence
; X64-NEXT:    movl (%rdi), %eax
; X64-NEXT:    retq
;
; X86-LABEL: xor32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    mfence
; X86-NEXT:    movl (%eax), %eax
; X86-NEXT:    retl
  %1 = atomicrmw xor i32* %p, i32 0 release
  ret i32 %1
}

define i64 @sub64(i64* %p) {
; X64-LABEL: sub64:
; X64:       # %bb.0:
; X64-NEXT:    mfence
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    retq
;
; X86-LABEL: sub64:
; X86:       # %bb.0:
; X86-NEXT:    pushl %ebx
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    pushl %esi
; X86-NEXT:    .cfi_def_cfa_offset 12
; X86-NEXT:    .cfi_offset %esi, -12
; X86-NEXT:    .cfi_offset %ebx, -8
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    movl (%esi), %eax
; X86-NEXT:    movl 4(%esi), %edx
; X86-NEXT:    .p2align 4, 0x90
; X86-NEXT:  .LBB3_1: # %atomicrmw.start
; X86-NEXT:    # =>This Inner Loop Header: Depth=1
; X86-NEXT:    movl %edx, %ecx
; X86-NEXT:    movl %eax, %ebx
; X86-NEXT:    lock cmpxchg8b (%esi)
; X86-NEXT:    jne .LBB3_1
; X86-NEXT:  # %bb.2: # %atomicrmw.end
; X86-NEXT:    popl %esi
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    popl %ebx
; X86-NEXT:    .cfi_def_cfa_offset 4
; X86-NEXT:    retl
  %1 = atomicrmw sub i64* %p, i64 0 seq_cst
  ret i64 %1
}

define i128 @or128(i128* %p) {
; X64-LABEL: or128:
; X64:       # %bb.0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    .cfi_def_cfa_offset 16
; X64-NEXT:    xorl %esi, %esi
; X64-NEXT:    xorl %edx, %edx
; X64-NEXT:    callq __sync_fetch_and_or_16
; X64-NEXT:    popq %rcx
; X64-NEXT:    .cfi_def_cfa_offset 8
; X64-NEXT:    retq
;
; X86-LABEL: or128:
; X86:       # %bb.0:
; X86-NEXT:    pushl %ebp
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    .cfi_offset %ebp, -8
; X86-NEXT:    movl %esp, %ebp
; X86-NEXT:    .cfi_def_cfa_register %ebp
; X86-NEXT:    pushl %edi
; X86-NEXT:    pushl %esi
; X86-NEXT:    andl $-8, %esp
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    .cfi_offset %esi, -16
; X86-NEXT:    .cfi_offset %edi, -12
; X86-NEXT:    movl 8(%ebp), %esi
; X86-NEXT:    movl %esp, %eax
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl 12(%ebp)
; X86-NEXT:    pushl %eax
; X86-NEXT:    calll __sync_fetch_and_or_16
; X86-NEXT:    addl $20, %esp
; X86-NEXT:    movl (%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
; X86-NEXT:    movl %edi, 8(%esi)
; X86-NEXT:    movl %edx, 12(%esi)
; X86-NEXT:    movl %eax, (%esi)
; X86-NEXT:    movl %ecx, 4(%esi)
; X86-NEXT:    movl %esi, %eax
; X86-NEXT:    leal -8(%ebp), %esp
; X86-NEXT:    popl %esi
; X86-NEXT:    popl %edi
; X86-NEXT:    popl %ebp
; X86-NEXT:    .cfi_def_cfa %esp, 4
; X86-NEXT:    retl $4
  %1 = atomicrmw or i128* %p, i128 0 monotonic
  ret i128 %1
}

; For 'and', the idempotent value is (-1)
define i32 @and32 (i32* %p) {
; X64-LABEL: and32:
; X64:       # %bb.0:
; X64-NEXT:    mfence
; X64-NEXT:    movl (%rdi), %eax
; X64-NEXT:    retq
;
; X86-LABEL: and32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    mfence
; X86-NEXT:    movl (%eax), %eax
; X86-NEXT:    retl
  %1 = atomicrmw and i32* %p, i32 -1 acq_rel
  ret i32 %1
}

define void @or32_nouse_monotonic(i32* %p) {
; CHECK-LABEL: or32_nouse_monotonic:
; CHECK:       # %bb.0:
; CHECK-NEXT:    #MEMBARRIER
; CHECK-NEXT:    ret{{[l|q]}}
  atomicrmw or i32* %p, i32 0 monotonic
  ret void
}


define void @or32_nouse_acquire(i32* %p) {
; CHECK-LABEL: or32_nouse_acquire:
; CHECK:       # %bb.0:
; CHECK-NEXT:    #MEMBARRIER
; CHECK-NEXT:    ret{{[l|q]}}
  atomicrmw or i32* %p, i32 0 acquire
  ret void
}

define void @or32_nouse_release(i32* %p) {
; CHECK-LABEL: or32_nouse_release:
; CHECK:       # %bb.0:
; CHECK-NEXT:    #MEMBARRIER
; CHECK-NEXT:    ret{{[l|q]}}
  atomicrmw or i32* %p, i32 0 release
  ret void
}

define void @or32_nouse_acq_rel(i32* %p) {
; CHECK-LABEL: or32_nouse_acq_rel:
; CHECK:       # %bb.0:
; CHECK-NEXT:    #MEMBARRIER
; CHECK-NEXT:    ret{{[l|q]}}
  atomicrmw or i32* %p, i32 0 acq_rel
  ret void
}

define void @or32_nouse_seq_cst(i32* %p) {
; X64-LABEL: or32_nouse_seq_cst:
; X64:       # %bb.0:
; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT:    retq
;
; X86-LABEL: or32_nouse_seq_cst:
; X86:       # %bb.0:
; X86-NEXT:    lock orl $0, (%esp)
; X86-NEXT:    retl
  atomicrmw or i32* %p, i32 0 seq_cst
  ret void
}

; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
define void @or64_nouse_seq_cst(i64* %p) {
; X64-LABEL: or64_nouse_seq_cst:
; X64:       # %bb.0:
; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT:    retq
;
; X86-LABEL: or64_nouse_seq_cst:
; X86:       # %bb.0:
; X86-NEXT:    pushl %ebx
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    pushl %esi
; X86-NEXT:    .cfi_def_cfa_offset 12
; X86-NEXT:    .cfi_offset %esi, -12
; X86-NEXT:    .cfi_offset %ebx, -8
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    movl (%esi), %eax
; X86-NEXT:    movl 4(%esi), %edx
; X86-NEXT:    .p2align 4, 0x90
; X86-NEXT:  .LBB11_1: # %atomicrmw.start
; X86-NEXT:    # =>This Inner Loop Header: Depth=1
; X86-NEXT:    movl %edx, %ecx
; X86-NEXT:    movl %eax, %ebx
; X86-NEXT:    lock cmpxchg8b (%esi)
; X86-NEXT:    jne .LBB11_1
; X86-NEXT:  # %bb.2: # %atomicrmw.end
; X86-NEXT:    popl %esi
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    popl %ebx
; X86-NEXT:    .cfi_def_cfa_offset 4
; X86-NEXT:    retl
  atomicrmw or i64* %p, i64 0 seq_cst
  ret void
}

; TODO: Don't need to lower as sync_and_fetch call
define void @or128_nouse_seq_cst(i128* %p) {
; X64-LABEL: or128_nouse_seq_cst:
; X64:       # %bb.0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    .cfi_def_cfa_offset 16
; X64-NEXT:    xorl %esi, %esi
; X64-NEXT:    xorl %edx, %edx
; X64-NEXT:    callq __sync_fetch_and_or_16
; X64-NEXT:    popq %rax
; X64-NEXT:    .cfi_def_cfa_offset 8
; X64-NEXT:    retq
;
; X86-LABEL: or128_nouse_seq_cst:
; X86:       # %bb.0:
; X86-NEXT:    pushl %ebp
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    .cfi_offset %ebp, -8
; X86-NEXT:    movl %esp, %ebp
; X86-NEXT:    .cfi_def_cfa_register %ebp
; X86-NEXT:    andl $-8, %esp
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    movl %esp, %eax
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl 8(%ebp)
; X86-NEXT:    pushl %eax
; X86-NEXT:    calll __sync_fetch_and_or_16
; X86-NEXT:    addl $20, %esp
; X86-NEXT:    movl %ebp, %esp
; X86-NEXT:    popl %ebp
; X86-NEXT:    .cfi_def_cfa %esp, 4
; X86-NEXT:    retl
  atomicrmw or i128* %p, i128 0 seq_cst
  ret void
}


define void @or16_nouse_seq_cst(i16* %p) {
; X64-LABEL: or16_nouse_seq_cst:
; X64:       # %bb.0:
; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT:    retq
;
; X86-LABEL: or16_nouse_seq_cst:
; X86:       # %bb.0:
; X86-NEXT:    lock orl $0, (%esp)
; X86-NEXT:    retl
  atomicrmw or i16* %p, i16 0 seq_cst
  ret void
}

define void @or8_nouse_seq_cst(i8* %p) {
; X64-LABEL: or8_nouse_seq_cst:
; X64:       # %bb.0:
; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT:    retq
;
; X86-LABEL: or8_nouse_seq_cst:
; X86:       # %bb.0:
; X86-NEXT:    lock orl $0, (%esp)
; X86-NEXT:    retl
  atomicrmw or i8* %p, i8 0 seq_cst
  ret void
}