| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 
 | # RUN: llc -run-pass si-insert-waits -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
--- |
  define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 {
  entry:
    %cmp0 = fcmp oeq float %cond, 0.000000e+00
    br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
  else:                                             ; preds = %entry
    store volatile i32 100, i32 addrspace(1)* undef
    br label %done, !structurizecfg.uniform !0
  if:                                               ; preds = %entry
    store volatile i32 9, i32 addrspace(1)* undef
    br label %done, !structurizecfg.uniform !0
  done:                                             ; preds = %if, %else
    %value = phi i32 [ 0, %if ], [ 1, %else ]
    store i32 %value, i32 addrspace(1)* %out
    ret void
  }
  define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 {
  entry:
    br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
  else:                                             ; preds = %entry
    store volatile i32 100, i32 addrspace(1)* undef
    br label %done, !structurizecfg.uniform !0
  if:                                               ; preds = %entry
    store volatile i32 9, i32 addrspace(1)* undef
    br label %done, !structurizecfg.uniform !0
  done:                                             ; preds = %if, %else
    %value = phi i32 [ 0, %if ], [ 1, %else ]
    store i32 %value, i32 addrspace(1)* %out
    ret void
  }
  attributes #0 = { nounwind }
  attributes #1 = { readnone }
  !0 = !{}
...
---
# CHECK-LABEL: name: vccz_corrupt_workaround
# CHECK: %vcc = V_CMP_EQ_F32
# CHECK-NEXT: %vcc = S_MOV_B64 %vcc
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
name:            vccz_corrupt_workaround
alignment:       0
exposesReturnsTwice: false
legalized:       false
regBankSelected: false
selected:        false
tracksRegLiveness: true
liveins:
  - { reg: '%sgpr0_sgpr1' }
frameInfo:
  isFrameAddressTaken: false
  isReturnAddressTaken: false
  hasStackMap:     false
  hasPatchPoint:   false
  stackSize:       0
  offsetAdjustment: 0
  maxAlignment:    0
  adjustsStack:    false
  hasCalls:        false
  maxCallFrameSize: 0
  hasOpaqueSPAdjustment: false
  hasVAStart:      false
  hasMustTailInVarArgFunc: false
body:             |
  bb.0.entry:
    liveins: %sgpr0_sgpr1
    %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`)
    %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    %sgpr7 = S_MOV_B32 61440
    %sgpr6 = S_MOV_B32 -1
    %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, implicit %exec
    S_CBRANCH_VCCZ %bb.1, implicit killed %vcc
  bb.2.if:
    liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
    %vgpr0 = V_MOV_B32_e32 9, implicit %exec
    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
    %vgpr0 = V_MOV_B32_e32 0, implicit %exec
    S_BRANCH %bb.3
  bb.1.else:
    liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
    %vgpr0 = V_MOV_B32_e32 100, implicit %exec
    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
    %vgpr0 = V_MOV_B32_e32 1, implicit %exec
  bb.3.done:
    liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
    %sgpr3 = S_MOV_B32 61440
    %sgpr2 = S_MOV_B32 -1
    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out)
    S_ENDPGM
...
---
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
# CHECK: S_WAITCNT
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef %vcc
name:            vccz_corrupt_undef_vcc
alignment:       0
exposesReturnsTwice: false
legalized:       false
regBankSelected: false
selected:        false
tracksRegLiveness: true
liveins:
  - { reg: '%sgpr0_sgpr1' }
frameInfo:
  isFrameAddressTaken: false
  isReturnAddressTaken: false
  hasStackMap:     false
  hasPatchPoint:   false
  stackSize:       0
  offsetAdjustment: 0
  maxAlignment:    0
  adjustsStack:    false
  hasCalls:        false
  maxCallFrameSize: 0
  hasOpaqueSPAdjustment: false
  hasVAStart:      false
  hasMustTailInVarArgFunc: false
body:             |
  bb.0.entry:
    liveins: %sgpr0_sgpr1
    %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
    %sgpr7 = S_MOV_B32 61440
    %sgpr6 = S_MOV_B32 -1
    S_CBRANCH_VCCZ %bb.1, implicit undef %vcc
  bb.2.if:
    liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
    %vgpr0 = V_MOV_B32_e32 9, implicit %exec
    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
    %vgpr0 = V_MOV_B32_e32 0, implicit %exec
    S_BRANCH %bb.3
  bb.1.else:
    liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
    %vgpr0 = V_MOV_B32_e32 100, implicit %exec
    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
    %vgpr0 = V_MOV_B32_e32 1, implicit %exec
  bb.3.done:
    liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
    %sgpr3 = S_MOV_B32 61440
    %sgpr2 = S_MOV_B32 -1
    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out)
    S_ENDPGM
...
 |