File: vect-regpairs.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (134 lines) | stat: -rw-r--r-- 9,173 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
;RUN: llc -mtriple=hexagon -mcpu=hexagonv66 -mhvx -filetype=obj < %s -o - | llvm-objdump --mcpu=hexagonv66 --mattr=+hvx -d - | FileCheck --check-prefix=CHECK-V66 %s
;RUN: llc -mtriple=hexagon -mcpu=hexagonv67 -mhvx -filetype=obj < %s -o - | llvm-objdump --mcpu=hexagonv67 --mattr=+hvx -d - | FileCheck --check-prefix=CHECK-V67 %s

; Should not attempt to use v<even>:<odd> 'reverse' vector regpairs
; on old or new arches (should not crash).

; CHECK-V66: vcombine
; CHECK-V67: vcombine
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.hexagon.V6.vd0()
declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32)
declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>)
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32 )
declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>)
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32 )
declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32)

declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>)
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>)


define void @Gaussian7x7u8PerRow(ptr %src, i32 %stride, i32 %width, ptr %dst) #0 {
entry:
  %mul = mul i32 %stride, 3
  %idx.neg = sub i32 0, %mul
  %add.ptr = getelementptr i8, ptr %src, i32 %idx.neg
  bitcast ptr %add.ptr to ptr
  %mul1 = shl i32 %stride, 1
  %idx.neg2 = sub i32 0, %mul1
  %add.ptr3 = getelementptr i8, ptr %src, i32 %idx.neg2
  bitcast ptr %add.ptr3 to ptr
  %idx.neg5 = sub i32 0, %stride
  %add.ptr6 = getelementptr i8, ptr %src, i32 %idx.neg5
  bitcast ptr %add.ptr6 to ptr
  bitcast ptr %src to ptr
  %add.ptr10 = getelementptr i8, ptr %src, i32 %stride
  bitcast ptr %add.ptr10 to ptr
  %add.ptr12 = getelementptr i8, ptr %src, i32 %mul1
  bitcast ptr %add.ptr12 to ptr
  %add.ptr14 = getelementptr i8, ptr %src, i32 %mul
  bitcast ptr %add.ptr14 to ptr
  bitcast ptr %dst to ptr
  load <16 x i32>, ptr %0load <16 x i32>, ptr %1load <16 x i32>, ptr %2load <16 x i32>, ptr %3load <16 x i32>, ptr %4load <16 x i32>, ptr %5load <16 x i32>, ptr %6call <16 x i32> @llvm.hexagon.V6.vd0()
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %15, <16 x i32> %15)
  call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %14, <16 x i32> %8)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %13, <16 x i32> %9)
  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %17, <32 x i32> %18, i32 101058054)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %12, <16 x i32> %10)
  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %19, <32 x i32> %20, i32 252645135)
  call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %21, <16 x i32> %11, i32 336860180)
  %cmp155 = icmp sgt i32 %width, 64
  br i1 %cmp155, label %for.body.preheader, label %for.end
for.body.preheader:                               %incdec.ptr20 = getelementptr i8, ptr %add.ptr14%23 = bitcast ptr %incdec.ptr20 to ptr
  %incdec.ptr19 = getelementptr i8, ptr %add.ptr12%24 = bitcast ptr %incdec.ptr19 to ptr
  %incdec.ptr18 = getelementptr i8, ptr %add.ptr10%25 = bitcast ptr %incdec.ptr18 to ptr
  %incdec.ptr17 = getelementptr i8, ptr %src%26 = bitcast ptr %incdec.ptr17 to ptr
  %incdec.ptr16 = getelementptr i8, ptr %add.ptr6%27 = bitcast ptr %incdec.ptr16 to ptr
  %incdec.ptr15 = getelementptr i8, ptr %add.ptr3%28 = bitcast ptr %incdec.ptr15 to ptr
  %incdec.ptr = getelementptr i8, ptr %add.ptr%29 = bitcast ptr %incdec.ptr to ptr
  br label %for.body
for.body:                                         %optr.0166 = phi ptr [ %incdec.ptr28, %for.body ], [ %7, %for.body.preheader ]
  %iptr6.0165 = phi ptr [ %incdec.ptr27, %for.body ], [ %23, %for.body.preheader ]
  %iptr5.0164 = phi ptr [ %incdec.ptr26, %for.body ], [ %24, %for.body.preheader ]
  %iptr4.0163 = phi ptr [ %incdec.ptr25, %for.body ], [ %25, %for.body.preheader ]
  %iptr3.0162 = phi ptr [ %incdec.ptr24, %for.body ], [ %26, %for.body.preheader ]
  %iptr2.0161 = phi ptr [ %incdec.ptr23, %for.body ], [ %27, %for.body.preheader ]
  %iptr1.0160 = phi ptr [ %incdec.ptr22, %for.body ], [ %28, %for.body.preheader ]
  %iptr0.0159 = phi ptr [ %incdec.ptr21, %for.body ], [ %29, %for.body.preheader ]
  %dXV1.0158 = phi <32 x i32> [ %49, %for.body ], [ %22, %for.body.preheader ]
  %dXV0.0157 = phi <32 x i32> [ %dXV1.0158, %for.body ], [ %16, %for.body.preheader ]
  %i.0156 = phi i32 [ %sub, %for.body ], [ %width, %for.body.preheader ]
  %incdec.ptr21 = getelementptr <16 x i32>, ptr %iptr0.0159%30 = load <16 x i32>, ptr %iptr0.0159%incdec.ptr22 = getelementptr <16 x i32>, ptr %iptr1.0160%31 = load <16 x i32>, ptr %iptr1.0160%incdec.ptr23 = getelementptr <16 x i32>, ptr %iptr2.0161%32 = load <16 x i32>, ptr %iptr2.0161%incdec.ptr24 = getelementptr <16 x i32>, ptr %iptr3.0162%33 = load <16 x i32>, ptr %iptr3.0162%incdec.ptr25 = getelementptr <16 x i32>, ptr %iptr4.0163%34 = load <16 x i32>, ptr %iptr4.0163%incdec.ptr26 = getelementptr <16 x i32>, ptr %iptr5.0164%35 = load <16 x i32>, ptr %iptr5.0164%incdec.ptr27 = getelementptr <16 x i32>, ptr %iptr6.0165%36 = load <16 x i32>, ptr %iptr6.0165, !tbaa !8
  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV1.0158)
  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV0.0157)
  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 2)
  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV1.0158)
  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV0.0157)
  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %40, <16 x i32> %41, i32 2)
  call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 4)
  call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %36, <16 x i32> %30)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %35, <16 x i32> %31)
  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %44, <32 x i32> %45, i32 101058054)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %34, <16 x i32> %32)
  call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %46, <32 x i32> %47, i32 252645135)
  call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %48, <16 x i32> %33, i32 336860180)
  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %49)
  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 2)
  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %49)
  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %52, <16 x i32> %37, i32 2)
  call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 4)
  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %37, <16 x i32> %39)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %40)
  call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %56, i32 252972820)
  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %40)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %58, <16 x i32> %37)
  call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %59, i32 252972820)
  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %43)
  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %42)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %62)
  call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %57, <32 x i32> %63, i32 17170694)
  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %54, <16 x i32> %42)
  call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %39)
  call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %65, <16 x i32> %66)
  call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %60, <32 x i32> %67, i32 17170694)
  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %64)
  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %64)
  call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %69, <16 x i32> %70, i32 12)
  call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %68)
  call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %68)
  call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %73, i32 12)
  call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %74, <16 x i32> %71)
  %incdec.ptr28 = getelementptr <16 x i32>, ptr %1
  store <16 x i32> %75, ptr %optr.0166%sub = add i32 %i.0156, -64
  %cmp = icmp sgt i32 %sub, 64
  br i1 %cmp, label %for.body, label %for.end
for.end:                                          ret void
}
declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32)
declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>)

attributes #0 = { "correctly-rounded-divide-sqrt-fp-math""target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls" "unsafe-fp-math"}
!8 = !{!9, !9, i64 0}
!9 = !{!"omnipotent char", !10}
!10 = !{}
!14 = !{}
!19 = !{}
!24 = !{}