File: builtins-ppc-build-pair-mma.c

package info (click to toggle)
llvm-toolchain-snapshot 1%3A22~%2B%2B20251023025710%2B3f47a7be1ae6-1~exp5
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,336,076 kB
  • sloc: cpp: 7,822,956; ansic: 1,531,523; asm: 1,088,291; python: 260,779; f90: 98,765; objc: 70,846; lisp: 47,149; pascal: 17,852; sh: 8,636; ml: 5,111; perl: 4,720; makefile: 3,680; awk: 3,523; javascript: 2,270; xml: 892; fortran: 793
file content (136 lines) | stat: -rw-r--r-- 10,042 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE
// RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT

// CHECK-LE-LABEL: define dso_local void @test1(
// CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-LE-NEXT:  [[ENTRY:.*:]]
// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4]], <16 x i8> [[VC3]], <16 x i8> [[VC2]], <16 x i8> [[VC1]])
// CHECK-LE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6:![0-9]+]]
// CHECK-LE-NEXT:    ret void
//
// CHECK-BE-LABEL: define dso_local void @test1(
// CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef writeonly captures(none) initializes((0, 64)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-BE-NEXT:  [[ENTRY:.*:]]
// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1]], <16 x i8> [[VC2]], <16 x i8> [[VC3]], <16 x i8> [[VC4]])
// CHECK-BE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP]], align 64, !tbaa [[__VECTOR_QUAD_TBAA6:![0-9]+]]
// CHECK-BE-NEXT:    ret void
//
// CHECK-LE-NOOPT-LABEL: define dso_local void @test1(
// CHECK-LE-NOOPT-SAME: ptr noundef [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]], ptr noundef [[RESP:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-LE-NOOPT-NEXT:  [[ENTRY:.*:]]
// CHECK-LE-NOOPT-NEXT:    [[VQP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT:    [[VPP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT:    [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT:    [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT:    [[VC3_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT:    [[VC4_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT:    [[RESP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT:    [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-LE-NOOPT-NEXT:    [[VP:%.*]] = alloca <256 x i1>, align 32
// CHECK-LE-NOOPT-NEXT:    [[RES:%.*]] = alloca <512 x i1>, align 64
// CHECK-LE-NOOPT-NEXT:    store ptr [[VQP]], ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    store ptr [[VPP]], ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC1]], ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC2]], ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC3]], ptr [[VC3_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC4]], ptr [[VC4_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    store ptr [[RESP]], ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
// CHECK-LE-NOOPT-NEXT:    store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
// CHECK-LE-NOOPT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP3]], ptr [[VP]], align 32
// CHECK-LE-NOOPT-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[VC3_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr [[VC4_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    [[TMP8:%.*]] = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]], <16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
// CHECK-LE-NOOPT-NEXT:    store <512 x i1> [[TMP8]], ptr [[RES]], align 64
// CHECK-LE-NOOPT-NEXT:    [[TMP9:%.*]] = load <512 x i1>, ptr [[RES]], align 64
// CHECK-LE-NOOPT-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    store <512 x i1> [[TMP9]], ptr [[TMP10]], align 64
// CHECK-LE-NOOPT-NEXT:    ret void
//
void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2,
            vector unsigned char vc3, vector unsigned char vc4, unsigned char *resp) {
  __vector_quad vq = *((__vector_quad *)vqp);
  __vector_pair vp = *((__vector_pair *)vpp);
  __vector_quad res;
  __builtin_mma_build_acc(&res, vc1, vc2, vc3, vc4);
  *((__vector_quad *)resp) = res;
}

// CHECK-LE-LABEL: define dso_local void @test2(
// CHECK-LE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-LE-NEXT:  [[ENTRY:.*:]]
// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2]], <16 x i8> [[VC1]])
// CHECK-LE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8:![0-9]+]]
// CHECK-LE-NEXT:    ret void
//
// CHECK-BE-LABEL: define dso_local void @test2(
// CHECK-BE-SAME: ptr noundef readnone captures(none) [[VQP:%.*]], ptr noundef readnone captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-BE-NEXT:  [[ENTRY:.*:]]
// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1]], <16 x i8> [[VC2]])
// CHECK-BE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA8:![0-9]+]]
// CHECK-BE-NEXT:    ret void
//
// CHECK-LE-NOOPT-LABEL: define dso_local void @test2(
// CHECK-LE-NOOPT-SAME: ptr noundef [[VQP:%.*]], ptr noundef [[VPP:%.*]], <16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], ptr noundef [[RESP:%.*]]) #[[ATTR0]] {
// CHECK-LE-NOOPT-NEXT:  [[ENTRY:.*:]]
// CHECK-LE-NOOPT-NEXT:    [[VQP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT:    [[VPP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT:    [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT:    [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT:    [[RESP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT:    [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-LE-NOOPT-NEXT:    [[VP:%.*]] = alloca <256 x i1>, align 32
// CHECK-LE-NOOPT-NEXT:    [[RES:%.*]] = alloca <256 x i1>, align 32
// CHECK-LE-NOOPT-NEXT:    store ptr [[VQP]], ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    store ptr [[VPP]], ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC1]], ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    store <16 x i8> [[VC2]], ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    store ptr [[RESP]], ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
// CHECK-LE-NOOPT-NEXT:    store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
// CHECK-LE-NOOPT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP3]], ptr [[VP]], align 32
// CHECK-LE-NOOPT-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT:    [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP6]], ptr [[RES]], align 32
// CHECK-LE-NOOPT-NEXT:    [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
// CHECK-LE-NOOPT-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT:    store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
// CHECK-LE-NOOPT-NEXT:    ret void
//
void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1,
            vector unsigned char vc2, unsigned char *resp) {
  __vector_quad vq = *((__vector_quad *)vqp);
  __vector_pair vp = *((__vector_pair *)vpp);
  __vector_pair res;
  __builtin_vsx_build_pair(&res, vc1, vc2);
  *((__vector_pair *)resp) = res;
}
//.
// CHECK-LE: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
// CHECK-LE: [[META5]] = !{!"Simple C/C++ TBAA"}
// CHECK-LE: [[__VECTOR_QUAD_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
// CHECK-LE: [[META7]] = !{!"__vector_quad", [[META4]], i64 0}
// CHECK-LE: [[__VECTOR_PAIR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
// CHECK-LE: [[META9]] = !{!"__vector_pair", [[META4]], i64 0}
//.
// CHECK-BE: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
// CHECK-BE: [[META5]] = !{!"Simple C/C++ TBAA"}
// CHECK-BE: [[__VECTOR_QUAD_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
// CHECK-BE: [[META7]] = !{!"__vector_quad", [[META4]], i64 0}
// CHECK-BE: [[__VECTOR_PAIR_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
// CHECK-BE: [[META9]] = !{!"__vector_pair", [[META4]], i64 0}
//.