File: bf16_imm.s

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (113 lines) | stat: -rw-r--r-- 5,309 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s

v_dot2_bf16_bf16 v5, v1, v2, 100.0
// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]

v_dot2_bf16_bf16 v2, v0, 1.0, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04]

v_dot2_bf16_bf16 v2, 1.0, v0, v2
// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04]

v_dot2_bf16_bf16 v5, v1, v2, 1.0
// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]

v_dot2_bf16_bf16 v2, v0, -1.0, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2       ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, 0.5, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2        ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, -0.5, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2       ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, 2.0, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2        ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, -2.0, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2       ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, 4.0, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2        ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, -4.0, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2       ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]

// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value
// which cannot be accurately represented in bf16.

v_dot2_bf16_bf16 v2, v0, 0.158203125, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, 0.15915494, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, 0x3e22, v2
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]

v_dot2_bf16_bf16 v2, v0, v2, 0.15915494
// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]

v_dot2_f32_bf16 v2, v1, 0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0, v2           ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, 0.5, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0.5, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe1,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, -0.5, v2
// CHECK: v_dot2_f32_bf16 v2, v1, -0.5, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe3,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, 1.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 1.0, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe5,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, -1.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, -1.0, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe7,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, 2.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 2.0, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe9,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, -2.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, -2.0, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xeb,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, 4.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 4.0, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xed,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, -4.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, -4.0, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xef,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, 0.15915494, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2  ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c]

v_dot2_f32_bf16 v2, v1, 0x3e22, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2  ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c]

v_dot2_f32_bf16 v2, 0.5, v1, v2
// CHECK: v_dot2_f32_bf16 v2, 0.5, v1, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0xf0,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, -0.5, v1, v2
// CHECK: v_dot2_f32_bf16 v2, -0.5, v1, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0xf1,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, 1.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, 1.0, v1, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0xf2,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, -1.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, -1.0, v1, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0xf3,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, 2.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, 2.0, v1, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0xf4,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, -2.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, -2.0, v1, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0xf5,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, 4.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, 4.0, v1, v2         ; encoding: [0x02,0x40,0x1a,0xcc,0xf6,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, -4.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, -4.0, v1, v2        ; encoding: [0x02,0x40,0x1a,0xcc,0xf7,0x02,0x0a,0x1c]

v_dot2_f32_bf16 v2, 100.0, v1, v2
// CHECK: v_dot2_f32_bf16 v2, 0x42c8, v1, v2      ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00]

v_dot2_f32_bf16 v2, v1, 100.0, v2
// CHECK: v_dot2_f32_bf16 v2, v1, 0x42c8, v2      ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xff,0x09,0x1c,0xc8,0x42,0x00,0x00]