Package: aom / 1.0.0-3

update-the-vsx-support.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sat, 23 Jun 2018 02:47:16 -0500
X-Dgit-Generated: 1.0.0-2 cfc5a36445e7fe7eab377bc374f08323150ba6d0
Subject: Update the VSX support

The CFL function signature takes a source and destination pointer.
Also make the typedef compatible with clang.

Change-Id: Ia633d10141ac0fd10af0a40b9878d18293463fb8
(cherry picked from commit 2f8587a8b30cb2e4b45df36c6c8461a440a77336)

---

--- aom-1.0.0.orig/av1/common/ppc/cfl_ppc.c
+++ aom-1.0.0/av1/common/ppc/cfl_ppc.c
@@ -24,19 +24,21 @@
 #define CFL_LINE_2 128
 #define CFL_LINE_3 192
 
-typedef vector int8_t int8x16_t;
-typedef vector uint8_t uint8x16_t;
-typedef vector int16_t int16x8_t;
-typedef vector uint16_t uint16x8_t;
-typedef vector int32_t int32x4_t;
-typedef vector uint32_t uint32x4_t;
-typedef vector uint64_t uint64x2_t;
+typedef vector signed char int8x16_t;          // NOLINT(runtime/int)
+typedef vector unsigned char uint8x16_t;       // NOLINT(runtime/int)
+typedef vector signed short int16x8_t;         // NOLINT(runtime/int)
+typedef vector unsigned short uint16x8_t;      // NOLINT(runtime/int)
+typedef vector signed int int32x4_t;           // NOLINT(runtime/int)
+typedef vector unsigned int uint32x4_t;        // NOLINT(runtime/int)
+typedef vector unsigned long long uint64x2_t;  // NOLINT(runtime/int)
 
-static INLINE void subtract_average_vsx(int16_t *pred_buf, int width,
-                                        int height, int round_offset,
+static INLINE void subtract_average_vsx(const uint16_t *src_ptr, int16_t *dst,
+                                        int width, int height, int round_offset,
                                         int num_pel_log2) {
-  const int16_t *end = pred_buf + height * CFL_BUF_LINE;
-  const int16_t *sum_buf = pred_buf;
+  //  int16_t *dst = dst_ptr;
+  const int16_t *dst_end = dst + height * CFL_BUF_LINE;
+  const int16_t *sum_buf = (const int16_t *)src_ptr;
+  const int16_t *end = sum_buf + height * CFL_BUF_LINE;
   const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2);
   const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
                                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
@@ -71,43 +73,40 @@ static INLINE void subtract_average_vsx(
   const int32x4_t avg = vec_sr(sum_32x4, div_shift);
   const int16x8_t vec_avg = vec_pack(avg, avg);
   do {
-    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, pred_buf), vec_avg), OFF_0, pred_buf);
-    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, pred_buf), vec_avg),
-               OFF_0 + CFL_BUF_LINE_BYTES, pred_buf);
-    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, pred_buf), vec_avg),
-               OFF_0 + CFL_LINE_2, pred_buf);
-    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, pred_buf), vec_avg),
-               OFF_0 + CFL_LINE_3, pred_buf);
+    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, dst), vec_avg), OFF_0, dst);
+    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, dst), vec_avg),
+               OFF_0 + CFL_BUF_LINE_BYTES, dst);
+    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, dst), vec_avg),
+               OFF_0 + CFL_LINE_2, dst);
+    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, dst), vec_avg),
+               OFF_0 + CFL_LINE_3, dst);
     if (width >= 16) {
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, pred_buf), vec_avg), OFF_1,
-                 pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, pred_buf), vec_avg),
-                 OFF_1 + CFL_LINE_1, pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, pred_buf), vec_avg),
-                 OFF_1 + CFL_LINE_2, pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, pred_buf), vec_avg),
-                 OFF_1 + CFL_LINE_3, pred_buf);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, dst), vec_avg), OFF_1, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, dst), vec_avg),
+                 OFF_1 + CFL_LINE_1, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, dst), vec_avg),
+                 OFF_1 + CFL_LINE_2, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, dst), vec_avg),
+                 OFF_1 + CFL_LINE_3, dst);
     }
     if (width == 32) {
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, pred_buf), vec_avg), OFF_2,
-                 pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, pred_buf), vec_avg),
-                 OFF_2 + CFL_LINE_1, pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, pred_buf), vec_avg),
-                 OFF_2 + CFL_LINE_2, pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, pred_buf), vec_avg),
-                 OFF_2 + CFL_LINE_3, pred_buf);
-
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, pred_buf), vec_avg), OFF_3,
-                 pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, pred_buf), vec_avg),
-                 OFF_3 + CFL_LINE_1, pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, pred_buf), vec_avg),
-                 OFF_3 + CFL_LINE_2, pred_buf);
-      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, pred_buf), vec_avg),
-                 OFF_3 + CFL_LINE_3, pred_buf);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, dst), vec_avg), OFF_2, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, dst), vec_avg),
+                 OFF_2 + CFL_LINE_1, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, dst), vec_avg),
+                 OFF_2 + CFL_LINE_2, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, dst), vec_avg),
+                 OFF_2 + CFL_LINE_3, dst);
+
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, dst), vec_avg), OFF_3, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, dst), vec_avg),
+                 OFF_3 + CFL_LINE_1, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, dst), vec_avg),
+                 OFF_3 + CFL_LINE_2, dst);
+      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, dst), vec_avg),
+                 OFF_3 + CFL_LINE_3, dst);
     }
-  } while ((pred_buf += CFL_BUF_LINE * 4) < end);
+  } while ((dst += CFL_BUF_LINE * 4) < dst_end);
 }
 
 // Declare wrappers for VSX sizes