1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
|
/* A representation of vector permutation indices.
Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "vec-perm-indices.h"
#include "tree.h"
#include "fold-const.h"
#include "tree-vector-builder.h"
#include "backend.h"
#include "rtl.h"
#include "memmodel.h"
#include "emit-rtl.h"
#include "selftest.h"
#include "rtx-vector-builder.h"
/* Switch to a new permutation vector that selects between NINPUTS vector
inputs that have NELTS_PER_INPUT elements each. Take the elements of the
new permutation vector from ELEMENTS, clamping each one to be in range. */
void
vec_perm_indices::new_vector (const vec_perm_builder &elements,
unsigned int ninputs,
poly_uint64 nelts_per_input)
{
m_ninputs = ninputs;
m_nelts_per_input = nelts_per_input;
/* If the vector has a constant number of elements, expand the
encoding and clamp each element. E.g. { 0, 2, 4, ... } might
wrap halfway if there is only one vector input, and we want
the wrapped form to be the canonical one.
If the vector has a variable number of elements, just copy
the encoding. In that case the unwrapped form is canonical
and there is no way of representing the wrapped form. */
poly_uint64 full_nelts = elements.full_nelts ();
unsigned HOST_WIDE_INT copy_nelts;
if (full_nelts.is_constant (©_nelts))
m_encoding.new_vector (full_nelts, copy_nelts, 1);
else
{
copy_nelts = elements.encoded_nelts ();
m_encoding.new_vector (full_nelts, elements.npatterns (),
elements.nelts_per_pattern ());
}
unsigned int npatterns = m_encoding.npatterns ();
for (unsigned int i = 0; i < npatterns; ++i)
m_encoding.quick_push (clamp (elements.elt (i)));
/* Use the fact that:
(a + b) % c == ((a % c) + (b % c)) % c
to simplify the clamping of variable-length vectors. */
for (unsigned int i = npatterns; i < copy_nelts; ++i)
{
element_type step = clamp (elements.elt (i)
- elements.elt (i - npatterns));
m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
}
m_encoding.finalize ();
}
/* Switch to a new permutation vector that selects the same input elements
as ORIG, but with each element split into FACTOR pieces. For example,
if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
{ 2, 3, 4, 5, 0, 1, 6, 7 }. */
void
vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
unsigned int factor)
{
m_ninputs = orig.m_ninputs;
m_nelts_per_input = orig.m_nelts_per_input * factor;
m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
orig.m_encoding.npatterns () * factor,
orig.m_encoding.nelts_per_pattern ());
unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
for (unsigned int i = 0; i < encoded_nelts; ++i)
{
element_type base = orig.m_encoding[i] * factor;
for (unsigned int j = 0; j < factor; ++j)
m_encoding.quick_push (base + j);
}
m_encoding.finalize ();
}
/* Rotate the inputs of the permutation right by DELTA inputs. This changes
the values of the permutation vector but it doesn't change the way that
the elements are encoded. */
void
vec_perm_indices::rotate_inputs (int delta)
{
element_type element_delta = delta * m_nelts_per_input;
for (unsigned int i = 0; i < m_encoding.length (); ++i)
m_encoding[i] = clamp (m_encoding[i] + element_delta);
}
/* Return true if index OUT_BASE + I * OUT_STEP selects input
element IN_BASE + I * IN_STEP. For example, the call to test
whether a permute reverses a vector of N elements would be:
series_p (0, 1, N - 1, -1)
which would return true for { N - 1, N - 2, N - 3, ... }.
The calls to test for an interleaving of elements starting
at N1 and N2 would be:
series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
bool
vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
element_type in_base, element_type in_step) const
{
/* Check the base value. */
if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
return false;
element_type full_nelts = m_encoding.full_nelts ();
unsigned int npatterns = m_encoding.npatterns ();
/* Calculate which multiple of OUT_STEP elements we need to get
back to the same pattern. */
unsigned int cycle_length = least_common_multiple (out_step, npatterns);
/* Check the steps. */
in_step = clamp (in_step);
out_base += out_step;
unsigned int limit = 0;
for (;;)
{
/* Succeed if we've checked all the elements in the vector. */
if (known_ge (out_base, full_nelts))
return true;
if (out_base >= npatterns)
{
/* We've got to the end of the "foreground" values. Check
2 elements from each pattern in the "background" values. */
if (limit == 0)
limit = out_base + cycle_length * 2;
else if (out_base >= limit)
return true;
}
element_type v0 = m_encoding.elt (out_base - out_step);
element_type v1 = m_encoding.elt (out_base);
if (maybe_ne (clamp (v1 - v0), in_step))
return false;
out_base += out_step;
}
return true;
}
/* Return true if all elements of the permutation vector are in the range
[START, START + SIZE). */
bool
vec_perm_indices::all_in_range_p (element_type start, element_type size) const
{
/* Check the first two elements of each pattern. */
unsigned int npatterns = m_encoding.npatterns ();
unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
for (unsigned int i = 0; i < base_nelts; ++i)
if (!known_in_range_p (m_encoding[i], start, size))
return false;
/* For stepped encodings, check the full range of the series. */
if (nelts_per_pattern == 3)
{
element_type limit = input_nelts ();
/* The number of elements in each pattern beyond the first two
that we checked above. */
poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
npatterns) - 2;
for (unsigned int i = 0; i < npatterns; ++i)
{
/* BASE1 has been checked but BASE2 hasn't. */
element_type base1 = m_encoding[i + npatterns];
element_type base2 = m_encoding[i + base_nelts];
/* The step to add to get from BASE1 to each subsequent value. */
element_type step = clamp (base2 - base1);
/* STEP has no inherent sign, so a value near LIMIT can
act as a negative step. The series is in range if it
is in range according to one of the two interpretations.
Since we're dealing with clamped values, ELEMENT_TYPE is
wide enough for overflow not to be a problem. */
element_type headroom_down = base1 - start;
element_type headroom_up = size - headroom_down - 1;
HOST_WIDE_INT diff;
if ((!step.is_constant (&diff)
|| maybe_lt (headroom_up, diff * step_nelts))
&& (!(limit - step).is_constant (&diff)
|| maybe_lt (headroom_down, diff * step_nelts)))
return false;
}
}
return true;
}
/* Try to read the contents of VECTOR_CST CST as a constant permutation
vector. Return true and add the elements to BUILDER on success,
otherwise return false without modifying BUILDER. */
bool
tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
{
unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
for (unsigned int i = 0; i < encoded_nelts; ++i)
if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
return false;
builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
VECTOR_CST_NPATTERNS (cst),
VECTOR_CST_NELTS_PER_PATTERN (cst));
for (unsigned int i = 0; i < encoded_nelts; ++i)
builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
return true;
}
/* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
tree
vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
{
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
tree_vector_builder sel (type, indices.encoding ().npatterns (),
indices.encoding ().nelts_per_pattern ());
unsigned int encoded_nelts = sel.encoded_nelts ();
for (unsigned int i = 0; i < encoded_nelts; i++)
sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
return sel.build ();
}
/* Return a CONST_VECTOR of mode MODE that contains the elements of
INDICES. */
rtx
vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& known_eq (GET_MODE_NUNITS (mode), indices.length ()));
rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
indices.encoding ().nelts_per_pattern ());
unsigned int encoded_nelts = sel.encoded_nelts ();
for (unsigned int i = 0; i < encoded_nelts; i++)
sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
return sel.build ();
}
#if CHECKING_P
namespace selftest {
/* Test a 12-element vector. */
static void
test_vec_perm_12 (void)
{
vec_perm_builder builder (12, 12, 1);
for (unsigned int i = 0; i < 4; ++i)
{
builder.quick_push (i * 5);
builder.quick_push (3 + i);
builder.quick_push (2 + 3 * i);
}
vec_perm_indices indices (builder, 1, 12);
ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
}
/* Run selftests for this file. */
void
vec_perm_indices_c_tests ()
{
test_vec_perm_12 ();
}
} // namespace selftest
#endif
|