| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 
 | //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of the classes providing information
// about existing X86 FMA3 opcodes, classifying and grouping them.
//
//===----------------------------------------------------------------------===//
#include "X86InstrFMA3Info.h"
#include "X86InstrInfo.h"
#include "llvm/Support/Threading.h"
#include <atomic>
#include <cassert>
#include <cstdint>
using namespace llvm;
#define FMA3GROUP(Name, Suf, Attrs) \
  { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
#define FMA3GROUP_MASKED(Name, Suf, Attrs) \
  FMA3GROUP(Name, Suf, Attrs) \
  FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
  FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
#define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
#define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
  FMA3GROUP(Name, Suf##Ym, Attrs) \
  FMA3GROUP(Name, Suf##Yr, Attrs) \
  FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
  FMA3GROUP(Name, Suf##m, Attrs) \
  FMA3GROUP(Name, Suf##r, Attrs)
#define FMA3GROUP_PACKED(Name, Attrs) \
  FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
  FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
  FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
  FMA3GROUP(Name, Suf##Zm, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
  FMA3GROUP(Name, Suf##Zr, Attrs) \
  FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
#define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
  FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
  FMA3GROUP(Name, Suf##m, Attrs) \
  FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
  FMA3GROUP(Name, Suf##r, Attrs) \
  FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
#define FMA3GROUP_SCALAR(Name, Attrs) \
  FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
  FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
  FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
#define FMA3GROUP_FULL(Name, Attrs) \
  FMA3GROUP_PACKED(Name, Attrs) \
  FMA3GROUP_SCALAR(Name, Attrs)
static const X86InstrFMA3Group Groups[] = {
  FMA3GROUP_FULL(VFMADD, 0)
  FMA3GROUP_PACKED(VFMADDSUB, 0)
  FMA3GROUP_FULL(VFMSUB, 0)
  FMA3GROUP_PACKED(VFMSUBADD, 0)
  FMA3GROUP_FULL(VFNMADD, 0)
  FMA3GROUP_FULL(VFNMSUB, 0)
};
#define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
  FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
  FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
  FMA3GROUP(Name, SDZ##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
  FMA3GROUP(Name, SHZ##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
  FMA3GROUP(Name, SSZ##Suf, Attrs) \
  FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
static const X86InstrFMA3Group BroadcastGroups[] = {
  FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
  FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
  FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
  FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
  FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
  FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
};
static const X86InstrFMA3Group RoundGroups[] = {
  FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
  FMA3GROUP_SCALAR_AVX512_ROUND(VFMADD, rb, X86InstrFMA3Group::Intrinsic)
  FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
  FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
  FMA3GROUP_SCALAR_AVX512_ROUND(VFMSUB, rb, X86InstrFMA3Group::Intrinsic)
  FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
  FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
  FMA3GROUP_SCALAR_AVX512_ROUND(VFNMADD, rb, X86InstrFMA3Group::Intrinsic)
  FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
  FMA3GROUP_SCALAR_AVX512_ROUND(VFNMSUB, rb, X86InstrFMA3Group::Intrinsic)
};
static void verifyTables() {
#ifndef NDEBUG
  static std::atomic<bool> TableChecked(false);
  if (!TableChecked.load(std::memory_order_relaxed)) {
    assert(llvm::is_sorted(Groups) && llvm::is_sorted(RoundGroups) &&
           llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
    TableChecked.store(true, std::memory_order_relaxed);
  }
#endif
}
/// Returns a reference to a group of FMA3 opcodes to where the given
/// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
/// and not included into any FMA3 group, then nullptr is returned.
const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
  // FMA3 instructions have a well defined encoding pattern we can exploit.
  uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
  bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
                       (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
                       (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
  bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
                         (TSFlags & X86II::OpMapMask) == X86II::T8) ||
                        ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
                         ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
                          (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
  bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
  if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
    return nullptr;
  verifyTables();
  ArrayRef<X86InstrFMA3Group> Table;
  if (TSFlags & X86II::EVEX_RC)
    Table = ArrayRef(RoundGroups);
  else if (TSFlags & X86II::EVEX_B)
    Table = ArrayRef(BroadcastGroups);
  else
    Table = ArrayRef(Groups);
  // FMA 132 instructions have an opcode of 0x96-0x9F
  // FMA 213 instructions have an opcode of 0xA6-0xAF
  // FMA 231 instructions have an opcode of 0xB6-0xBF
  unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
  auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
    return Group.Opcodes[FormIndex] < Opcode;
  });
  assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
         "Couldn't find FMA3 opcode!");
  return I;
}
 |