File: LoongArchMatInt.cpp

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.8-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,388 kB
  • sloc: cpp: 7,438,767; ansic: 1,393,871; asm: 1,012,926; python: 241,728; f90: 86,635; objc: 75,411; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (125 lines) | stat: -rw-r--r-- 4,459 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
//===- LoongArchMatInt.cpp - Immediate materialisation ---------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "LoongArchMatInt.h"
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/Support/MathExtras.h"

using namespace llvm;

LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
  // Val:
  // |            hi32              |              lo32            |
  // +-----------+------------------+------------------+-----------+
  // | Highest12 |    Higher20      |       Hi20       |    Lo12   |
  // +-----------+------------------+------------------+-----------+
  // 63        52 51              32 31              12 11         0
  //
  const int64_t Highest12 = Val >> 52 & 0xFFF;
  const int64_t Higher20 = Val >> 32 & 0xFFFFF;
  const int64_t Hi20 = Val >> 12 & 0xFFFFF;
  const int64_t Lo12 = Val & 0xFFF;
  InstSeq Insts;

  // LU52I_D used for: Bits[63:52] | Bits[51:0].
  if (Highest12 != 0 && SignExtend64<52>(Val) == 0) {
    Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
    return Insts;
  }

  // lo32
  if (Hi20 == 0)
    Insts.push_back(Inst(LoongArch::ORI, Lo12));
  else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20))
    Insts.push_back(Inst(LoongArch::ADDI_W, SignExtend64<12>(Lo12)));
  else {
    Insts.push_back(Inst(LoongArch::LU12I_W, SignExtend64<20>(Hi20)));
    if (Lo12 != 0)
      Insts.push_back(Inst(LoongArch::ORI, Lo12));
  }

  // hi32
  // Higher20
  if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20))
    Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20)));

  // Highest12
  if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12))
    Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));

  size_t N = Insts.size();
  if (N < 3)
    return Insts;

  // When the number of instruction sequences is greater than 2, we have the
  // opportunity to optimize using the BSTRINS_D instruction. The scenario is as
  // follows:
  //
  // N of Insts = 3
  // 1. ORI + LU32I_D + LU52I_D     =>     ORI + BSTRINS_D, TmpVal = ORI
  // 2. ADDI_W + LU32I_D + LU52I_D  =>  ADDI_W + BSTRINS_D, TmpVal = ADDI_W
  // 3. LU12I_W + ORI + LU32I_D     =>     ORI + BSTRINS_D, TmpVal = ORI
  // 4. LU12I_W + LU32I_D + LU52I_D => LU12I_W + BSTRINS_D, TmpVal = LU12I_W
  //
  // N of Insts = 4
  // 5. LU12I_W + ORI + LU32I_D + LU52I_D => LU12I_W + ORI + BSTRINS_D
  //                                      => ORI + LU52I_D + BSTRINS_D
  //    TmpVal = (LU12I_W | ORI) or (ORI | LU52I_D)
  // The BSTRINS_D instruction will use the `TmpVal` to construct the `Val`.
  uint64_t TmpVal1 = 0;
  uint64_t TmpVal2 = 0;
  switch (Insts[0].Opc) {
  default:
    llvm_unreachable("unexpected opcode");
    break;
  case LoongArch::LU12I_W:
    if (Insts[1].Opc == LoongArch::ORI) {
      TmpVal1 = Insts[1].Imm;
      if (N == 3)
        break;
      TmpVal2 = static_cast<uint64_t>(Insts[3].Imm) << 52 | TmpVal1;
    }
    TmpVal1 |= static_cast<uint64_t>(Insts[0].Imm) << 12;
    break;
  case LoongArch::ORI:
  case LoongArch::ADDI_W:
    TmpVal1 = Insts[0].Imm;
    break;
  }

  uint64_t Msb = 32;
  uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1);
  for (; Msb < 64; ++Msb, HighMask = HighMask << 1) {
    for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) {
      uint64_t LowMask = (1ULL << Lsb) - 1;
      uint64_t Mask = HighMask | LowMask;
      uint64_t LsbToZero = TmpVal1 & ((1ULL << (Msb - Lsb + 1)) - 1);
      uint64_t MsbToLsb = LsbToZero << Lsb;
      if ((MsbToLsb | (TmpVal1 & Mask)) == (uint64_t)Val) {
        if (Insts[1].Opc == LoongArch::ORI && N == 3)
          Insts[0] = Insts[1];
        Insts.pop_back_n(2);
        Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
        return Insts;
      }
      if (TmpVal2 != 0) {
        LsbToZero = TmpVal2 & ((1ULL << (Msb - Lsb + 1)) - 1);
        MsbToLsb = LsbToZero << Lsb;
        if ((MsbToLsb | (TmpVal2 & Mask)) == (uint64_t)Val) {
          Insts[0] = Insts[1];
          Insts[1] = Insts[3];
          Insts.pop_back_n(2);
          Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
          return Insts;
        }
      }
    }
  }

  return Insts;
}