1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
|
/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
*
* The LLVM Compiler Infrastructure
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*
*===----------------------------------------------------------------------===*
*
* This file is part of the X86 Disassembler.
* It contains common definitions used by both the disassembler and the table
* generator.
* Documentation for the disassembler can be found in X86Disassembler.h.
*
*===----------------------------------------------------------------------===*/
/*
* This header file provides those definitions that need to be shared between
* the decoder and the table generator in a C-friendly manner.
*/
#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H
#include "llvm/Support/DataTypes.h"
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
/*
* Attributes of an instruction that must be known before the opcode can be
* processed correctly. Most of these indicate the presence of particular
* prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
*/
#define ATTRIBUTE_BITS \
ENUM_ENTRY(ATTR_NONE, 0x00) \
ENUM_ENTRY(ATTR_64BIT, 0x01) \
ENUM_ENTRY(ATTR_XS, 0x02) \
ENUM_ENTRY(ATTR_XD, 0x04) \
ENUM_ENTRY(ATTR_REXW, 0x08) \
ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
ENUM_ENTRY(ATTR_VEX, 0x20) \
ENUM_ENTRY(ATTR_VEXL, 0x40)
#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
ATTRIBUTE_BITS
ATTR_max
};
#undef ENUM_ENTRY
/*
* Combinations of the above attributes that are relevant to instruction
* decode. Although other combinations are possible, they can be reduced to
* these without affecting the ultimately decoded instruction.
*/
/* Class name Rank Rationale for rank assignment */
#define INSTRUCTION_CONTEXTS \
ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
"64-bit mode but no more") \
ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
"operands change width") \
ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
"but not the operands") \
ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
"but not the operands") \
ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \
"operands change width") \
ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
"operands change width") \
ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
"change width; overrides IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
"secondary") \
ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
"opcode") \
ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
"IC_64BIT_REXW_XS") \
ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
"else because this changes most " \
"operands' meaning") \
ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
#define ENUM_ENTRY(n, r, d) n,
typedef enum {
INSTRUCTION_CONTEXTS
IC_max
} InstructionContext;
#undef ENUM_ENTRY
/*
* Opcode types, which determine which decode table to use, both in the Intel
* manual and also for the decoder.
*/
typedef enum {
ONEBYTE = 0,
TWOBYTE = 1,
THREEBYTE_38 = 2,
THREEBYTE_3A = 3,
THREEBYTE_A6 = 4,
THREEBYTE_A7 = 5
} OpcodeType;
/*
* The following structs are used for the hierarchical decode table. After
* determining the instruction's class (i.e., which IC_* constant applies to
* it), the decoder reads the opcode. Some instructions require specific
* values of the ModR/M byte, so the ModR/M byte indexes into the final table.
*
* If a ModR/M byte is not required, "required" is left unset, and the values
* for each instructionID are identical.
*/
typedef uint16_t InstrUID;
/*
* ModRMDecisionType - describes the type of ModR/M decision, allowing the
* consumer to determine the number of entries in it.
*
* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
* instruction is the same.
* MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
* corresponds to one instruction; otherwise, it corresponds to
* a different instruction.
* MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
* to a different instruction.
*/
#define MODRMTYPES \
ENUM_ENTRY(MODRM_ONEENTRY) \
ENUM_ENTRY(MODRM_SPLITRM) \
ENUM_ENTRY(MODRM_FULL)
#define ENUM_ENTRY(n) n,
typedef enum {
MODRMTYPES
MODRM_max
} ModRMDecisionType;
#undef ENUM_ENTRY
/*
* ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
* instruction each possible value of the ModR/M byte corresponds to. Once
* this information is known, we have narrowed down to a single instruction.
*/
struct ModRMDecision {
uint8_t modrm_type;
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_IDS
};
/*
* OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
* given a particular opcode.
*/
struct OpcodeDecision {
struct ModRMDecision modRMDecisions[256];
};
/*
* ContextDecision - Specifies which opcode->instruction tables to look at given
* a particular context (set of attributes). Since there are many possible
* contexts, the decoder first uses CONTEXTS_SYM to determine which context
* applies given a specific set of attributes. Hence there are only IC_max
* entries in this table, rather than 2^(ATTR_max).
*/
struct ContextDecision {
struct OpcodeDecision opcodeDecisions[IC_max];
};
/*
* Physical encodings of instruction operands.
*/
#define ENCODINGS \
ENUM_ENTRY(ENCODING_NONE, "") \
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
ENUM_ENTRY(ENCODING_CW, "2-byte") \
ENUM_ENTRY(ENCODING_CD, "4-byte") \
ENUM_ENTRY(ENCODING_CP, "6-byte") \
ENUM_ENTRY(ENCODING_CO, "8-byte") \
ENUM_ENTRY(ENCODING_CT, "10-byte") \
ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
ENUM_ENTRY(ENCODING_IW, "2-byte") \
ENUM_ENTRY(ENCODING_ID, "4-byte") \
ENUM_ENTRY(ENCODING_IO, "8-byte") \
ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
"the opcode byte") \
ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
"opcode byte") \
\
ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
"opcode byte") \
ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
"in type")
#define ENUM_ENTRY(n, d) n,
typedef enum {
ENCODINGS
ENCODING_max
} OperandEncoding;
#undef ENUM_ENTRY
/*
* Semantic interpretations of instruction operands.
*/
#define TYPES \
ENUM_ENTRY(TYPE_NONE, "") \
ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
ENUM_ENTRY(TYPE_REL16, "2-byte") \
ENUM_ENTRY(TYPE_REL32, "4-byte") \
ENUM_ENTRY(TYPE_REL64, "8-byte") \
ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
ENUM_ENTRY(TYPE_R16, "2-byte") \
ENUM_ENTRY(TYPE_R32, "4-byte") \
ENUM_ENTRY(TYPE_R64, "8-byte") \
ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
ENUM_ENTRY(TYPE_IMM16, "2-byte") \
ENUM_ENTRY(TYPE_IMM32, "4-byte") \
ENUM_ENTRY(TYPE_IMM64, "8-byte") \
ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
ENUM_ENTRY(TYPE_RM16, "2-byte") \
ENUM_ENTRY(TYPE_RM32, "4-byte") \
ENUM_ENTRY(TYPE_RM64, "8-byte") \
ENUM_ENTRY(TYPE_M, "Memory operand") \
ENUM_ENTRY(TYPE_M8, "1-byte") \
ENUM_ENTRY(TYPE_M16, "2-byte") \
ENUM_ENTRY(TYPE_M32, "4-byte") \
ENUM_ENTRY(TYPE_M64, "8-byte") \
ENUM_ENTRY(TYPE_LEA, "Effective address") \
ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
"base)") \
ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
"2 = SS, 3 = DS, 4 = FS, 5 = GS") \
ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
ENUM_ENTRY(TYPE_M64FP, "64-bit") \
ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
"floating-point instructions") \
ENUM_ENTRY(TYPE_M32INT, "4-byte") \
ENUM_ENTRY(TYPE_M64INT, "8-byte") \
ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
ENUM_ENTRY(TYPE_MM, "MMX register operand") \
ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
ENUM_ENTRY(TYPE_MM64, "8-byte") \
ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
ENUM_ENTRY(TYPE_XMM64, "8-byte") \
ENUM_ENTRY(TYPE_XMM128, "16-byte") \
ENUM_ENTRY(TYPE_XMM256, "32-byte") \
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
\
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
ENUM_ENTRY(TYPE_DUP1, "operand 1") \
ENUM_ENTRY(TYPE_DUP2, "operand 2") \
ENUM_ENTRY(TYPE_DUP3, "operand 3") \
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
#define ENUM_ENTRY(n, d) n,
typedef enum {
TYPES
TYPE_max
} OperandType;
#undef ENUM_ENTRY
/*
* OperandSpecifier - The specification for how to extract and interpret one
* operand.
*/
struct OperandSpecifier {
OperandEncoding encoding;
OperandType type;
};
/*
* Indicates where the opcode modifier (if any) is to be found. Extended
* opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
*/
#define MODIFIER_TYPES \
ENUM_ENTRY(MODIFIER_NONE) \
ENUM_ENTRY(MODIFIER_OPCODE) \
ENUM_ENTRY(MODIFIER_MODRM)
#define ENUM_ENTRY(n) n,
typedef enum {
MODIFIER_TYPES
MODIFIER_max
} ModifierType;
#undef ENUM_ENTRY
#define X86_MAX_OPERANDS 5
/*
* The specification for how to extract and interpret a full instruction and
* its operands.
*/
struct InstructionSpecifier {
ModifierType modifierType;
uint8_t modifierBase;
struct OperandSpecifier operands[X86_MAX_OPERANDS];
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_SPECIFIER_FIELDS
};
/*
* Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
* are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
* respectively.
*/
typedef enum {
MODE_16BIT,
MODE_32BIT,
MODE_64BIT
} DisassemblerMode;
#endif
|