File: x86.mdl

package info (click to toggle)
mlton 20210117%2Bdfsg-3
links: PTS, VCS
area: main
in suites: sid
size: 58,464 kB
sloc: ansic: 27,682; sh: 4,455; asm: 3,569; lisp: 2,879; makefile: 2,347; perl: 1,169; python: 191; pascal: 68; javascript: 7
file content (1019 lines) | stat: -rw-r--r-- 34,740 bytes
(*
 * 32bit, x86 instruction set.
 *
 * Note:
 *  1. Segmentation registers and other weird stuff are not modelled.
 *  2. The instruction set that I model is 32-bit oriented.
 *     I don't try to fit that 16-bit mode stuff in.
 *  3. BCD arithmetic is missing
 *  4. Multi-precision stuff is incomplete
 *  5. No MMX (maybe we'll add this in later)
 *  6. Slegdehammer extensions from AMD (more later)
 *
 * Allen Leung (leunga@cs.nyu.edu)
 *
 *)
architecture X86 =
struct

   superscalar          (* superscalar machine *)

   little endian        (* little endian architecture *)

   lowercase assembly   (* print assembly in lower case *)

  (*------------------------------------------------------------------------
   * Note: While the x86 has only 8 integer and 8 floating point registers,
   * the SMLNJ compiler fakes it by assuming that it has 32 integer
   * and 32 floating point registers.  That's why we have 32 integer
   * and 32 floating point registers in this description.
   * Probably pseudo memory registers should understood directly by
   * the md tool.
   *
   *------------------------------------------------------------------------*)

   storage
      GP   = $r[32] of 32 bits
                asm: (fn (0,8)  => "%al" | (0,16) => "%ax" | (0,32) => "%eax"
                       | (1,8)  => "%cl" | (1,16) => "%cx" | (1,32) => "%ecx"
                       | (2,8)  => "%dl" | (2,16) => "%dx" | (2,32) => "%edx"
                       | (3,8)  => "%bl" | (3,16) => "%bx" | (3,32) => "%ebx"
                       | (4,16) => "%sp" | (4,32) => "%esp"
                       | (5,16) => "%bp" | (5,32) => "%ebp"
                       | (6,16) => "%si" | (6,32) => "%esi"
                       | (7,16) => "%di" | (7,32) => "%edi"
                       | (r,_) => "%"^Int.toString r
                      )
   |  FP   = $f[32] of 64 bits
                 asm: (fn (f,_) =>
                          if f < 8 then "%st("^Int.toString f^")"
                          else "%f"^Int.toString f (* pseudo register *)
                      )
   |  CC     = $cc[] of 32 bits aliasing GP asm: "cc"
   |  EFLAGS = $eflags[1] of 32 bits asm: "$eflags"
   |  FFLAGS = $fflags[1] of 32 bits asm: "$fflags"
   |  MEM    = $m[]    of 8 aggregable bits asm: "mem"
   |  CTRL   = $ctrl[] asm: "ctrl"

   locations
       eax       = $r[0]
   and ecx       = $r[1]
   and edx       = $r[2]
   and ebx       = $r[3]
   and esp       = $r[4]
   and ebp       = $r[5]
   and esi       = $r[6]
   and edi       = $r[7]
   and stackptrR = $r[4]
   and ST(x)     = $f[x]
   and ST0       = $f[0]
   and asmTmpR   = $r[0] (* not used *)
   and fasmTmp   = $f[0] (* not used *)
   and eflags    = $eflags[0]

  (*------------------------------------------------------------------------
   *
   * Representation for various opcodes.
   *
   *------------------------------------------------------------------------*)
   structure Instruction =
   struct
      (* An effective address can be any combination of
       *  base + index*scale + disp
       *   or
       *   B + I*SCALE + DISP
       *
       * where any component is optional. The operand datatype captures
       * all these combinations.
       *
       *  DISP 	         == Immed | ImmedLabel | Const
       *  B		 == Displace{base=B, disp=0}
       *  B+DISP	 == Displace{base=B, disp=DISP}
       *  I*SCALE+DISP   == Indexed{base=NONE,index=I,scale=SCALE,disp=D}
       *  B+I*SCALE+DISP == Indexed{base=SOME B,index=I,scale=SCALE,disp=DISP}
       * Note1: The index register cannot be EBP.
       *        The disp field must be one of Immed, ImmedLabel,  or Const.
       *)

      (* Note: Relative is only generated after sdi resolution *)
      datatype operand =
         Immed      of Int32.int	   rtl: int
       | ImmedLabel of T.labexp            rtl: labexp
       | Relative   of int		   (* no semantics given *)
       | LabelEA    of T.labexp            rtl: labexp (* XXX *)
       | Direct     of $GP		   rtl: $r[GP]
          (* pseudo memory register for floating point *)
       | FDirect    of $FP                 rtl: $f[FP]
          (* virtual floating point register *)
       | FPR        of $FP                 rtl: $f[FP]
       | ST         of $FP		   rtl: $f[FP]
          (* pseudo memory register *)
       | MemReg     of $GP                 rtl: $r[GP]
       | Displace   of {base: $GP, disp:operand, mem:Region.region}
              rtl: $m[$r[base] + disp : mem]
       | Indexed    of {base: $GP option, index: $GP, scale:int,
                        disp:operand, mem:Region.region}
              rtl: $m[$r[base] + $r[index] << scale + disp : mem]

      type addressing_mode = operand

      type ea = operand

      datatype cond! =
          EQ "e" 0w4 | NE 0w5 | LT "l" 0w12 | LE 0w14 | GT "g" 0w15 | GE 0w13
        | B  0w2 | BE (* below *) 0w6 | A 0w7 | AE (* above *) 0w3
        | C  0w2 | NC (* if carry *) 0w3 | P 0wxa | NP (* if parity *) 0wxb
        | O  0w0 | NO (* overflow *) 0w1

          (* LOCK can only be used in front of
           *                      (Intel ordering, not gasm ordering)
           * ADC, ADD, AND, BT   mem, reg/imm
           * BTS, BTR, BTC, OR   mem, reg/imm
           * SBB, SUB, XOR       mem, reg/imm
           * XCHG                reg, mem
           * XCHG                mem, reg
           * DEC, INC, NEG, NOT  mem
           *)

      datatype binaryOp! =
         ADDL | SUBL | ANDL | ORL | XORL | SHLL | SARL | SHRL | IMULL
       | ADCL | SBBL
       | ADDW | SUBW | ANDW | ORW | XORW | SHLW | SARW | SHRW | IMULW
       | ADDB | SUBB | ANDB | ORB | XORB | SHLB | SARB | SHRB | IMULB
       | BTSW | BTCW | BTRW | BTSL | BTCL | BTRL
       | ROLW | RORW | ROLL | RORL
       | XCHGB | XCHGW | XCHGL

         (* Moby need these but I'm not going to handle them in the optimzer
          * until Moby starts generating these things
          *)
       | LOCK_ADCW "lock\n\tadcw"
       | LOCK_ADCL "lock\n\tadcl"
       | LOCK_ADDW "lock\n\taddw"
       | LOCK_ADDL "lock\n\taddl"
       | LOCK_ANDW "lock\n\tandw"
       | LOCK_ANDL "lock\n\tandl"
       | LOCK_BTSW "lock\n\tbtsw"
       | LOCK_BTSL "lock\n\tbtsl"
       | LOCK_BTRW "lock\n\tbtrw"
       | LOCK_BTRL "lock\n\tbtrl"
       | LOCK_BTCW "lock\n\tbtcw"
       | LOCK_BTCL "lock\n\tbtcl"
       | LOCK_ORW  "lock\n\torw"
       | LOCK_ORL  "lock\n\torl"
       | LOCK_SBBW "lock\n\tsbbw"
       | LOCK_SBBL "lock\n\tsbbl"
       | LOCK_SUBW "lock\n\tsubw"
       | LOCK_SUBL "lock\n\tsubl"
       | LOCK_XORW "lock\n\txorw"
       | LOCK_XORL "lock\n\txorl"
       | LOCK_XADDB "lock\n\txaddb"
       | LOCK_XADDW "lock\n\txaddw"
       | LOCK_XADDL "lock\n\txaddl"

      (* One operand opcodes *)
      datatype multDivOp! =
               IMULL1 "imull" | MULL1 "mull" | IDIVL1 "idivl" | DIVL1 "divl"

      datatype unaryOp! = DECL | INCL | NEGL | NOTL
                        | DECW | INCW | NEGW | NOTW
                        | DECB | INCB | NEGB | NOTB
                        | LOCK_DECL "lock\n\tdecl"
                        | LOCK_INCL "lock\n\tincl"
                        | LOCK_NEGL "lock\n\tnegl"
                        | LOCK_NOTL "lock\n\tnotl"

      datatype shiftOp! = SHLDL | SHRDL

      datatype bitOp! = BTW
                      | BTL
                      | LOCK_BTW "lock\n\tbtw"
                      | LOCK_BTL "lock\n\tbtl"

      datatype move! = MOVL
                     | MOVB
                     | MOVW
                     | MOVSWL  (* sx(word) -> long *)
                     | MOVZWL  (* zx(word) -> long *)
                     | MOVSBL  (* sx(byte) -> long *)
                     | MOVZBL  (* zx(byte) -> long *)

     (* The Intel manual is incorrect on the description of FDIV and FDIVR *)
      datatype fbinOp! =
          FADDP   | FADDS
        | FMULP   | FMULS
                  | FCOMS
                  | FCOMPS
        | FSUBP   | FSUBS  	(* ST(1) := ST-ST(1); [pop] *)
        | FSUBRP  | FSUBRS 	(* ST(1) := ST(1)-ST; [pop] *)
        | FDIVP   | FDIVS   (* ST(1) := ST/ST(1); [pop] *)
        | FDIVRP  | FDIVRS 	(* ST(1) := ST(1)/ST; [pop] *)
                  | FADDL
                  | FMULL
                  | FCOML
                  | FCOMPL
                  | FSUBL 	(* ST(1) := ST-ST(1); [pop] *)
                  | FSUBRL 	(* ST(1) := ST(1)-ST; [pop] *)
                  | FDIVL   (* ST(1) := ST/ST(1); [pop] *)
                  | FDIVRL 	(* ST(1) := ST(1)/ST; [pop] *)

      datatype fibinOp! =
          FIADDS (0wxde,0) | FIMULS  (0wxde,1)
        | FICOMS (0wxde,2) | FICOMPS (0wxde,3)
        | FISUBS (0wxde,4) | FISUBRS (0wxde,5)
        | FIDIVS (0wxde,6) | FIDIVRS (0wxde,7)
        | FIADDL (0wxda,0) | FIMULL  (0wxda,1)
        | FICOML (0wxda,2) | FICOMPL (0wxda,3)
        | FISUBL (0wxda,4) | FISUBRL (0wxda,5)
        | FIDIVL (0wxda,6) | FIDIVRL (0wxda,7)

      datatype funOp! =
         (* the first byte is always d9; the second byte is listed *)
         FCHS     0wxe0
       | FABS     0wxe1
       | FTST     0wxe4
       | FXAM     0wxe5
       | FPTAN    0wxf2
       | FPATAN   0wxf3
       | FXTRACT  0wxf4
       | FPREM1   0wxf5
       | FDECSTP  0wxf6
       | FINCSTP  0wxf7
       | FPREM    0wxf8
       | FYL2XP1  0wxf9
       | FSQRT    0wxfa
       | FSINCOS  0wxfb
       | FRNDINT  0wxfc
       | FSCALE   0wxfd
       | FSIN     0wxfe
       | FCOS     0wxff

      datatype fenvOp! = FLDENV | FNLDENV | FSTENV | FNSTENV

      (* Intel floating point precision *)
      datatype fsize = FP32 "s" | FP64 "l" | FP80 "t"

      (* Intel integer precision *)
      datatype isize = I8 "8" | I16 "16" | I32 "32" | I64 "64"

  end (* Instruction *)

  (*------------------------------------------------------------------------
   *
   * Here, I'm going to define the semantics of the instructions
   *
   *------------------------------------------------------------------------*)
  structure RTL =
  struct

     (* Get the basis *)
     include "Tools/basis.mdl"
     open Basis
     infix 1 ||  (* parallel effects *)
     infix 2 :=  (* assignment *)

     (* Some type abbreviations *)
     fun byte x = (x : #8 bits)
     fun word x = (x : #16 bits)
     fun long x = (x : #32 bits)
     fun float x = (x : #32 bits)
     fun double x = (x : #64 bits)
     fun real80 x = (x : #80 bits)

     (* Intel register abbreviations *)
     val eax = $r[0] and ecx = $r[1] and edx = $r[2] and ebx = $r[3]
     and esp = $r[4] and ebp = $r[5] and esi = $r[6] and edi = $r[7]

     (* Condition codes bits in eflag.
      * Let's give symbolic name for each bit as per the Intel doc.
      *)
     rtl setFlag : #n bits -> #n bits
     fun flag b = andb($eflags[0] >> b, 1)
     val CF = flag 0 and PF = flag 2
     and ZF = flag 6 and SF = flag 7 and OF = flag 11

     (* Now gets use the bits to express the conditions.  Again from Intel. *)
     (* conditions *)                   (* aliases *)
     val B   = CF == 1                  val C = B and NAE = B
     val BE  = CF == 1 orelse ZF == 1   val NA = BE
     val E   = ZF == 1                  val Z = E
     val L   = SF <> OF                 val NGE = L
     val LE  = SF <> OF orelse ZF == 1  val NG = LE
     val NB  = CF == 0                  val AE = NB and NC = NB
     val NBE = CF == 0 andalso ZF == 0  val A  = NBE
     val NE  = ZF == 0                  val NZ = NE
     val NL  = SF == OF                 val GE = NL
     val NLE = ZF == 0 andalso SF == OF val G = NLE
     val NO  = OF == 0
     val NP  = PF == 0                  val PO = NP
     val NS  = SF == 0
     val O   = OF == 1
     val P   = PF == 1                  val PE = P
     val S   = SF == 1

     rtl NOP{} = () (* duh! *)
     rtl LEA{addr, r32} = $r[r32] := addr (* this is completely wrong! XXX *)

         (* moves with type conversion *)
     rtl MOVL{src,dst} = dst := long src
     rtl MOVW{src,dst} = dst := word src
     rtl MOVB{src,dst} = dst := byte src
     rtl MOVSWL{src,dst} = dst := long(sx(word src))
     rtl MOVZWL{src,dst} = dst := long(zx(word src))
     rtl MOVSBL{src,dst} = dst := long(sx(byte src))
     rtl MOVZBL{src,dst} = dst := long(zx(byte src))

     (* semantics of integer arithmetic;
      * all instructions sets the condition code
      *)
     fun binop typ oper {dst,src} = dst := typ(oper(dst,src))
     fun arith typ oper {dst,src} = dst := typ(oper(dst,src))
                                 || $eflags[0] := ??? (* XXX *)
     fun unary typ oper {opnd} = opnd := typ(oper opnd)

     fun inc x = x + 1
     fun dec x = x - 1

     (* I'm too lazy to specify the semantics of these for now *)
     rtl adc sbb bts btc btr rol ror xchg xadd cmpxchg
          : #n bits * #n bits -> #n bits

     rtl [ADD,SUB,AND,OR,XOR]^^B = map (arith byte) [(+),(-),andb,orb,xorb]
     rtl [ADD,SUB,AND,OR,XOR]^^W = map (arith word) [(+),(-),andb,orb,xorb]
     rtl [ADD,SUB,AND,OR,XOR]^^L = map (arith long) [(+),(-),andb,orb,xorb]
     rtl [SHR,SHL,SAR]^^B = map (binop byte) [(>>),(<<),(~>>)]
     rtl [SHR,SHL,SAR]^^W = map (binop word) [(>>),(<<),(~>>)]
     rtl [SHR,SHL,SAR]^^L = map (binop long) [(>>),(<<),(~>>)]
     rtl [NEG,NOT,INC,DEC]^^B = map (unary byte) [(~),notb,inc,dec]
     rtl [NEG,NOT,INC,DEC]^^W = map (unary word) [(~),notb,inc,dec]
     rtl [NEG,NOT,INC,DEC]^^L = map (unary long) [(~),notb,inc,dec]


     rtl [ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^B =
          map (arith byte) [adc,sbb,bts,btc,btr,rol,ror,xchg]
     rtl [ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^W =
          map (arith word) [adc,sbb,bts,btc,btr,rol,ror,xchg]
     rtl [ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^L =
          map (arith long) [adc,sbb,bts,btc,btr,rol,ror,xchg]

     fun lockarith typ oper {src,dst}=
             dst := typ(oper(dst,src))
          || Kill $eflags[0] (* XXX *)
     fun lockunary typ oper {opnd} =
             opnd := typ(oper(opnd))
          || Kill $eflags[0] (* XXX *)

     rtl LOCK_^^[ADD,SUB,AND,OR,XOR,XADD]^^B =
          map (lockarith byte) [(+),(-),andb,orb,xorb,xadd]
     rtl LOCK_^^[ADD,SUB,AND,OR,XOR,XADD]^^W =
          map (lockarith word) [(+),(-),andb,orb,xorb,xadd]
     rtl LOCK_^^[ADD,SUB,AND,OR,XOR,XADD]^^L =
          map (lockarith long) [(+),(-),andb,orb,xorb,xadd]
     rtl LOCK_^^[ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^B =
          map (lockarith byte) [adc,sbb,bts,btc,btr,rol,ror,xchg]
     rtl LOCK_^^[ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^W =
          map (lockarith word) [adc,sbb,bts,btc,btr,rol,ror,xchg]
     rtl LOCK_^^[ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^L =
          map (lockarith long) [adc,sbb,bts,btc,btr,rol,ror,xchg]
     rtl LOCK_^^[DEC,INC,NEG,NOT]^^L =
          map (lockunary long) [dec,inc,(~),notb]
     rtl LOCK_^^[CMPXCHG]^^B = map (lockarith byte) [cmpxchg]
     rtl LOCK_^^[CMPXCHG]^^W = map (lockarith word) [cmpxchg]
     rtl LOCK_^^[CMPXCHG]^^L = map (lockarith long) [cmpxchg]

     (* Multiplication/division *)
     rtl upperMultiply : #n bits * #n bits -> #n bits
     rtl MULL1{src}  = eax        := muls(eax, src) ||
                       edx        := upperMultiply(eax, src) ||
                       $eflags[0] := ???
     rtl IDIVL1{src} = eax := divs(eax, src) ||
                       edx := rems(eax, src) ||
                       $eflags[0] := ???
     rtl DIVL1{src}  = edx := divu(eax, src) ||
                       edx := remu(eax, src) ||
                       $eflags[0] := ???

     (* test[b,w,l] *)
     rtl TESTB {lsrc,rsrc} = $eflags[0] := setFlag(andb(byte lsrc, rsrc))
     rtl TESTW {lsrc,rsrc} = $eflags[0] := setFlag(andb(word lsrc, rsrc))
     rtl TESTL {lsrc,rsrc} = $eflags[0] := setFlag(andb(long lsrc, rsrc))

     (* setcc *)
     fun set cc {opnd} = opnd := byte(cond(cc, 0xff, 0x0))
     rtl SET^^   [EQ,NE,LT,LE,GT,GE,B,BE,A,AE,C,NC,P,NP,O,NO] =
         map set [E ,NE,L, LE,G ,GE,B,BE,A,AE,C,NC,P,NP,O,NO]

     (* conditional move *)
     fun cmov cc {src,dst} = if cc then $r[dst] := long src else ()
     rtl CMOV^^   [EQ,NE,LT,LE,GT,GE,B,BE,A,AE,C,NC,P,NP,O,NO] =
         map cmov [E ,NE,L, LE,G ,GE,B,BE,A,AE,C,NC,P,NP,O,NO]

     (* push and pops *)
     rtl PUSHL {operand} = $m[esp - 4] := long(operand) || esp := esp - 4
     rtl PUSHW {operand} = $m[esp - 2] := word(operand) || esp := esp - 2
     rtl PUSHB {operand} = $m[esp - 1] := byte(operand) || esp := esp - 1
     rtl POP  {operand} = operand := long($m[esp]) || esp := esp + 4

     (* semantics of branches and jumps *)
     rtl JMP{operand} = Jmp(long operand)
     fun jcc cc {opnd} = if cc then Jmp(long opnd) else ()
     rtl J^^     [EQ,NE,LT,LE,GT,GE,B,BE,A,AE,C,NC,P,NP,O,NO] =
         map jcc [E ,NE,L, LE,G ,GE,B,BE,A,AE,C,NC,P,NP,O,NO]
     rtl CALL{opnd,defs,uses} =
         Call(long opnd) ||
         Kill $cellset[defs] ||
         Use $cellset[uses]

     (* semantics of floating point operators
      * The 3-address fake operators first.
      *)
     fun fbinop typ oper {lsrc, rsrc, dst} = dst := typ(oper(lsrc, rsrc))
     fun funary typ oper {src, dst} = dst := typ(oper src)
     rtl F^^[ADD,SUB,MUL,DIV]^^L = map (fbinop double) f^^[add,sub,mul,div]
     rtl F^^[ADD,SUB,MUL,DIV]^^S = map (fbinop float) f^^[add,sub,mul,div]
     rtl F^^[ADD,SUB,MUL,DIV]^^T = map (fbinop real80) f^^[add,sub,mul,div]

      (* semantics of trig/transendental functions are abstract *)
     rtl fsqrt fsin fcos ftan fasin facos fatan fln fexp : #n bits -> #n bits
     rtl F^^[CHS,ABS,SQRT,SIN,COS,TAN,ASIN,ACOS,ATAN,LN,EXP] =
         map (funary real80)
         f^^[neg,abs,sqrt,sin,cos,tan,asin,acos,atan,ln,exp]
  end (* RTL *)

  (*------------------------------------------------------------------------
   * Machine Instruction encoding on the x86
   * Because of variable width instructions.
   * We decompose each byte field into a seperate format first, then combine
   * then to form the real instructions
   *------------------------------------------------------------------------*)
  instruction formats 8 bits
    modrm{mod:2, reg:3, rm:3}
  | reg{opc:5, reg:3}
  | sib{ss:2, index:3, base:3}
  | immed8{imm:8}

  instruction formats 32 bits
    immed32{imm:32}

  (*
   * Variable format instructions
   *)
  instruction formats
    immedOpnd{opnd} =
      (case opnd of
         I.Immed i32 => i32
       | I.ImmedLabel le => lexp le
       | I.LabelEA le => lexp le
       | _ => error "immedOpnd"
      )
  | extension{opc, opnd} = (* generate an extension *)
      (case opnd of
        I.Direct r => modrm{mod=3, reg=opc, rm=r}
      | I.MemReg _ => extension{opc,opnd=memReg opnd}
      | I.FDirect _ => extension{opc,opnd=memReg opnd}
      | I.Displace{base, disp, ...} =>
        let val immed = immedOpnd{opnd=disp}
        in  () (* XXX *)
        end
      | I.Indexed{base=NONE, index, scale, disp, ...} => ()
      | I.Indexed{base=SOME b, index, scale, disp, ...} => ()
      | _ => error "immedExt"
      )

   instruction formats 16 bits
     encodeST{prefix:8, opc:5, st: $FP 3}

   instruction formats
     encodeReg{prefix:8, reg: $GP 3, opnd} =
      (emit prefix; immedExt{opc=reg, opnd=opnd})
   | arith{opc1,opc2,src,dst} =
      (case (src, dst) of
        (I.ImmedLabel le, dst) => arith{opc1,opc2,src=I.Immed(lexp le),dst}
      | (I.LabelEA le, dst) => arith{opc1,opc2,src=I.Immed(lexp le),dst}
      | (I.Immed i,dst) => ()
      | (src, I.Direct r) => encodeReg{prefix=opc1+op3,reg,opnd=src}
      | (I.Direct r,dst) => encodeReg{prefix=opc1+0w1,reg,opnd=dst}
      | _ => error "arith"
      )

  (*------------------------------------------------------------------------
   * A bunch of routines for emitting assembly on the x86.
   * This is a headache because the syntax is quite non-orthorgonal.
   * So we have to write some code to help out the md tool
   * Assembly note:
   * Note: we are using the AT&T syntax (for Linux) and not the intel syntax
   * memory operands have the form:
   *       section:disp(base, index, scale)
   * Most of the complication is actually in emiting the correct
   * operand syntax.
   *------------------------------------------------------------------------*)

  functor Assembly
     (structure MemRegs : MEMORY_REGISTERS where I = Instr
      val memRegBase : CellsBasis.cell option) =
  struct
     fun memReg r = MemRegs.memReg {reg=r, base=Option.valOf memRegBase}
     fun emitInt32 i =
     let val s = Int32.toString i
         val s = if i >= 0 then s else "-"^String.substring(s,1,size s-1)
     in  emit s end

     val {low=SToffset, ...} = C.cellRange CellsBasis.FP

     fun emitScale 0 = emit "1"
       | emitScale 1 = emit "2"
       | emitScale 2 = emit "4"
       | emitScale 3 = emit "8"
       | emitScale _ = error "emitScale"

     and eImmed(I.Immed (i)) = emitInt32 i
       | eImmed(I.ImmedLabel lexp) = emit_labexp lexp
       | eImmed _ = error "eImmed"


     and emit_operand opn =
         case opn of
         I.Immed i => (emit "$"; emitInt32 i)
       | I.ImmedLabel lexp => (emit "$"; emit_labexp lexp)
       | I.LabelEA le => emit_labexp le
       | I.Relative _ => error "emit_operand"
       | I.Direct r => emitCell r
       | I.MemReg r => emit_operand(memReg opn)
       | I.ST f => emitCell f
       | I.FPR f => (emit "%f"; emit(Int.toString(CellsBasis.registerNum f)))
       | I.FDirect f => emit_operand(memReg opn)
       | I.Displace{base,disp,mem,...} =>
           (emit_disp disp; emit "("; emitCell base; emit ")";
            emit_region mem)
       | I.Indexed{base,index,scale,disp,mem,...} =>
          (emit_disp disp; emit "(";
           case base of
             NONE => ()
           | SOME base => emitCell base;
           comma();
           emitCell index; comma();
           emitScale scale; emit ")"; emit_region mem)

      and emit_operand8(I.Direct r) = emit(CellsBasis.toStringWithSize(r,8))
        | emit_operand8 opn = emit_operand opn

      and emit_disp(I.Immed 0) = ()
        | emit_disp(I.Immed i) = emitInt32 i
        | emit_disp(I.ImmedLabel lexp) = emit_labexp lexp
        | emit_disp _ = error "emit_disp"

     (* The gas assembler does not like the "$" prefix for immediate
      * labels in certain instructions.
      *)
      fun stupidGas(I.ImmedLabel lexp) = emit_labexp lexp
        | stupidGas opnd = (emit "*"; emit_operand opnd)

     (* Display the floating point binary opcode *)
      fun isMemOpnd(I.MemReg _) = true
        | isMemOpnd(I.FDirect f) = true
        | isMemOpnd(I.LabelEA _) = true
        | isMemOpnd(I.Displace _) = true
        | isMemOpnd(I.Indexed _) = true
        | isMemOpnd _ = false
      fun chop fbinOp =
          let val n = size fbinOp
          in  case Char.toLower(String.sub(fbinOp,n-1)) of
                (#"s" | #"l") => String.substring(fbinOp,0,n-1)
              | _ => fbinOp
          end

      fun isST0 (I.ST r) = CellsBasis.registerNum r = 0
        | isST0 _ = false

      (* Special syntax for binary operators *)
      fun emit_fbinaryOp(binOp,src,dst) =
          if isMemOpnd src then
              (emit_fbinOp binOp; emit "\t"; emit_operand src)
          else (emit(chop(asm_fbinOp binOp)); emit "\t";
                case (isST0 src, isST0 dst) of
                  (_, true) => (emit_operand src; emit ", %st")
                | (true, _) => (emit "%st, "; emit_operand dst)
                | _ => error "emit_fbinaryOp"
               )

      val emit_dst = emit_operand
      val emit_src = emit_operand
      val emit_opnd = emit_operand
      val emit_opnd8 = emit_operand8
      val emit_rsrc = emit_operand
      val emit_lsrc = emit_operand
      val emit_addr = emit_operand
      val emit_src1 = emit_operand
      val emit_ea = emit_operand
      val emit_count = emit_operand
  end (* Assembly *)


  (*------------------------------------------------------------------------
   *
   * Reservation tables and pipeline definitions for scheduling.
   * Faked for now as I don't have to time to look up the definitions
   * from the Intel doc.
   *
   *------------------------------------------------------------------------*)

   (* Function units *)
   resource issue and mem and alu and falu and fmul and fdiv and branch

   (* Different implementations of cpus *)
   cpu default 2 [2 issue, 2 mem, 1 alu, 1 falu, 1 fmul] (* 2 issue machine *)

   (* Definitions of various reservation tables *)
   pipeline NOP _    = [issue]
    and     ARITH _  = [issue^^alu]
    and     LOAD _   = [issue^^mem]
    and     STORE _  = [issue^^mem,mem,mem]
    and     FARITH _ = [issue^^falu]
    and     FMUL _   = [issue^^fmul,fmul]
    and     FDIV _   = [issue^^fdiv,fdiv*50]
    and     BRANCH _ = [issue^^branch]

  (*------------------------------------------------------------------------
   *
   * Compiler representation of the instruction set.
   *
   *------------------------------------------------------------------------*)
  instruction
      NOP
	asm: ``nop''
	rtl: ``NOP''

    | JMP of operand * Label.label list
	asm: ``jmp\t<stupidGas operand>''
	rtl: ``JMP''

    | JCC of {cond:cond, opnd:operand}
	asm: ``j<cond>\t<stupidGas opnd>''
	rtl: ``J<cond>''

    | CALL of {opnd: operand, defs: $cellset, uses: $cellset,
               return: $cellset, cutsTo: Label.label list, mem: Region.region,
 	       pops:Int32.int}
	asm: ``call\t<stupidGas opnd><mem><
          	emit_defs(defs)><
          	emit_uses(uses)><
          	emit_cellset("return",return)><
                emit_cutsTo cutsTo>''
	rtl: ``CALL''

    | ENTER of {src1:operand, src2:operand}
	asm: ``enter\t<emit_operand src1>, <emit_operand src2>''

    | LEAVE
	asm: ``leave''

    | RET of operand option
	asm: ``ret<case option of NONE => ()
                                | SOME e => (emit "\t"; emit_operand e)>''

   (* integer *)
    | MOVE of {mvOp:move, src:operand, dst:operand}
	asm: ``<mvOp>\t<src>, <dst>''
	rtl: ``<mvOp>''

    | LEA of {r32: $GP, addr: operand}
	asm: ``leal\t<addr>, <r32>''
	rtl: ``LEA''

    | CMPL of {lsrc: operand, rsrc: operand}
	asm: ``cmpl\t<rsrc>, <lsrc>''

    | CMPW of {lsrc: operand, rsrc: operand}
	``cmpb\t<rsrc>, <lsrc>''

    | CMPB of {lsrc: operand, rsrc: operand}
	``cmpb\t<rsrc>, <lsrc>''

    | TESTL of {lsrc: operand, rsrc: operand}
	asm: ``testl\t<rsrc>, <lsrc>''
	rtl: ``TESTL''

    | TESTW of {lsrc: operand, rsrc: operand}
	asm: ``testw\t<rsrc>, <lsrc>''
	rtl: ``TESTW''

    | TESTB of {lsrc: operand, rsrc: operand}
	asm: ``testb\t<rsrc>, <lsrc>''
	rtl: ``TESTB''

    | BITOP of {bitOp:bitOp, lsrc: operand, rsrc: operand}
	``<bitOp>\t<rsrc>, <lsrc>''

    | BINARY of {binOp:binaryOp, src:operand, dst:operand}
	asm: (case (src,binOp) of
               (I.Direct _,  (* tricky business here for shifts *)
               (I.SARL | I.SHRL | I.SHLL |
                I.SARW | I.SHRW | I.SHLW |
                I.SARB | I.SHRB | I.SHLB)) => ``<binOp>\t%cl, <dst>''
             | _ => ``<binOp>\t<src>, <dst>''
             )
	(*rtl: ``<binOp>''*)
    | SHIFT of {shiftOp:shiftOp, src:operand, dst:operand, count:operand}
        asm: (case count of (* must be %ecx if it is a register *)
                I.Direct ecx => ``<shiftOp>\t<src>, <dst>''
              | _            => ``<shiftOp>\t<src>, <count>, <dst>''
             )

    | CMPXCHG of {lock:bool, sz:isize, src: operand, dst:operand}
	asm: (if lock then ``lock\n\t'' else ();
              ``cmpxchg'';
              case sz of
                I.I8 => ``b''
              | I.I16 => ``w''
              | I.I32 => ``l''
	      | I.I64 => error "CMPXCHG: I64";
              ``\t<src>, <dst>''
             )

    | MULTDIV of {multDivOp:multDivOp, src:operand}
	asm: ``<multDivOp>\t<src>''

    | MUL3 of {dst: $GP, src2: Int32.int, src1:operand}
        (* Fermin: constant operand must go first *)
        asm: ``imull\t$<emitInt32 src2>, <src1>, <dst>''

    | UNARY of {unOp:unaryOp, opnd:operand}
	asm: ``<unOp>\t<opnd>''
	rtl: ``<unOp>''

      (* set byte on condition code; note that
       * this only sets the low order byte, so it also
       * uses its operand.
       *)
    | SET of {cond:cond, opnd:operand}
	asm: ``set<cond>\t<emit_opnd8 opnd>''
	rtl: ``SET<cond>''

        (* conditional move; Pentium Pro or higher only
         * Destination must be a register.
         *)
    | CMOV of {cond:cond, src:operand, dst: $GP}
	asm: ``cmov<cond>\t<src>, <dst>''
	rtl: ``CMOV<cond>''

    | PUSHL of operand
	asm: ``pushl\t<operand>''
	rtl: ``PUSHL''

    | PUSHW of operand
	asm: ``pushw\t<operand>''
	rtl: ``PUSHW''

    | PUSHB of operand
	asm: ``pushb\t<operand>''
	rtl: ``PUSHB''

    | PUSHFD     (* push $eflags onto stack *)
	``pushfd''

    | POPFD	(* pop $eflags onto stack *)
	``popfd''

    | POP of operand
	asm: ``popl\t<operand>''
	rtl: ``POP''

    | CDQ
	``cdq''

    | INTO
	``into''

    (* floating *)
    | FBINARY of {binOp:fbinOp, src:operand, dst:operand}
	asm: (emit_fbinaryOp(binOp,src,dst))

    | FIBINARY of {binOp:fibinOp, src:operand}
	asm: ``<binOp>\t<src>'' (* the implied destination is %ST(0) *)

    | FUNARY of funOp
	``<funOp>''

    | FUCOM of operand
	``fucom\t<operand>''

    | FUCOMP of operand
	``fucomp\t<operand>''

    | FUCOMPP
	``fucompp''

    | FCOMPP
	``fcompp''

    | FCOMI of operand
	``fcomi\t<operand>, %st''

    | FCOMIP of operand
	``fcomip\t<operand>, %st''

    | FUCOMI of operand
	``fucomi\t<operand>, %st''

    | FUCOMIP of operand
	``fucomip\t<operand>, %st''

    | FXCH of {opnd: $FP}
	``fxch\t<opnd>''

    | FSTPL of operand
	asm: (case operand of
               I.ST _ => ``fstp\t<operand>''
             | _ => ``fstpl\t<operand>''
             )

    | FSTPS of operand
	``fstps\t<operand>''

    | FSTPT of operand
	``fstps\t<operand>''

    | FSTL of operand
	asm: (case operand of
                I.ST _ => ``fst\t<operand>''
              | _      => ``fstl\t<operand>''
             )

    | FSTS of operand
	``fsts\t<operand>''

    | FLD1
	``fld1''

    | FLDL2E
	``fldl2e''

    | FLDL2T
	``fldl2t''

    | FLDLG2
	``fldlg2''

    | FLDLN2
	``fldln2''

    | FLDPI
	``fldpi''

    | FLDZ
	``fldz''

    | FLDL of operand
	asm: (case operand of
               I.ST _ => ``fld\t<operand>''
             | _ => ``fldl\t<operand>''
             )

    | FLDS of operand
	``flds\t<operand>''

    | FLDT of operand
	``fldt\t<operand>''

    | FILD of operand
	``fild\t<operand>''

    | FILDL of operand
	``fildl\t<operand>''

    | FILDLL of operand
	``fildll\t<operand>''

    | FNSTSW
	``fnstsw''

    | FENV of {fenvOp:fenvOp, opnd:operand} (* load/store environment *)
	``<fenvOp>\t<opnd>''

      (* pseudo floating ops *)
    | FMOVE of {fsize:fsize, src:operand, dst:operand}
	``fmove<fsize>\t<src>, <dst>''

    | FILOAD of {isize:isize, ea:operand, dst:operand}
	``fiload<isize>\t<ea>, <dst>''

    | FBINOP of {fsize:fsize,
                 binOp:fbinOp, lsrc:operand, rsrc:operand, dst:operand}
	``<binOp><fsize>\t<lsrc>, <rsrc>, <dst>''
        (* rtl: ``<binOp><fsize>'' *)

    | FIBINOP of {isize:isize,
                  binOp:fibinOp, lsrc:operand, rsrc:operand, dst:operand}
	``<binOp><isize>\t<lsrc>, <rsrc>, <dst>''
        (* rtl: ``<binOp><isize>'' *)

    | FUNOP of {fsize:fsize, unOp:funOp, src:operand, dst:operand}
	``<unOp><fsize>\t<src>, <dst>''
        (* rtl: [[unOp fsize]] *)

    | FCMP of {i:bool,fsize:fsize, lsrc:operand, rsrc:operand}
	asm: (if i then ``fcmpi'' else ``fcmp''; ``<fsize>\t<lsrc>, <rsrc>'')
        (* rtl: [["FCMP" fsize]] *)

   (* misc *)
    | SAHF        (* %flags -> %ah *)
	``sahf''

  (*** concurrency operations ****)

      (* performs a serializing operation on all load-to-memory operations issued prior to
       * the lfence instruction.
       *)
    | LFENCE
        asm: ``lfence''
        rtl:  ``LFENCE''

      (* performs a serializing operation on all load-from-memory and store-to-memory
       * operations issued prior to the mfence instruction.
       *)
    | MFENCE
        asm: ``mfence''
        rtl: ``MFENCE''

      (* performs a serializing operation on all store-to-memory operations issued prior to
       * the sfence instruction.
       *)
    | SFENCE
        asm: ``sfence''
        rtl:  ``SFENCE''

      (* improves performance of spin-wait loops *)
    | PAUSE
        asm: ``pause''
        rtl: ``PAUSE''

    | LAHF	  (* %ah -> %flags *)
	``lahf''

    | SOURCE of {}
        asm: ``source''
        mc:  ()

    | SINK of {}
        asm: ``sink''
        mc:  ()

    | PHI of {}
        asm: ``phi''
        mc:  ()

  (*------------------------------------------------------------------------
   * Some helper routines for the SSA optimizer.
   * These should go away soon.
   *------------------------------------------------------------------------*)
    structure SSA =
    struct
       fun operand(ty, I.Immed i) = T.LI(T.I.fromInt32(32,i))
         (*| operand(ty, I.ImmedLabel le) = T.LABEL le*)
         | operand(ty, I.Direct r) = T.REG(ty, r)
         | operand _ = error "operand"
    end
  (*------------------------------------------------------------------------
   * Some helper routines for the rewriting module.
   * These should go away soon.
   *------------------------------------------------------------------------*)
    structure Rewrite =
    struct
        fun rewriteOperandUse (rs,rt,opnd) =
        (case opnd
         of I.Direct r => if C.sameColor(r,rs) then I.Direct rt else opnd
          | I.Displace{base, disp, mem} =>
              if C.sameColor(base,rs)
              then I.Displace{base=rt, disp=disp, mem=mem}
              else opnd
          | I.Indexed{base as SOME b, index, scale, disp, mem} => let
              val base'= if C.sameColor(b,rs) then SOME rt else base
              val index'=if C.sameColor(index,rs) then rt else index
            in I.Indexed{base=base', index=index', scale=scale,
                         disp=disp, mem=mem}
            end
          | I.Indexed{base, index, scale, disp, mem=mem}  =>
            if C.sameColor(index,rs) then
              I.Indexed{base=base, index=rt, scale=scale, disp=disp, mem=mem}
            else opnd
          | _ => opnd
        (*esac*))

        fun rewriteOperandDef (rs,rt,opnd as I.Direct r) =
             if C.sameColor(r,rs) then I.Direct rt else opnd

        fun frewriteOperandDef(fs,ft,opnd as I.FDirect f) =
               if C.sameColor(f,fs) then I.FDirect ft else opnd
          | frewriteOperandDef(fs,ft,opnd as I.FPR f) =
               if C.sameColor(f,fs) then I.FPR ft else opnd
          | frewriteOperandDef opnd = opnd

        fun frewriteOperandUse(fs,ft,opnd as I.FDirect r) =
             if C.sameColor(r,fs) then I.FDirect ft else opnd
          | frewriteOperandUse(fs,ft,opnd as I.FPR r) =
             if C.sameColor(r,fs) then I.FPR ft else opnd
          | frewriteOperandUse(fs,ft, opnd) = opnd
    end

end