File: nasm_manual.rb

package info (click to toggle)
ruby-rsec 0.4.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 272 kB
  • sloc: ruby: 2,130; lisp: 13; makefile: 3
file content (136 lines) | stat: -rw-r--r-- 4,008 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Parse NASM manual [nasm.txt] and generate a list of opcodes.
# Results are saved in [nasm_codes.txt], undocumented codes are printed.
# Further: extend the parser to generate an X86 assembler.
require "rsec"

module NASMManualParser
  include Rsec::Helper
  extend self

  Instructions = {}

  class UnSupportedError < RuntimeError
  end

  class Instruction < Struct.new(:nemonic, :operands, :code, :archs)
  end

  def debug parser, *strs
    return parser unless $debug
    strs.each do |str|
      parser.eof.parse! str
    end
    parser
  end

  def reg_parser
    gp_reg  = /E?[ABCD]X|E?(SP|BP|SI|DI)/
    gp_reg8 = /[ABCD][HL]/
    seg_reg = /ES|CS|SS|DS|FS|GS/
    fpu_reg = /ST[0-7]/
    mmx_reg = /MM[0-7]/
    xr_reg  = /CR[0234]|DR[012367]|TR[34567]/
    reg = gp_reg.r | gp_reg8 | seg_reg | fpu_reg | mmx_reg | xr_reg
    debug reg, 'AX'
  end

  def operands_parser
    imm_class     = /imm:imm(32|16)|imm(32|16|8)?/
    mem_class     = /mem(80|64|32|16|8)?/ # be ware of the order
    reg_class     = /reg(32|16|8)|(fpu|mmx|seg)reg/
    memoffs_class = /memoffs(32|16|8)/
    tr_class      = 'TR3/4/5/6/7'
    classes       = (imm_class.r | memoffs_class | mem_class | reg_class | tr_class).fail 'operand class'
    reg           = reg_parser.fail 'register'
    num           = /\d/.r(&:to_i).fail 'num'
    # memoffs should be left of mem
    operand       = classes | reg | num
    operands      = operand.join('/').even.join(',').even
    debug operands, 'reg32', 'AX,memoffs16'
  end

  def code_parser
    plus_cc     = /[0-9A-F][0-9A-F]\+cc/
    plus_r      = /[0-9A-F][0-9A-F]\+r/
    hex         = /[0-9A-F][0-9A-F]/.r {|s| s.to_i 16}
    slash       = /\/[\dr]/
    imm_code    = /i[bwd]/
    reg_code    = /rw\/rd|r[bwd]/
    ref_code    = /ow\/od|o[wd]/
    prefix_code = /[oa](32|16)/
    code =\
      (plus_cc.r | plus_r | hex | slash |
      imm_code | reg_code | ref_code | prefix_code).join(/\s+/).even
    debug code, 'o32 0F C8+r', 'o32 6B /r ib', 'o16 A1 ow/od'
  end

  def archs_parser
    arch = symbol(/8086|186|286|386|486|PENT|P6|CYRIX|FPU|MMX|PRIV|UNDOC/)
    archs = ('['.r >> arch.join(',').even << ']').map do |archs|
      # map to set
      archs.inject({}){|h, arch|
        raise UnSupportedError, 'not implemented' if arch == 'UNDOC'
        h[arch] = true
        h
      }
    end
    debug archs, '[386,FPU]'
  end

  def instruction_parser
    nemonic = /[A-Z]\w+|xxSAR/
    operands = operands_parser._?
    code = ';'.r >> code_parser
    archs = archs_parser
    instruction = seq_ nemonic, operands, code, archs do |nemonic, (operands), code, archs|
      Instruction.new nemonic, operands, code, archs
    end
    debug instruction, 'FISUBR mem32 ; DA /5 [8086,FPU]', 'BSWAP reg32 ; o32 0F C8+r [486]'
  end

  def desugar line
    # r/m short hands
    line = line.gsub /r\/m(32|16|8)/, 'reg\1/mem\1'
    line.gsub! 'r/m64', 'mmxreg/mem64'
    # compress space
    line.sub! /\s(TO|NEAR|FAR|SHORT)/, '_\1'
    line
  end

  def parse_line parser, line
    parser.parse! desugar line
  rescue Rsec::SyntaxError
  rescue UnSupportedError
  end

  def parse filename
    parsed = ''
    parser = instruction_parser.eof
    src = File.read filename
    src.lines.with_index do |raw_line, idx|
      line = raw_line.strip
      # this shapy shows the line is something defining an nemonic
      if line =~ /^\w+\s+[^;\[]+;\ [^;\[]+\[.+\]$/
        if (parse_line parser, line)
          parsed << raw_line
        else
          puts "unparsed:#{idx}\t#{line}"
        end
      end
    end
    parsed
  end

end

if __FILE__ == $PROGRAM_NAME
  $debug = true
  manual = "#{File.dirname __FILE__}/nasm_manual.txt"
  codes  = "#{File.dirname __FILE__}/nasm_codes.txt"
  File.open codes, 'w' do |file|
    file.<< NASMManualParser.parse manual
  end
  puts '-' * 80
  puts "X86 asm codes are saved to #{codes}"
end