1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
|
#!/usr/bin/env ruby
# frozen_string_literal: true
ENV['GEM_HOME'] = File.expand_path('./.bundle', __dir__)
require 'rubygems/source'
require 'bundler/inline'
gemfile(true) do
source 'https://rubygems.org'
gem 'ffi-clang', '0.7.0', require: false
end
# Help ffi-clang find libclang
# Hint: apt install libclang1
ENV['LIBCLANG'] ||= Dir.glob("/usr/lib/llvm-*/lib/libclang.so.1").grep_v(/-cpp/).sort.last
require 'ffi/clang'
require 'etc'
require 'fiddle/import'
require 'set'
unless build_dir = ARGV.first
abort "Usage: #{$0} BUILD_DIR"
end
class Node < Struct.new(
:kind,
:spelling,
:type,
:typedef_type,
:bitwidth,
:sizeof_type,
:offsetof,
:enum_value,
:children,
keyword_init: true,
)
end
# Parse a C header with ffi-clang and return Node objects.
# To ease the maintenance, ffi-clang should be used only inside this class.
class HeaderParser
def initialize(header, cflags:)
@translation_unit = FFI::Clang::Index.new.parse_translation_unit(header, cflags, [], {})
end
def parse
parse_children(@translation_unit.cursor)
end
private
def parse_children(cursor)
children = []
cursor.visit_children do |cursor, _parent|
children << parse_cursor(cursor)
next :continue
end
children
end
def parse_cursor(cursor)
unless cursor.kind.start_with?('cursor_')
raise "unexpected cursor kind: #{cursor.kind}"
end
kind = cursor.kind.to_s.delete_prefix('cursor_').to_sym
children = parse_children(cursor)
offsetof = {}
if kind == :struct
children.select { |c| c.kind == :field_decl }.each do |child|
offsetof[child.spelling] = cursor.type.offsetof(child.spelling)
end
end
sizeof_type = nil
if %i[struct union].include?(kind)
sizeof_type = cursor.type.sizeof
end
enum_value = nil
if kind == :enum_constant_decl
enum_value = cursor.enum_value
end
Node.new(
kind: kind,
spelling: cursor.spelling,
type: cursor.type.spelling,
typedef_type: cursor.typedef_type.spelling,
bitwidth: cursor.bitwidth,
sizeof_type: sizeof_type,
offsetof: offsetof,
enum_value: enum_value,
children: children,
)
end
end
# Convert Node objects to a Ruby binding source.
class BindingGenerator
BINDGEN_BEG = '### RJIT bindgen begin ###'
BINDGEN_END = '### RJIT bindgen end ###'
DEFAULTS = { '_Bool' => 'CType::Bool.new' }
DEFAULTS.default_proc = proc { |_h, k| "CType::Stub.new(:#{k})" }
attr_reader :src
# @param src_path [String]
# @param consts [Hash{ Symbol => Array<String> }]
# @param values [Hash{ Symbol => Array<String> }]
# @param funcs [Array<String>]
# @param types [Array<String>]
# @param dynamic_types [Array<String>] #ifdef-dependent immediate types, which need Primitive.cexpr! for type detection
# @param skip_fields [Hash{ Symbol => Array<String> }] Struct fields that are skipped from bindgen
# @param ruby_fields [Hash{ Symbol => Array<String> }] Struct VALUE fields that are considered Ruby objects
def initialize(src_path:, consts:, values:, funcs:, types:, dynamic_types:, skip_fields:, ruby_fields:)
@preamble, @postamble = split_ambles(src_path)
@src = String.new
@consts = consts.transform_values(&:sort)
@values = values.transform_values(&:sort)
@funcs = funcs.sort
@types = types.sort
@dynamic_types = dynamic_types.sort
@skip_fields = skip_fields.transform_keys(&:to_s)
@ruby_fields = ruby_fields.transform_keys(&:to_s)
@references = Set.new
end
def generate(nodes)
println @preamble
# Define macros/enums
@consts.each do |type, values|
values.each do |value|
raise "#{value} isn't a valid constant name" unless ('A'..'Z').include?(value[0])
println " C::#{value} = Primitive.cexpr! %q{ #{type}2NUM(#{value}) }"
end
end
println
# Define variables
@values.each do |type, values|
values.each do |value|
println " def C.#{value} = Primitive.cexpr!(%q{ #{type}2NUM(#{value}) })"
end
end
println
# Define function pointers
@funcs.each do |func|
println " def C.#{func}"
println " Primitive.cexpr! %q{ SIZET2NUM((size_t)#{func}) }"
println " end"
println
end
# Build a hash table for type lookup by name
nodes_index = flatten_nodes(nodes).group_by(&:spelling).transform_values do |values|
# Try to search a declaration with definitions
node_with_children = values.find { |v| !v.children.empty? }
next node_with_children if node_with_children
# Otherwise, assume the last one is the main declaration
values.last
end
# Define types
@types.each do |type|
unless definition = generate_node(nodes_index[type])
raise "Failed to find or generate type: #{type}"
end
println " def C.#{type}"
println "@#{type} ||= #{definition}".gsub(/^/, " ").chomp
println " end"
println
end
# Define dynamic types
@dynamic_types.each do |type|
unless generate_node(nodes_index[type])&.start_with?('CType::Immediate')
raise "Non-immediate type is given to dynamic_types: #{type}"
end
# Only one Primitive.cexpr! is allowed for each line: https://github.com/ruby/ruby/pull/9612
println " def C.#{type}"
println " @#{type} ||= CType::Immediate.find("
println " Primitive.cexpr!(\"SIZEOF(#{type})\"),"
println " Primitive.cexpr!(\"SIGNED_TYPE_P(#{type})\"),"
println " )"
println " end"
println
end
# Leave a stub for types that are referenced but not targeted
(@references - @types - @dynamic_types).each do |type|
println " def C.#{type}"
println " #{DEFAULTS[type]}"
println " end"
println
end
print @postamble
end
private
# Make an array that includes all top-level and nested nodes
def flatten_nodes(nodes)
result = []
nodes.each do |node|
unless node.children.empty?
result.concat(flatten_nodes(node.children))
end
end
result.concat(nodes) # prioritize top-level nodes
result
end
# Return code before BINDGEN_BEG and code after BINDGEN_END
def split_ambles(src_path)
lines = File.read(src_path).lines
preamble_end = lines.index { |l| l.include?(BINDGEN_BEG) }
raise "`#{BINDGEN_BEG}` was not found in '#{src_path}'" if preamble_end.nil?
postamble_beg = lines.index { |l| l.include?(BINDGEN_END) }
raise "`#{BINDGEN_END}` was not found in '#{src_path}'" if postamble_beg.nil?
raise "`#{BINDGEN_BEG}` was found after `#{BINDGEN_END}`" if preamble_end >= postamble_beg
return lines[0..preamble_end].join, lines[postamble_beg..-1].join
end
# Generate code from a node. Used for constructing a complex nested node.
# @param node [Node]
def generate_node(node, sizeof_type: nil)
case node&.kind
when :struct, :union
# node.spelling is often empty for union, but we'd like to give it a name when it has one.
buf = +"CType::#{node.kind.to_s.sub(/\A[a-z]/, &:upcase)}.new(\n"
buf << " \"#{node.spelling}\", Primitive.cexpr!(\"SIZEOF(#{sizeof_type || node.type})\"),\n"
bit_fields_end = node.children.index { |c| c.bitwidth == -1 } || node.children.size # first non-bit field index
node.children.each_with_index do |child, i|
skip_type = sizeof_type&.gsub(/\(\(struct ([^\)]+) \*\)NULL\)->/, '\1.') || node.spelling
next if @skip_fields.fetch(skip_type, []).include?(child.spelling)
field_builder = proc do |field, type|
if node.kind == :struct
to_ruby = @ruby_fields.fetch(node.spelling, []).include?(field)
if child.bitwidth > 0
if bit_fields_end <= i # give up offsetof calculation for non-leading bit fields
raise "non-leading bit fields are not supported. consider including '#{field}' in skip_fields."
end
offsetof = node.offsetof.fetch(field)
else
off_type = sizeof_type || "(*((#{node.type} *)NULL))"
offsetof = "Primitive.cexpr!(\"OFFSETOF(#{off_type}, #{field})\")"
end
" #{field}: [#{type}, #{offsetof}#{', true' if to_ruby}],\n"
else
" #{field}: #{type},\n"
end
end
case child
# BitField is struct-specific. So it must be handled here.
in Node[kind: :field_decl, spelling:, bitwidth:, children: [_grandchild, *]] if bitwidth > 0
buf << field_builder.call(spelling, "CType::BitField.new(#{bitwidth}, #{node.offsetof.fetch(spelling) % 8})")
# "(unnamed ...)" struct and union are handled here, which are also struct-specific.
in Node[kind: :field_decl, spelling:, type:, children: [grandchild]] if type.match?(/\((unnamed|anonymous) [^)]+\)\z/)
if sizeof_type
child_type = "#{sizeof_type}.#{child.spelling}"
else
child_type = "((#{node.type} *)NULL)->#{child.spelling}"
end
buf << field_builder.call(spelling, generate_node(grandchild, sizeof_type: child_type).gsub(/^/, ' ').sub(/\A +/, ''))
# In most cases, we'd like to let generate_type handle the type unless it's "(unnamed ...)".
in Node[kind: :field_decl, spelling:, type:] if !type.empty?
buf << field_builder.call(spelling, generate_type(type))
else # forward declarations are ignored
end
end
buf << ")"
when :typedef_decl
case node.children
in [child]
generate_node(child)
in [child, Node[kind: :integer_literal]]
generate_node(child)
in _ unless node.typedef_type.empty?
generate_type(node.typedef_type)
end
when :enum_decl
generate_type('int')
when :type_ref
generate_type(node.spelling)
end
end
# Generate code from a type name. Used for resolving the name of a simple leaf node.
# @param type [String]
def generate_type(type)
if type.match?(/\[\d+\]\z/)
return "CType::Array.new { #{generate_type(type.sub!(/\[\d+\]\z/, ''))} }"
end
type = type.delete_suffix('const')
if type.end_with?('*')
if type == 'const void *'
# `CType::Pointer.new { CType::Immediate.parse("void") }` is never useful,
# so specially handle that case here.
return 'CType::Immediate.parse("void *")'
end
return "CType::Pointer.new { #{generate_type(type.delete_suffix('*').rstrip)} }"
end
type = type.gsub(/((const|volatile) )+/, '').rstrip
if type.start_with?(/(struct|union|enum) /)
target = type.split(' ', 2).last
push_target(target)
"self.#{target}"
else
begin
ctype = Fiddle::Importer.parse_ctype(type)
rescue Fiddle::DLError
push_target(type)
"self.#{type}"
else
# Convert any function pointers to void* to workaround FILE* vs int*
if ctype == Fiddle::TYPE_VOIDP
"CType::Immediate.parse(\"void *\")"
else
"CType::Immediate.parse(#{type.dump})"
end
end
end
end
def print(str)
@src << str
end
def println(str = "")
@src << str << "\n"
end
def chomp
@src.delete_suffix!("\n")
end
def rstrip!
@src.rstrip!
end
def push_target(target)
unless target.match?(/\A\w+\z/)
raise "invalid target: #{target}"
end
@references << target
end
end
src_dir = File.expand_path('../..', __dir__)
src_path = File.join(src_dir, 'rjit_c.rb')
build_dir = File.expand_path(build_dir)
cflags = [
src_dir,
build_dir,
File.join(src_dir, 'include'),
File.join(build_dir, ".ext/include/#{RUBY_PLATFORM}"),
].map { |dir| "-I#{dir}" }
# Clear .cache/clangd created by the language server, which could break this bindgen
clangd_cache = File.join(src_dir, '.cache/clangd')
if Dir.exist?(clangd_cache)
system('rm', '-rf', clangd_cache, exception: true)
end
# Parse rjit_c.h and generate rjit_c.rb
nodes = HeaderParser.new(File.join(src_dir, 'rjit_c.h'), cflags: cflags).parse
generator = BindingGenerator.new(
src_path: src_path,
consts: {
LONG: %w[
UNLIMITED_ARGUMENTS
VM_ENV_DATA_INDEX_ME_CREF
VM_ENV_DATA_INDEX_SPECVAL
],
SIZET: %w[
ARRAY_REDEFINED_OP_FLAG
BOP_AND
BOP_AREF
BOP_EQ
BOP_EQQ
BOP_FREEZE
BOP_GE
BOP_GT
BOP_LE
BOP_LT
BOP_MINUS
BOP_MOD
BOP_OR
BOP_PLUS
BUILTIN_ATTR_LEAF
HASH_REDEFINED_OP_FLAG
INTEGER_REDEFINED_OP_FLAG
INVALID_SHAPE_ID
METHOD_VISI_PRIVATE
METHOD_VISI_PROTECTED
METHOD_VISI_PUBLIC
METHOD_VISI_UNDEF
OBJ_TOO_COMPLEX_SHAPE_ID
OPTIMIZED_METHOD_TYPE_BLOCK_CALL
OPTIMIZED_METHOD_TYPE_CALL
OPTIMIZED_METHOD_TYPE_SEND
OPTIMIZED_METHOD_TYPE_STRUCT_AREF
OPTIMIZED_METHOD_TYPE_STRUCT_ASET
RARRAY_EMBED_FLAG
RARRAY_EMBED_LEN_MASK
RARRAY_EMBED_LEN_SHIFT
RMODULE_IS_REFINEMENT
ROBJECT_EMBED
RSTRUCT_EMBED_LEN_MASK
RUBY_EVENT_CLASS
RUBY_EVENT_C_CALL
RUBY_EVENT_C_RETURN
RUBY_FIXNUM_FLAG
RUBY_FLONUM_FLAG
RUBY_FLONUM_MASK
RUBY_IMMEDIATE_MASK
RUBY_SPECIAL_SHIFT
RUBY_SYMBOL_FLAG
RUBY_T_ARRAY
RUBY_T_CLASS
RUBY_T_ICLASS
RUBY_T_HASH
RUBY_T_MASK
RUBY_T_MODULE
RUBY_T_STRING
RUBY_T_SYMBOL
RUBY_T_OBJECT
SHAPE_FLAG_SHIFT
SHAPE_FROZEN
SHAPE_ID_NUM_BITS
SHAPE_IVAR
SHAPE_MASK
SHAPE_ROOT
STRING_REDEFINED_OP_FLAG
T_OBJECT
VM_BLOCK_HANDLER_NONE
VM_CALL_ARGS_BLOCKARG
VM_CALL_ARGS_SPLAT
VM_CALL_FCALL
VM_CALL_FORWARDING
VM_CALL_KWARG
VM_CALL_KW_SPLAT
VM_CALL_KW_SPLAT_MUT
VM_CALL_KW_SPLAT_bit
VM_CALL_OPT_SEND
VM_CALL_TAILCALL
VM_CALL_TAILCALL_bit
VM_CALL_ZSUPER
VM_ENV_DATA_INDEX_FLAGS
VM_ENV_DATA_SIZE
VM_ENV_FLAG_LOCAL
VM_ENV_FLAG_WB_REQUIRED
VM_FRAME_FLAG_BMETHOD
VM_FRAME_FLAG_CFRAME
VM_FRAME_FLAG_CFRAME_KW
VM_FRAME_FLAG_LAMBDA
VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM
VM_FRAME_MAGIC_BLOCK
VM_FRAME_MAGIC_CFUNC
VM_FRAME_MAGIC_METHOD
VM_METHOD_TYPE_ALIAS
VM_METHOD_TYPE_ATTRSET
VM_METHOD_TYPE_BMETHOD
VM_METHOD_TYPE_CFUNC
VM_METHOD_TYPE_ISEQ
VM_METHOD_TYPE_IVAR
VM_METHOD_TYPE_MISSING
VM_METHOD_TYPE_NOTIMPLEMENTED
VM_METHOD_TYPE_OPTIMIZED
VM_METHOD_TYPE_REFINED
VM_METHOD_TYPE_UNDEF
VM_METHOD_TYPE_ZSUPER
VM_SPECIAL_OBJECT_VMCORE
RUBY_ENCODING_MASK
RUBY_FL_FREEZE
RHASH_PASS_AS_KEYWORDS
],
},
values: {
SIZET: %w[
block_type_iseq
imemo_iseq
imemo_callinfo
rb_block_param_proxy
rb_cArray
rb_cFalseClass
rb_cFloat
rb_cInteger
rb_cNilClass
rb_cString
rb_cSymbol
rb_cTrueClass
rb_rjit_global_events
rb_mRubyVMFrozenCore
rb_vm_insns_count
idRespond_to_missing
],
},
funcs: %w[
rb_ary_entry_internal
rb_ary_push
rb_ary_resurrect
rb_ary_store
rb_ec_ary_new_from_values
rb_ec_str_resurrect
rb_ensure_iv_list_size
rb_fix_aref
rb_fix_div_fix
rb_fix_mod_fix
rb_fix_mul_fix
rb_gc_writebarrier
rb_get_symbol_id
rb_hash_aref
rb_hash_aset
rb_hash_bulk_insert
rb_hash_new
rb_hash_new_with_size
rb_hash_resurrect
rb_ivar_get
rb_obj_as_string_result
rb_obj_is_kind_of
rb_str_concat_literals
rb_str_eql_internal
rb_str_getbyte
rb_vm_bh_to_procval
rb_vm_concat_array
rb_vm_defined
rb_vm_get_ev_const
rb_vm_getclassvariable
rb_vm_ic_hit_p
rb_vm_opt_newarray_min
rb_vm_opt_newarray_max
rb_vm_opt_newarray_hash
rb_vm_opt_newarray_pack
rb_vm_setinstancevariable
rb_vm_splat_array
rjit_full_cfunc_return
rjit_optimized_call
rjit_str_neq_internal
rjit_record_exit_stack
rb_ivar_defined
rb_vm_throw
rb_backref_get
rb_reg_last_match
rb_reg_match_pre
rb_reg_match_post
rb_reg_match_last
rb_reg_nth_match
rb_gvar_get
rb_range_new
rb_ary_tmp_new_from_values
rb_reg_new_ary
rb_ary_clear
rb_str_intern
rb_vm_setclassvariable
rb_str_bytesize
rjit_str_simple_append
rb_str_buf_append
rb_str_dup
rb_vm_yield_with_cfunc
rb_vm_set_ivar_id
rb_ary_dup
rjit_rb_ary_subseq_length
rb_ary_unshift_m
rjit_build_kwhash
rb_rjit_entry_stub_hit
rb_rjit_branch_stub_hit
rb_sym_to_proc
],
types: %w[
CALL_DATA
IC
ID
IVC
RArray
RB_BUILTIN
RBasic
RObject
RStruct
RString
attr_index_t
iseq_inline_constant_cache
iseq_inline_constant_cache_entry
iseq_inline_iv_cache_entry
iseq_inline_storage_entry
method_optimized_type
rb_block
rb_block_type
rb_builtin_function
rb_call_data
rb_callable_method_entry_struct
rb_callable_method_entry_t
rb_callcache
rb_callinfo
rb_captured_block
rb_cfunc_t
rb_control_frame_t
rb_cref_t
rb_execution_context_struct
rb_execution_context_t
rb_iseq_constant_body
rb_iseq_location_t
rb_iseq_struct
rb_iseq_t
rb_method_attr_t
rb_method_bmethod_t
rb_method_cfunc_t
rb_method_definition_struct
rb_method_entry_t
rb_method_iseq_t
rb_method_optimized_t
rb_method_type_t
rb_proc_t
rb_rjit_runtime_counters
rb_serial_t
rb_shape
rb_shape_t
rb_thread_struct
rb_jit_func_t
rb_iseq_param_keyword
rb_rjit_options
rb_callinfo_kwarg
],
# #ifdef-dependent immediate types, which need Primitive.cexpr! for type detection
dynamic_types: %w[
VALUE
shape_id_t
],
skip_fields: {
'rb_execution_context_struct.machine': %w[regs], # differs between macOS and Linux
rb_execution_context_struct: %w[method_missing_reason], # non-leading bit fields not supported
rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload yjit_calls_at_interv], # conditionally defined
rb_thread_struct: %w[status has_dedicated_nt to_kill abort_on_exception report_on_exception pending_interrupt_queue_checked],
:'' => %w[is_from_method is_lambda is_isolated], # rb_proc_t
},
ruby_fields: {
rb_iseq_constant_body: %w[
rjit_blocks
],
rb_iseq_location_struct: %w[
base_label
label
pathobj
],
rb_callable_method_entry_t: %w[
defined_class
],
rb_callable_method_entry_struct: %w[
defined_class
],
},
)
generator.generate(nodes)
# Write rjit_c.rb
File.write(src_path, generator.src)
|