1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
# coding: utf-8
class PDF::Reader
# Takes a collection of TextRun objects and renders them into a single
# string that best approximates the way they'd appear on a render PDF page.
#
# media box should be a 4 number array that describes the dimensions of the
# page to be rendered as described by the page's MediaBox attribute
class PageLayout
def initialize(runs, mediabox)
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
@runs = merge_runs(runs)
@mean_font_size = mean(@runs.map(&:font_size)) || 0
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
@page_width = mediabox[2] - mediabox[0]
@page_height = mediabox[3] - mediabox[1]
@x_offset = @runs.map(&:x).sort.first
@current_platform_is_rbx_19 = RUBY_DESCRIPTION =~ /\Arubinius 2.0.0/ &&
RUBY_VERSION >= "1.9.0"
end
def to_s
return "" if @runs.empty?
page = row_count.times.map { |i| " " * col_count }
@runs.each do |run|
x_pos = ((run.x - @x_offset) / col_multiplier).round
y_pos = row_count - (run.y / row_multiplier).round
if y_pos < row_count && y_pos >= 0 && x_pos < col_count && x_pos >= 0
local_string_insert(page[y_pos], run.text, x_pos)
end
end
interesting_rows(page).map(&:rstrip).join("\n")
end
private
# given an array of strings, return a new array with empty rows from the
# beginning and end removed.
#
# interesting_rows([ "", "one", "two", "" ])
# => [ "one", "two" ]
#
def interesting_rows(rows)
line_lengths = rows.map { |l| l.strip.length }
return [] if line_lengths.all?(&:zero?)
first_line_with_text = line_lengths.index { |l| l > 0 }
last_line_with_text = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
interesting_line_count = last_line_with_text - first_line_with_text
rows[first_line_with_text, interesting_line_count].map
end
def row_count
@row_count ||= (@page_height / @mean_font_size).floor
end
def col_count
@col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
end
def row_multiplier
@row_multiplier ||= @page_height.to_f / row_count.to_f
end
def col_multiplier
@col_multiplier ||= @page_width.to_f / col_count.to_f
end
def mean(collection)
if collection.size == 0
0
else
collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
end
end
def each_line(&block)
@runs.sort.group_by { |run|
run.y.to_i
}.map { |y, collection|
yield y, collection
}
end
# take a collection of TextRun objects and merge any that are in close
# proximity
def merge_runs(runs)
runs.group_by { |char|
char.y.to_i
}.map { |y, chars|
group_chars_into_runs(chars.sort)
}.flatten.sort
end
def group_chars_into_runs(chars)
runs = []
while head = chars.shift
if runs.empty?
runs << head
elsif runs.last.mergable?(head)
runs[-1] = runs.last + head
else
runs << head
end
end
runs
end
# This is a simple alternative to String#[]=. We can't use the string
# method as it's buggy on rubinius 2.0rc1 (in 1.9 mode)
#
# See my bug report at https://github.com/rubinius/rubinius/issues/1985
def local_string_insert(haystack, needle, index)
if @current_platform_is_rbx_19
char_count = needle.length
haystack.replace(
(haystack[0,index] || "") +
needle +
(haystack[index+char_count,500] || "")
)
else
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
end
end
end
end
|