File: page_layout.rb

package info (click to toggle)
ruby-pdf-reader 1.3.3-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 12,908 kB
  • ctags: 569
  • sloc: ruby: 8,330; makefile: 10
file content (127 lines) | stat: -rw-r--r-- 3,834 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# coding: utf-8

class PDF::Reader

  # Takes a collection of TextRun objects and renders them into a single
  # string that best approximates the way they'd appear on a render PDF page.
  #
  # media box should be a 4 number array that describes the dimensions of the
  # page to be rendered as described by the page's MediaBox attribute
  class PageLayout
    def initialize(runs, mediabox)
      raise ArgumentError, "a mediabox must be provided" if mediabox.nil?

      @runs    = merge_runs(runs)
      @mean_font_size   = mean(@runs.map(&:font_size)) || 0
      @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
      @page_width  = mediabox[2] - mediabox[0]
      @page_height = mediabox[3] - mediabox[1]
      @x_offset = @runs.map(&:x).sort.first
      @current_platform_is_rbx_19 = RUBY_DESCRIPTION =~ /\Arubinius 2.0.0/ &&
                                      RUBY_VERSION >= "1.9.0"
    end

    def to_s
      return "" if @runs.empty?

      page = row_count.times.map { |i| " " * col_count }
      @runs.each do |run|
        x_pos = ((run.x - @x_offset) / col_multiplier).round
        y_pos = row_count - (run.y / row_multiplier).round
        if y_pos < row_count && y_pos >= 0 && x_pos < col_count && x_pos >= 0
          local_string_insert(page[y_pos], run.text, x_pos)
        end
      end
      interesting_rows(page).map(&:rstrip).join("\n")
    end

    private

    # given an array of strings, return a new array with empty rows from the
    # beginning and end removed.
    #
    #   interesting_rows([ "", "one", "two", "" ])
    #   => [ "one", "two" ]
    #
    def interesting_rows(rows)
      line_lengths = rows.map { |l| l.strip.length }
      first_line_with_text = line_lengths.index { |l| l > 0 }
      last_line_with_text  = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
      interesting_line_count = last_line_with_text - first_line_with_text
      rows[first_line_with_text, interesting_line_count].map
    end

    def row_count
      @row_count ||= (@page_height / @mean_font_size).floor
    end

    def col_count
      @col_count ||= ((@page_width  / @mean_glyph_width) * 1.05).floor
    end

    def row_multiplier
      @row_multiplier ||= @page_height.to_f / row_count.to_f
    end

    def col_multiplier
      @col_multiplier ||= @page_width.to_f / col_count.to_f
    end

    def mean(collection)
      if collection.size == 0
        0
      else
        collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
      end
    end

    def each_line(&block)
      @runs.sort.group_by { |run|
        run.y.to_i
      }.map { |y, collection|
        yield y, collection
      }
    end

    # take a collection of TextRun objects and merge any that are in close
    # proximity
    def merge_runs(runs)
      runs.group_by { |char|
        char.y.to_i
      }.map { |y, chars|
        group_chars_into_runs(chars.sort)
      }.flatten.sort
    end

    def group_chars_into_runs(chars)
      runs = []
      while head = chars.shift
        if runs.empty?
          runs << head
        elsif runs.last.mergable?(head)
          runs[-1] = runs.last + head
        else
          runs << head
        end
      end
      runs
    end

    # This is a simple alternative to String#[]=. We can't use the string
    # method as it's buggy on rubinius 2.0rc1 (in 1.9 mode)
    #
    # See my bug report at https://github.com/rubinius/rubinius/issues/1985
    def local_string_insert(haystack, needle, index)
      if @current_platform_is_rbx_19
        char_count = needle.length
        haystack.replace(
          (haystack[0,index] || "") +
          needle +
          (haystack[index+char_count,500] || "")
        )
      else
        haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
      end
    end
  end
end