1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
# coding: utf-8
# typed: true
# frozen_string_literal: true
require 'digest/md5'
module PDF
class Reader
# High level representation of a single PDF form xobject. Form xobjects
# are contained pieces of content that can be inserted onto multiple
# pages. They're generally used as a space efficient way to store
# repetative content (like logos, header, footers, etc).
#
# This behaves and looks much like a limited PDF::Reader::Page class.
#
class FormXObject
extend Forwardable
#: untyped
attr_reader :xobject
def_delegators :resources, :color_spaces
def_delegators :resources, :fonts
def_delegators :resources, :graphic_states
def_delegators :resources, :patterns
def_delegators :resources, :procedure_sets
def_delegators :resources, :properties
def_delegators :resources, :shadings
def_delegators :resources, :xobjects
#: (untyped, untyped, ?Hash[untyped, untyped]) -> void
def initialize(page, xobject, options = {})
@page = page
@objects = page.objects
@cache = options[:cache] || {}
@xobject = @objects.deref_stream(xobject)
end
# return a hash of fonts used on this form.
#
# The keys are the font labels used within the form content stream.
#
# The values are a PDF::Reader::Font instances that provide access
# to most available metrics for each font.
#
#: () -> untyped
def font_objects
raw_fonts = @objects.deref_hash(fonts)
::Hash[raw_fonts.map { |label, font|
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
}]
end
# processes the raw content stream for this form in sequential order and
# passes callbacks to the receiver objects.
#
# See the comments on PDF::Reader::Page#walk for more detail.
#
#: (*untyped) -> untyped
def walk(*receivers)
receivers = receivers.map { |receiver|
ValidatingReceiver.new(receiver)
}
content_stream(receivers, raw_content)
end
# returns the raw content stream for this page. This is plumbing, nothing to
# see here unless you're a PDF nerd like me.
#
#: () -> untyped
def raw_content
@xobject.unfiltered_data
end
private
# Returns the resources that accompany this form.
#
#: () -> untyped
def resources
@resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
end
#: (untyped, untyped, ?Array[untyped]) -> untyped
def callback(receivers, name, params=[])
receivers.each do |receiver|
receiver.send(name, *params) if receiver.respond_to?(name)
end
end
#: () -> untyped
def content_stream_md5
@content_stream_md5 ||= Digest::MD5.hexdigest(raw_content)
end
#: () -> untyped
def cached_tokens_key
@cached_tokens_key ||= "tokens-#{content_stream_md5}"
end
#: () -> untyped
def tokens
@cache[cached_tokens_key] ||= begin
buffer = Buffer.new(StringIO.new(raw_content), :content_stream => true)
parser = Parser.new(buffer, @objects)
result = []
while (token = parser.parse_token(PagesStrategy::OPERATORS))
result << token
end
result
end
end
#: (untyped, untyped) -> untyped
def content_stream(receivers, instructions)
params = []
tokens.each do |token|
if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
callback(receivers, PagesStrategy::OPERATORS[token], params)
params.clear
else
params << token
end
end
rescue EOFError
raise MalformedPDFError, "End Of File while processing a content stream"
end
end
end
end
|