File: extract_fonts.rb

package info (click to toggle)
ruby-pdf-reader 1.3.3-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 12,908 kB
  • ctags: 569
  • sloc: ruby: 8,330; makefile: 10
file content (77 lines) | stat: -rwxr-xr-x 1,793 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env ruby
# coding: utf-8

# This demonstrates a way to extract TTF fonts from a PDF. It could be expanded
# to support extra font formats if required. Be aware that many PDFs subset
# fonts before they're embedded so glyphs may be missing or re-arranged.

require 'pdf/reader'

module ExtractFonts

  class Extractor

    def page(page)
      count = 0

      return count if page.fonts.nil? || page.fonts.empty?

      page.fonts.each do |label, font|
        next if complete_refs[font]
        complete_refs[font] = true

        process_font(page, font)

        count += 1
      end

      count
    end

    private

    def process_font(page, font)
      font = page.objects.deref(font)

      case font[:Subtype]
      when :Type0 then
        font[:DescendantFonts].each { |f| process_font(page, f) }
      when :TrueType, :CIDFontType2 then
        ExtractFonts::TTF.new(page.objects, font).save("#{font[:BaseFont]}.ttf")
      else
        $stderr.puts "unsupported font type #{font[:Subtype]}"
      end
    end

    def complete_refs
      @complete_refs ||= {}
    end

  end

  class TTF

    def initialize(objects, font)
      @objects, @font = objects, font
      @descriptor = @objects.deref(@font[:FontDescriptor])
    end

    def save(filename)
      puts "#{filename}"
      if @descriptor && @descriptor[:FontFile2]
        stream = @objects.deref(@descriptor[:FontFile2])
        File.open(filename, "wb") { |file| file.write stream.unfiltered_data }
      else
        $stderr.puts "- TTF font not embedded"
      end
    end
  end
end

filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-unicode.pdf"
extractor = ExtractFonts::Extractor.new

PDF::Reader.open(filename) do |reader|
  page = reader.page(1)
  extractor.page(page)
end