File: extract_fonts.rb

package info (click to toggle)
ruby-pdf-reader 2.15.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 33,512 kB
  • sloc: ruby: 11,959; sh: 46; makefile: 11
file content (82 lines) | stat: -rwxr-xr-x 1,898 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env ruby
# coding: utf-8

# This demonstrates a way to extract TTF fonts from a PDF. It could be expanded
# to support extra font formats if required. Be aware that many PDFs subset
# fonts before they're embedded so glyphs may be missing or re-arranged.

require 'pdf/reader'

module ExtractFonts

  class Extractor

    def page(page)
      count = 0

      return count if page.fonts.nil? || page.fonts.empty?

      page.fonts.each do |label, font|
        next if complete_refs[label]
        complete_refs[label] = true

        process_font(page, font)

        count += 1
      end

      count
    end

    private

    def process_font(page, font)
      font = page.objects.deref(font)

      case font[:Subtype]
      when :Type0 then
        font[:DescendantFonts].each { |f| process_font(page, f) }
      when :TrueType, :CIDFontType2 then
        ExtractFonts::TTF.new(page.objects, font).save("#{font[:BaseFont]}.ttf")
      else
        $stderr.puts "unsupported font type #{font[:Subtype]} for #{font[:BaseFont]}"
      end
    end

    def complete_refs
      @complete_refs ||= {}
    end

  end

  class TTF

    def initialize(objects, font)
      @objects, @font = objects, font
      @descriptor = @objects.deref(@font[:FontDescriptor])
    end

    def save(filename)
      puts "#{filename}"
      if @descriptor && @descriptor[:FontFile2]
        stream = @objects.deref(@descriptor[:FontFile2])
        File.open(filename, "wb") { |file| file.write stream.unfiltered_data }
      else
        $stderr.puts "- TTF font not embedded"
      end
    end
  end
end

if ARGV.size == 0 # default file name
  ARGV << File.expand_path(File.join(File.dirname(__dir__), "spec", "data", "cairo-unicode.pdf")) 
end

extractor = ExtractFonts::Extractor.new

ARGV.each do |arg|
  PDF::Reader.open(arg) do |reader|
    page = reader.page(1)
    extractor.page(page)
  end
end