File: cmap_spec.rb

package info (click to toggle)
ruby-pdf-reader 2.15.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 33,512 kB
  • sloc: ruby: 11,959; sh: 46; makefile: 11
file content (133 lines) | stat: -rw-r--r-- 5,533 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# typed: false
# coding: utf-8

describe PDF::Reader::CMap do

  describe "initialisation" do
    context "cmap with bfchar" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_bfchar.txt"
        map = PDF::Reader::CMap.new(binread(filename))
        expect(map.map).to be_a_kind_of(Hash)
        expect(map.size).to     eq(9)
        expect(map.map[0x1]).to eq([0x48])
        expect(map.map[0x2]).to eq([0x65])
        expect(map.map[0x9]).to eq([0x73])
      end
    end

    context "cmap with bfchar that converts a charatcer to a unicode codepoint" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_bfchar.txt"
        map = PDF::Reader::CMap.new(binread(filename))
        expect(map.decode(0x1)).to eq([0x48])
        expect(map.decode(0x2)).to eq([0x65])
        expect(map.decode(0x9)).to eq([0x73])
      end
    end

    context "cmap with bfchar and bfrange" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_bfrange.txt"
        map = PDF::Reader::CMap.new(binread(filename))
        expect(map.decode(0x16C9)).to eq([0x4F38]) # mapped with the bfchar operator
        expect(map.decode(0x0003)).to eq([0x0020]) # mapped with the bfrange operator
        expect(map.decode(0x0004)).to eq([0x0020+1]) # mapped with the bfrange operator
        expect(map.decode(0x0005)).to eq([0x0020+2]) # mapped with the bfrange operator
      end
    end

    context "cmap with bfrange and some indentation" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_bfrange_two.txt"
        map = PDF::Reader::CMap.new(binread(filename))
        expect(map.decode(0x0100)).to eq([0x0100]) # mapped with the bfrange operator
        expect(map.decode(0xD900)).to eq([0xD900]) # mapped with the bfrange operator
      end
    end

    context "cmap with array version of bfrange" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_bfrange_three.txt"
        map = PDF::Reader::CMap.new(binread(filename))

        expect(map.size).to eql(256)
        expect(map.decode(0x00)).to eq([0xfffd]) # mapped with the bfrange operator
        expect(map.decode(0x01)).to eq([0x0050]) # mapped with the bfrange operator
        expect(map.decode(0x03)).to eq([0x0067]) # mapped with the bfrange operator
        expect(map.decode(0x08)).to eq([0x0073]) # mapped with the bfrange operator
      end
    end

    context "when endbfrange is not on a dedicated line" do
      it "correctly loads a cmap" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_bfrange_four.txt"
        map = PDF::Reader::CMap.new(binread(filename))

        expect(map.size).to eql(3)
        expect(map.decode(0x61)).to eq([0x61]) # mapped with the bfrange operator
        expect(map.decode(0x62)).to eq([0x62]) # mapped with the bfrange operator
        expect(map.decode(0x63)).to eq([0x63]) # mapped with the bfrange operator
      end
    end

    context "cmap with bfchar and ligatures" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_ligatures.txt"
        map = PDF::Reader::CMap.new(binread(filename))

        expect(map.decode(0x00B7)).to eql([0x2019])
        expect(map.decode(0x00C0)).to eql([0x66, 0x69])
        expect(map.decode(0x00C1)).to eql([0x66, 0x6C])
      end
    end

    context "cmap with bfchar and surrogate pairs" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_surrogate_pairs.txt"
        map = PDF::Reader::CMap.new(binread(filename))

        expect(map.decode(0x0502)).to eql([0x03D1])
        expect(map.decode(0x0C09)).to eql([0x1D6FD])
        expect(map.decode(0x0723)).to eql([0x1D434])
        expect(map.decode(0x0C23)).to eql([0x1D717])
        expect(map.decode(0x0526)).to eql([0x20D7])
        expect(map.decode(0x072B)).to eql([0x1D43C])
        expect(map.decode(0x122C)).to eql([0xFFFD])
      end
    end

    context "cmap with bfchar with surrogate pairs and ligatures" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_multiple_surrogate_pairs.txt"
        map = PDF::Reader::CMap.new(binread(filename))

        expect(map.decode(0x0BC9)).to eql([0x1D443, 0x1D443])
      end
    end

    context "cmap with bfrange containing > 255 characters" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_large_bfrange.txt"
        map = PDF::Reader::CMap.new(binread(filename))

        expect(map.decode(0x00B7)).to eql([0x00B7])
        expect(map.decode(0x00C0)).to eql([0x00C0])
        expect(map.decode(0x00C1)).to eql([0x00C1])
      end
    end

    context "cmap with bfchar and surrogate pairs, where the surrogate pair starts with D800" do
      it "correctly loads character mapping" do
        filename = File.dirname(__FILE__) + "/data/cmap_with_surrogate_pairs_on_boundary.txt"
        map = PDF::Reader::CMap.new(binread(filename))
        expect(map.map).to be_a_kind_of(Hash)
        expect(map.size).to        eq(27)
        expect(map.map[0x0]).to    eq([0x10102])
        expect(map.map[0xB]).to    eq([0x28])
        expect(map.map[0x1E]).to   eq([0x3B])
        expect(map.map[0x0194]).to eq([0x25CF])
      end
    end
  end
end