File: unicode.rb

package info (click to toggle)
jruby 1.7.26-1%2Bdeb9u1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 84,572 kB
  • sloc: ruby: 669,910; java: 253,056; xml: 35,152; ansic: 9,187; yacc: 7,267; cpp: 5,244; sh: 1,036; makefile: 345; jsp: 48; tcl: 40
file content (60 lines) | stat: -rw-r--r-- 1,885 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- encoding: utf-8 -*-

describe :string_unpack_unicode, :shared => true do
  it "decodes Unicode codepoints as ASCII values" do
    [ ["\x00",      [0]],
      ["\x01",      [1]],
      ["\x08",      [8]],
      ["\x0f",      [15]],
      ["\x18",      [24]],
      ["\x1f",      [31]],
      ["\x7f",      [127]],
      ["\xc2\x80",  [128]],
      ["\xc2\x81",  [129]],
      ["\xc3\xbf",  [255]]
    ].should be_computed_by(:unpack, "U")
  end

  it "decodes the number of characters specified by the count modifier" do
    [ ["\xc2\x80\xc2\x81\xc2\x82\xc2\x83", "U1", [0x80]],
      ["\xc2\x80\xc2\x81\xc2\x82\xc2\x83", "U2", [0x80, 0x81]],
      ["\xc2\x80\xc2\x81\xc2\x82\xc2\x83", "U3", [0x80, 0x81, 0x82]]
    ].should be_computed_by(:unpack)
  end

  it "implicitly has a count of one when no count modifier is passed" do
    "\xc2\x80\xc2\x81\xc2\x82\xc2\x83".unpack("U1").should == [0x80]
  end

  it "decodes all remaining characters when passed the '*' modifier" do
    "\xc2\x80\xc2\x81\xc2\x82\xc2\x83".unpack("U*").should == [0x80, 0x81, 0x82, 0x83]
  end

  it "decodes UTF-8 BMP codepoints" do
    [ ["\xc2\x80",      [0x80]],
      ["\xdf\xbf",      [0x7ff]],
      ["\xe0\xa0\x80",  [0x800]],
      ["\xef\xbf\xbf",  [0xffff]]
    ].should be_computed_by(:unpack, "U")
  end

  it "decodes UTF-8 max codepoints" do
    [ ["\xf0\x90\x80\x80", [0x10000]],
      ["\xf3\xbf\xbf\xbf", [0xfffff]],
      ["\xf4\x80\x80\x80", [0x100000]],
      ["\xf4\x8f\xbf\xbf", [0x10ffff]]
    ].should be_computed_by(:unpack, "U")
  end

  it "does not decode any items for directives exceeding the input string size" do
    "\xc2\x80".unpack("UUUU").should == [0x80]
  end

  it "ignores NULL bytes between directives" do
    "\x01\x02".unpack("U\x00U").should == [1, 2]
  end

  it "ignores spaces between directives" do
    "\x01\x02".unpack("U U").should == [1, 2]
  end
end