File: scrub_spec.rb

package info (click to toggle)
ruby2.5 2.5.5-3%2Bdeb10u4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 101,532 kB
  • sloc: ruby: 732,598; ansic: 669,262; xml: 25,363; yacc: 20,963; javascript: 6,680; sh: 3,610; lisp: 2,627; makefile: 596; python: 198; sed: 76; perl: 62; awk: 36; asm: 35
file content (101 lines) | stat: -rw-r--r-- 2,779 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -*- encoding: utf-8 -*-
require File.expand_path("../../../spec_helper", __FILE__)

describe "String#scrub with a default replacement" do
  it "returns self for valid strings" do
    input = "foo"

    input.scrub.should == input
  end

  it "replaces invalid byte sequences" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    "abc\u3042#{x81}".scrub.should == "abc\u3042\uFFFD"
  end

  it "returns a copy of self when the input encoding is BINARY" do
    input = "foo".encode('BINARY')

    input.scrub.should == "foo"
  end


  it "replaces invalid byte sequences when using ASCII as the input encoding" do
    xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
    input = "abc\u3042#{xE3x80}".force_encoding('ASCII')
    input.scrub.should == "abc?????"
  end
end

describe "String#scrub with a custom replacement" do
  it "returns self for valid strings" do
    input = "foo"

    input.scrub("*").should == input
  end

  it "replaces invalid byte sequences" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    "abc\u3042#{x81}".scrub("*").should == "abc\u3042*"
  end

  it "replaces an incomplete character at the end with a single replacement" do
    xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
    xE3x80.scrub("*").should == "*"
  end

  it "raises ArgumentError for replacements with an invalid encoding" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    xE4 = [0xE4].pack('C').force_encoding('utf-8')
    block = lambda { "foo#{x81}".scrub(xE4) }

    block.should raise_error(ArgumentError)
  end

  it "raises TypeError when a non String replacement is given" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    block = lambda { "foo#{x81}".scrub(1) }

    block.should raise_error(TypeError)
  end
end

describe "String#scrub with a block" do
  it "returns self for valid strings" do
    input = "foo"

    input.scrub { |b| "*" }.should == input
  end

  it "replaces invalid byte sequences" do
    xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8'
    replaced = "abc\u3042#{xE3x80}".scrub { |b| "<#{b.unpack("H*")[0]}>" }

    replaced.should == "abc\u3042<e380>"
  end

  it "replaces invalid byte sequences using a custom encoding" do
    x80x80 = [0x80, 0x80].pack('CC').force_encoding 'utf-8'
    replaced = x80x80.scrub do |bad|
      bad.encode(Encoding::UTF_8, Encoding::Windows_1252)
    end

    replaced.should == "€€"
  end
end

describe "String#scrub!" do
  it "modifies self for valid strings" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    input = "a#{x81}"
    input.scrub!
    input.should == "a\uFFFD"
  end

  it "accepts blocks" do
    x81 = [0x81].pack('C').force_encoding('utf-8')
    input = "a#{x81}"
    input.scrub! { |b| "<?>" }
    input.should == "a<?>"
  end
end