File: text.rb

package info (click to toggle)
ruby-htree 0.8%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster, stretch
  • size: 448 kB
  • ctags: 703
  • sloc: ruby: 5,931; makefile: 24
file content (123 lines) | stat: -rw-r--r-- 3,219 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
require 'htree/modules'
require 'htree/raw_string'
require 'htree/htmlinfo'
require 'htree/encoder'
require 'htree/fstr'

if !"".respond_to?(:encode)
  require 'iconv'
end

module HTree
  class Text
    # :stopdoc:
    class << self
      alias new_internal new
    end
    # :startdoc:

    def Text.new(arg)
      arg = arg.to_node if HTree::Location === arg
      if Text === arg
        new_internal arg.rcdata, arg.normalized_rcdata
      elsif String === arg
        arg2 = arg.gsub(/&/, '&amp;')
        arg = arg2.freeze if arg != arg2
        new_internal arg
      else
        raise TypeError, "cannot initialize Text with #{arg.inspect}"
      end
    end

    def initialize(rcdata, normalized_rcdata=internal_normalize(rcdata)) # :notnew:
      init_raw_string
      @rcdata = rcdata && HTree.frozen_string(rcdata)
      @normalized_rcdata = @rcdata == normalized_rcdata ? @rcdata : normalized_rcdata
    end
    attr_reader :rcdata, :normalized_rcdata

    def internal_normalize(rcdata)
      # - character references are decoded as much as possible.
      # - undecodable character references are converted to decimal numeric character refereces.
      result = rcdata.gsub(/&(?:#([0-9]+)|#x([0-9a-fA-F]+)|([A-Za-z][A-Za-z0-9]*));/o) {|s|
        u = nil
        if $1
          u = $1.to_i
        elsif $2
          u = $2.hex
        elsif $3
          u = NamedCharacters[$3]
        end
        if !u || u < 0 || 0x7fffffff < u
          '?'
        elsif u == 38 # '&' character.
          '&#38;'
        elsif u <= 0x7f
          [u].pack("C")
        else
          us = [u].pack("U")
          if us.respond_to? :encode
            us.encode(Encoder.internal_charset, :xml=>:text)
          else
            begin
              Iconv.conv(Encoder.internal_charset, 'UTF-8', us)
            rescue Iconv::Failure
              "&##{u};"
            end
          end
        end
      }
      HTree.frozen_string(result)
    end
    private :internal_normalize

    # HTree::Text#to_s converts the text to a string.
    # - character references are decoded as much as possible.
    # - undecodable character reference are converted to `?' character.
    def to_s
      @normalized_rcdata.gsub(/&(?:#([0-9]+));/o) {|s|
        u = $1.to_i
        if 0 <= u && u <= 0x7f
          [u].pack("C")
        else
          '?'
        end
      }
    end

    def empty?
      @normalized_rcdata.empty?
    end

    def strip
      rcdata = @normalized_rcdata.dup
      rcdata.sub!(/\A(?:\s|&nbsp;)+/, '')
      rcdata.sub!(/(?:\s|&nbsp;)+\z/, '')
      if rcdata == @normalized_rcdata
        self
      else
        rcdata.freeze
        Text.new_internal(rcdata, rcdata)
      end
    end

    # HTree::Text.concat returns a text which is concatenation of arguments.
    #
    # An argument should be one of follows.
    # - String
    # - HTree::Text
    # - HTree::Location which points HTree::Text
    def Text.concat(*args)
      rcdata = ''
      args.each {|arg|
        arg = arg.to_node if HTree::Location === arg
        if Text === arg
          rcdata << arg.rcdata
        else
          rcdata << arg.gsub(/&/, '&amp;')
        end
      }
      new_internal rcdata
    end
  end
end