File: util.rb

package info (click to toggle)
ruby-mechanize 2.7.6-1%2Bdeb10u1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,480 kB
  • sloc: ruby: 11,380; makefile: 5; sh: 4
file content (161 lines) | stat: -rw-r--r-- 4,585 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
require 'cgi'
require 'nkf'

class Mechanize::Util
  # default mime type data for Page::Image#mime_type.
  # You can use another Apache-compatible mimetab.
  #   mimetab = WEBrick::HTTPUtils.load_mime_types('/etc/mime.types')
  #   Mechanize::Util::DefaultMimeTypes.replace(mimetab)
  DefaultMimeTypes = WEBrick::HTTPUtils::DefaultMimeTypes

  class << self
    # Builds a query string from a given enumerable object
    # +parameters+.  This method uses Mechanize::Util.each_parameter
    # as preprocessor, which see.
    def build_query_string(parameters, enc = nil)
      each_parameter(parameters).inject(nil) { |s, (k, v)|
        # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
        (s.nil? ? '' : s << '&') << [CGI.escape(k.to_s), CGI.escape(v.to_s)].join('=')
      } || ''
    end

    # Parses an enumerable object +parameters+ and iterates over the
    # key-value pairs it contains.
    #
    # +parameters+ may be a hash, or any enumerable object which
    # iterates over [key, value] pairs, typically an array of arrays.
    #
    # If a key is paired with an array-like object, the pair is
    # expanded into multiple occurrences of the key, one for each
    # element of the array.  e.g. { a: [1, 2] } => [:a, 1], [:a, 2]
    #
    # If a key is paired with a hash-like object, the pair is expanded
    # into hash-like multiple pairs, one for each pair of the hash.
    # e.g. { a: { x: 1, y: 2 } } => ['a[x]', 1], ['a[y]', 2]
    #
    # An array-like value is allowed to be specified as hash value.
    # e.g. { a: { q: [1, 2] } } => ['a[q]', 1], ['a[q]', 2]
    #
    # For a non-array-like, non-hash-like value, the key-value pair is
    # yielded as is.
    def each_parameter(parameters, &block)
      return to_enum(__method__, parameters) if block.nil?

      parameters.each { |key, value|
        each_parameter_1(key, value, &block)
      }
    end

    private

    def each_parameter_1(key, value, &block)
      return if key.nil?

      case
      when s = String.try_convert(value)
        yield [key, s]
      when a = Array.try_convert(value)
        a.each { |avalue|
          yield [key, avalue]
        }
      when h = Hash.try_convert(value)
        h.each { |hkey, hvalue|
          each_parameter_1('%s[%s]' % [key, hkey], hvalue, &block)
        }
      else
        yield [key, value]
      end
    end
  end

  # Converts string +s+ from +code+ to UTF-8.
  def self.from_native_charset(s, code, ignore_encoding_error = false, log = nil)
    return s unless s && code
    return s unless Mechanize.html_parser == Nokogiri::HTML

    begin
      s.encode(code)
    rescue EncodingError => ex
      log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
      if ignore_encoding_error
        s
      else
        raise
      end
    end
  end

  def self.html_unescape(s)
    return s unless s
    s.gsub(/&(\w+|#[0-9]+);/) { |match|
      number = case match
               when /&(\w+);/
                 Mechanize.html_parser::NamedCharacters[$1]
               when /&#([0-9]+);/
                 $1.to_i
               end

      number ? ([number].pack('U') rescue match) : match
    }
  end

  case NKF::BINARY
  when Encoding
    def self.guess_encoding(src)
      # NKF.guess of JRuby may return nil
      NKF.guess(src) || Encoding::US_ASCII
    end
  else
    # Old NKF from 1.8, still bundled with Rubinius
    NKF_ENCODING_MAP = {
      NKF::UNKNOWN => Encoding::US_ASCII,
      NKF::BINARY  => Encoding::ASCII_8BIT,
      NKF::ASCII   => Encoding::US_ASCII,
      NKF::JIS     => Encoding::ISO_2022_JP,
      NKF::EUC     => Encoding::EUC_JP,
      NKF::SJIS    => Encoding::Shift_JIS,
      NKF::UTF8    => Encoding::UTF_8,
      NKF::UTF16   => Encoding::UTF_16BE,
      NKF::UTF32   => Encoding::UTF_32BE,
    }

    def self.guess_encoding(src)
      NKF_ENCODING_MAP[NKF.guess(src)]
    end
  end

  def self.detect_charset(src)
    if src
      guess_encoding(src).name.upcase
    else
      Encoding::ISO8859_1.name
    end
  end

  def self.uri_escape str, unsafe = nil
    @parser ||= begin
                  URI::Parser.new
                rescue NameError
                  URI
                end

    if URI == @parser then
      unsafe ||= URI::UNSAFE
    else
      unsafe ||= @parser.regexp[:UNSAFE]
    end

    @parser.escape str, unsafe
  end

  def self.uri_unescape str
    @parser ||= begin
                  URI::Parser.new
                rescue NameError
                  URI
                end

    @parser.unescape str
  end

end