File: xml.rb

package info (click to toggle)
ruby-crack 0.4.4-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,716 kB
  • sloc: ruby: 859; sh: 39; makefile: 3
file content (238 lines) | stat: -rw-r--r-- 6,831 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
require 'rexml/parsers/streamparser'
require 'rexml/parsers/baseparser'
require 'rexml/light/node'
require 'rexml/text'
require "rexml/document"
require 'date'
require 'time'
require 'yaml'
require 'bigdecimal'

# The Reason behind redefining the String Class for this specific plugin is to
# avoid the dynamic insertion of stuff on it (see version previous to this commit).
# Doing that disables the possibility of efectuating a dump on the structure. This way it goes.
class REXMLUtiliyNodeString < String
  attr_accessor :attributes
end

# This is a slighly modified version of the XMLUtilityNode from
# http://merb.devjavu.com/projects/merb/ticket/95 (has.sox@gmail.com)
# It's mainly just adding vowels, as I ht cd wth n vwls :)
# This represents the hard part of the work, all I did was change the
# underlying parser.
class REXMLUtilityNode #:nodoc:
  attr_accessor :name, :attributes, :children, :type

  def self.typecasts
    @@typecasts
  end

  def self.typecasts=(obj)
    @@typecasts = obj
  end

  def self.available_typecasts
    @@available_typecasts
  end

  def self.available_typecasts=(obj)
    @@available_typecasts = obj
  end

  self.typecasts = {}
  self.typecasts["integer"]       = lambda{|v| v.nil? ? nil : v.to_i}
  self.typecasts["boolean"]       = lambda{|v| v.nil? ? nil : (v.strip != "false")}
  self.typecasts["datetime"]      = lambda{|v| v.nil? ? nil : Time.parse(v).utc}
  self.typecasts["date"]          = lambda{|v| v.nil? ? nil : Date.parse(v)}
  self.typecasts["dateTime"]      = lambda{|v| v.nil? ? nil : Time.parse(v).utc}
  self.typecasts["decimal"]       = lambda{|v| v.nil? ? nil : BigDecimal(v.to_s)}
  self.typecasts["double"]        = lambda{|v| v.nil? ? nil : v.to_f}
  self.typecasts["float"]         = lambda{|v| v.nil? ? nil : v.to_f}
  self.typecasts["string"]        = lambda{|v| v.to_s}
  self.typecasts["base64Binary"]  = lambda{|v| v.unpack('m').first }

  self.available_typecasts = self.typecasts.keys

  def initialize(name, normalized_attributes = {})

    # unnormalize attribute values
    attributes = Hash[* normalized_attributes.map { |key, value|
      [ key, unnormalize_xml_entities(value) ]
    }.flatten]

    @name         = name.tr("-", "_")
    # leave the type alone if we don't know what it is
    @type         = self.class.available_typecasts.include?(attributes["type"]) ? attributes.delete("type") : attributes["type"]

    @nil_element  = attributes.delete("nil") == "true"
    @attributes   = undasherize_keys(attributes)
    @children     = []
    @text         = false
  end

  def add_node(node)
    @text = true if node.is_a? String
    @children << node
  end

  def to_hash
    # ACG: Added a check here to prevent an exception a type == "file" tag has nodes within it
    if @type == "file" and (@children.first.nil? or @children.first.is_a?(String))
      f = StringIO.new((@children.first || '').unpack('m').first)
      class << f
        attr_accessor :original_filename, :content_type
      end
      f.original_filename = attributes['name'] || 'untitled'
      f.content_type = attributes['content_type'] || 'application/octet-stream'
      return {name => f}
    end

    if @text
      t = typecast_value( unnormalize_xml_entities( inner_html ) )
      if t.is_a?(String)
        t = REXMLUtiliyNodeString.new(t)
        t.attributes = attributes
      end
      return { name => t }
    else
      #change repeating groups into an array
      groups = @children.inject({}) { |s,e| (s[e.name] ||= []) << e; s }

      out = nil
      if @type == "array"
        out = []
        groups.each do |k, v|
          if v.size == 1
            out << v.first.to_hash.entries.first.last
          else
            out << v.map{|e| e.to_hash[k]}
          end
        end
        out = out.flatten

      else # If Hash
        out = {}
        groups.each do |k,v|
          if v.size == 1
            out.merge!(v.first)
          else
            out.merge!( k => v.map{|e| e.to_hash[k]})
          end
        end
        out.merge! attributes unless attributes.empty?
        out = out.empty? ? nil : out
      end

      if @type && out.nil?
        { name => typecast_value(out) }
      else
        { name => out }
      end
    end
  end

  # Typecasts a value based upon its type. For instance, if
  # +node+ has #type == "integer",
  # {{[node.typecast_value("12") #=> 12]}}
  #
  # @param value<String> The value that is being typecast.
  #
  # @details [:type options]
  #   "integer"::
  #     converts +value+ to an integer with #to_i
  #   "boolean"::
  #     checks whether +value+, after removing spaces, is the literal
  #     "true"
  #   "datetime"::
  #     Parses +value+ using Time.parse, and returns a UTC Time
  #   "date"::
  #     Parses +value+ using Date.parse
  #
  # @return <Integer, TrueClass, FalseClass, Time, Date, Object>
  #   The result of typecasting +value+.
  #
  # @note
  #   If +self+ does not have a "type" key, or if it's not one of the
  #   options specified above, the raw +value+ will be returned.
  def typecast_value(value)
    return value unless @type
    proc = self.class.typecasts[@type]
    proc.nil? ? value : proc.call(value)
  end

  # Take keys of the form foo-bar and convert them to foo_bar
  def undasherize_keys(params)
    params.keys.each do |key, value|
      params[key.tr("-", "_")] = params.delete(key)
    end
    params
  end

  # Get the inner_html of the REXML node.
  def inner_html
    @children.join
  end

  # Converts the node into a readable HTML node.
  #
  # @return <String> The HTML node in text form.
  def to_html
    attributes.merge!(:type => @type ) if @type
    "<#{name}#{Crack::Util.to_xml_attributes(attributes)}>#{@nil_element ? '' : inner_html}</#{name}>"
  end

  # @alias #to_html #to_s
  def to_s
    to_html
  end

  private

  def unnormalize_xml_entities value
    REXML::Text.unnormalize(value)
  end
end

module Crack
  class REXMLParser
    def self.parse(xml)
      stack = []
      parser = REXML::Parsers::BaseParser.new(xml)

      while true
        event = parser.pull
        case event[0]
        when :end_document
          break
        when :end_doctype, :start_doctype
          # do nothing
        when :start_element
          stack.push REXMLUtilityNode.new(event[1], event[2])
        when :end_element
          if stack.size > 1
            temp = stack.pop
            stack.last.add_node(temp)
          end
        when :text, :cdata
          stack.last.add_node(event[1]) unless event[1].strip.length == 0 || stack.empty?
        end
      end

      stack.length > 0 ? stack.pop.to_hash : {}
    end
  end

  class XML
    def self.parser
      @@parser ||= REXMLParser
    end

    def self.parser=(parser)
      @@parser = parser
    end

    def self.parse(xml)
      parser.parse(xml)
    end
  end
end