1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
# frozen_string_literal: true
module Loofah
#
# A RuntimeError raised when Loofah could not find an appropriate scrubber.
#
class ScrubberNotFound < RuntimeError; end
#
# A Scrubber wraps up a block (or method) that is run on an HTML node (element):
#
# # change all <span> tags to <div> tags
# span2div = Loofah::Scrubber.new do |node|
# node.name = "div" if node.name == "span"
# end
#
# Alternatively, this scrubber could have been implemented as:
#
# class Span2Div < Loofah::Scrubber
# def scrub(node)
# node.name = "div" if node.name == "span"
# end
# end
# span2div = Span2Div.new
#
# This can then be run on a document:
#
# Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
# # => "<div>foo</div><p>bar</p>"
#
# Scrubbers can be run on a document in either a top-down traversal (the
# default) or bottom-up. Top-down scrubbers can optionally return
# Scrubber::STOP to terminate the traversal of a subtree.
#
class Scrubber
# Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
CONTINUE = Object.new.freeze
# Top-down Scrubbers may return STOP to indicate that the subtree should not be traversed.
STOP = Object.new.freeze
# When a scrubber is initialized, the :direction may be specified
# as :top_down (the default) or :bottom_up.
attr_reader :direction
# When a scrubber is initialized, the optional block is saved as
# :block. Note that, if no block is passed, then the +scrub+
# method is assumed to have been implemented.
attr_reader :block
#
# Options may include
# :direction => :top_down (the default)
# or
# :direction => :bottom_up
#
# For top_down traversals, if the block returns
# Loofah::Scrubber::STOP, then the traversal will be terminated
# for the current node's subtree.
#
# Alternatively, a Scrubber may inherit from Loofah::Scrubber,
# and implement +scrub+, which is slightly faster than using a
# block.
#
def initialize(options = {}, &block)
direction = options[:direction] || :top_down
unless [:top_down, :bottom_up].include?(direction)
raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
end
@direction = direction
@block = block
end
#
# Calling +traverse+ will cause the document to be traversed by
# either the lambda passed to the initializer or the +scrub+
# method, in the direction specified at +new+ time.
#
def traverse(node)
direction == :bottom_up ? traverse_conditionally_bottom_up(node) : traverse_conditionally_top_down(node)
end
#
# When +new+ is not passed a block, the class may implement
# +scrub+, which will be called for each document node.
#
def scrub(node)
raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
end
#
# If the attribute is not set, add it
# If the attribute is set, don't overwrite the existing value
#
def append_attribute(node, attribute, value)
current_value = node.get_attribute(attribute) || ""
current_values = current_value.split(/\s+/)
updated_value = current_values | [value]
node.set_attribute(attribute, updated_value.join(" "))
end
private
def html5lib_sanitize(node)
case node.type
when Nokogiri::XML::Node::ELEMENT_NODE
if HTML5::Scrub.allowed_element?(node.name)
HTML5::Scrub.scrub_attributes(node)
return Scrubber::CONTINUE
end
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
if HTML5::Scrub.cdata_needs_escaping?(node)
node.before(HTML5::Scrub.cdata_escape(node))
return Scrubber::STOP
end
return Scrubber::CONTINUE
end
Scrubber::STOP
end
def traverse_conditionally_top_down(node)
if block
return if block.call(node) == STOP
elsif scrub(node) == STOP
return
end
node.children.each { |j| traverse_conditionally_top_down(j) }
end
def traverse_conditionally_bottom_up(node)
node.children.each { |j| traverse_conditionally_bottom_up(j) }
if block
block.call(node)
else
scrub(node)
end
end
end
end
|