File: filter.rb

package info (click to toggle)
ruby-html-pipeline 2.14.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 424 kB
  • sloc: ruby: 2,265; sh: 13; makefile: 6
file content (165 lines) | stat: -rw-r--r-- 5,642 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# frozen_string_literal: true

module HTML
  class Pipeline
    # Base class for user content HTML filters. Each filter takes an
    # HTML string or Nokogiri::HTML::DocumentFragment, performs
    # modifications and/or writes information to the result hash. Filters must
    # return a DocumentFragment (typically the same instance provided to the call
    # method) or a String with HTML markup.
    #
    # Example filter that replaces all images with trollface:
    #
    #   class FuuuFilter < HTML::Pipeline::Filter
    #     def call
    #       doc.search('img').each do |img|
    #         img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
    #       end
    #     end
    #   end
    #
    # The context Hash passes options to filters and should not be changed in
    # place.  A Result Hash allows filters to make extracted information
    # available to the caller and is mutable.
    #
    # Common context options:
    #   :base_url   - The site's base URL
    #   :repository - A Repository providing context for the HTML being processed
    #
    # Each filter may define additional options and output values. See the class
    # docs for more info.
    class Filter
      class InvalidDocumentException < StandardError; end

      def initialize(doc, context = nil, result = nil)
        if doc.is_a?(String)
          @html = doc.to_str
          @doc = nil
        else
          @doc = doc
          @html = nil
        end
        @context = context || {}
        @result = result || {}
        validate
      end

      # Public: Returns a simple Hash used to pass extra information into filters
      # and also to allow filters to make extracted information available to the
      # caller.
      attr_reader :context

      # Public: Returns a Hash used to allow filters to pass back information
      # to callers of the various Pipelines.  This can be used for
      # #mentioned_users, for example.
      attr_reader :result

      # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
      # provided a String, parse into a DocumentFragment the first time this
      # method is called.
      def doc
        @doc ||= parse_html(html)
      end

      # The String representation of the document. If a DocumentFragment was
      # provided to the Filter, it is serialized into a String when this method is
      # called.
      def html
        raise InvalidDocumentException if @html.nil? && @doc.nil?
        @html || doc.to_html
      end

      # The main filter entry point. The doc attribute is guaranteed to be a
      # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
      # this document in place or extract information and add it to the context
      # hash.
      def call
        raise NotImplementedError
      end

      # Make sure the context has everything we need. Noop: Subclasses can override.
      def validate; end

      # The Repository object provided in the context hash, or nil when no
      # :repository was specified.
      #
      # It's assumed that the repository context has already been checked
      # for permissions
      def repository
        context[:repository]
      end

      # The User object provided in the context hash, or nil when no user
      # was specified
      def current_user
        context[:current_user]
      end

      # The site's base URL provided in the context hash, or '/' when no
      # base URL was specified.
      def base_url
        context[:base_url] || '/'
      end

      # Ensure the passed argument is a DocumentFragment. When a string is
      # provided, it is parsed and returned; otherwise, the DocumentFragment is
      # returned unmodified.
      def parse_html(html)
        HTML::Pipeline.parse(html)
      end

      # Helper method for filter subclasses used to determine if any of a node's
      # ancestors have one of the tag names specified.
      #
      # node - The Node object to check.
      # tags - An array of tag name strings to check. These should be downcase.
      #
      # Returns true when the node has a matching ancestor.
      def has_ancestor?(node, tags)
        while node = node.parent
          break true if tags.include?(node.name.downcase)
        end
      end

      # Perform a filter on doc with the given context.
      #
      # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
      # markup.
      def self.call(doc, context = nil, result = nil)
        new(doc, context, result).call
      end

      # Like call but guarantees that a DocumentFragment is returned, even when
      # the last filter returns a String.
      def self.to_document(input, context = nil)
        html = call(input, context)
        HTML::Pipeline.parse(html)
      end

      # Like call but guarantees that a string of HTML markup is returned.
      def self.to_html(input, context = nil)
        output = call(input, context)
        if output.respond_to?(:to_html)
          output.to_html
        else
          output.to_s
        end
      end

      # Validator for required context. This will check that anything passed in
      # contexts exists in @contexts
      #
      # If any errors are found an ArgumentError will be raised with a
      # message listing all the missing contexts and the filters that
      # require them.
      def needs(*keys)
        missing = keys.reject { |key| context.include? key }

        if missing.any?
          raise ArgumentError,
                "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
        end
      end
    end
  end
end