File: document.rb

package info (click to toggle)
ruby-roadie 5.2.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 512 kB
  • sloc: ruby: 3,418; makefile: 5
file content (227 lines) | stat: -rw-r--r-- 8,136 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# frozen_string_literal: true

module Roadie
  # The main entry point for Roadie. A document represents a working unit and
  # is built with the input HTML and the configuration options you need.
  #
  # A Document must never be used from two threads at the same time. Reusing
  # Documents is discouraged.
  #
  # Stylesheets are added to the HTML from three different sources:
  # 1. Stylesheets inside the document ( +<style>+ elements)
  # 2. Stylesheets referenced by the DOM ( +<link>+ elements)
  # 3. The internal stylesheet (see {#add_css})
  #
  # The internal stylesheet is used last and gets the highest priority. The
  # rest is used in the same order as browsers are supposed to use them.
  #
  # The execution methods are {#transform} and {#transform_partial}.
  #
  # @attr [#call] before_transformation Callback to call just before {#transform}ation begins. Will be called with the parsed DOM tree and the {Document} instance.
  # @attr [#call] after_transformation Callback to call just before {#transform}ation is completed. Will be called with the current DOM tree and the {Document} instance.
  class Document
    attr_reader :html, :asset_providers, :external_asset_providers

    # URL options. If none are given no URL rewriting will take place.
    # @see UrlGenerator#initialize
    attr_accessor :url_options

    attr_accessor :before_transformation, :after_transformation

    # Should CSS that cannot be inlined be kept in a new `<style>` element in `<head>`?
    attr_accessor :keep_uninlinable_css

    # Merge media queries to increase performance and reduce email size if enabled.
    # This will change specificity in some cases, like for example:
    #   @media(max-width: 600px) { .col-6 { display: block; } }
    #   @media(max-width: 400px) { .col-12 { display: inline-block; } }
    #   @media(max-width: 600px) { .col-12 { display: block; } }
    # will become
    #   @media(max-width: 600px) { .col-6 { display: block; } .col-12 { display: block; } }
    #   @media(max-width: 400px) { .col-12 { display: inline-block; } }
    # which would change the styling on the page
    attr_accessor :merge_media_queries

    # Integer representing a bitmap set of options used by Nokogiri during serialization.
    # For the complete set of available options look into +Nokogiri::XML::Node::SaveOptions+.
    attr_reader :serialization_options

    # The mode to generate markup in. Valid values are `:html` (default) and `:xhtml`.
    attr_reader :mode

    # @param [String] html the input HTML
    def initialize(html)
      @keep_uninlinable_css = true
      @merge_media_queries = true
      @serialization_options =
        Nokogiri::XML::Node::SaveOptions::NO_DECLARATION |
        Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
      @html = html
      @asset_providers = ProviderList.wrap(FilesystemProvider.new)
      @external_asset_providers = ProviderList.empty
      @css = +""
      @mode = :html
    end

    # Append additional CSS to the document's internal stylesheet.
    # @param [String] new_css
    def add_css(new_css)
      @css << "\n\n" << new_css
    end

    # Transform the input HTML as a full document and returns the processed
    # HTML.
    #
    # Before the transformation begins, the {#before_transformation} callback
    # will be called with the parsed HTML tree and the {Document} instance, and
    # after all work is complete the {#after_transformation} callback will be
    # invoked in the same way.
    #
    # Most of the work is delegated to other classes. A list of them can be
    # seen below.
    #
    # @see MarkupImprover MarkupImprover (improves the markup of the DOM)
    # @see Inliner Inliner (inlines the stylesheets)
    # @see UrlRewriter UrlRewriter (rewrites URLs and makes them absolute)
    # @see #transform_partial Transforms partial documents (fragments)
    #
    # @return [String] the transformed HTML
    def transform
      dom = Nokogiri::HTML.parse html

      callback before_transformation, dom

      improve dom
      inline dom, keep_uninlinable_in: :head
      rewrite_urls dom

      callback after_transformation, dom

      remove_ignore_markers dom
      serialize_document dom
    end

    # Transform the input HTML as a HTML fragment/partial and returns the
    # processed HTML.
    #
    # Before the transformation begins, the {#before_transformation} callback
    # will be called with the parsed HTML tree and the {Document} instance, and
    # after all work is complete the {#after_transformation} callback will be
    # invoked in the same way.
    #
    # The main difference between this and {#transform} is that this does not
    # treat the HTML as a full document and does not try to fix it by adding
    # doctypes, {<head>} elements, etc.
    #
    # Most of the work is delegated to other classes. A list of them can be
    # seen below.
    #
    # @see Inliner Inliner (inlines the stylesheets)
    # @see UrlRewriter UrlRewriter (rewrites URLs and makes them absolute)
    # @see #transform Transforms full documents
    #
    # @return [String] the transformed HTML
    def transform_partial
      dom = Nokogiri::HTML.fragment html

      callback before_transformation, dom

      inline dom, keep_uninlinable_in: :root
      rewrite_urls dom

      callback after_transformation, dom

      serialize_document dom
    end

    # Assign new normal asset providers. The supplied list will be wrapped in a {ProviderList} using {ProviderList.wrap}.
    def asset_providers=(list)
      @asset_providers = ProviderList.wrap(list)
    end

    # Assign new external asset providers. The supplied list will be wrapped in a {ProviderList} using {ProviderList.wrap}.
    def external_asset_providers=(list)
      @external_asset_providers = ProviderList.wrap(list)
    end

    # Integer representing a bitmap set of options used by Nokogiri during serialization.
    # For the complete set of available options look into +Nokogiri::XML::Node::SaveOptions+.
    # (To change the mode in which the document is generated use {#mode=} however.)
    def serialization_options=(options)
      @serialization_options = options || 0
    end

    # Change the mode. The mode affects how the resulting markup is generated.
    #
    # Valid modes:
    #   `:html` (default)
    #   `:xhtml`
    #   `:xml`
    def mode=(mode)
      if VALID_MODES.include?(mode)
        @mode = mode
      else
        raise ArgumentError, "Invalid mode #{mode.inspect}. Valid modes are: #{VALID_MODES.inspect}"
      end
    end

    private

    VALID_MODES = %i[html xhtml xml].freeze
    private_constant :VALID_MODES

    def stylesheet
      Stylesheet.new "(Document styles)", @css
    end

    def improve(dom)
      MarkupImprover.new(dom, html).improve
    end

    def inline(dom, options = {})
      keep_uninlinable_in = options.fetch(:keep_uninlinable_in)
      dom_stylesheets = AssetScanner.new(dom, asset_providers, external_asset_providers).extract_css
      Inliner.new(dom_stylesheets + [stylesheet], dom).inline(
        keep_uninlinable_css: keep_uninlinable_css,
        keep_uninlinable_in: keep_uninlinable_in,
        merge_media_queries: merge_media_queries
      )
    end

    def rewrite_urls(dom)
      make_url_rewriter.transform_dom(dom)
    end

    def serialize_document(dom)
      # #dup is called since it fixed a few segfaults in certain versions of Nokogiri
      save_options = Nokogiri::XML::Node::SaveOptions
      format = {
        html: save_options::AS_HTML,
        xhtml: save_options::AS_XHTML,
        xml: save_options::AS_XML
      }.fetch(mode)

      dom.dup.to_html(save_with: (serialization_options | format))
    end

    def make_url_rewriter
      if url_options
        UrlRewriter.new(UrlGenerator.new(url_options))
      else
        NullUrlRewriter.new
      end
    end

    def callback(callable, dom)
      if callable.respond_to?(:call)
        callable.call(dom, self)
      end
    end

    def remove_ignore_markers(dom)
      dom.css("[data-roadie-ignore]").each do |node|
        node.remove_attribute "data-roadie-ignore"
      end
    end
  end
end