1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
|
# frozen_string_literal: true
module Roadie
# The main entry point for Roadie. A document represents a working unit and
# is built with the input HTML and the configuration options you need.
#
# A Document must never be used from two threads at the same time. Reusing
# Documents is discouraged.
#
# Stylesheets are added to the HTML from three different sources:
# 1. Stylesheets inside the document ( +<style>+ elements)
# 2. Stylesheets referenced by the DOM ( +<link>+ elements)
# 3. The internal stylesheet (see {#add_css})
#
# The internal stylesheet is used last and gets the highest priority. The
# rest is used in the same order as browsers are supposed to use them.
#
# The execution methods are {#transform} and {#transform_partial}.
#
# @attr [#call] before_transformation Callback to call just before {#transform}ation begins. Will be called with the parsed DOM tree and the {Document} instance.
# @attr [#call] after_transformation Callback to call just before {#transform}ation is completed. Will be called with the current DOM tree and the {Document} instance.
class Document
attr_reader :html, :asset_providers, :external_asset_providers
# URL options. If none are given no URL rewriting will take place.
# @see UrlGenerator#initialize
attr_accessor :url_options
attr_accessor :before_transformation, :after_transformation
# Should CSS that cannot be inlined be kept in a new `<style>` element in `<head>`?
attr_accessor :keep_uninlinable_css
# Merge media queries to increase performance and reduce email size if enabled.
# This will change specificity in some cases, like for example:
# @media(max-width: 600px) { .col-6 { display: block; } }
# @media(max-width: 400px) { .col-12 { display: inline-block; } }
# @media(max-width: 600px) { .col-12 { display: block; } }
# will become
# @media(max-width: 600px) { .col-6 { display: block; } .col-12 { display: block; } }
# @media(max-width: 400px) { .col-12 { display: inline-block; } }
# which would change the styling on the page
attr_accessor :merge_media_queries
# Integer representing a bitmap set of options used by Nokogiri during serialization.
# For the complete set of available options look into +Nokogiri::XML::Node::SaveOptions+.
attr_reader :serialization_options
# The mode to generate markup in. Valid values are `:html` (default) and `:xhtml`.
attr_reader :mode
# @param [String] html the input HTML
def initialize(html)
@keep_uninlinable_css = true
@merge_media_queries = true
@serialization_options =
Nokogiri::XML::Node::SaveOptions::NO_DECLARATION |
Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
@html = html
@asset_providers = ProviderList.wrap(FilesystemProvider.new)
@external_asset_providers = ProviderList.empty
@css = +""
@mode = :html
end
# Append additional CSS to the document's internal stylesheet.
# @param [String] new_css
def add_css(new_css)
@css << "\n\n" << new_css
end
# Transform the input HTML as a full document and returns the processed
# HTML.
#
# Before the transformation begins, the {#before_transformation} callback
# will be called with the parsed HTML tree and the {Document} instance, and
# after all work is complete the {#after_transformation} callback will be
# invoked in the same way.
#
# Most of the work is delegated to other classes. A list of them can be
# seen below.
#
# @see MarkupImprover MarkupImprover (improves the markup of the DOM)
# @see Inliner Inliner (inlines the stylesheets)
# @see UrlRewriter UrlRewriter (rewrites URLs and makes them absolute)
# @see #transform_partial Transforms partial documents (fragments)
#
# @return [String] the transformed HTML
def transform
dom = Nokogiri::HTML.parse html
callback before_transformation, dom
improve dom
inline dom, keep_uninlinable_in: :head
rewrite_urls dom
callback after_transformation, dom
remove_ignore_markers dom
serialize_document dom
end
# Transform the input HTML as a HTML fragment/partial and returns the
# processed HTML.
#
# Before the transformation begins, the {#before_transformation} callback
# will be called with the parsed HTML tree and the {Document} instance, and
# after all work is complete the {#after_transformation} callback will be
# invoked in the same way.
#
# The main difference between this and {#transform} is that this does not
# treat the HTML as a full document and does not try to fix it by adding
# doctypes, {<head>} elements, etc.
#
# Most of the work is delegated to other classes. A list of them can be
# seen below.
#
# @see Inliner Inliner (inlines the stylesheets)
# @see UrlRewriter UrlRewriter (rewrites URLs and makes them absolute)
# @see #transform Transforms full documents
#
# @return [String] the transformed HTML
def transform_partial
dom = Nokogiri::HTML.fragment html
callback before_transformation, dom
inline dom, keep_uninlinable_in: :root
rewrite_urls dom
callback after_transformation, dom
serialize_document dom
end
# Assign new normal asset providers. The supplied list will be wrapped in a {ProviderList} using {ProviderList.wrap}.
def asset_providers=(list)
@asset_providers = ProviderList.wrap(list)
end
# Assign new external asset providers. The supplied list will be wrapped in a {ProviderList} using {ProviderList.wrap}.
def external_asset_providers=(list)
@external_asset_providers = ProviderList.wrap(list)
end
# Integer representing a bitmap set of options used by Nokogiri during serialization.
# For the complete set of available options look into +Nokogiri::XML::Node::SaveOptions+.
# (To change the mode in which the document is generated use {#mode=} however.)
def serialization_options=(options)
@serialization_options = options || 0
end
# Change the mode. The mode affects how the resulting markup is generated.
#
# Valid modes:
# `:html` (default)
# `:xhtml`
# `:xml`
def mode=(mode)
if VALID_MODES.include?(mode)
@mode = mode
else
raise ArgumentError, "Invalid mode #{mode.inspect}. Valid modes are: #{VALID_MODES.inspect}"
end
end
private
VALID_MODES = %i[html xhtml xml].freeze
private_constant :VALID_MODES
def stylesheet
Stylesheet.new "(Document styles)", @css
end
def improve(dom)
MarkupImprover.new(dom, html).improve
end
def inline(dom, options = {})
keep_uninlinable_in = options.fetch(:keep_uninlinable_in)
dom_stylesheets = AssetScanner.new(dom, asset_providers, external_asset_providers).extract_css
Inliner.new(dom_stylesheets + [stylesheet], dom).inline(
keep_uninlinable_css: keep_uninlinable_css,
keep_uninlinable_in: keep_uninlinable_in,
merge_media_queries: merge_media_queries
)
end
def rewrite_urls(dom)
make_url_rewriter.transform_dom(dom)
end
def serialize_document(dom)
# #dup is called since it fixed a few segfaults in certain versions of Nokogiri
save_options = Nokogiri::XML::Node::SaveOptions
format = {
html: save_options::AS_HTML,
xhtml: save_options::AS_XHTML,
xml: save_options::AS_XML
}.fetch(mode)
dom.dup.to_html(save_with: (serialization_options | format))
end
def make_url_rewriter
if url_options
UrlRewriter.new(UrlGenerator.new(url_options))
else
NullUrlRewriter.new
end
end
def callback(callable, dom)
if callable.respond_to?(:call)
callable.call(dom, self)
end
end
def remove_ignore_markers(dom)
dom.css("[data-roadie-ignore]").each do |node|
node.remove_attribute "data-roadie-ignore"
end
end
end
end
|