File: markup_improver.rb

package info (click to toggle)
ruby-roadie 5.2.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 512 kB
  • sloc: ruby: 3,418; makefile: 5
file content (89 lines) | stat: -rw-r--r-- 2,469 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# frozen_string_literal: true

module Roadie
  # @api private
  # Class that improves the markup of a HTML DOM tree
  #
  # This class will improve the following aspects of the DOM:
  # * A HTML5 doctype will be added if missing, other doctypes will be left as-is.
  # * Basic HTML elements will be added if missing.
  #   * `<html>`
  #   * `<head>`
  #   * `<body>`
  #   * `<meta>` declaring charset and content-type (text/html)
  class MarkupImprover
    # The original HTML must also be passed in in order to handle the doctypes
    # since a +Nokogiri::HTML::Document+ will always have a doctype, no matter if
    # the original source had it or not. Reading the raw HTML is the only way to
    # determine if we want to add a HTML5 doctype or not.
    def initialize(dom, original_html)
      @dom = dom
      @html = original_html
    end

    # @return [nil] passed DOM will be mutated
    def improve
      ensure_doctype_present
      ensure_html_element_present
      head = ensure_head_element_present
      ensure_declared_charset head
    end

    protected

    attr_reader :dom

    private

    def ensure_doctype_present
      return if @html.include?("<!DOCTYPE ")
      # Nokogiri adds a "default" doctype to the DOM, which we will remove
      dom.internal_subset&.remove
      dom.create_internal_subset "html", nil, nil
    end

    def ensure_html_element_present
      return if dom.at_xpath("html")
      html = Nokogiri::XML::Node.new "html", dom
      dom << html
    end

    def ensure_head_element_present
      if (head = dom.at_xpath("html/head"))
        head
      else
        create_head_element dom.at_xpath("html")
      end
    end

    def create_head_element(parent)
      head = Nokogiri::XML::Node.new "head", dom
      if parent.children.empty?
        parent << head
      else
        # Crashes when no children are present
        parent.children.before head
      end
      head
    end

    def ensure_declared_charset(parent)
      if content_type_meta_element_missing?
        parent.add_child make_content_type_element
      end
    end

    def content_type_meta_element_missing?
      dom.xpath("html/head/meta").none? do |meta|
        meta["http-equiv"].to_s.downcase == "content-type"
      end
    end

    def make_content_type_element
      meta = Nokogiri::XML::Node.new("meta", dom)
      meta["http-equiv"] = "Content-Type"
      meta["content"] = "text/html; charset=UTF-8"
      meta
    end
  end
end