1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
# frozen_string_literal: true
require "addressable/uri"
module HTMLProofer
# Represents the element currently being processed
class Element
include HTMLProofer::Utils
attr_reader :node, :url, :base_url, :line, :content
def initialize(runner, node, base_url: nil)
@runner = runner
@node = node
swap_attributes!
@base_url = base_url
@url = Attribute::Url.new(runner, link_attribute, base_url: base_url, source: @runner.current_source, filename: @runner.current_filename)
@line = node.line
@content = node.content
end
def link_attribute
meta_content || src || srcset || href
end
def meta_content
return unless meta_tag?
@node["content"]
end
def meta_tag?
@node.name == "meta"
end
def src
return if !img_tag? && !script_tag? && !source_tag?
@node["src"]
end
def img_tag?
@node.name == "img"
end
def script_tag?
@node.name == "script"
end
def srcset
return if !img_tag? && !source_tag?
@node["srcset"]
end
def source_tag?
@node.name == "source"
end
def href
return if !a_tag? && !link_tag?
@node["href"]
end
def a_tag?
@node.name == "a"
end
def link_tag?
@node.name == "link"
end
def aria_hidden?
@node.attributes["aria-hidden"]&.value == "true"
end
def multiple_srcsets?
!blank?(srcset) && srcset.split(",").size > 1
end
# From https://github.com/sindresorhus/srcset/blob/f7c48acd7facf18e94dec47e6b96e84e0f0e69dc/index.js#LL1-L16C71
# This regex represents a loose rule of an “image candidate string”; see https://html.spec.whatwg.org/multipage/images.html#srcset-attribute
# An “image candidate string” roughly consists of the following:
# 1. Zero or more whitespace characters.
# 2. A non-empty URL that does not start or end with `,`.
# 3. Zero or more whitespace characters.
# 4. An optional “descriptor” that starts with a whitespace character.
# 5. Zero or more whitespace characters.
# 6. Each image candidate string is separated by a `,`.
# We intentionally implement a loose rule here so that we can perform more aggressive error handling and reporting in the below code.
IMAGE_CANDIDATE_REGEX = /\s*([^,]\S*[^,](?:\s+[^,]+)?)\s*(?:,|$)/
def srcsets
return if blank?(srcset)
srcset.split(IMAGE_CANDIDATE_REGEX).select.with_index do |_part, idx|
idx.odd?
end.map(&:strip)
end
def multiple_sizes?
return false if blank?(srcsets)
srcsets.any? do |srcset|
!blank?(srcset) && srcset.split(" ").size > 1
end
end
def srcsets_wo_sizes
return if blank?(srcsets)
srcsets.map do |srcset|
srcset.split(" ").first
end
end
def ignore?
return true if @node.attributes["data-proofer-ignore"]
return true if ancestors_ignorable?
return true if url&.ignore?
false
end
private def attribute_swapped?
return false if blank?(@runner.options[:swap_attributes])
attrs = @runner.options[:swap_attributes][@node.name]
true unless blank?(attrs)
end
private def swap_attributes!
return unless attribute_swapped?
attr_swaps = @runner.options[:swap_attributes][@node.name]
attr_swaps.flatten.each_slice(2) do |(old_attr, new_attr)|
next if blank?(node[old_attr])
node[new_attr] = node[old_attr]
node.delete(old_attr)
end
end
private def ancestors_ignorable?
ancestors_attributes = @node.ancestors.map { |a| a.respond_to?(:attributes) && a.attributes }
ancestors_attributes.pop # remove document at the end
ancestors_attributes.any? { |a| !a["data-proofer-ignore"].nil? }
end
end
end
|