1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
|
# frozen_string_literal: true
module Banzai
module Filter
# Sanitize HTML produced by Markdown.
#
# Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter
# Styles used by Markdown for table alignment
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/
def customize_allowlist(allowlist)
allowlist[:allow_comments] = context[:allow_comments]
# Allow table alignment; we allow specific text-align values in a
# transformer below
allowlist[:attributes]['th'] = %w[style]
allowlist[:attributes]['td'] = %w[style]
allowlist[:css] = { properties: ['text-align'] }
# Allow the 'data-sourcepos' from CommonMark on all elements
allowlist[:attributes][:all].push('data-sourcepos')
allowlist[:attributes][:all].push('data-escaped-char')
# Remove any `style` properties not required for table alignment
allowlist[:transformers].push(self.class.remove_unsafe_table_style)
# Allow `id` in `a` and `li` elements for footnotes
# and `a` elements for header anchors.
# Remove any `id` properties not matching
allowlist[:attributes]['a'].push('id')
allowlist[:attributes]['li'] = %w[id]
allowlist[:transformers].push(self.class.remove_id_attributes)
# Remove any `class` property not required for `a`
allowlist[:attributes]['a'].push('class')
allowlist[:transformers].push(self.class.remove_unsafe_link_class)
# Allow section elements with data-footnotes attribute
allowlist[:elements].push('section')
allowlist[:attributes]['section'] = %w[data-footnotes]
allowlist[:attributes]['a'].push('data-footnote-ref', 'data-footnote-backref', 'data-footnote-backref-idx')
allowlist
end
class << self
def remove_unsafe_table_style
->(env) do
node = env[:node]
return unless node.name == 'th' || node.name == 'td'
return unless node.has_attribute?('style')
if node['style'] =~ TABLE_ALIGNMENT_PATTERN
node['style'] = "text-align: #{$~[:alignment]}"
else
node.remove_attribute('style')
end
end
end
def remove_unsafe_link_class
->(env) do
node = env[:node]
return unless node.name == 'a'
return unless node.has_attribute?('class')
node.remove_attribute('class') if remove_link_class?(node)
end
end
def remove_link_class?(node)
return if node['class'] == 'anchor'
true
end
def remove_id_attributes
->(env) do
node = env[:node]
return unless node.name == 'a' || node.name == 'li'
return unless node.has_attribute?('id')
# footnote ids should not be removed
return if node.name == 'li' && node['id'].start_with?(Banzai::Filter::FootnoteFilter::FOOTNOTE_ID_PREFIX)
return if node.name == 'a' &&
node['id'].start_with?(Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_ID_PREFIX)
# links with generated header anchors should not be removed
return if node.name == 'a' && node['class'] == 'anchor' &&
node['id'].start_with?(Banzai::Renderer::USER_CONTENT_ID_PREFIX)
node.remove_attribute('id')
end
end
end
end
end
end
Banzai::Filter::SanitizationFilter.prepend_mod_with('Banzai::Filter::SanitizationFilter')
|