1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
# -*- coding: utf-8; frozen_string_literal: true -*-
#
#--
# Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
#
# This file is part of kramdown which is licensed under the MIT.
#++
#
require 'rexml/parsers/baseparser'
module Kramdown
module Utils
# Provides convenience methods for HTML related tasks.
#
# *Note* that this module has to be mixed into a class that has a @root (containing an element
# of type :root) and an @options (containing an options hash) instance variable so that some of
# the methods can work correctly.
module Html
# Convert the entity +e+ to a string. The optional parameter +original+ may contain the
# original representation of the entity.
#
# This method uses the option +entity_output+ to determine the output form for the entity.
def entity_to_str(e, original = nil)
entity_output = @options[:entity_output]
if entity_output == :as_char &&
(c = e.char.encode(@root.options[:encoding]) rescue nil) &&
((c = e.char) == '"' || !ESCAPE_MAP.key?(c))
c
elsif (entity_output == :as_input || entity_output == :as_char) && original
original
elsif (entity_output == :symbolic || ESCAPE_MAP.key?(e.char)) && !e.name.nil?
"&#{e.name};"
else # default to :numeric
"&##{e.code_point};"
end
end
# Return the HTML representation of the attributes +attr+.
def html_attributes(attr)
return '' if attr.empty?
attr.map do |k, v|
v.nil? || (k == 'id' && v.strip.empty?) ? '' : " #{k}=\"#{escape_html(v.to_s, :attribute)}\""
end.join
end
# :stopdoc:
ESCAPE_MAP = {
'<' => '<',
'>' => '>',
'&' => '&',
'"' => '"',
}
ESCAPE_ALL_RE = /<|>|&/
ESCAPE_TEXT_RE = Regexp.union(REXML::Parsers::BaseParser::REFERENCE_RE, /<|>|&/)
ESCAPE_ATTRIBUTE_RE = Regexp.union(REXML::Parsers::BaseParser::REFERENCE_RE, /<|>|&|"/)
ESCAPE_RE_FROM_TYPE = {all: ESCAPE_ALL_RE, text: ESCAPE_TEXT_RE, attribute: ESCAPE_ATTRIBUTE_RE}
# :startdoc:
# Escape the special HTML characters in the string +str+. The parameter +type+ specifies what
# is escaped: :all - all special HTML characters except the quotation mark as well as
# entities, :text - all special HTML characters except the quotation mark but no entities and
# :attribute - all special HTML characters including the quotation mark but no entities.
def escape_html(str, type = :all)
str.gsub(ESCAPE_RE_FROM_TYPE[type]) {|m| ESCAPE_MAP[m] || m }
end
REDUNDANT_LINE_BREAK_REGEX = /([\p{Han}\p{Hiragana}\p{Katakana}]+)\n([\p{Han}\p{Hiragana}\p{Katakana}]+)/u
def fix_cjk_line_break(str)
while str.gsub!(REDUNDANT_LINE_BREAK_REGEX, '\1\2')
end
str
end
end
end
end
|