1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
|
# frozen_string_literal: true
module Capybara
module Node
##
#
# {Capybara::Node::WhitespaceNormalizer} provides methods that
# help to normalize the spacing of text content inside of
# {Capybara::Node::Element}s by removing various unicode
# spacing and directional markings.
#
module WhitespaceNormalizer
# Unicode for NBSP, or
NON_BREAKING_SPACE = "\u00a0"
LINE_SEPERATOR = "\u2028"
PARAGRAPH_SEPERATOR = "\u2029"
# All spaces except for NBSP
BREAKING_SPACES = "[[:space:]&&[^#{NON_BREAKING_SPACE}]]".freeze
# Whitespace we want to substitute with plain spaces
SQUEEZED_SPACES = " \n\f\t\v#{LINE_SEPERATOR}#{PARAGRAPH_SEPERATOR}".freeze
# Any whitespace at the front of text
LEADING_SPACES = /\A#{BREAKING_SPACES}+/
# Any whitespace at the end of text
TRAILING_SPACES = /#{BREAKING_SPACES}+\z/
# "Invisible" space character
ZERO_WIDTH_SPACE = "\u200b"
# Signifies text is read left to right
LEFT_TO_RIGHT_MARK = "\u200e"
# Signifies text is read right to left
RIGHT_TO_LEFT_MARK = "\u200f"
# Characters we want to truncate from text
REMOVED_CHARACTERS = [ZERO_WIDTH_SPACE, LEFT_TO_RIGHT_MARK, RIGHT_TO_LEFT_MARK].join
# Matches multiple empty lines
EMPTY_LINES = /[\ \n]*\n[\ \n]*/
##
#
# Normalizes the spacing of a node's text to be similar to
# what matchers might expect.
#
# @param text [String]
# @return [String]
#
def normalize_spacing(text)
text
.delete(REMOVED_CHARACTERS)
.tr(SQUEEZED_SPACES, ' ')
.squeeze(' ')
.sub(LEADING_SPACES, '')
.sub(TRAILING_SPACES, '')
.tr(NON_BREAKING_SPACE, ' ')
end
##
#
# Variant on {Capybara::Node::Normalizer#normalize_spacing} that
# targets the whitespace of visible elements only.
#
# @param text [String]
# @return [String]
#
def normalize_visible_spacing(text)
text
.squeeze(' ')
.gsub(EMPTY_LINES, "\n")
.sub(LEADING_SPACES, '')
.sub(TRAILING_SPACES, '')
.tr(NON_BREAKING_SPACE, ' ')
end
end
end
end
|