File: whitespace_normalizer.rb

package info (click to toggle)
ruby-capybara 3.40.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,368 kB
  • sloc: ruby: 23,988; javascript: 752; makefile: 11
file content (81 lines) | stat: -rw-r--r-- 2,290 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# frozen_string_literal: true

module Capybara
  module Node
    ##
    #
    # {Capybara::Node::WhitespaceNormalizer} provides methods that
    # help to normalize the spacing of text content inside of
    # {Capybara::Node::Element}s by removing various unicode
    # spacing and directional markings.
    #
    module WhitespaceNormalizer
      # Unicode for NBSP, or  
      NON_BREAKING_SPACE = "\u00a0"
      LINE_SEPERATOR = "\u2028"
      PARAGRAPH_SEPERATOR = "\u2029"

      # All spaces except for NBSP
      BREAKING_SPACES = "[[:space:]&&[^#{NON_BREAKING_SPACE}]]".freeze

      # Whitespace we want to substitute with plain spaces
      SQUEEZED_SPACES = " \n\f\t\v#{LINE_SEPERATOR}#{PARAGRAPH_SEPERATOR}".freeze

      # Any whitespace at the front of text
      LEADING_SPACES = /\A#{BREAKING_SPACES}+/

      # Any whitespace at the end of text
      TRAILING_SPACES = /#{BREAKING_SPACES}+\z/

      # "Invisible" space character
      ZERO_WIDTH_SPACE = "\u200b"

      # Signifies text is read left to right
      LEFT_TO_RIGHT_MARK = "\u200e"

      # Signifies text is read right to left
      RIGHT_TO_LEFT_MARK = "\u200f"

      # Characters we want to truncate from text
      REMOVED_CHARACTERS = [ZERO_WIDTH_SPACE, LEFT_TO_RIGHT_MARK, RIGHT_TO_LEFT_MARK].join

      # Matches multiple empty lines
      EMPTY_LINES = /[\ \n]*\n[\ \n]*/

      ##
      #
      # Normalizes the spacing of a node's text to be similar to
      # what matchers might expect.
      #
      # @param text [String]
      # @return [String]
      #
      def normalize_spacing(text)
        text
          .delete(REMOVED_CHARACTERS)
          .tr(SQUEEZED_SPACES, ' ')
          .squeeze(' ')
          .sub(LEADING_SPACES, '')
          .sub(TRAILING_SPACES, '')
          .tr(NON_BREAKING_SPACE, ' ')
      end

      ##
      #
      # Variant on {Capybara::Node::Normalizer#normalize_spacing} that
      # targets the whitespace of visible elements only.
      #
      # @param text [String]
      # @return [String]
      #
      def normalize_visible_spacing(text)
        text
          .squeeze(' ')
          .gsub(EMPTY_LINES, "\n")
          .sub(LEADING_SPACES, '')
          .sub(TRAILING_SPACES, '')
          .tr(NON_BREAKING_SPACE, ' ')
      end
    end
  end
end