From 36a6dad07d572a0098c29d6d96a226638a7caa38 Mon Sep 17 00:00:00 2001
From: Alvaro Martin Fraguas <alvaro.martin@nccgroup.com>
Date: Tue, 12 Apr 2022 16:20:27 -0400
Subject: [PATCH] Fix and add protections for XSS in names.

Add the method ERB::Util.xml_name_escape to escape dangerous characters
in names of tags and names of attributes, following the specification of
XML.

Use that method in the tag helpers of ActionView::Helpers. Add a deprecation
warning to the option :escape_attributes mentioning the new behavior and the
transition to :escape, to simplify by applying the option to the whole tag.

[CVE-2022-27777]
---
 .../lib/action_view/helpers/tag_helper.rb     | 39 +++++++-
 actionview/test/template/tag_helper_test.rb   | 91 +++++++++++++++++--
 .../core_ext/string/output_safety.rb          | 28 ++++++
 .../test/core_ext/string_ext_test.rb          | 26 ++++++
 4 files changed, 171 insertions(+), 13 deletions(-)

diff --git a/actionview/lib/action_view/helpers/tag_helper.rb b/actionview/lib/action_view/helpers/tag_helper.rb
index fc2654a684..7b51d18730 100644
--- a/actionview/lib/action_view/helpers/tag_helper.rb
+++ b/actionview/lib/action_view/helpers/tag_helper.rb
@@ -41,18 +41,25 @@ def initialize(view_context)
           @view_context = view_context
         end
 
-        def tag_string(name, content = nil, escape_attributes: true, **options, &block)
+        def tag_string(name, content = nil, **options, &block)
+          escape = handle_deprecated_escape_options(options)
           content = @view_context.capture(self, &block) if block_given?
+
           if VOID_ELEMENTS.include?(name) && content.nil?
-            "<#{name.to_s.dasherize}#{tag_options(options, escape_attributes)}>".html_safe
+            "<#{name.to_s.dasherize}#{tag_options(options, escape)}>".html_safe
           else
-            content_tag_string(name.to_s.dasherize, content || "", options, escape_attributes)
+            content_tag_string(name.to_s.dasherize, content || "", options, escape)
           end
         end
 
         def content_tag_string(name, content, options, escape = true)
           tag_options = tag_options(options, escape) if options
-          content     = ERB::Util.unwrapped_html_escape(content) if escape
+
+          if escape
+            name = ERB::Util.xml_name_escape(name)
+            content = ERB::Util.unwrapped_html_escape(content)
+          end
+
           "<#{name}#{tag_options}>#{PRE_CONTENT_STRINGS[name]}#{content}</#{name}>".html_safe
         end
 
@@ -85,6 +92,8 @@ def boolean_tag_option(key)
         end
 
         def tag_option(key, value, escape)
+          key = ERB::Util.xml_name_escape(key) if escape
+
           if value.is_a?(Array)
             value = escape ? safe_join(value, " ") : value.join(" ")
           else
@@ -107,6 +116,27 @@ def respond_to_missing?(*args)
             true
           end
 
+          def handle_deprecated_escape_options(options)
+            # The option :escape_attributes has been merged into the options hash to be
+            # able to warn when it is used, so we need to handle default values here.
+            escape_option_provided = options.has_key?(:escape)
+            escape_attributes_option_provided = options.has_key?(:escape_attributes)
+
+            if escape_attributes_option_provided
+              ActiveSupport::Deprecation.warn(<<~MSG)
+                Use of the option :escape_attributes is deprecated. It currently \
+                escapes both names and values of tags and attributes and it is \
+                equivalent to :escape. If any of them are enabled, the escaping \
+                is fully enabled.
+              MSG
+            end
+
+            return true unless escape_option_provided || escape_attributes_option_provided
+            escape_option = options.delete(:escape)
+            escape_attributes_option = options.delete(:escape_attributes)
+            escape_option || escape_attributes_option
+          end
+
           def method_missing(called, *args, **options, &block)
             tag_string(called, *args, **options, &block)
           end
@@ -237,6 +267,7 @@ def tag(name = nil, options = nil, open = false, escape = true)
         if name.nil?
           tag_builder
         else
+          name = ERB::Util.xml_name_escape(name) if escape
           "<#{name}#{tag_builder.tag_options(options, escape) if options}#{open ? ">" : " />"}".html_safe
         end
       end
diff --git a/actionview/test/template/tag_helper_test.rb b/actionview/test/template/tag_helper_test.rb
index 6626f78678..2f53826c9c 100644
--- a/actionview/test/template/tag_helper_test.rb
+++ b/actionview/test/template/tag_helper_test.rb
@@ -7,6 +7,8 @@ class TagHelperTest < ActionView::TestCase
 
   tests ActionView::Helpers::TagHelper
 
+  COMMON_DANGEROUS_CHARS = "&<>\"' %*+,/;=^|"
+
   def test_tag
     assert_equal "<br />", tag("br")
     assert_equal "<br clear=\"left\" />", tag(:br, clear: "left")
@@ -86,6 +88,77 @@ def test_tag_builder_do_not_modify_html_safe_options
     assert html_safe_str.html_safe?
   end
 
+  def test_tag_with_dangerous_name
+    assert_equal "<#{"_" * COMMON_DANGEROUS_CHARS.size} />",
+                 tag(COMMON_DANGEROUS_CHARS)
+
+    assert_equal "<#{COMMON_DANGEROUS_CHARS} />",
+                 tag(COMMON_DANGEROUS_CHARS, nil, false, false)
+  end
+
+  def test_tag_builder_with_dangerous_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<#{escaped_dangerous_chars}></#{escaped_dangerous_chars}>",
+                 tag.public_send(COMMON_DANGEROUS_CHARS.to_sym)
+
+    assert_equal "<#{COMMON_DANGEROUS_CHARS}></#{COMMON_DANGEROUS_CHARS}>",
+                 tag.public_send(COMMON_DANGEROUS_CHARS.to_sym, nil, escape: false)
+  end
+
+  def test_tag_with_dangerous_aria_attribute_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<the-name aria-#{escaped_dangerous_chars}=\"the value\" />",
+                 tag("the-name", aria: { COMMON_DANGEROUS_CHARS => "the value" })
+
+    assert_equal "<the-name aria-#{COMMON_DANGEROUS_CHARS}=\"the value\" />",
+                 tag("the-name", { aria: { COMMON_DANGEROUS_CHARS => "the value" } }, false, false)
+  end
+
+  def test_tag_builder_with_dangerous_aria_attribute_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<the-name aria-#{escaped_dangerous_chars}=\"the value\"></the-name>",
+                 tag.public_send(:"the-name", aria: { COMMON_DANGEROUS_CHARS => "the value" })
+
+    assert_equal "<the-name aria-#{COMMON_DANGEROUS_CHARS}=\"the value\"></the-name>",
+                 tag.public_send(:"the-name", aria: { COMMON_DANGEROUS_CHARS => "the value" }, escape: false)
+  end
+
+  def test_tag_with_dangerous_data_attribute_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<the-name data-#{escaped_dangerous_chars}=\"the value\" />",
+                 tag("the-name", data: { COMMON_DANGEROUS_CHARS => "the value" })
+
+    assert_equal "<the-name data-#{COMMON_DANGEROUS_CHARS}=\"the value\" />",
+                 tag("the-name", { data: { COMMON_DANGEROUS_CHARS => "the value" } }, false, false)
+  end
+
+  def test_tag_builder_with_dangerous_data_attribute_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<the-name data-#{escaped_dangerous_chars}=\"the value\"></the-name>",
+                 tag.public_send(:"the-name", data: { COMMON_DANGEROUS_CHARS => "the value" })
+
+    assert_equal "<the-name data-#{COMMON_DANGEROUS_CHARS}=\"the value\"></the-name>",
+                 tag.public_send(:"the-name", data: { COMMON_DANGEROUS_CHARS => "the value" }, escape: false)
+  end
+
+  def test_tag_with_dangerous_unknown_attribute_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<the-name #{escaped_dangerous_chars}=\"the value\" />",
+                 tag("the-name", COMMON_DANGEROUS_CHARS => "the value")
+
+    assert_equal "<the-name #{COMMON_DANGEROUS_CHARS}=\"the value\" />",
+                 tag("the-name", { COMMON_DANGEROUS_CHARS => "the value" }, false, false)
+  end
+
+  def test_tag_builder_with_dangerous_unknown_attribute_name
+    escaped_dangerous_chars = "_" * COMMON_DANGEROUS_CHARS.size
+    assert_equal "<the-name #{escaped_dangerous_chars}=\"the value\"></the-name>",
+                 tag.public_send(:"the-name", COMMON_DANGEROUS_CHARS => "the value")
+
+    assert_equal "<the-name #{COMMON_DANGEROUS_CHARS}=\"the value\"></the-name>",
+                 tag.public_send(:"the-name", COMMON_DANGEROUS_CHARS => "the value", escape: false)
+  end
+
   def test_content_tag
     assert_equal "<a href=\"create\">Create</a>", content_tag("a", "Create", "href" => "create")
     assert_predicate content_tag("a", "Create", "href" => "create"), :html_safe?
@@ -105,7 +178,7 @@ def test_tag_builder_with_content
     assert_equal "<p>&lt;script&gt;evil_js&lt;/script&gt;</p>",
                  tag.p("<script>evil_js</script>")
     assert_equal "<p><script>evil_js</script></p>",
-                 tag.p("<script>evil_js</script>", escape_attributes: false)
+                 tag.p("<script>evil_js</script>", escape: false)
   end
 
   def test_tag_builder_nested
@@ -220,10 +293,10 @@ def test_content_tag_with_unescaped_array_class
   end
 
   def test_tag_builder_with_unescaped_array_class
-    str = tag.p "limelight", class: ["song", "play>"], escape_attributes: false
+    str = tag.p "limelight", class: ["song", "play>"], escape: false
     assert_equal "<p class=\"song play>\">limelight</p>", str
 
-    str = tag.p "limelight", class: ["song", ["play>"]], escape_attributes: false
+    str = tag.p "limelight", class: ["song", ["play>"]], escape: false
     assert_equal "<p class=\"song play>\">limelight</p>", str
   end
 
@@ -242,7 +315,7 @@ def test_content_tag_with_unescaped_empty_array_class
   end
 
   def test_tag_builder_with_unescaped_empty_array_class
-    str = tag.p "limelight", class: [], escape_attributes: false
+    str = tag.p "limelight", class: [], escape: false
     assert_equal '<p class="">limelight</p>', str
   end
 
@@ -313,11 +386,11 @@ def test_disable_escaping
   end
 
   def test_tag_builder_disable_escaping
-    assert_equal '<a href="&amp;"></a>', tag.a(href: "&amp;", escape_attributes: false)
-    assert_equal '<a href="&amp;">cnt</a>', tag.a(href: "&amp;", escape_attributes: false) { "cnt" }
-    assert_equal '<br data-hidden="&amp;">', tag.br("data-hidden": "&amp;", escape_attributes: false)
-    assert_equal '<a href="&amp;">content</a>', tag.a("content", href: "&amp;", escape_attributes: false)
-    assert_equal '<a href="&amp;">content</a>', tag.a(href: "&amp;", escape_attributes: false) { "content" }
+    assert_equal '<a href="&amp;"></a>', tag.a(href: "&amp;", escape: false)
+    assert_equal '<a href="&amp;">cnt</a>', tag.a(href: "&amp;", escape: false) { "cnt" }
+    assert_equal '<br data-hidden="&amp;">', tag.br("data-hidden": "&amp;", escape: false)
+    assert_equal '<a href="&amp;">content</a>', tag.a("content", href: "&amp;", escape: false)
+    assert_equal '<a href="&amp;">content</a>', tag.a(href: "&amp;", escape: false) { "content" }
   end
 
   def test_data_attributes
 
diff --git a/activesupport/lib/active_support/core_ext/string/output_safety.rb b/activesupport/lib/active_support/core_ext/string/output_safety.rb
index 5b2b3162f2..a7ae901585 100644
--- a/activesupport/lib/active_support/core_ext/string/output_safety.rb
+++ b/activesupport/lib/active_support/core_ext/string/output_safety.rb
@@ -12,6 +12,14 @@ module Util
     HTML_ESCAPE_ONCE_REGEXP = /["><']|&(?!([a-zA-Z]+|(#\d+)|(#[xX][\dA-Fa-f]+));)/
     JSON_ESCAPE_REGEXP = /[\u2028\u2029&><]/u
 
+    # Following XML requirements: https://www.w3.org/TR/REC-xml/#NT-Name
+    TAG_NAME_START_REGEXP_SET = ":A-Z_a-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{2FF}\u{370}-\u{37D}\u{37F}-\u{1FFF}" \
+                                "\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}" \
+                                "\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}"
+    TAG_NAME_START_REGEXP = /[^#{TAG_NAME_START_REGEXP_SET}]/
+    TAG_NAME_FOLLOWING_REGEXP = /[^#{TAG_NAME_START_REGEXP_SET}\-.0-9\u{B7}\u{0300}-\u{036F}\u{203F}-\u{2040}]/
+    TAG_NAME_REPLACEMENT_CHAR = "_"
+
     # A utility method for escaping HTML tag characters.
     # This method is also aliased as <tt>h</tt>.
     #
@@ -116,6 +124,26 @@ def json_escape(s)
     end
 
     module_function :json_escape
+
+    # A utility method for escaping XML names of tags and names of attributes.
+    #
+    #   xml_name_escape('1 < 2 & 3')
+    #   # => "1___2___3"
+    #
+    # It follows the requirements of the specification: https://www.w3.org/TR/REC-xml/#NT-Name
+    def xml_name_escape(name)
+      name = name.to_s
+      return "" if name.blank?
+
+      starting_char = name[0].gsub(TAG_NAME_START_REGEXP, TAG_NAME_REPLACEMENT_CHAR)
+
+      return starting_char if name.size == 1
+
+      following_chars = name[1..-1].gsub(TAG_NAME_FOLLOWING_REGEXP, TAG_NAME_REPLACEMENT_CHAR)
+
+      starting_char + following_chars
+    end
+    module_function :xml_name_escape
   end
 end
 
diff --git a/activesupport/test/core_ext/string_ext_test.rb b/activesupport/test/core_ext/string_ext_test.rb
index c5a000b67a..90e0aaaf0e 100644
--- a/activesupport/test/core_ext/string_ext_test.rb
+++ b/activesupport/test/core_ext/string_ext_test.rb
@@ -1009,6 +1009,32 @@ def to_s
     expected = "© &lt;"
     assert_equal expected, ERB::Util.html_escape_once(string)
   end
+
+  test "ERB::Util.xml_name_escape should escape unsafe characters for XML names" do
+    unsafe_char = ">"
+    safe_char = "Á"
+    safe_char_after_start = "3"
+
+    assert_equal "_", ERB::Util.xml_name_escape(unsafe_char)
+    assert_equal "_#{safe_char}", ERB::Util.xml_name_escape(unsafe_char + safe_char)
+    assert_equal "__", ERB::Util.xml_name_escape(unsafe_char * 2)
+
+    assert_equal "__#{safe_char}_",
+                 ERB::Util.xml_name_escape("#{unsafe_char * 2}#{safe_char}#{unsafe_char}")
+
+    assert_equal safe_char + safe_char_after_start,
+                 ERB::Util.xml_name_escape(safe_char + safe_char_after_start)
+
+    assert_equal "_#{safe_char}",
+                 ERB::Util.xml_name_escape(safe_char_after_start + safe_char)
+
+    assert_equal "img_src_nonexistent_onerror_alert_1_",
+                 ERB::Util.xml_name_escape("img src=nonexistent onerror=alert(1)")
+
+    common_dangerous_chars = "&<>\"' %*+,/;=^|"
+    assert_equal "_" * common_dangerous_chars.size,
+                 ERB::Util.xml_name_escape(common_dangerous_chars)
+  end
 end
 
 class StringExcludeTest < ActiveSupport::TestCase
-- 
2.30.2

