File: 0003-html-properly-handle-trailing-solidus-in-unquoted-at.patch

package info (click to toggle)
golang-golang-x-net 1%3A0.27.0-2
links: PTS, VCS
area: main
in suites: experimental, forky, sid, trixie, trixie-backports
size: 8,636 kB
sloc: asm: 18; makefile: 12; sh: 7
file content (91 lines) | stat: -rw-r--r-- 3,107 bytes
From: Roland Shoemaker <roland@golang.org>
Date: Mon, 24 Feb 2025 11:18:31 -0800
Subject: html: properly handle trailing solidus in unquoted attribute value
 in foreign content

The parser properly treats tags like <p a=/> as <p a="/">, but the
tokenizer emits the SelfClosingTagToken token incorrectly. When the
parser is used to parse foreign content, this results in an incorrect
DOM.

Thanks to Sean Ng (https://ensy.zip) for reporting this issue.

Fixes golang/go#73070
Fixes CVE-2025-22872

Change-Id: I65c18df6d6244bf943b61e6c7a87895929e78f4f
Reviewed-on: https://go-review.googlesource.com/c/net/+/661256
Reviewed-by: Neal Patel <nealpatel@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Gopher Robot <gobot@golang.org>
---
 html/token.go      | 18 ++++++++++++++++--
 html/token_test.go | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/html/token.go b/html/token.go
index 3c57880..6598c1f 100644
--- a/html/token.go
+++ b/html/token.go
@@ -839,8 +839,22 @@ func (z *Tokenizer) readStartTag() TokenType {
 	if raw {
 		z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
 	}
-	// Look for a self-closing token like "<br/>".
-	if z.err == nil && z.buf[z.raw.end-2] == '/' {
+	// Look for a self-closing token (e.g. <br/>).
+	//
+	// Originally, we did this by just checking that the last character of the
+	// tag (ignoring the closing bracket) was a solidus (/) character, but this
+	// is not always accurate.
+	//
+	// We need to be careful that we don't misinterpret a non-self-closing tag
+	// as self-closing, as can happen if the tag contains unquoted attribute
+	// values (i.e. <p a=/>).
+	//
+	// To avoid this, we check that the last non-bracket character of the tag
+	// (z.raw.end-2) isn't the same character as the last non-quote character of
+	// the last attribute of the tag (z.pendingAttr[1].end-1), if the tag has
+	// attributes.
+	nAttrs := len(z.attr)
+	if z.err == nil && z.buf[z.raw.end-2] == '/' && (nAttrs == 0 || z.raw.end-2 != z.attr[nAttrs-1][1].end-1) {
 		return SelfClosingTagToken
 	}
 	return StartTagToken
diff --git a/html/token_test.go b/html/token_test.go
index a36d112..44773f1 100644
--- a/html/token_test.go
+++ b/html/token_test.go
@@ -616,6 +616,16 @@ var tokenTests = []tokenTest{
 		`<p a/ ="">`,
 		`<p a="" =""="">`,
 	},
+	{
+		"slash at end of unquoted attribute value",
+		`<p a="\">`,
+		`<p a="\">`,
+	},
+	{
+		"self-closing tag with attribute",
+		`<p a=/>`,
+		`<p a="/">`,
+	},
 }
 
 func TestTokenizer(t *testing.T) {
@@ -815,6 +825,14 @@ func TestReaderEdgeCases(t *testing.T) {
 	}
 }
 
+func TestSelfClosingTagValueConfusion(t *testing.T) {
+	z := NewTokenizer(strings.NewReader(`<p a=/>`))
+	tok := z.Next()
+	if tok != StartTagToken {
+		t.Fatalf("unexpected token type: got %s, want %s", tok, StartTagToken)
+	}
+}
+
 // zeroOneByteReader is like a strings.Reader that alternates between
 // returning 0 bytes and 1 byte at a time.
 type zeroOneByteReader struct {