Bump version to 5.11.0

2025-07-23 07:09:43 +02:00 · 2024-01-10 14:47:40 -05:00 · 2024-01-10 14:47:40 -05:00 · 510e1bd0bd
commit 510e1bd0bd
parent a32510b8e6
370 changed files with 18825 additions and 5454 deletions
--- a/vendor/golang.org/x/net/html/doc.go
+++ b/vendor/golang.org/x/net/html/doc.go
@ -92,6 +92,27 @@ example, to process each anchor node in depth-first order:
 The relevant specifications include:
 https://html.spec.whatwg.org/multipage/syntax.html and
 https://html.spec.whatwg.org/multipage/syntax.html#tokenization
+
+# Security Considerations
+
+Care should be taken when parsing and interpreting HTML, whether full documents
+or fragments, within the framework of the HTML specification, especially with
+regard to untrusted inputs.
+
+This package provides both a tokenizer and a parser, which implement the
+tokenization, and tokenization and tree construction stages of the WHATWG HTML
+parsing specification respectively. While the tokenizer parses and normalizes
+individual HTML tokens, only the parser constructs the DOM tree from the
+tokenized HTML, as described in the tree construction stage of the
+specification, dynamically modifying or extending the docuemnt's DOM tree.
+
+If your use case requires semantically well-formed HTML documents, as defined by
+the WHATWG specification, the parser should be used rather than the tokenizer.
+
+In security contexts, if trust decisions are being made using the tokenized or
+parsed content, the input must be re-serialized (for instance by using Render or
+Token.String) in order for those trust decisions to hold, as the process of
+tokenization or parsing may alter the content.
 */
 package html // import "golang.org/x/net/html"

--- a/vendor/golang.org/x/net/html/escape.go
+++ b/vendor/golang.org/x/net/html/escape.go
@ -193,6 +193,87 @@ func lower(b []byte) []byte {
 	return b
 }

+// escapeComment is like func escape but escapes its input bytes less often.
+// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
+// meaningful and (2) contain angle brackets that we'd like to avoid escaping
+// unless we have to.
+//
+// "We have to" includes the '&' byte, since that introduces other escapes.
+//
+// It also includes those bytes (not including EOF) that would otherwise end
+// the comment. Per the summary table at the bottom of comment_test.go, this is
+// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
+//
+// Studying the summary table (and T actions in its '>' column) closely, we
+// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
+// start of the comment data. State 52 is after a '!'. The other three states
+// are after a '-'.
+//
+// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
+//   - The '>' is after a '!' or '-' (in the unescaped data) or
+//   - The '>' is at the start of the comment data (after the opening "<!--").
+func escapeComment(w writer, s string) error {
+	// When modifying this function, consider manually increasing the
+	// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
+	// That increase should only be temporary, not committed, as it
+	// exponentially affects the test running time.
+
+	if len(s) == 0 {
+		return nil
+	}
+
+	// Loop:
+	//   - Grow j such that s[i:j] does not need escaping.
+	//   - If s[j] does need escaping, output s[i:j] and an escaped s[j],
+	//     resetting i and j to point past that s[j] byte.
+	i := 0
+	for j := 0; j < len(s); j++ {
+		escaped := ""
+		switch s[j] {
+		case '&':
+			escaped = "&amp;"
+
+		case '>':
+			if j > 0 {
+				if prev := s[j-1]; (prev != '!') && (prev != '-') {
+					continue
+				}
+			}
+			escaped = "&gt;"
+
+		default:
+			continue
+		}
+
+		if i < j {
+			if _, err := w.WriteString(s[i:j]); err != nil {
+				return err
+			}
+		}
+		if _, err := w.WriteString(escaped); err != nil {
+			return err
+		}
+		i = j + 1
+	}
+
+	if i < len(s) {
+		if _, err := w.WriteString(s[i:]); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// escapeCommentString is to EscapeString as escapeComment is to escape.
+func escapeCommentString(s string) string {
+	if strings.IndexAny(s, "&>") == -1 {
+		return s
+	}
+	var buf bytes.Buffer
+	escapeComment(&buf, s)
+	return buf.String()
+}
+
 const escapedChars = "&'<>\"\r"

 func escape(w writer, s string) error {
--- a/vendor/golang.org/x/net/html/parse.go
+++ b/vendor/golang.org/x/net/html/parse.go
@ -184,7 +184,7 @@ func (p *parser) clearStackToContext(s scope) {
 	}
 }

-// parseGenericRawTextElements implements the generic raw text element parsing
+// parseGenericRawTextElement implements the generic raw text element parsing
 // algorithm defined in 12.2.6.2.
 // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
 // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
@ -734,7 +734,7 @@ func inHeadIM(p *parser) bool {
 	return false
 }

-// 12.2.6.4.5.
+// Section 12.2.6.4.5.
 func inHeadNoscriptIM(p *parser) bool {
 	switch p.tok.Type {
 	case DoctypeToken:
--- a/vendor/golang.org/x/net/html/render.go
+++ b/vendor/golang.org/x/net/html/render.go
@ -85,7 +85,7 @@ func render1(w writer, n *Node) error {
 		if _, err := w.WriteString("<!--"); err != nil {
 			return err
 		}
-		if _, err := w.WriteString(n.Data); err != nil {
+		if err := escapeComment(w, n.Data); err != nil {
 			return err
 		}
 		if _, err := w.WriteString("-->"); err != nil {
@ -96,7 +96,7 @@ func render1(w writer, n *Node) error {
 		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
 			return err
 		}
-		if _, err := w.WriteString(n.Data); err != nil {
+		if err := escape(w, n.Data); err != nil {
 			return err
 		}
 		if n.Attr != nil {
@ -194,9 +194,8 @@ func render1(w writer, n *Node) error {
 		}
 	}

-	// Render any child nodes.
-	switch n.Data {
-	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+	// Render any child nodes
+	if childTextNodesAreLiteral(n) {
 		for c := n.FirstChild; c != nil; c = c.NextSibling {
 			if c.Type == TextNode {
 				if _, err := w.WriteString(c.Data); err != nil {
@ -213,7 +212,7 @@ func render1(w writer, n *Node) error {
 			// last element in the file, with no closing tag.
 			return plaintextAbort
 		}
-	default:
+	} else {
 		for c := n.FirstChild; c != nil; c = c.NextSibling {
 			if err := render1(w, c); err != nil {
 				return err
@ -231,6 +230,27 @@ func render1(w writer, n *Node) error {
 	return w.WriteByte('>')
 }

+func childTextNodesAreLiteral(n *Node) bool {
+	// Per WHATWG HTML 13.3, if the parent of the current node is a style,
+	// script, xmp, iframe, noembed, noframes, or plaintext element, and the
+	// current node is a text node, append the value of the node's data
+	// literally. The specification is not explicit about it, but we only
+	// enforce this if we are in the HTML namespace (i.e. when the namespace is
+	// "").
+	// NOTE: we also always include noscript elements, although the
+	// specification states that they should only be rendered as such if
+	// scripting is enabled for the node (which is not something we track).
+	if n.Namespace != "" {
+		return false
+	}
+	switch n.Data {
+	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+		return true
+	default:
+		return false
+	}
+}
+
 // writeQuoted writes s to w surrounded by quotes. Normally it will use double
 // quotes, but if s contains a double quote, it will use single quotes.
 // It is used for writing the identifiers in a doctype declaration.
--- a/vendor/golang.org/x/net/html/token.go
+++ b/vendor/golang.org/x/net/html/token.go
@ -110,9 +110,9 @@ func (t Token) String() string {
 	case SelfClosingTagToken:
 		return "<" + t.tagString() + "/>"
 	case CommentToken:
-		return "<!--" + t.Data + "-->"
+		return "<!--" + escapeCommentString(t.Data) + "-->"
 	case DoctypeToken:
-		return "<!DOCTYPE " + t.Data + ">"
+		return "<!DOCTYPE " + EscapeString(t.Data) + ">"
 	}
 	return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
 }
@ -598,6 +598,11 @@ scriptDataDoubleEscapeEnd:
 // readComment reads the next comment token starting with "<!--". The opening
 // "<!--" has already been consumed.
 func (z *Tokenizer) readComment() {
+	// When modifying this function, consider manually increasing the
+	// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
+	// That increase should only be temporary, not committed, as it
+	// exponentially affects the test running time.
+
 	z.data.start = z.raw.end
 	defer func() {
 		if z.data.end < z.data.start {
@ -605,14 +610,13 @@ func (z *Tokenizer) readComment() {
 			z.data.end = z.data.start
 		}
 	}()
-	for dashCount := 2; ; {
+
+	var dashCount int
+	beginning := true
+	for {
 		c := z.readByte()
 		if z.err != nil {
-			// Ignore up to two dashes at EOF.
-			if dashCount > 2 {
-				dashCount = 2
-			}
-			z.data.end = z.raw.end - dashCount
+			z.data.end = z.calculateAbruptCommentDataEnd()
 			return
 		}
 		switch c {
@ -620,7 +624,7 @@ func (z *Tokenizer) readComment() {
 			dashCount++
 			continue
 		case '>':
-			if dashCount >= 2 {
+			if dashCount >= 2 || beginning {
 				z.data.end = z.raw.end - len("-->")
 				return
 			}
@ -628,19 +632,52 @@ func (z *Tokenizer) readComment() {
 			if dashCount >= 2 {
 				c = z.readByte()
 				if z.err != nil {
-					z.data.end = z.raw.end
+					z.data.end = z.calculateAbruptCommentDataEnd()
 					return
-				}
-				if c == '>' {
+				} else if c == '>' {
 					z.data.end = z.raw.end - len("--!>")
 					return
+				} else if c == '-' {
+					dashCount = 1
+					beginning = false
+					continue
 				}
 			}
 		}
 		dashCount = 0
+		beginning = false
 	}
 }

+func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
+	raw := z.Raw()
+	const prefixLen = len("<!--")
+	if len(raw) >= prefixLen {
+		raw = raw[prefixLen:]
+		if hasSuffix(raw, "--!") {
+			return z.raw.end - 3
+		} else if hasSuffix(raw, "--") {
+			return z.raw.end - 2
+		} else if hasSuffix(raw, "-") {
+			return z.raw.end - 1
+		}
+	}
+	return z.raw.end
+}
+
+func hasSuffix(b []byte, suffix string) bool {
+	if len(b) < len(suffix) {
+		return false
+	}
+	b = b[len(b)-len(suffix):]
+	for i := range b {
+		if b[i] != suffix[i] {
+			return false
+		}
+	}
+	return true
+}
+
 // readUntilCloseAngle reads until the next ">".
 func (z *Tokenizer) readUntilCloseAngle() {
 	z.data.start = z.raw.end
@ -876,7 +913,14 @@ func (z *Tokenizer) readTagAttrKey() {
 		case ' ', '\n', '\r', '\t', '\f', '/':
 			z.pendingAttr[0].end = z.raw.end - 1
 			return
-		case '=', '>':
+		case '=':
+			if z.pendingAttr[0].start+1 == z.raw.end {
+				// WHATWG 13.2.5.32, if we see an equals sign before the attribute name
+				// begins, we treat it as a character in the attribute name and continue.
+				continue
+			}
+			fallthrough
+		case '>':
 			z.raw.end--
 			z.pendingAttr[0].end = z.raw.end
 			return