1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-07-23 07:09:43 +02:00

Bump version to 5.11.0

This commit is contained in:
Harvey Kandola 2024-01-10 14:47:40 -05:00
parent a32510b8e6
commit 510e1bd0bd
370 changed files with 18825 additions and 5454 deletions

21
vendor/golang.org/x/net/html/doc.go generated vendored
View file

@ -92,6 +92,27 @@ example, to process each anchor node in depth-first order:
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
# Security Considerations
Care should be taken when parsing and interpreting HTML, whether full documents
or fragments, within the framework of the HTML specification, especially with
regard to untrusted inputs.
This package provides both a tokenizer and a parser, which implement the
tokenization, and tokenization and tree construction stages of the WHATWG HTML
parsing specification respectively. While the tokenizer parses and normalizes
individual HTML tokens, only the parser constructs the DOM tree from the
tokenized HTML, as described in the tree construction stage of the
specification, dynamically modifying or extending the docuemnt's DOM tree.
If your use case requires semantically well-formed HTML documents, as defined by
the WHATWG specification, the parser should be used rather than the tokenizer.
In security contexts, if trust decisions are being made using the tokenized or
parsed content, the input must be re-serialized (for instance by using Render or
Token.String) in order for those trust decisions to hold, as the process of
tokenization or parsing may alter the content.
*/
package html // import "golang.org/x/net/html"

View file

@ -193,6 +193,87 @@ func lower(b []byte) []byte {
return b
}
// escapeComment is like func escape but escapes its input bytes less often.
// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
// meaningful and (2) contain angle brackets that we'd like to avoid escaping
// unless we have to.
//
// "We have to" includes the '&' byte, since that introduces other escapes.
//
// It also includes those bytes (not including EOF) that would otherwise end
// the comment. Per the summary table at the bottom of comment_test.go, this is
// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
//
// Studying the summary table (and T actions in its '>' column) closely, we
// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
// start of the comment data. State 52 is after a '!'. The other three states
// are after a '-'.
//
// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
// - The '>' is after a '!' or '-' (in the unescaped data) or
// - The '>' is at the start of the comment data (after the opening "<!--").
func escapeComment(w writer, s string) error {
// When modifying this function, consider manually increasing the
// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
// That increase should only be temporary, not committed, as it
// exponentially affects the test running time.
if len(s) == 0 {
return nil
}
// Loop:
// - Grow j such that s[i:j] does not need escaping.
// - If s[j] does need escaping, output s[i:j] and an escaped s[j],
// resetting i and j to point past that s[j] byte.
i := 0
for j := 0; j < len(s); j++ {
escaped := ""
switch s[j] {
case '&':
escaped = "&amp;"
case '>':
if j > 0 {
if prev := s[j-1]; (prev != '!') && (prev != '-') {
continue
}
}
escaped = "&gt;"
default:
continue
}
if i < j {
if _, err := w.WriteString(s[i:j]); err != nil {
return err
}
}
if _, err := w.WriteString(escaped); err != nil {
return err
}
i = j + 1
}
if i < len(s) {
if _, err := w.WriteString(s[i:]); err != nil {
return err
}
}
return nil
}
// escapeCommentString is to EscapeString as escapeComment is to escape.
func escapeCommentString(s string) string {
if strings.IndexAny(s, "&>") == -1 {
return s
}
var buf bytes.Buffer
escapeComment(&buf, s)
return buf.String()
}
const escapedChars = "&'<>\"\r"
func escape(w writer, s string) error {

View file

@ -184,7 +184,7 @@ func (p *parser) clearStackToContext(s scope) {
}
}
// parseGenericRawTextElements implements the generic raw text element parsing
// parseGenericRawTextElement implements the generic raw text element parsing
// algorithm defined in 12.2.6.2.
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
@ -734,7 +734,7 @@ func inHeadIM(p *parser) bool {
return false
}
// 12.2.6.4.5.
// Section 12.2.6.4.5.
func inHeadNoscriptIM(p *parser) bool {
switch p.tok.Type {
case DoctypeToken:

View file

@ -85,7 +85,7 @@ func render1(w writer, n *Node) error {
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
if err := escapeComment(w, n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
@ -96,7 +96,7 @@ func render1(w writer, n *Node) error {
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
if err := escape(w, n.Data); err != nil {
return err
}
if n.Attr != nil {
@ -194,9 +194,8 @@ func render1(w writer, n *Node) error {
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
// Render any child nodes
if childTextNodesAreLiteral(n) {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
@ -213,7 +212,7 @@ func render1(w writer, n *Node) error {
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
} else {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
@ -231,6 +230,27 @@ func render1(w writer, n *Node) error {
return w.WriteByte('>')
}
func childTextNodesAreLiteral(n *Node) bool {
// Per WHATWG HTML 13.3, if the parent of the current node is a style,
// script, xmp, iframe, noembed, noframes, or plaintext element, and the
// current node is a text node, append the value of the node's data
// literally. The specification is not explicit about it, but we only
// enforce this if we are in the HTML namespace (i.e. when the namespace is
// "").
// NOTE: we also always include noscript elements, although the
// specification states that they should only be rendered as such if
// scripting is enabled for the node (which is not something we track).
if n.Namespace != "" {
return false
}
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
return true
default:
return false
}
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.

View file

@ -110,9 +110,9 @@ func (t Token) String() string {
case SelfClosingTagToken:
return "<" + t.tagString() + "/>"
case CommentToken:
return "<!--" + t.Data + "-->"
return "<!--" + escapeCommentString(t.Data) + "-->"
case DoctypeToken:
return "<!DOCTYPE " + t.Data + ">"
return "<!DOCTYPE " + EscapeString(t.Data) + ">"
}
return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
}
@ -598,6 +598,11 @@ scriptDataDoubleEscapeEnd:
// readComment reads the next comment token starting with "<!--". The opening
// "<!--" has already been consumed.
func (z *Tokenizer) readComment() {
// When modifying this function, consider manually increasing the
// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
// That increase should only be temporary, not committed, as it
// exponentially affects the test running time.
z.data.start = z.raw.end
defer func() {
if z.data.end < z.data.start {
@ -605,14 +610,13 @@ func (z *Tokenizer) readComment() {
z.data.end = z.data.start
}
}()
for dashCount := 2; ; {
var dashCount int
beginning := true
for {
c := z.readByte()
if z.err != nil {
// Ignore up to two dashes at EOF.
if dashCount > 2 {
dashCount = 2
}
z.data.end = z.raw.end - dashCount
z.data.end = z.calculateAbruptCommentDataEnd()
return
}
switch c {
@ -620,7 +624,7 @@ func (z *Tokenizer) readComment() {
dashCount++
continue
case '>':
if dashCount >= 2 {
if dashCount >= 2 || beginning {
z.data.end = z.raw.end - len("-->")
return
}
@ -628,19 +632,52 @@ func (z *Tokenizer) readComment() {
if dashCount >= 2 {
c = z.readByte()
if z.err != nil {
z.data.end = z.raw.end
z.data.end = z.calculateAbruptCommentDataEnd()
return
}
if c == '>' {
} else if c == '>' {
z.data.end = z.raw.end - len("--!>")
return
} else if c == '-' {
dashCount = 1
beginning = false
continue
}
}
}
dashCount = 0
beginning = false
}
}
func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
raw := z.Raw()
const prefixLen = len("<!--")
if len(raw) >= prefixLen {
raw = raw[prefixLen:]
if hasSuffix(raw, "--!") {
return z.raw.end - 3
} else if hasSuffix(raw, "--") {
return z.raw.end - 2
} else if hasSuffix(raw, "-") {
return z.raw.end - 1
}
}
return z.raw.end
}
func hasSuffix(b []byte, suffix string) bool {
if len(b) < len(suffix) {
return false
}
b = b[len(b)-len(suffix):]
for i := range b {
if b[i] != suffix[i] {
return false
}
}
return true
}
// readUntilCloseAngle reads until the next ">".
func (z *Tokenizer) readUntilCloseAngle() {
z.data.start = z.raw.end
@ -876,7 +913,14 @@ func (z *Tokenizer) readTagAttrKey() {
case ' ', '\n', '\r', '\t', '\f', '/':
z.pendingAttr[0].end = z.raw.end - 1
return
case '=', '>':
case '=':
if z.pendingAttr[0].start+1 == z.raw.end {
// WHATWG 13.2.5.32, if we see an equals sign before the attribute name
// begins, we treat it as a character in the attribute name and continue.
continue
}
fallthrough
case '>':
z.raw.end--
z.pendingAttr[0].end = z.raw.end
return