mirror of
https://github.com/documize/community.git
synced 2025-07-23 07:09:43 +02:00
Bump version to 5.11.0
This commit is contained in:
parent
a32510b8e6
commit
510e1bd0bd
370 changed files with 18825 additions and 5454 deletions
21
vendor/golang.org/x/net/html/doc.go
generated
vendored
21
vendor/golang.org/x/net/html/doc.go
generated
vendored
|
@ -92,6 +92,27 @@ example, to process each anchor node in depth-first order:
|
|||
The relevant specifications include:
|
||||
https://html.spec.whatwg.org/multipage/syntax.html and
|
||||
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
|
||||
|
||||
# Security Considerations
|
||||
|
||||
Care should be taken when parsing and interpreting HTML, whether full documents
|
||||
or fragments, within the framework of the HTML specification, especially with
|
||||
regard to untrusted inputs.
|
||||
|
||||
This package provides both a tokenizer and a parser, which implement the
|
||||
tokenization, and tokenization and tree construction stages of the WHATWG HTML
|
||||
parsing specification respectively. While the tokenizer parses and normalizes
|
||||
individual HTML tokens, only the parser constructs the DOM tree from the
|
||||
tokenized HTML, as described in the tree construction stage of the
|
||||
specification, dynamically modifying or extending the docuemnt's DOM tree.
|
||||
|
||||
If your use case requires semantically well-formed HTML documents, as defined by
|
||||
the WHATWG specification, the parser should be used rather than the tokenizer.
|
||||
|
||||
In security contexts, if trust decisions are being made using the tokenized or
|
||||
parsed content, the input must be re-serialized (for instance by using Render or
|
||||
Token.String) in order for those trust decisions to hold, as the process of
|
||||
tokenization or parsing may alter the content.
|
||||
*/
|
||||
package html // import "golang.org/x/net/html"
|
||||
|
||||
|
|
81
vendor/golang.org/x/net/html/escape.go
generated
vendored
81
vendor/golang.org/x/net/html/escape.go
generated
vendored
|
@ -193,6 +193,87 @@ func lower(b []byte) []byte {
|
|||
return b
|
||||
}
|
||||
|
||||
// escapeComment is like func escape but escapes its input bytes less often.
|
||||
// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
|
||||
// meaningful and (2) contain angle brackets that we'd like to avoid escaping
|
||||
// unless we have to.
|
||||
//
|
||||
// "We have to" includes the '&' byte, since that introduces other escapes.
|
||||
//
|
||||
// It also includes those bytes (not including EOF) that would otherwise end
|
||||
// the comment. Per the summary table at the bottom of comment_test.go, this is
|
||||
// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
|
||||
//
|
||||
// Studying the summary table (and T actions in its '>' column) closely, we
|
||||
// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
|
||||
// start of the comment data. State 52 is after a '!'. The other three states
|
||||
// are after a '-'.
|
||||
//
|
||||
// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
|
||||
// - The '>' is after a '!' or '-' (in the unescaped data) or
|
||||
// - The '>' is at the start of the comment data (after the opening "<!--").
|
||||
func escapeComment(w writer, s string) error {
|
||||
// When modifying this function, consider manually increasing the
|
||||
// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
|
||||
// That increase should only be temporary, not committed, as it
|
||||
// exponentially affects the test running time.
|
||||
|
||||
if len(s) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Loop:
|
||||
// - Grow j such that s[i:j] does not need escaping.
|
||||
// - If s[j] does need escaping, output s[i:j] and an escaped s[j],
|
||||
// resetting i and j to point past that s[j] byte.
|
||||
i := 0
|
||||
for j := 0; j < len(s); j++ {
|
||||
escaped := ""
|
||||
switch s[j] {
|
||||
case '&':
|
||||
escaped = "&"
|
||||
|
||||
case '>':
|
||||
if j > 0 {
|
||||
if prev := s[j-1]; (prev != '!') && (prev != '-') {
|
||||
continue
|
||||
}
|
||||
}
|
||||
escaped = ">"
|
||||
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
if i < j {
|
||||
if _, err := w.WriteString(s[i:j]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := w.WriteString(escaped); err != nil {
|
||||
return err
|
||||
}
|
||||
i = j + 1
|
||||
}
|
||||
|
||||
if i < len(s) {
|
||||
if _, err := w.WriteString(s[i:]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// escapeCommentString is to EscapeString as escapeComment is to escape.
|
||||
func escapeCommentString(s string) string {
|
||||
if strings.IndexAny(s, "&>") == -1 {
|
||||
return s
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
escapeComment(&buf, s)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
const escapedChars = "&'<>\"\r"
|
||||
|
||||
func escape(w writer, s string) error {
|
||||
|
|
4
vendor/golang.org/x/net/html/parse.go
generated
vendored
4
vendor/golang.org/x/net/html/parse.go
generated
vendored
|
@ -184,7 +184,7 @@ func (p *parser) clearStackToContext(s scope) {
|
|||
}
|
||||
}
|
||||
|
||||
// parseGenericRawTextElements implements the generic raw text element parsing
|
||||
// parseGenericRawTextElement implements the generic raw text element parsing
|
||||
// algorithm defined in 12.2.6.2.
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
|
||||
// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
|
||||
|
@ -734,7 +734,7 @@ func inHeadIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// 12.2.6.4.5.
|
||||
// Section 12.2.6.4.5.
|
||||
func inHeadNoscriptIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case DoctypeToken:
|
||||
|
|
32
vendor/golang.org/x/net/html/render.go
generated
vendored
32
vendor/golang.org/x/net/html/render.go
generated
vendored
|
@ -85,7 +85,7 @@ func render1(w writer, n *Node) error {
|
|||
if _, err := w.WriteString("<!--"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
if err := escapeComment(w, n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString("-->"); err != nil {
|
||||
|
@ -96,7 +96,7 @@ func render1(w writer, n *Node) error {
|
|||
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
if err := escape(w, n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.Attr != nil {
|
||||
|
@ -194,9 +194,8 @@ func render1(w writer, n *Node) error {
|
|||
}
|
||||
}
|
||||
|
||||
// Render any child nodes.
|
||||
switch n.Data {
|
||||
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
||||
// Render any child nodes
|
||||
if childTextNodesAreLiteral(n) {
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type == TextNode {
|
||||
if _, err := w.WriteString(c.Data); err != nil {
|
||||
|
@ -213,7 +212,7 @@ func render1(w writer, n *Node) error {
|
|||
// last element in the file, with no closing tag.
|
||||
return plaintextAbort
|
||||
}
|
||||
default:
|
||||
} else {
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
|
@ -231,6 +230,27 @@ func render1(w writer, n *Node) error {
|
|||
return w.WriteByte('>')
|
||||
}
|
||||
|
||||
func childTextNodesAreLiteral(n *Node) bool {
|
||||
// Per WHATWG HTML 13.3, if the parent of the current node is a style,
|
||||
// script, xmp, iframe, noembed, noframes, or plaintext element, and the
|
||||
// current node is a text node, append the value of the node's data
|
||||
// literally. The specification is not explicit about it, but we only
|
||||
// enforce this if we are in the HTML namespace (i.e. when the namespace is
|
||||
// "").
|
||||
// NOTE: we also always include noscript elements, although the
|
||||
// specification states that they should only be rendered as such if
|
||||
// scripting is enabled for the node (which is not something we track).
|
||||
if n.Namespace != "" {
|
||||
return false
|
||||
}
|
||||
switch n.Data {
|
||||
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
|
||||
// quotes, but if s contains a double quote, it will use single quotes.
|
||||
// It is used for writing the identifiers in a doctype declaration.
|
||||
|
|
70
vendor/golang.org/x/net/html/token.go
generated
vendored
70
vendor/golang.org/x/net/html/token.go
generated
vendored
|
@ -110,9 +110,9 @@ func (t Token) String() string {
|
|||
case SelfClosingTagToken:
|
||||
return "<" + t.tagString() + "/>"
|
||||
case CommentToken:
|
||||
return "<!--" + t.Data + "-->"
|
||||
return "<!--" + escapeCommentString(t.Data) + "-->"
|
||||
case DoctypeToken:
|
||||
return "<!DOCTYPE " + t.Data + ">"
|
||||
return "<!DOCTYPE " + EscapeString(t.Data) + ">"
|
||||
}
|
||||
return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
|
||||
}
|
||||
|
@ -598,6 +598,11 @@ scriptDataDoubleEscapeEnd:
|
|||
// readComment reads the next comment token starting with "<!--". The opening
|
||||
// "<!--" has already been consumed.
|
||||
func (z *Tokenizer) readComment() {
|
||||
// When modifying this function, consider manually increasing the
|
||||
// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
|
||||
// That increase should only be temporary, not committed, as it
|
||||
// exponentially affects the test running time.
|
||||
|
||||
z.data.start = z.raw.end
|
||||
defer func() {
|
||||
if z.data.end < z.data.start {
|
||||
|
@ -605,14 +610,13 @@ func (z *Tokenizer) readComment() {
|
|||
z.data.end = z.data.start
|
||||
}
|
||||
}()
|
||||
for dashCount := 2; ; {
|
||||
|
||||
var dashCount int
|
||||
beginning := true
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
// Ignore up to two dashes at EOF.
|
||||
if dashCount > 2 {
|
||||
dashCount = 2
|
||||
}
|
||||
z.data.end = z.raw.end - dashCount
|
||||
z.data.end = z.calculateAbruptCommentDataEnd()
|
||||
return
|
||||
}
|
||||
switch c {
|
||||
|
@ -620,7 +624,7 @@ func (z *Tokenizer) readComment() {
|
|||
dashCount++
|
||||
continue
|
||||
case '>':
|
||||
if dashCount >= 2 {
|
||||
if dashCount >= 2 || beginning {
|
||||
z.data.end = z.raw.end - len("-->")
|
||||
return
|
||||
}
|
||||
|
@ -628,19 +632,52 @@ func (z *Tokenizer) readComment() {
|
|||
if dashCount >= 2 {
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
z.data.end = z.raw.end
|
||||
z.data.end = z.calculateAbruptCommentDataEnd()
|
||||
return
|
||||
}
|
||||
if c == '>' {
|
||||
} else if c == '>' {
|
||||
z.data.end = z.raw.end - len("--!>")
|
||||
return
|
||||
} else if c == '-' {
|
||||
dashCount = 1
|
||||
beginning = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
dashCount = 0
|
||||
beginning = false
|
||||
}
|
||||
}
|
||||
|
||||
func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
|
||||
raw := z.Raw()
|
||||
const prefixLen = len("<!--")
|
||||
if len(raw) >= prefixLen {
|
||||
raw = raw[prefixLen:]
|
||||
if hasSuffix(raw, "--!") {
|
||||
return z.raw.end - 3
|
||||
} else if hasSuffix(raw, "--") {
|
||||
return z.raw.end - 2
|
||||
} else if hasSuffix(raw, "-") {
|
||||
return z.raw.end - 1
|
||||
}
|
||||
}
|
||||
return z.raw.end
|
||||
}
|
||||
|
||||
func hasSuffix(b []byte, suffix string) bool {
|
||||
if len(b) < len(suffix) {
|
||||
return false
|
||||
}
|
||||
b = b[len(b)-len(suffix):]
|
||||
for i := range b {
|
||||
if b[i] != suffix[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// readUntilCloseAngle reads until the next ">".
|
||||
func (z *Tokenizer) readUntilCloseAngle() {
|
||||
z.data.start = z.raw.end
|
||||
|
@ -876,7 +913,14 @@ func (z *Tokenizer) readTagAttrKey() {
|
|||
case ' ', '\n', '\r', '\t', '\f', '/':
|
||||
z.pendingAttr[0].end = z.raw.end - 1
|
||||
return
|
||||
case '=', '>':
|
||||
case '=':
|
||||
if z.pendingAttr[0].start+1 == z.raw.end {
|
||||
// WHATWG 13.2.5.32, if we see an equals sign before the attribute name
|
||||
// begins, we treat it as a character in the attribute name and continue.
|
||||
continue
|
||||
}
|
||||
fallthrough
|
||||
case '>':
|
||||
z.raw.end--
|
||||
z.pendingAttr[0].end = z.raw.end
|
||||
return
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue