summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/net/html
diff options
context:
space:
mode:
authorValentin Rothberg <rothberg@redhat.com>2019-06-24 11:29:13 +0200
committerValentin Rothberg <rothberg@redhat.com>2019-06-24 13:20:59 +0200
commitd697456dc90adbaf68224ed7c115b38d5855e582 (patch)
tree5fd88c48b34e7bead0028fa97e39f43f03880642 /vendor/golang.org/x/net/html
parenta3211b73c62a9fcc13f09305bf629ef507b26d34 (diff)
downloadpodman-d697456dc90adbaf68224ed7c115b38d5855e582.tar.gz
podman-d697456dc90adbaf68224ed7c115b38d5855e582.tar.bz2
podman-d697456dc90adbaf68224ed7c115b38d5855e582.zip
migrate to go-modules
Signed-off-by: Valentin Rothberg <rothberg@redhat.com>
Diffstat (limited to 'vendor/golang.org/x/net/html')
-rw-r--r--vendor/golang.org/x/net/html/atom/gen.go712
-rw-r--r--vendor/golang.org/x/net/html/node.go2
-rw-r--r--vendor/golang.org/x/net/html/parse.go158
3 files changed, 845 insertions, 27 deletions
diff --git a/vendor/golang.org/x/net/html/atom/gen.go b/vendor/golang.org/x/net/html/atom/gen.go
new file mode 100644
index 000000000..5d052781b
--- /dev/null
+++ b/vendor/golang.org/x/net/html/atom/gen.go
@@ -0,0 +1,712 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+//go:generate go run gen.go
+//go:generate go run gen.go -test
+
+package main
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "go/format"
+ "io/ioutil"
+ "math/rand"
+ "os"
+ "sort"
+ "strings"
+)
+
+// identifier converts s to a Go exported identifier.
+// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
+func identifier(s string) string {
+ b := make([]byte, 0, len(s))
+ cap := true
+ for _, c := range s {
+ if c == '-' {
+ cap = true
+ continue
+ }
+ if cap && 'a' <= c && c <= 'z' {
+ c -= 'a' - 'A'
+ }
+ cap = false
+ b = append(b, byte(c))
+ }
+ return string(b)
+}
+
+var test = flag.Bool("test", false, "generate table_test.go")
+
+func genFile(name string, buf *bytes.Buffer) {
+ b, err := format.Source(buf.Bytes())
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(1)
+ }
+ if err := ioutil.WriteFile(name, b, 0644); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(1)
+ }
+}
+
+func main() {
+ flag.Parse()
+
+ var all []string
+ all = append(all, elements...)
+ all = append(all, attributes...)
+ all = append(all, eventHandlers...)
+ all = append(all, extra...)
+ sort.Strings(all)
+
+ // uniq - lists have dups
+ w := 0
+ for _, s := range all {
+ if w == 0 || all[w-1] != s {
+ all[w] = s
+ w++
+ }
+ }
+ all = all[:w]
+
+ if *test {
+ var buf bytes.Buffer
+ fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
+ fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
+ fmt.Fprintln(&buf, "package atom\n")
+ fmt.Fprintln(&buf, "var testAtomList = []string{")
+ for _, s := range all {
+ fmt.Fprintf(&buf, "\t%q,\n", s)
+ }
+ fmt.Fprintln(&buf, "}")
+
+ genFile("table_test.go", &buf)
+ return
+ }
+
+ // Find hash that minimizes table size.
+ var best *table
+ for i := 0; i < 1000000; i++ {
+ if best != nil && 1<<(best.k-1) < len(all) {
+ break
+ }
+ h := rand.Uint32()
+ for k := uint(0); k <= 16; k++ {
+ if best != nil && k >= best.k {
+ break
+ }
+ var t table
+ if t.init(h, k, all) {
+ best = &t
+ break
+ }
+ }
+ }
+ if best == nil {
+ fmt.Fprintf(os.Stderr, "failed to construct string table\n")
+ os.Exit(1)
+ }
+
+ // Lay out strings, using overlaps when possible.
+ layout := append([]string{}, all...)
+
+ // Remove strings that are substrings of other strings
+ for changed := true; changed; {
+ changed = false
+ for i, s := range layout {
+ if s == "" {
+ continue
+ }
+ for j, t := range layout {
+ if i != j && t != "" && strings.Contains(s, t) {
+ changed = true
+ layout[j] = ""
+ }
+ }
+ }
+ }
+
+ // Join strings where one suffix matches another prefix.
+ for {
+ // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
+ // maximizing overlap length k.
+ besti := -1
+ bestj := -1
+ bestk := 0
+ for i, s := range layout {
+ if s == "" {
+ continue
+ }
+ for j, t := range layout {
+ if i == j {
+ continue
+ }
+ for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
+ if s[len(s)-k:] == t[:k] {
+ besti = i
+ bestj = j
+ bestk = k
+ }
+ }
+ }
+ }
+ if bestk > 0 {
+ layout[besti] += layout[bestj][bestk:]
+ layout[bestj] = ""
+ continue
+ }
+ break
+ }
+
+ text := strings.Join(layout, "")
+
+ atom := map[string]uint32{}
+ for _, s := range all {
+ off := strings.Index(text, s)
+ if off < 0 {
+ panic("lost string " + s)
+ }
+ atom[s] = uint32(off<<8 | len(s))
+ }
+
+ var buf bytes.Buffer
+ // Generate the Go code.
+ fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
+ fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
+ fmt.Fprintln(&buf, "package atom\n\nconst (")
+
+ // compute max len
+ maxLen := 0
+ for _, s := range all {
+ if maxLen < len(s) {
+ maxLen = len(s)
+ }
+ fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
+ }
+ fmt.Fprintln(&buf, ")\n")
+
+ fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
+ fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
+
+ fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
+ for i, s := range best.tab {
+ if s == "" {
+ continue
+ }
+ fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
+ }
+ fmt.Fprintf(&buf, "}\n")
+ datasize := (1 << best.k) * 4
+
+ fmt.Fprintln(&buf, "const atomText =")
+ textsize := len(text)
+ for len(text) > 60 {
+ fmt.Fprintf(&buf, "\t%q +\n", text[:60])
+ text = text[60:]
+ }
+ fmt.Fprintf(&buf, "\t%q\n\n", text)
+
+ genFile("table.go", &buf)
+
+ fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
+}
+
+type byLen []string
+
+func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
+func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+func (x byLen) Len() int { return len(x) }
+
+// fnv computes the FNV hash with an arbitrary starting value h.
+func fnv(h uint32, s string) uint32 {
+ for i := 0; i < len(s); i++ {
+ h ^= uint32(s[i])
+ h *= 16777619
+ }
+ return h
+}
+
+// A table represents an attempt at constructing the lookup table.
+// The lookup table uses cuckoo hashing, meaning that each string
+// can be found in one of two positions.
+type table struct {
+ h0 uint32
+ k uint
+ mask uint32
+ tab []string
+}
+
+// hash returns the two hashes for s.
+func (t *table) hash(s string) (h1, h2 uint32) {
+ h := fnv(t.h0, s)
+ h1 = h & t.mask
+ h2 = (h >> 16) & t.mask
+ return
+}
+
+// init initializes the table with the given parameters.
+// h0 is the initial hash value,
+// k is the number of bits of hash value to use, and
+// x is the list of strings to store in the table.
+// init returns false if the table cannot be constructed.
+func (t *table) init(h0 uint32, k uint, x []string) bool {
+ t.h0 = h0
+ t.k = k
+ t.tab = make([]string, 1<<k)
+ t.mask = 1<<k - 1
+ for _, s := range x {
+ if !t.insert(s) {
+ return false
+ }
+ }
+ return true
+}
+
+// insert inserts s in the table.
+func (t *table) insert(s string) bool {
+ h1, h2 := t.hash(s)
+ if t.tab[h1] == "" {
+ t.tab[h1] = s
+ return true
+ }
+ if t.tab[h2] == "" {
+ t.tab[h2] = s
+ return true
+ }
+ if t.push(h1, 0) {
+ t.tab[h1] = s
+ return true
+ }
+ if t.push(h2, 0) {
+ t.tab[h2] = s
+ return true
+ }
+ return false
+}
+
+// push attempts to push aside the entry in slot i.
+func (t *table) push(i uint32, depth int) bool {
+ if depth > len(t.tab) {
+ return false
+ }
+ s := t.tab[i]
+ h1, h2 := t.hash(s)
+ j := h1 + h2 - i
+ if t.tab[j] != "" && !t.push(j, depth+1) {
+ return false
+ }
+ t.tab[j] = s
+ return true
+}
+
+// The lists of element names and attribute keys were taken from
+// https://html.spec.whatwg.org/multipage/indices.html#index
+// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
+
+// "command", "keygen" and "menuitem" have been removed from the spec,
+// but are kept here for backwards compatibility.
+var elements = []string{
+ "a",
+ "abbr",
+ "address",
+ "area",
+ "article",
+ "aside",
+ "audio",
+ "b",
+ "base",
+ "bdi",
+ "bdo",
+ "blockquote",
+ "body",
+ "br",
+ "button",
+ "canvas",
+ "caption",
+ "cite",
+ "code",
+ "col",
+ "colgroup",
+ "command",
+ "data",
+ "datalist",
+ "dd",
+ "del",
+ "details",
+ "dfn",
+ "dialog",
+ "div",
+ "dl",
+ "dt",
+ "em",
+ "embed",
+ "fieldset",
+ "figcaption",
+ "figure",
+ "footer",
+ "form",
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "head",
+ "header",
+ "hgroup",
+ "hr",
+ "html",
+ "i",
+ "iframe",
+ "img",
+ "input",
+ "ins",
+ "kbd",
+ "keygen",
+ "label",
+ "legend",
+ "li",
+ "link",
+ "main",
+ "map",
+ "mark",
+ "menu",
+ "menuitem",
+ "meta",
+ "meter",
+ "nav",
+ "noscript",
+ "object",
+ "ol",
+ "optgroup",
+ "option",
+ "output",
+ "p",
+ "param",
+ "picture",
+ "pre",
+ "progress",
+ "q",
+ "rp",
+ "rt",
+ "ruby",
+ "s",
+ "samp",
+ "script",
+ "section",
+ "select",
+ "slot",
+ "small",
+ "source",
+ "span",
+ "strong",
+ "style",
+ "sub",
+ "summary",
+ "sup",
+ "table",
+ "tbody",
+ "td",
+ "template",
+ "textarea",
+ "tfoot",
+ "th",
+ "thead",
+ "time",
+ "title",
+ "tr",
+ "track",
+ "u",
+ "ul",
+ "var",
+ "video",
+ "wbr",
+}
+
+// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
+//
+// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
+// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
+// but are kept here for backwards compatibility.
+var attributes = []string{
+ "abbr",
+ "accept",
+ "accept-charset",
+ "accesskey",
+ "action",
+ "allowfullscreen",
+ "allowpaymentrequest",
+ "allowusermedia",
+ "alt",
+ "as",
+ "async",
+ "autocomplete",
+ "autofocus",
+ "autoplay",
+ "challenge",
+ "charset",
+ "checked",
+ "cite",
+ "class",
+ "color",
+ "cols",
+ "colspan",
+ "command",
+ "content",
+ "contenteditable",
+ "contextmenu",
+ "controls",
+ "coords",
+ "crossorigin",
+ "data",
+ "datetime",
+ "default",
+ "defer",
+ "dir",
+ "dirname",
+ "disabled",
+ "download",
+ "draggable",
+ "dropzone",
+ "enctype",
+ "for",
+ "form",
+ "formaction",
+ "formenctype",
+ "formmethod",
+ "formnovalidate",
+ "formtarget",
+ "headers",
+ "height",
+ "hidden",
+ "high",
+ "href",
+ "hreflang",
+ "http-equiv",
+ "icon",
+ "id",
+ "inputmode",
+ "integrity",
+ "is",
+ "ismap",
+ "itemid",
+ "itemprop",
+ "itemref",
+ "itemscope",
+ "itemtype",
+ "keytype",
+ "kind",
+ "label",
+ "lang",
+ "list",
+ "loop",
+ "low",
+ "manifest",
+ "max",
+ "maxlength",
+ "media",
+ "mediagroup",
+ "method",
+ "min",
+ "minlength",
+ "multiple",
+ "muted",
+ "name",
+ "nomodule",
+ "nonce",
+ "novalidate",
+ "open",
+ "optimum",
+ "pattern",
+ "ping",
+ "placeholder",
+ "playsinline",
+ "poster",
+ "preload",
+ "radiogroup",
+ "readonly",
+ "referrerpolicy",
+ "rel",
+ "required",
+ "reversed",
+ "rows",
+ "rowspan",
+ "sandbox",
+ "spellcheck",
+ "scope",
+ "scoped",
+ "seamless",
+ "selected",
+ "shape",
+ "size",
+ "sizes",
+ "sortable",
+ "sorted",
+ "slot",
+ "span",
+ "spellcheck",
+ "src",
+ "srcdoc",
+ "srclang",
+ "srcset",
+ "start",
+ "step",
+ "style",
+ "tabindex",
+ "target",
+ "title",
+ "translate",
+ "type",
+ "typemustmatch",
+ "updateviacache",
+ "usemap",
+ "value",
+ "width",
+ "workertype",
+ "wrap",
+}
+
+// "onautocomplete", "onautocompleteerror", "onmousewheel",
+// "onshow" and "onsort" have been removed from the spec,
+// but are kept here for backwards compatibility.
+var eventHandlers = []string{
+ "onabort",
+ "onautocomplete",
+ "onautocompleteerror",
+ "onauxclick",
+ "onafterprint",
+ "onbeforeprint",
+ "onbeforeunload",
+ "onblur",
+ "oncancel",
+ "oncanplay",
+ "oncanplaythrough",
+ "onchange",
+ "onclick",
+ "onclose",
+ "oncontextmenu",
+ "oncopy",
+ "oncuechange",
+ "oncut",
+ "ondblclick",
+ "ondrag",
+ "ondragend",
+ "ondragenter",
+ "ondragexit",
+ "ondragleave",
+ "ondragover",
+ "ondragstart",
+ "ondrop",
+ "ondurationchange",
+ "onemptied",
+ "onended",
+ "onerror",
+ "onfocus",
+ "onhashchange",
+ "oninput",
+ "oninvalid",
+ "onkeydown",
+ "onkeypress",
+ "onkeyup",
+ "onlanguagechange",
+ "onload",
+ "onloadeddata",
+ "onloadedmetadata",
+ "onloadend",
+ "onloadstart",
+ "onmessage",
+ "onmessageerror",
+ "onmousedown",
+ "onmouseenter",
+ "onmouseleave",
+ "onmousemove",
+ "onmouseout",
+ "onmouseover",
+ "onmouseup",
+ "onmousewheel",
+ "onwheel",
+ "onoffline",
+ "ononline",
+ "onpagehide",
+ "onpageshow",
+ "onpaste",
+ "onpause",
+ "onplay",
+ "onplaying",
+ "onpopstate",
+ "onprogress",
+ "onratechange",
+ "onreset",
+ "onresize",
+ "onrejectionhandled",
+ "onscroll",
+ "onsecuritypolicyviolation",
+ "onseeked",
+ "onseeking",
+ "onselect",
+ "onshow",
+ "onsort",
+ "onstalled",
+ "onstorage",
+ "onsubmit",
+ "onsuspend",
+ "ontimeupdate",
+ "ontoggle",
+ "onunhandledrejection",
+ "onunload",
+ "onvolumechange",
+ "onwaiting",
+}
+
+// extra are ad-hoc values not covered by any of the lists above.
+var extra = []string{
+ "acronym",
+ "align",
+ "annotation",
+ "annotation-xml",
+ "applet",
+ "basefont",
+ "bgsound",
+ "big",
+ "blink",
+ "center",
+ "color",
+ "desc",
+ "face",
+ "font",
+ "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
+ "foreignobject",
+ "frame",
+ "frameset",
+ "image",
+ "isindex",
+ "listing",
+ "malignmark",
+ "marquee",
+ "math",
+ "mglyph",
+ "mi",
+ "mn",
+ "mo",
+ "ms",
+ "mtext",
+ "nobr",
+ "noembed",
+ "noframes",
+ "plaintext",
+ "prompt",
+ "public",
+ "rb",
+ "rtc",
+ "spacer",
+ "strike",
+ "svg",
+ "system",
+ "tt",
+ "xmp",
+}
diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go
index 2c1cade60..633ee15dc 100644
--- a/vendor/golang.org/x/net/html/node.go
+++ b/vendor/golang.org/x/net/html/node.go
@@ -177,7 +177,7 @@ func (s *nodeStack) index(n *Node) int {
// contains returns whether a is within s.
func (s *nodeStack) contains(a atom.Atom) bool {
for _, n := range *s {
- if n.DataAtom == a {
+ if n.DataAtom == a && n.Namespace == "" {
return true
}
}
diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go
index 64a579372..992cff2a3 100644
--- a/vendor/golang.org/x/net/html/parse.go
+++ b/vendor/golang.org/x/net/html/parse.go
@@ -439,9 +439,6 @@ func (p *parser) resetInsertionMode() {
case a.Select:
if !last {
for ancestor, first := n, p.oe[0]; ancestor != first; {
- if ancestor == first {
- break
- }
ancestor = p.oe[p.oe.index(ancestor)-1]
switch ancestor.DataAtom {
case a.Template:
@@ -633,7 +630,16 @@ func inHeadIM(p *parser) bool {
p.oe.pop()
p.acknowledgeSelfClosingTag()
return true
- case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
+ case a.Noscript:
+ p.addElement()
+ if p.scripting {
+ p.setOriginalIM()
+ p.im = textIM
+ } else {
+ p.im = inHeadNoscriptIM
+ }
+ return true
+ case a.Script, a.Title, a.Noframes, a.Style:
p.addElement()
p.setOriginalIM()
p.im = textIM
@@ -695,6 +701,49 @@ func inHeadIM(p *parser) bool {
return false
}
+// 12.2.6.4.5.
+func inHeadNoscriptIM(p *parser) bool {
+ switch p.tok.Type {
+ case DoctypeToken:
+ // Ignore the token.
+ return true
+ case StartTagToken:
+ switch p.tok.DataAtom {
+ case a.Html:
+ return inBodyIM(p)
+ case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
+ return inHeadIM(p)
+ case a.Head, a.Noscript:
+ // Ignore the token.
+ return true
+ }
+ case EndTagToken:
+ switch p.tok.DataAtom {
+ case a.Noscript, a.Br:
+ default:
+ // Ignore the token.
+ return true
+ }
+ case TextToken:
+ s := strings.TrimLeft(p.tok.Data, whitespace)
+ if len(s) == 0 {
+ // It was all whitespace.
+ return inHeadIM(p)
+ }
+ case CommentToken:
+ return inHeadIM(p)
+ }
+ p.oe.pop()
+ if p.top().DataAtom != a.Head {
+ panic("html: the new current node will be a head element.")
+ }
+ p.im = inHeadIM
+ if p.tok.DataAtom == a.Noscript {
+ return true
+ }
+ return false
+}
+
// Section 12.2.6.4.6.
func afterHeadIM(p *parser) bool {
switch p.tok.Type {
@@ -904,7 +953,7 @@ func inBodyIM(p *parser) bool {
case a.A:
for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
- p.inBodyEndTagFormatting(a.A)
+ p.inBodyEndTagFormatting(a.A, "a")
p.oe.remove(n)
p.afe.remove(n)
break
@@ -918,7 +967,7 @@ func inBodyIM(p *parser) bool {
case a.Nobr:
p.reconstructActiveFormattingElements()
if p.elementInScope(defaultScope, a.Nobr) {
- p.inBodyEndTagFormatting(a.Nobr)
+ p.inBodyEndTagFormatting(a.Nobr, "nobr")
p.reconstructActiveFormattingElements()
}
p.addFormattingElement()
@@ -1126,7 +1175,7 @@ func inBodyIM(p *parser) bool {
case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
- p.inBodyEndTagFormatting(p.tok.DataAtom)
+ p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
case a.Applet, a.Marquee, a.Object:
if p.popUntil(defaultScope, p.tok.DataAtom) {
p.clearActiveFormattingElements()
@@ -1137,7 +1186,7 @@ func inBodyIM(p *parser) bool {
case a.Template:
return inHeadIM(p)
default:
- p.inBodyEndTagOther(p.tok.DataAtom)
+ p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
}
case CommentToken:
p.addChild(&Node{
@@ -1164,7 +1213,7 @@ func inBodyIM(p *parser) bool {
return true
}
-func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
+func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
// This is the "adoption agency" algorithm, described at
// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
@@ -1186,7 +1235,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
}
}
if formattingElement == nil {
- p.inBodyEndTagOther(tagAtom)
+ p.inBodyEndTagOther(tagAtom, tagName)
return
}
feIndex := p.oe.index(formattingElement)
@@ -1291,9 +1340,17 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
-func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
+func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
for i := len(p.oe) - 1; i >= 0; i-- {
- if p.oe[i].DataAtom == tagAtom {
+ // Two element nodes have the same tag if they have the same Data (a
+ // string-typed field). As an optimization, for common HTML tags, each
+ // Data string is assigned a unique, non-zero DataAtom (a uint32-typed
+ // field), since integer comparison is faster than string comparison.
+ // Uncommon (custom) tags get a zero DataAtom.
+ //
+ // The if condition here is equivalent to (p.oe[i].Data == tagName).
+ if (p.oe[i].DataAtom == tagAtom) &&
+ ((tagAtom != 0) || (p.oe[i].Data == tagName)) {
p.oe = p.oe[:i]
break
}
@@ -1687,8 +1744,9 @@ func inCellIM(p *parser) bool {
return true
}
// Close the cell and reprocess.
- p.popUntil(tableScope, a.Td, a.Th)
- p.clearActiveFormattingElements()
+ if p.popUntil(tableScope, a.Td, a.Th) {
+ p.clearActiveFormattingElements()
+ }
p.im = inRowIM
return false
}
@@ -1719,8 +1777,12 @@ func inSelectIM(p *parser) bool {
}
p.addElement()
case a.Select:
- p.tok.Type = EndTagToken
- return false
+ if p.popUntil(selectScope, a.Select) {
+ p.resetInsertionMode()
+ } else {
+ // Ignore the token.
+ return true
+ }
case a.Input, a.Keygen, a.Textarea:
if p.elementInScope(selectScope, a.Select) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@@ -1750,6 +1812,9 @@ func inSelectIM(p *parser) bool {
case a.Select:
if p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
+ } else {
+ // Ignore the token.
+ return true
}
case a.Template:
return inHeadIM(p)
@@ -1775,13 +1840,22 @@ func inSelectInTableIM(p *parser) bool {
case StartTagToken, EndTagToken:
switch p.tok.DataAtom {
case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
- if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
- p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
- return false
- } else {
+ if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
// Ignore the token.
return true
}
+ // This is like p.popUntil(selectScope, a.Select), but it also
+ // matches <math select>, not just <select>. Matching the MathML
+ // tag is arguably incorrect (conceptually), but it mimics what
+ // Chromium does.
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ if n := p.oe[i]; n.DataAtom == a.Select {
+ p.oe = p.oe[:i]
+ break
+ }
+ }
+ p.resetInsertionMode()
+ return false
}
}
return inSelectIM(p)
@@ -2226,6 +2300,33 @@ func (p *parser) parse() error {
//
// The input is assumed to be UTF-8 encoded.
func Parse(r io.Reader) (*Node, error) {
+ return ParseWithOptions(r)
+}
+
+// ParseFragment parses a fragment of HTML and returns the nodes that were
+// found. If the fragment is the InnerHTML for an existing element, pass that
+// element in context.
+//
+// It has the same intricacies as Parse.
+func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+ return ParseFragmentWithOptions(r, context)
+}
+
+// ParseOption configures a parser.
+type ParseOption func(p *parser)
+
+// ParseOptionEnableScripting configures the scripting flag.
+// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
+//
+// By default, scripting is enabled.
+func ParseOptionEnableScripting(enable bool) ParseOption {
+ return func(p *parser) {
+ p.scripting = enable
+ }
+}
+
+// ParseWithOptions is like Parse, with options.
+func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
p := &parser{
tokenizer: NewTokenizer(r),
doc: &Node{
@@ -2235,6 +2336,11 @@ func Parse(r io.Reader) (*Node, error) {
framesetOK: true,
im: initialIM,
}
+
+ for _, f := range opts {
+ f(p)
+ }
+
err := p.parse()
if err != nil {
return nil, err
@@ -2242,12 +2348,8 @@ func Parse(r io.Reader) (*Node, error) {
return p.doc, nil
}
-// ParseFragment parses a fragment of HTML and returns the nodes that were
-// found. If the fragment is the InnerHTML for an existing element, pass that
-// element in context.
-//
-// It has the same intricacies as Parse.
-func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+// ParseFragmentWithOptions is like ParseFragment, with options.
+func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
contextTag := ""
if context != nil {
if context.Type != ElementNode {
@@ -2271,6 +2373,10 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
context: context,
}
+ for _, f := range opts {
+ f(p)
+ }
+
root := &Node{
Type: ElementNode,
DataAtom: a.Html,