diff options
author | Daniel J Walsh <dwalsh@redhat.com> | 2019-01-04 10:15:38 -0500 |
---|---|---|
committer | Daniel J Walsh <dwalsh@redhat.com> | 2019-01-04 14:54:59 -0500 |
commit | 43686072d3ff559abb0aea865509ae85f8a301de (patch) | |
tree | 54946ccc3cbe4cf01355253f3d8b91194fba4a1b | |
parent | 9ffd4806163e410d51d0f0cbece45b7405ff9fee (diff) | |
download | podman-43686072d3ff559abb0aea865509ae85f8a301de.tar.gz podman-43686072d3ff559abb0aea865509ae85f8a301de.tar.bz2 podman-43686072d3ff559abb0aea865509ae85f8a301de.zip |
Update vendor of runc
Updating the vendor or runc to pull in some fixes that we need.
In order to get this vendor to work, we needed to update the vendor
of docker/docker, which causes all sorts of issues, just to fix
the docker/pkg/sysinfo. Rather then doing this, I pulled in pkg/sysinfo
into libpod and fixed the code locally.
I then switched the use of docker/pkg/sysinfo to libpod/pkg/sysinfo.
I also switched out the docker/pkg/mount to containers/storage/pkg/mount
Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
42 files changed, 9250 insertions, 101 deletions
diff --git a/cmd/podman/create_cli.go b/cmd/podman/create_cli.go index 2a820fa43..1a0830f2e 100644 --- a/cmd/podman/create_cli.go +++ b/cmd/podman/create_cli.go @@ -7,7 +7,7 @@ import ( "strings" cc "github.com/containers/libpod/pkg/spec" - "github.com/docker/docker/pkg/sysinfo" + "github.com/containers/libpod/pkg/sysinfo" "github.com/docker/go-units" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 3d6603364..ffa999730 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -6,8 +6,8 @@ import ( "strings" "github.com/containers/libpod/pkg/rootless" + "github.com/containers/storage/pkg/mount" "github.com/docker/docker/daemon/caps" - "github.com/docker/docker/pkg/mount" "github.com/docker/go-units" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/README.md b/pkg/sysinfo/README.md index c1530cef0..c1530cef0 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/README.md +++ b/pkg/sysinfo/README.md diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/numcpu.go b/pkg/sysinfo/numcpu.go index aeb1a3a80..aeb1a3a80 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/numcpu.go +++ b/pkg/sysinfo/numcpu.go diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/numcpu_linux.go b/pkg/sysinfo/numcpu_linux.go index f1d2d9db3..f1d2d9db3 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/numcpu_linux.go +++ b/pkg/sysinfo/numcpu_linux.go diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/numcpu_windows.go b/pkg/sysinfo/numcpu_windows.go index 1d89dd550..1d89dd550 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/numcpu_windows.go +++ b/pkg/sysinfo/numcpu_windows.go diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo.go b/pkg/sysinfo/sysinfo.go index f046de4b1..f046de4b1 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo.go +++ b/pkg/sysinfo/sysinfo.go diff --git a/pkg/sysinfo/sysinfo_linux.go b/pkg/sysinfo/sysinfo_linux.go new file mode 100644 index 000000000..f4047b63c --- /dev/null +++ b/pkg/sysinfo/sysinfo_linux.go @@ -0,0 +1,254 @@ +package sysinfo + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func findCgroupMountpoints() (map[string]string, error) { + cgMounts, err := cgroups.GetCgroupMounts(false) + if err != nil { + return nil, fmt.Errorf("Failed to parse cgroup information: %v", err) + } + mps := make(map[string]string) + for _, m := range cgMounts { + for _, ss := range m.Subsystems { + mps[ss] = m.Mountpoint + } + } + return mps, nil +} + +// New returns a new SysInfo, using the filesystem to detect which features +// the kernel supports. If `quiet` is `false` warnings are printed in logs +// whenever an error occurs or misconfigurations are present. +func New(quiet bool) *SysInfo { + sysInfo := &SysInfo{} + cgMounts, err := findCgroupMountpoints() + if err != nil { + logrus.Warnf("Failed to parse cgroup information: %v", err) + } else { + sysInfo.cgroupMemInfo = checkCgroupMem(cgMounts, quiet) + sysInfo.cgroupCPUInfo = checkCgroupCPU(cgMounts, quiet) + sysInfo.cgroupBlkioInfo = checkCgroupBlkioInfo(cgMounts, quiet) + sysInfo.cgroupCpusetInfo = checkCgroupCpusetInfo(cgMounts, quiet) + sysInfo.cgroupPids = checkCgroupPids(quiet) + } + + _, ok := cgMounts["devices"] + sysInfo.CgroupDevicesEnabled = ok + + sysInfo.IPv4ForwardingDisabled = !readProcBool("/proc/sys/net/ipv4/ip_forward") + sysInfo.BridgeNFCallIPTablesDisabled = !readProcBool("/proc/sys/net/bridge/bridge-nf-call-iptables") + sysInfo.BridgeNFCallIP6TablesDisabled = !readProcBool("/proc/sys/net/bridge/bridge-nf-call-ip6tables") + + // Check if AppArmor is supported. + if _, err := os.Stat("/sys/kernel/security/apparmor"); !os.IsNotExist(err) { + sysInfo.AppArmor = true + } + + // Check if Seccomp is supported, via CONFIG_SECCOMP. + if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { + // Make sure the kernel has CONFIG_SECCOMP_FILTER. + if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { + sysInfo.Seccomp = true + } + } + + return sysInfo +} + +// checkCgroupMem reads the memory information from the memory cgroup mount point. +func checkCgroupMem(cgMounts map[string]string, quiet bool) cgroupMemInfo { + mountPoint, ok := cgMounts["memory"] + if !ok { + if !quiet { + logrus.Warn("Your kernel does not support cgroup memory limit") + } + return cgroupMemInfo{} + } + + swapLimit := cgroupEnabled(mountPoint, "memory.memsw.limit_in_bytes") + if !quiet && !swapLimit { + logrus.Warn("Your kernel does not support swap memory limit") + } + memoryReservation := cgroupEnabled(mountPoint, "memory.soft_limit_in_bytes") + if !quiet && !memoryReservation { + logrus.Warn("Your kernel does not support memory reservation") + } + oomKillDisable := cgroupEnabled(mountPoint, "memory.oom_control") + if !quiet && !oomKillDisable { + logrus.Warn("Your kernel does not support oom control") + } + memorySwappiness := cgroupEnabled(mountPoint, "memory.swappiness") + if !quiet && !memorySwappiness { + logrus.Warn("Your kernel does not support memory swappiness") + } + kernelMemory := cgroupEnabled(mountPoint, "memory.kmem.limit_in_bytes") + if !quiet && !kernelMemory { + logrus.Warn("Your kernel does not support kernel memory limit") + } + + return cgroupMemInfo{ + MemoryLimit: true, + SwapLimit: swapLimit, + MemoryReservation: memoryReservation, + OomKillDisable: oomKillDisable, + MemorySwappiness: memorySwappiness, + KernelMemory: kernelMemory, + } +} + +// checkCgroupCPU reads the cpu information from the cpu cgroup mount point. +func checkCgroupCPU(cgMounts map[string]string, quiet bool) cgroupCPUInfo { + mountPoint, ok := cgMounts["cpu"] + if !ok { + if !quiet { + logrus.Warn("Unable to find cpu cgroup in mounts") + } + return cgroupCPUInfo{} + } + + cpuShares := cgroupEnabled(mountPoint, "cpu.shares") + if !quiet && !cpuShares { + logrus.Warn("Your kernel does not support cgroup cpu shares") + } + + cpuCfsPeriod := cgroupEnabled(mountPoint, "cpu.cfs_period_us") + if !quiet && !cpuCfsPeriod { + logrus.Warn("Your kernel does not support cgroup cfs period") + } + + cpuCfsQuota := cgroupEnabled(mountPoint, "cpu.cfs_quota_us") + if !quiet && !cpuCfsQuota { + logrus.Warn("Your kernel does not support cgroup cfs quotas") + } + + cpuRealtimePeriod := cgroupEnabled(mountPoint, "cpu.rt_period_us") + if !quiet && !cpuRealtimePeriod { + logrus.Warn("Your kernel does not support cgroup rt period") + } + + cpuRealtimeRuntime := cgroupEnabled(mountPoint, "cpu.rt_runtime_us") + if !quiet && !cpuRealtimeRuntime { + logrus.Warn("Your kernel does not support cgroup rt runtime") + } + + return cgroupCPUInfo{ + CPUShares: cpuShares, + CPUCfsPeriod: cpuCfsPeriod, + CPUCfsQuota: cpuCfsQuota, + CPURealtimePeriod: cpuRealtimePeriod, + CPURealtimeRuntime: cpuRealtimeRuntime, + } +} + +// checkCgroupBlkioInfo reads the blkio information from the blkio cgroup mount point. +func checkCgroupBlkioInfo(cgMounts map[string]string, quiet bool) cgroupBlkioInfo { + mountPoint, ok := cgMounts["blkio"] + if !ok { + if !quiet { + logrus.Warn("Unable to find blkio cgroup in mounts") + } + return cgroupBlkioInfo{} + } + + weight := cgroupEnabled(mountPoint, "blkio.weight") + if !quiet && !weight { + logrus.Warn("Your kernel does not support cgroup blkio weight") + } + + weightDevice := cgroupEnabled(mountPoint, "blkio.weight_device") + if !quiet && !weightDevice { + logrus.Warn("Your kernel does not support cgroup blkio weight_device") + } + + readBpsDevice := cgroupEnabled(mountPoint, "blkio.throttle.read_bps_device") + if !quiet && !readBpsDevice { + logrus.Warn("Your kernel does not support cgroup blkio throttle.read_bps_device") + } + + writeBpsDevice := cgroupEnabled(mountPoint, "blkio.throttle.write_bps_device") + if !quiet && !writeBpsDevice { + logrus.Warn("Your kernel does not support cgroup blkio throttle.write_bps_device") + } + readIOpsDevice := cgroupEnabled(mountPoint, "blkio.throttle.read_iops_device") + if !quiet && !readIOpsDevice { + logrus.Warn("Your kernel does not support cgroup blkio throttle.read_iops_device") + } + + writeIOpsDevice := cgroupEnabled(mountPoint, "blkio.throttle.write_iops_device") + if !quiet && !writeIOpsDevice { + logrus.Warn("Your kernel does not support cgroup blkio throttle.write_iops_device") + } + return cgroupBlkioInfo{ + BlkioWeight: weight, + BlkioWeightDevice: weightDevice, + BlkioReadBpsDevice: readBpsDevice, + BlkioWriteBpsDevice: writeBpsDevice, + BlkioReadIOpsDevice: readIOpsDevice, + BlkioWriteIOpsDevice: writeIOpsDevice, + } +} + +// checkCgroupCpusetInfo reads the cpuset information from the cpuset cgroup mount point. +func checkCgroupCpusetInfo(cgMounts map[string]string, quiet bool) cgroupCpusetInfo { + mountPoint, ok := cgMounts["cpuset"] + if !ok { + if !quiet { + logrus.Warn("Unable to find cpuset cgroup in mounts") + } + return cgroupCpusetInfo{} + } + + cpus, err := ioutil.ReadFile(path.Join(mountPoint, "cpuset.cpus")) + if err != nil { + return cgroupCpusetInfo{} + } + + mems, err := ioutil.ReadFile(path.Join(mountPoint, "cpuset.mems")) + if err != nil { + return cgroupCpusetInfo{} + } + + return cgroupCpusetInfo{ + Cpuset: true, + Cpus: strings.TrimSpace(string(cpus)), + Mems: strings.TrimSpace(string(mems)), + } +} + +// checkCgroupPids reads the pids information from the pids cgroup mount point. +func checkCgroupPids(quiet bool) cgroupPids { + _, err := cgroups.FindCgroupMountpoint("", "pids") + if err != nil { + if !quiet { + logrus.Warn(err) + } + return cgroupPids{} + } + + return cgroupPids{ + PidsLimit: true, + } +} + +func cgroupEnabled(mountPoint, name string) bool { + _, err := os.Stat(path.Join(mountPoint, name)) + return err == nil +} + +func readProcBool(path string) bool { + val, err := ioutil.ReadFile(path) + if err != nil { + return false + } + return strings.TrimSpace(string(val)) == "1" +} diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_linux.go b/pkg/sysinfo/sysinfo_linux.go~ index 471f786a7..471f786a7 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_linux.go +++ b/pkg/sysinfo/sysinfo_linux.go~ diff --git a/pkg/sysinfo/sysinfo_linux_test.go b/pkg/sysinfo/sysinfo_linux_test.go new file mode 100644 index 000000000..860784f2a --- /dev/null +++ b/pkg/sysinfo/sysinfo_linux_test.go @@ -0,0 +1,104 @@ +package sysinfo + +import ( + "io/ioutil" + "os" + "path" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" +) + +func TestReadProcBool(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "test-sysinfo-proc") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + procFile := filepath.Join(tmpDir, "read-proc-bool") + err = ioutil.WriteFile(procFile, []byte("1"), 0644) + require.NoError(t, err) + + if !readProcBool(procFile) { + t.Fatal("expected proc bool to be true, got false") + } + + if err := ioutil.WriteFile(procFile, []byte("0"), 0644); err != nil { + t.Fatal(err) + } + if readProcBool(procFile) { + t.Fatal("expected proc bool to be false, got true") + } + + if readProcBool(path.Join(tmpDir, "no-exist")) { + t.Fatal("should be false for non-existent entry") + } + +} + +func TestCgroupEnabled(t *testing.T) { + cgroupDir, err := ioutil.TempDir("", "cgroup-test") + require.NoError(t, err) + defer os.RemoveAll(cgroupDir) + + if cgroupEnabled(cgroupDir, "test") { + t.Fatal("cgroupEnabled should be false") + } + + err = ioutil.WriteFile(path.Join(cgroupDir, "test"), []byte{}, 0644) + require.NoError(t, err) + + if !cgroupEnabled(cgroupDir, "test") { + t.Fatal("cgroupEnabled should be true") + } +} + +func TestNew(t *testing.T) { + sysInfo := New(false) + require.NotNil(t, sysInfo) + checkSysInfo(t, sysInfo) + + sysInfo = New(true) + require.NotNil(t, sysInfo) + checkSysInfo(t, sysInfo) +} + +func checkSysInfo(t *testing.T, sysInfo *SysInfo) { + // Check if Seccomp is supported, via CONFIG_SECCOMP.then sysInfo.Seccomp must be TRUE , else FALSE + if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { + // Make sure the kernel has CONFIG_SECCOMP_FILTER. + if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { + require.True(t, sysInfo.Seccomp) + } + } else { + require.False(t, sysInfo.Seccomp) + } +} + +func TestNewAppArmorEnabled(t *testing.T) { + // Check if AppArmor is supported. then it must be TRUE , else FALSE + if _, err := os.Stat("/sys/kernel/security/apparmor"); err != nil { + t.Skip("App Armor Must be Enabled") + } + + sysInfo := New(true) + require.True(t, sysInfo.AppArmor) +} + +func TestNewAppArmorDisabled(t *testing.T) { + // Check if AppArmor is supported. then it must be TRUE , else FALSE + if _, err := os.Stat("/sys/kernel/security/apparmor"); !os.IsNotExist(err) { + t.Skip("App Armor Must be Disabled") + } + + sysInfo := New(true) + require.False(t, sysInfo.AppArmor) +} + +func TestNumCPU(t *testing.T) { + cpuNumbers := NumCPU() + if cpuNumbers <= 0 { + t.Fatal("CPU returned must be greater than zero") + } +} diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_solaris.go b/pkg/sysinfo/sysinfo_solaris.go index c858d57e0..c858d57e0 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_solaris.go +++ b/pkg/sysinfo/sysinfo_solaris.go diff --git a/pkg/sysinfo/sysinfo_test.go b/pkg/sysinfo/sysinfo_test.go new file mode 100644 index 000000000..b61fbcf54 --- /dev/null +++ b/pkg/sysinfo/sysinfo_test.go @@ -0,0 +1,26 @@ +package sysinfo + +import "testing" + +func TestIsCpusetListAvailable(t *testing.T) { + cases := []struct { + provided string + available string + res bool + err bool + }{ + {"1", "0-4", true, false}, + {"01,3", "0-4", true, false}, + {"", "0-7", true, false}, + {"1--42", "0-7", false, true}, + {"1-42", "00-1,8,,9", false, true}, + {"1,41-42", "43,45", false, false}, + {"0-3", "", false, false}, + } + for _, c := range cases { + r, err := isCpusetListAvailable(c.provided, c.available) + if (c.err && err == nil) && r != c.res { + t.Fatalf("Expected pair: %v, %v for %s, %s. Got %v, %v instead", c.res, c.err, c.provided, c.available, (c.err && err == nil), r) + } + } +} diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_unix.go b/pkg/sysinfo/sysinfo_unix.go index 45f3ef1c6..45f3ef1c6 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_unix.go +++ b/pkg/sysinfo/sysinfo_unix.go diff --git a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_windows.go b/pkg/sysinfo/sysinfo_windows.go index 4e6255bc5..4e6255bc5 100644 --- a/vendor/github.com/docker/docker/pkg/sysinfo/sysinfo_windows.go +++ b/pkg/sysinfo/sysinfo_windows.go diff --git a/vendor.conf b/vendor.conf index 6bbcb347e..fd4968870 100644 --- a/vendor.conf +++ b/vendor.conf @@ -48,7 +48,7 @@ github.com/mistifyio/go-zfs v2.1.1 github.com/mtrmac/gpgme b2432428689ca58c2b8e8dea9449d3295cf96fc9 github.com/opencontainers/go-digest v1.0.0-rc0 github.com/opencontainers/image-spec v1.0.0 -github.com/opencontainers/runc b4e2ecb452d9ee4381137cc0a7e6715b96bed6de +github.com/opencontainers/runc bbb17efcb4c0ab986407812a31ba333a7450064c github.com/opencontainers/runtime-spec d810dbc60d8c5aeeb3d054bd1132fab2121968ce github.com/opencontainers/runtime-tools master github.com/opencontainers/selinux 51c6c0a5dbc675792e953298cb9871819d6f9bb8 diff --git a/vendor/github.com/docker/docker/pkg/archive/example_changes.go b/vendor/github.com/docker/docker/pkg/archive/example_changes.go new file mode 100644 index 000000000..495db809e --- /dev/null +++ b/vendor/github.com/docker/docker/pkg/archive/example_changes.go @@ -0,0 +1,97 @@ +// +build ignore + +// Simple tool to create an archive stream from an old and new directory +// +// By default it will stream the comparison of two temporary directories with junk files +package main + +import ( + "flag" + "fmt" + "io" + "io/ioutil" + "os" + "path" + + "github.com/docker/docker/pkg/archive" + "github.com/sirupsen/logrus" +) + +var ( + flDebug = flag.Bool("D", false, "debugging output") + flNewDir = flag.String("newdir", "", "") + flOldDir = flag.String("olddir", "", "") + log = logrus.New() +) + +func main() { + flag.Usage = func() { + fmt.Println("Produce a tar from comparing two directory paths. By default a demo tar is created of around 200 files (including hardlinks)") + fmt.Printf("%s [OPTIONS]\n", os.Args[0]) + flag.PrintDefaults() + } + flag.Parse() + log.Out = os.Stderr + if (len(os.Getenv("DEBUG")) > 0) || *flDebug { + logrus.SetLevel(logrus.DebugLevel) + } + var newDir, oldDir string + + if len(*flNewDir) == 0 { + var err error + newDir, err = ioutil.TempDir("", "docker-test-newDir") + if err != nil { + log.Fatal(err) + } + defer os.RemoveAll(newDir) + if _, err := prepareUntarSourceDirectory(100, newDir, true); err != nil { + log.Fatal(err) + } + } else { + newDir = *flNewDir + } + + if len(*flOldDir) == 0 { + oldDir, err := ioutil.TempDir("", "docker-test-oldDir") + if err != nil { + log.Fatal(err) + } + defer os.RemoveAll(oldDir) + } else { + oldDir = *flOldDir + } + + changes, err := archive.ChangesDirs(newDir, oldDir) + if err != nil { + log.Fatal(err) + } + + a, err := archive.ExportChanges(newDir, changes) + if err != nil { + log.Fatal(err) + } + defer a.Close() + + i, err := io.Copy(os.Stdout, a) + if err != nil && err != io.EOF { + log.Fatal(err) + } + fmt.Fprintf(os.Stderr, "wrote archive of %d bytes", i) +} + +func prepareUntarSourceDirectory(numberOfFiles int, targetPath string, makeLinks bool) (int, error) { + fileData := []byte("fooo") + for n := 0; n < numberOfFiles; n++ { + fileName := fmt.Sprintf("file-%d", n) + if err := ioutil.WriteFile(path.Join(targetPath, fileName), fileData, 0700); err != nil { + return 0, err + } + if makeLinks { + if err := os.Link(path.Join(targetPath, fileName), path.Join(targetPath, fileName+"-link")); err != nil { + return 0, err + } + } + } + totalSize := numberOfFiles * len(fileData) + return totalSize, nil +} diff --git a/vendor/github.com/docker/docker/profiles/seccomp/generate.go b/vendor/github.com/docker/docker/profiles/seccomp/generate.go new file mode 100644 index 000000000..32f22bb37 --- /dev/null +++ b/vendor/github.com/docker/docker/profiles/seccomp/generate.go @@ -0,0 +1,32 @@ +// +build ignore + +package main + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + + "github.com/docker/docker/profiles/seccomp" +) + +// saves the default seccomp profile as a json file so people can use it as a +// base for their own custom profiles +func main() { + wd, err := os.Getwd() + if err != nil { + panic(err) + } + f := filepath.Join(wd, "default.json") + + // write the default profile to the file + b, err := json.MarshalIndent(seccomp.DefaultProfile(), "", "\t") + if err != nil { + panic(err) + } + + if err := ioutil.WriteFile(f, b, 0644); err != nil { + panic(err) + } +} diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md index 5215e32c1..e755fb7bc 100644 --- a/vendor/github.com/opencontainers/runc/README.md +++ b/vendor/github.com/opencontainers/runc/README.md @@ -68,6 +68,7 @@ make BUILDTAGS='seccomp apparmor' | selinux | selinux process and mount labeling | <none> | | apparmor | apparmor profile support | <none> | | ambient | ambient capability support | kernel 4.3 | +| nokmem | disable kernel memory account | <none> | ### Running the test suite @@ -87,6 +88,18 @@ You can run a specific test case by setting the `TESTFLAGS` variable. # make test TESTFLAGS="-run=SomeTestFunction" ``` +You can run a specific integration test by setting the `TESTPATH` variable. + +```bash +# make test TESTPATH="/checkpoint.bats" +``` + +You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables. + +```bash +# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/" +``` + ### Dependencies Management `runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. @@ -251,3 +264,7 @@ PIDFile=/run/mycontainerid.pid [Install] WantedBy=multi-user.target ``` + +## License + +The code and docs are released under the [Apache 2.0 license](LICENSE). diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md index 42f3efe56..1d7fa04c0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -148,6 +148,7 @@ config := &configs.Config{ {Type: configs.NEWPID}, {Type: configs.NEWUSER}, {Type: configs.NEWNET}, + {Type: configs.NEWCGROUP}, }), Cgroups: &configs.Cgroup{ Name: "test-container", @@ -323,6 +324,7 @@ generated when building libcontainer with docker. ## Copyright and license -Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. -Docs released under Creative commons. - +Code and documentation copyright 2014 Docker, inc. +The code and documentation are released under the [Apache 2.0 license](../LICENSE). +The documentation is also released under Creative Commons Attribution 4.0 International License. +You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 7c995efee..ea571ad93 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -13,40 +13,50 @@ import ( "strings" "time" - "github.com/docker/go-units" + units "github.com/docker/go-units" ) const ( - cgroupNamePrefix = "name=" + CgroupNamePrefix = "name=" CgroupProcesses = "cgroup.procs" ) // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt -func FindCgroupMountpoint(subsystem string) (string, error) { - mnt, _, err := FindCgroupMountpointAndRoot(subsystem) +func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { + mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) return mnt, err } -func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { +func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { // We are not using mount.GetMounts() because it's super-inefficient, // parsing it directly sped up x10 times because of not using Sscanf. // It was one of two major performance drawbacks in container start. if !isSubsystemAvailable(subsystem) { return "", "", NewNotFoundError(subsystem) } + f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", "", err } defer f.Close() - scanner := bufio.NewScanner(f) + return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) +} + +func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { + scanner := bufio.NewScanner(reader) for scanner.Scan() { txt := scanner.Text() - fields := strings.Split(txt, " ") - for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if opt == subsystem { - return fields[4], fields[3], nil + fields := strings.Fields(txt) + if len(fields) < 5 { + continue + } + if strings.HasPrefix(fields[4], cgroupPath) { + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } } } } @@ -103,7 +113,7 @@ func FindCgroupMountpointDir() (string, error) { } if postSeparatorFields[0] == "cgroup" { - // Check that the mount is properly formated. + // Check that the mount is properly formatted. if numPostFields < 3 { return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) } @@ -151,19 +161,20 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, Root: fields[3], } for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if !ss[opt] { + seen, known := ss[opt] + if !known || (!all && seen) { continue } - if strings.HasPrefix(opt, cgroupNamePrefix) { - m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) - } else { - m.Subsystems = append(m.Subsystems, opt) - } - if !all { - numFound++ + ss[opt] = true + if strings.HasPrefix(opt, CgroupNamePrefix) { + opt = opt[len(CgroupNamePrefix):] } + m.Subsystems = append(m.Subsystems, opt) + numFound++ + } + if len(m.Subsystems) > 0 || all { + res = append(res, m) } - res = append(res, m) } if err := scanner.Err(); err != nil { return nil, err @@ -187,7 +198,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) { allMap := make(map[string]bool) for s := range allSubsystems { - allMap[s] = true + allMap[s] = false } return getCgroupMountsHelper(allMap, f, all) } @@ -256,13 +267,13 @@ func GetInitCgroupPath(subsystem string) (string, error) { } func getCgroupPathHelper(subsystem, cgroup string) (string, error) { - mnt, root, err := FindCgroupMountpointAndRoot(subsystem) + mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) if err != nil { return "", err } // This is needed for nested containers, because in /proc/self/cgroup we - // see pathes from host, which don't exist in container. + // see paths from host, which don't exist in container. relCgroup, err := filepath.Rel(root, cgroup) if err != nil { return "", err @@ -342,7 +353,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err return p, nil } - if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { return p, nil } @@ -452,7 +463,7 @@ func WriteCgroupProc(dir string, pid int) error { return fmt.Errorf("no such directory for %s", CgroupProcesses) } - // Dont attach any pid to the cgroup if -1 is specified as a pid + // Don't attach any pid to the cgroup if -1 is specified as a pid if pid != -1 { if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil { return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index b1c4762fe..7728522fe 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -186,12 +186,19 @@ type Config struct { // callers keyring in this case. NoNewKeyring bool `json:"no_new_keyring"` - // Rootless specifies whether the container is a rootless container. - Rootless bool `json:"rootless"` - - // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into - // to limit the resources (e.g., L3 cache) the container has available + // IntelRdt specifies settings for Intel RDT group that the container is placed into + // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` + + // RootlessEUID is set when the runc was launched with non-zero EUID. + // Note that RootlessEUID is set to false when launched with EUID=0 in userns. + // When RootlessEUID is set, runc creates a new userns for the container. + // (config.json needs to contain userns settings) + RootlessEUID bool `json:"rootless_euid,omitempty"` + + // RootlessCgroups is set when unlikely to have the full access to cgroups. + // When RootlessCgroups is set, cgroups errors are ignored. + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` } type Hooks struct { @@ -265,26 +272,23 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) { }) } -// HookState is the payload provided to a hook on execution. -type HookState specs.State - type Hook interface { // Run executes the hook with the provided state. - Run(HookState) error + Run(*specs.State) error } // NewFunctionHook will call the provided function when the hook is run. -func NewFunctionHook(f func(HookState) error) FuncHook { +func NewFunctionHook(f func(*specs.State) error) FuncHook { return FuncHook{ run: f, } } type FuncHook struct { - run func(HookState) error + run func(*specs.State) error } -func (f FuncHook) Run(s HookState) error { +func (f FuncHook) Run(s *specs.State) error { return f.run(s) } @@ -307,7 +311,7 @@ type CommandHook struct { Command } -func (c Command) Run(s HookState) error { +func (c Command) Run(s *specs.State) error { b, err := json.Marshal(s) if err != nil { return err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go index 36bd5f96a..57e9f037d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go @@ -4,4 +4,10 @@ type IntelRdt struct { // The schema for L3 cache id and capacity bitmask (CBM) // Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The schema of memory bandwidth per L3 cache id + // Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..." + // The unit of memory bandwidth is specified in "percentages" by + // default, and in "MBps" if MBA Software Controller is enabled. + MemBwSchema string `json:"memBwSchema,omitempty"` } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go index 5fc171a57..1bbaef9bd 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go @@ -7,12 +7,13 @@ import ( ) const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" + NEWCGROUP NamespaceType = "NEWCGROUP" ) var ( @@ -35,6 +36,8 @@ func NsName(ns NamespaceType) string { return "user" case NEWUTS: return "uts" + case NEWCGROUP: + return "cgroup" } return "" } @@ -68,6 +71,7 @@ func NamespaceTypes() []NamespaceType { NEWNET, NEWPID, NEWNS, + NEWCGROUP, } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go index 4ce6813d2..2dc7adfc9 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -9,12 +9,13 @@ func (n *Namespace) Syscall() int { } var namespaceInfo = map[NamespaceType]int{ - NEWNET: unix.CLONE_NEWNET, - NEWNS: unix.CLONE_NEWNS, - NEWUSER: unix.CLONE_NEWUSER, - NEWIPC: unix.CLONE_NEWIPC, - NEWUTS: unix.CLONE_NEWUTS, - NEWPID: unix.CLONE_NEWPID, + NEWNET: unix.CLONE_NEWNET, + NEWNS: unix.CLONE_NEWNS, + NEWUSER: unix.CLONE_NEWUSER, + NEWIPC: unix.CLONE_NEWIPC, + NEWUTS: unix.CLONE_NEWUTS, + NEWPID: unix.CLONE_NEWPID, + NEWCGROUP: unix.CLONE_NEWCGROUP, } // CloneFlags parses the container's Namespaces options to set the correct diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md index 575701371..9ec6c3931 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md @@ -10,8 +10,8 @@ The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, called the preamble, is used as a header when compiling the C parts of the package. So every time we import package `nsenter`, the C code function `nsexec()` would be -called. And package `nsenter` is now only imported in `main_unix.go`, so every time -before we call `cmd.Start` on linux, that C code would run. +called. And package `nsenter` is only imported in `init.go`, so every time the runc +`init` command is invoked, that C code is run. Because `nsexec()` must be run before the Go runtime in order to use the Linux kernel namespace, you must `import` this library into a package if @@ -37,7 +37,7 @@ the parent `nsexec()` will exit and the child `nsexec()` process will return to allow the Go runtime take over. NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any -CLONE_NEW* clone flags because we must fork a new process in order to +`CLONE_NEW*` clone flags because we must fork a new process in order to enter the PID namespace. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index a4cd1399d..28269dfc0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -42,6 +42,12 @@ enum sync_t { SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ }; +/* + * Synchronisation value for cgroup namespace setup. + * The same constant is defined in process_linux.go as "createCgroupns". + */ +#define CREATECGROUPNS 0x80 + /* longjmp() arguments. */ #define JUMP_PARENT 0x00 #define JUMP_CHILD 0xA0 @@ -82,7 +88,7 @@ struct nlconfig_t { uint8_t is_setgroup; /* Rootless container settings. */ - uint8_t is_rootless; + uint8_t is_rootless_euid; /* boolean */ char *uidmappath; size_t uidmappath_len; char *gidmappath; @@ -100,7 +106,7 @@ struct nlconfig_t { #define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 -#define ROOTLESS_ATTR 27287 +#define ROOTLESS_EUID_ATTR 27287 #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 @@ -211,7 +217,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) /* * If @app is NULL, execve will segfault. Just check it here and bail (if - * we're in this path, the caller is already getting desparate and there + * we're in this path, the caller is already getting desperate and there * isn't a backup to this failing). This usually would be a configuration * or programming issue. */ @@ -419,8 +425,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; - case ROOTLESS_ATTR: - config->is_rootless = readint8(current); + case ROOTLESS_EUID_ATTR: + config->is_rootless_euid = readint8(current); /* boolean */ break; case OOM_SCORE_ADJ_ATTR: config->oom_score_adj = current; @@ -640,7 +646,6 @@ void nsexec(void) case JUMP_PARENT:{ int len; pid_t child, first_child = -1; - char buf[JSON_MAX]; bool ready = false; /* For debugging. */ @@ -687,7 +692,7 @@ void nsexec(void) * newuidmap/newgidmap shall be used. */ - if (config.is_rootless && !config.is_setgroup) + if (config.is_rootless_euid && !config.is_setgroup) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ @@ -716,6 +721,18 @@ void nsexec(void) kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); } + + /* Send the init_func pid back to our parent. + * + * Send the init_func pid and the pid of the first child back to our parent. + * We need to send both back because we can't reap the first child we created (CLONE_PARENT). + * It becomes the responsibility of our parent to reap the first child. + */ + len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); + if (len < 0) { + kill(child, SIGKILL); + bail("unable to generate JSON for child pid"); + } } break; case SYNC_CHILD_READY: @@ -759,23 +776,6 @@ void nsexec(void) bail("unexpected sync value: %u", s); } } - - /* - * Send the init_func pid and the pid of the first child back to our parent. - * - * We need to send both back because we can't reap the first child we created (CLONE_PARENT). - * It becomes the responsibility of our parent to reap the first child. - */ - len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); - if (len < 0) { - kill(child, SIGKILL); - bail("unable to generate JSON for child pid"); - } - if (write(pipenum, buf, len) != len) { - kill(child, SIGKILL); - bail("unable to send child pid to bootstrapper"); - } - exit(0); } @@ -862,14 +862,17 @@ void nsexec(void) if (setresuid(0, 0, 0) < 0) bail("failed to become root in user namespace"); } - /* - * Unshare all of the namespaces. Note that we don't merge this - * with clone() because there were some old kernel versions where - * clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do - * it the long way. + * Unshare all of the namespaces. Now, it should be noted that this + * ordering might break in the future (especially with rootless + * containers). But for now, it's not possible to split this into + * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. + * + * Note that we don't merge this with clone() because there were + * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) + * was broken, so we'll just do it the long way anyway. */ - if (unshare(config.cloneflags) < 0) + if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) bail("failed to unshare namespaces"); /* @@ -953,11 +956,23 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (!config.is_rootless && config.is_setgroup) { + if (!config.is_rootless_euid && config.is_setgroup) { if (setgroups(0, NULL) < 0) bail("setgroups failed"); } + /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ + if (config.cloneflags & CLONE_NEWCGROUP) { + uint8_t value; + if (read(pipenum, &value, sizeof(value)) != sizeof(value)) + bail("read synchronisation value failed"); + if (value == CREATECGROUPNS) { + if (unshare(CLONE_NEWCGROUP) < 0) + bail("failed to unshare cgroup namespace"); + } else + bail("received unknown synchronisation value"); + } + s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with patent: write(SYNC_CHILD_READY)"); diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go index c1e634c94..92b5ae8de 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -5,6 +5,7 @@ package user import ( "io" "os" + "strconv" "golang.org/x/sys/unix" ) @@ -115,22 +116,23 @@ func CurrentGroup() (Group, error) { return LookupGid(unix.Getgid()) } -func CurrentUserSubUIDs() ([]SubID, error) { +func currentUserSubIDs(fileName string) ([]SubID, error) { u, err := CurrentUser() if err != nil { return nil, err } - return ParseSubIDFileFilter("/etc/subuid", - func(entry SubID) bool { return entry.Name == u.Name }) + filter := func(entry SubID) bool { + return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) + } + return ParseSubIDFileFilter(fileName, filter) } -func CurrentGroupSubGIDs() ([]SubID, error) { - g, err := CurrentGroup() - if err != nil { - return nil, err - } - return ParseSubIDFileFilter("/etc/subgid", - func(entry SubID) bool { return entry.Name == g.Name }) +func CurrentUserSubUIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subuid") +} + +func CurrentUserSubGIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subgid") } func CurrentProcessUIDMap() ([]IDMap, error) { diff --git a/vendor/github.com/opencontainers/runc/vendor.conf b/vendor/github.com/opencontainers/runc/vendor.conf index e2b519e67..fadbe0707 100644 --- a/vendor/github.com/opencontainers/runc/vendor.conf +++ b/vendor/github.com/opencontainers/runc/vendor.conf @@ -1,7 +1,7 @@ # OCI runtime-spec. When updating this, make sure you use a version tag rather # than a commit ID so it's much more obvious what version of the spec we are # using. -github.com/opencontainers/runtime-spec v1.0.0 +github.com/opencontainers/runtime-spec 5684b8af48c1ac3b1451fa499724e30e3c20a294 # Core libcontainer functionality. github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08 github.com/opencontainers/selinux v1.0.0-rc1 diff --git a/vendor/golang.org/x/net/html/atom/atom.go b/vendor/golang.org/x/net/html/atom/atom.go new file mode 100644 index 000000000..cd0a8ac15 --- /dev/null +++ b/vendor/golang.org/x/net/html/atom/atom.go @@ -0,0 +1,78 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package atom provides integer codes (also known as atoms) for a fixed set of +// frequently occurring HTML strings: tag names and attribute keys such as "p" +// and "id". +// +// Sharing an atom's name between all elements with the same tag can result in +// fewer string allocations when tokenizing and parsing HTML. Integer +// comparisons are also generally faster than string comparisons. +// +// The value of an atom's particular code is not guaranteed to stay the same +// between versions of this package. Neither is any ordering guaranteed: +// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to +// be dense. The only guarantees are that e.g. looking up "div" will yield +// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0. +package atom // import "golang.org/x/net/html/atom" + +// Atom is an integer code for a string. The zero value maps to "". +type Atom uint32 + +// String returns the atom's name. +func (a Atom) String() string { + start := uint32(a >> 8) + n := uint32(a & 0xff) + if start+n > uint32(len(atomText)) { + return "" + } + return atomText[start : start+n] +} + +func (a Atom) string() string { + return atomText[a>>8 : a>>8+a&0xff] +} + +// fnv computes the FNV hash with an arbitrary starting value h. +func fnv(h uint32, s []byte) uint32 { + for i := range s { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} + +func match(s string, t []byte) bool { + for i, c := range t { + if s[i] != c { + return false + } + } + return true +} + +// Lookup returns the atom whose name is s. It returns zero if there is no +// such atom. The lookup is case sensitive. +func Lookup(s []byte) Atom { + if len(s) == 0 || len(s) > maxAtomLen { + return 0 + } + h := fnv(hash0, s) + if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) { + return a + } + if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) { + return a + } + return 0 +} + +// String returns a string whose contents are equal to s. In that sense, it is +// equivalent to string(s) but may be more efficient. +func String(s []byte) string { + if a := Lookup(s); a != 0 { + return a.String() + } + return string(s) +} diff --git a/vendor/golang.org/x/net/html/atom/gen.go b/vendor/golang.org/x/net/html/atom/gen.go new file mode 100644 index 000000000..6bfa86601 --- /dev/null +++ b/vendor/golang.org/x/net/html/atom/gen.go @@ -0,0 +1,648 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates table.go and table_test.go. +// Invoke as +// +// go run gen.go |gofmt >table.go +// go run gen.go -test |gofmt >table_test.go + +import ( + "flag" + "fmt" + "math/rand" + "os" + "sort" + "strings" +) + +// identifier converts s to a Go exported identifier. +// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". +func identifier(s string) string { + b := make([]byte, 0, len(s)) + cap := true + for _, c := range s { + if c == '-' { + cap = true + continue + } + if cap && 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + cap = false + b = append(b, byte(c)) + } + return string(b) +} + +var test = flag.Bool("test", false, "generate table_test.go") + +func main() { + flag.Parse() + + var all []string + all = append(all, elements...) + all = append(all, attributes...) + all = append(all, eventHandlers...) + all = append(all, extra...) + sort.Strings(all) + + if *test { + fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n") + fmt.Printf("package atom\n\n") + fmt.Printf("var testAtomList = []string{\n") + for _, s := range all { + fmt.Printf("\t%q,\n", s) + } + fmt.Printf("}\n") + return + } + + // uniq - lists have dups + // compute max len too + maxLen := 0 + w := 0 + for _, s := range all { + if w == 0 || all[w-1] != s { + if maxLen < len(s) { + maxLen = len(s) + } + all[w] = s + w++ + } + } + all = all[:w] + + // Find hash that minimizes table size. + var best *table + for i := 0; i < 1000000; i++ { + if best != nil && 1<<(best.k-1) < len(all) { + break + } + h := rand.Uint32() + for k := uint(0); k <= 16; k++ { + if best != nil && k >= best.k { + break + } + var t table + if t.init(h, k, all) { + best = &t + break + } + } + } + if best == nil { + fmt.Fprintf(os.Stderr, "failed to construct string table\n") + os.Exit(1) + } + + // Lay out strings, using overlaps when possible. + layout := append([]string{}, all...) + + // Remove strings that are substrings of other strings + for changed := true; changed; { + changed = false + for i, s := range layout { + if s == "" { + continue + } + for j, t := range layout { + if i != j && t != "" && strings.Contains(s, t) { + changed = true + layout[j] = "" + } + } + } + } + + // Join strings where one suffix matches another prefix. + for { + // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], + // maximizing overlap length k. + besti := -1 + bestj := -1 + bestk := 0 + for i, s := range layout { + if s == "" { + continue + } + for j, t := range layout { + if i == j { + continue + } + for k := bestk + 1; k <= len(s) && k <= len(t); k++ { + if s[len(s)-k:] == t[:k] { + besti = i + bestj = j + bestk = k + } + } + } + } + if bestk > 0 { + layout[besti] += layout[bestj][bestk:] + layout[bestj] = "" + continue + } + break + } + + text := strings.Join(layout, "") + + atom := map[string]uint32{} + for _, s := range all { + off := strings.Index(text, s) + if off < 0 { + panic("lost string " + s) + } + atom[s] = uint32(off<<8 | len(s)) + } + + // Generate the Go code. + fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n") + fmt.Printf("package atom\n\nconst (\n") + for _, s := range all { + fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s]) + } + fmt.Printf(")\n\n") + + fmt.Printf("const hash0 = %#x\n\n", best.h0) + fmt.Printf("const maxAtomLen = %d\n\n", maxLen) + + fmt.Printf("var table = [1<<%d]Atom{\n", best.k) + for i, s := range best.tab { + if s == "" { + continue + } + fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s) + } + fmt.Printf("}\n") + datasize := (1 << best.k) * 4 + + fmt.Printf("const atomText =\n") + textsize := len(text) + for len(text) > 60 { + fmt.Printf("\t%q +\n", text[:60]) + text = text[60:] + } + fmt.Printf("\t%q\n\n", text) + + fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) +} + +type byLen []string + +func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } +func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +func (x byLen) Len() int { return len(x) } + +// fnv computes the FNV hash with an arbitrary starting value h. +func fnv(h uint32, s string) uint32 { + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} + +// A table represents an attempt at constructing the lookup table. +// The lookup table uses cuckoo hashing, meaning that each string +// can be found in one of two positions. +type table struct { + h0 uint32 + k uint + mask uint32 + tab []string +} + +// hash returns the two hashes for s. +func (t *table) hash(s string) (h1, h2 uint32) { + h := fnv(t.h0, s) + h1 = h & t.mask + h2 = (h >> 16) & t.mask + return +} + +// init initializes the table with the given parameters. +// h0 is the initial hash value, +// k is the number of bits of hash value to use, and +// x is the list of strings to store in the table. +// init returns false if the table cannot be constructed. +func (t *table) init(h0 uint32, k uint, x []string) bool { + t.h0 = h0 + t.k = k + t.tab = make([]string, 1<<k) + t.mask = 1<<k - 1 + for _, s := range x { + if !t.insert(s) { + return false + } + } + return true +} + +// insert inserts s in the table. +func (t *table) insert(s string) bool { + h1, h2 := t.hash(s) + if t.tab[h1] == "" { + t.tab[h1] = s + return true + } + if t.tab[h2] == "" { + t.tab[h2] = s + return true + } + if t.push(h1, 0) { + t.tab[h1] = s + return true + } + if t.push(h2, 0) { + t.tab[h2] = s + return true + } + return false +} + +// push attempts to push aside the entry in slot i. +func (t *table) push(i uint32, depth int) bool { + if depth > len(t.tab) { + return false + } + s := t.tab[i] + h1, h2 := t.hash(s) + j := h1 + h2 - i + if t.tab[j] != "" && !t.push(j, depth+1) { + return false + } + t.tab[j] = s + return true +} + +// The lists of element names and attribute keys were taken from +// https://html.spec.whatwg.org/multipage/indices.html#index +// as of the "HTML Living Standard - Last Updated 21 February 2015" version. + +var elements = []string{ + "a", + "abbr", + "address", + "area", + "article", + "aside", + "audio", + "b", + "base", + "bdi", + "bdo", + "blockquote", + "body", + "br", + "button", + "canvas", + "caption", + "cite", + "code", + "col", + "colgroup", + "command", + "data", + "datalist", + "dd", + "del", + "details", + "dfn", + "dialog", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "iframe", + "img", + "input", + "ins", + "kbd", + "keygen", + "label", + "legend", + "li", + "link", + "map", + "mark", + "menu", + "menuitem", + "meta", + "meter", + "nav", + "noscript", + "object", + "ol", + "optgroup", + "option", + "output", + "p", + "param", + "pre", + "progress", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "script", + "section", + "select", + "small", + "source", + "span", + "strong", + "style", + "sub", + "summary", + "sup", + "table", + "tbody", + "td", + "template", + "textarea", + "tfoot", + "th", + "thead", + "time", + "title", + "tr", + "track", + "u", + "ul", + "var", + "video", + "wbr", +} + +// https://html.spec.whatwg.org/multipage/indices.html#attributes-3 + +var attributes = []string{ + "abbr", + "accept", + "accept-charset", + "accesskey", + "action", + "alt", + "async", + "autocomplete", + "autofocus", + "autoplay", + "challenge", + "charset", + "checked", + "cite", + "class", + "cols", + "colspan", + "command", + "content", + "contenteditable", + "contextmenu", + "controls", + "coords", + "crossorigin", + "data", + "datetime", + "default", + "defer", + "dir", + "dirname", + "disabled", + "download", + "draggable", + "dropzone", + "enctype", + "for", + "form", + "formaction", + "formenctype", + "formmethod", + "formnovalidate", + "formtarget", + "headers", + "height", + "hidden", + "high", + "href", + "hreflang", + "http-equiv", + "icon", + "id", + "inputmode", + "ismap", + "itemid", + "itemprop", + "itemref", + "itemscope", + "itemtype", + "keytype", + "kind", + "label", + "lang", + "list", + "loop", + "low", + "manifest", + "max", + "maxlength", + "media", + "mediagroup", + "method", + "min", + "minlength", + "multiple", + "muted", + "name", + "novalidate", + "open", + "optimum", + "pattern", + "ping", + "placeholder", + "poster", + "preload", + "radiogroup", + "readonly", + "rel", + "required", + "reversed", + "rows", + "rowspan", + "sandbox", + "spellcheck", + "scope", + "scoped", + "seamless", + "selected", + "shape", + "size", + "sizes", + "sortable", + "sorted", + "span", + "src", + "srcdoc", + "srclang", + "start", + "step", + "style", + "tabindex", + "target", + "title", + "translate", + "type", + "typemustmatch", + "usemap", + "value", + "width", + "wrap", +} + +var eventHandlers = []string{ + "onabort", + "onautocomplete", + "onautocompleteerror", + "onafterprint", + "onbeforeprint", + "onbeforeunload", + "onblur", + "oncancel", + "oncanplay", + "oncanplaythrough", + "onchange", + "onclick", + "onclose", + "oncontextmenu", + "oncuechange", + "ondblclick", + "ondrag", + "ondragend", + "ondragenter", + "ondragleave", + "ondragover", + "ondragstart", + "ondrop", + "ondurationchange", + "onemptied", + "onended", + "onerror", + "onfocus", + "onhashchange", + "oninput", + "oninvalid", + "onkeydown", + "onkeypress", + "onkeyup", + "onlanguagechange", + "onload", + "onloadeddata", + "onloadedmetadata", + "onloadstart", + "onmessage", + "onmousedown", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onmousewheel", + "onoffline", + "ononline", + "onpagehide", + "onpageshow", + "onpause", + "onplay", + "onplaying", + "onpopstate", + "onprogress", + "onratechange", + "onreset", + "onresize", + "onscroll", + "onseeked", + "onseeking", + "onselect", + "onshow", + "onsort", + "onstalled", + "onstorage", + "onsubmit", + "onsuspend", + "ontimeupdate", + "ontoggle", + "onunload", + "onvolumechange", + "onwaiting", +} + +// extra are ad-hoc values not covered by any of the lists above. +var extra = []string{ + "align", + "annotation", + "annotation-xml", + "applet", + "basefont", + "bgsound", + "big", + "blink", + "center", + "color", + "desc", + "face", + "font", + "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. + "foreignobject", + "frame", + "frameset", + "image", + "isindex", + "listing", + "malignmark", + "marquee", + "math", + "mglyph", + "mi", + "mn", + "mo", + "ms", + "mtext", + "nobr", + "noembed", + "noframes", + "plaintext", + "prompt", + "public", + "spacer", + "strike", + "svg", + "system", + "tt", + "xmp", +} diff --git a/vendor/golang.org/x/net/html/atom/table.go b/vendor/golang.org/x/net/html/atom/table.go new file mode 100644 index 000000000..2605ba310 --- /dev/null +++ b/vendor/golang.org/x/net/html/atom/table.go @@ -0,0 +1,713 @@ +// generated by go run gen.go; DO NOT EDIT + +package atom + +const ( + A Atom = 0x1 + Abbr Atom = 0x4 + Accept Atom = 0x2106 + AcceptCharset Atom = 0x210e + Accesskey Atom = 0x3309 + Action Atom = 0x1f606 + Address Atom = 0x4f307 + Align Atom = 0x1105 + Alt Atom = 0x4503 + Annotation Atom = 0x1670a + AnnotationXml Atom = 0x1670e + Applet Atom = 0x2b306 + Area Atom = 0x2fa04 + Article Atom = 0x38807 + Aside Atom = 0x8305 + Async Atom = 0x7b05 + Audio Atom = 0xa605 + Autocomplete Atom = 0x1fc0c + Autofocus Atom = 0xb309 + Autoplay Atom = 0xce08 + B Atom = 0x101 + Base Atom = 0xd604 + Basefont Atom = 0xd608 + Bdi Atom = 0x1a03 + Bdo Atom = 0xe703 + Bgsound Atom = 0x11807 + Big Atom = 0x12403 + Blink Atom = 0x12705 + Blockquote Atom = 0x12c0a + Body Atom = 0x2f04 + Br Atom = 0x202 + Button Atom = 0x13606 + Canvas Atom = 0x7f06 + Caption Atom = 0x1bb07 + Center Atom = 0x5b506 + Challenge Atom = 0x21f09 + Charset Atom = 0x2807 + Checked Atom = 0x32807 + Cite Atom = 0x3c804 + Class Atom = 0x4de05 + Code Atom = 0x14904 + Col Atom = 0x15003 + Colgroup Atom = 0x15008 + Color Atom = 0x15d05 + Cols Atom = 0x16204 + Colspan Atom = 0x16207 + Command Atom = 0x17507 + Content Atom = 0x42307 + Contenteditable Atom = 0x4230f + Contextmenu Atom = 0x3310b + Controls Atom = 0x18808 + Coords Atom = 0x19406 + Crossorigin Atom = 0x19f0b + Data Atom = 0x44a04 + Datalist Atom = 0x44a08 + Datetime Atom = 0x23c08 + Dd Atom = 0x26702 + Default Atom = 0x8607 + Defer Atom = 0x14b05 + Del Atom = 0x3ef03 + Desc Atom = 0x4db04 + Details Atom = 0x4807 + Dfn Atom = 0x6103 + Dialog Atom = 0x1b06 + Dir Atom = 0x6903 + Dirname Atom = 0x6907 + Disabled Atom = 0x10c08 + Div Atom = 0x11303 + Dl Atom = 0x11e02 + Download Atom = 0x40008 + Draggable Atom = 0x17b09 + Dropzone Atom = 0x39108 + Dt Atom = 0x50902 + Em Atom = 0x6502 + Embed Atom = 0x6505 + Enctype Atom = 0x21107 + Face Atom = 0x5b304 + Fieldset Atom = 0x1b008 + Figcaption Atom = 0x1b80a + Figure Atom = 0x1cc06 + Font Atom = 0xda04 + Footer Atom = 0x8d06 + For Atom = 0x1d803 + ForeignObject Atom = 0x1d80d + Foreignobject Atom = 0x1e50d + Form Atom = 0x1f204 + Formaction Atom = 0x1f20a + Formenctype Atom = 0x20d0b + Formmethod Atom = 0x2280a + Formnovalidate Atom = 0x2320e + Formtarget Atom = 0x2470a + Frame Atom = 0x9a05 + Frameset Atom = 0x9a08 + H1 Atom = 0x26e02 + H2 Atom = 0x29402 + H3 Atom = 0x2a702 + H4 Atom = 0x2e902 + H5 Atom = 0x2f302 + H6 Atom = 0x50b02 + Head Atom = 0x2d504 + Header Atom = 0x2d506 + Headers Atom = 0x2d507 + Height Atom = 0x25106 + Hgroup Atom = 0x25906 + Hidden Atom = 0x26506 + High Atom = 0x26b04 + Hr Atom = 0x27002 + Href Atom = 0x27004 + Hreflang Atom = 0x27008 + Html Atom = 0x25504 + HttpEquiv Atom = 0x2780a + I Atom = 0x601 + Icon Atom = 0x42204 + Id Atom = 0x8502 + Iframe Atom = 0x29606 + Image Atom = 0x29c05 + Img Atom = 0x2a103 + Input Atom = 0x3e805 + Inputmode Atom = 0x3e809 + Ins Atom = 0x1a803 + Isindex Atom = 0x2a907 + Ismap Atom = 0x2b005 + Itemid Atom = 0x33c06 + Itemprop Atom = 0x3c908 + Itemref Atom = 0x5ad07 + Itemscope Atom = 0x2b909 + Itemtype Atom = 0x2c308 + Kbd Atom = 0x1903 + Keygen Atom = 0x3906 + Keytype Atom = 0x53707 + Kind Atom = 0x10904 + Label Atom = 0xf005 + Lang Atom = 0x27404 + Legend Atom = 0x18206 + Li Atom = 0x1202 + Link Atom = 0x12804 + List Atom = 0x44e04 + Listing Atom = 0x44e07 + Loop Atom = 0xf404 + Low Atom = 0x11f03 + Malignmark Atom = 0x100a + Manifest Atom = 0x5f108 + Map Atom = 0x2b203 + Mark Atom = 0x1604 + Marquee Atom = 0x2cb07 + Math Atom = 0x2d204 + Max Atom = 0x2e103 + Maxlength Atom = 0x2e109 + Media Atom = 0x6e05 + Mediagroup Atom = 0x6e0a + Menu Atom = 0x33804 + Menuitem Atom = 0x33808 + Meta Atom = 0x45d04 + Meter Atom = 0x24205 + Method Atom = 0x22c06 + Mglyph Atom = 0x2a206 + Mi Atom = 0x2eb02 + Min Atom = 0x2eb03 + Minlength Atom = 0x2eb09 + Mn Atom = 0x23502 + Mo Atom = 0x3ed02 + Ms Atom = 0x2bc02 + Mtext Atom = 0x2f505 + Multiple Atom = 0x30308 + Muted Atom = 0x30b05 + Name Atom = 0x6c04 + Nav Atom = 0x3e03 + Nobr Atom = 0x5704 + Noembed Atom = 0x6307 + Noframes Atom = 0x9808 + Noscript Atom = 0x3d208 + Novalidate Atom = 0x2360a + Object Atom = 0x1ec06 + Ol Atom = 0xc902 + Onabort Atom = 0x13a07 + Onafterprint Atom = 0x1c00c + Onautocomplete Atom = 0x1fa0e + Onautocompleteerror Atom = 0x1fa13 + Onbeforeprint Atom = 0x6040d + Onbeforeunload Atom = 0x4e70e + Onblur Atom = 0xaa06 + Oncancel Atom = 0xe908 + Oncanplay Atom = 0x28509 + Oncanplaythrough Atom = 0x28510 + Onchange Atom = 0x3a708 + Onclick Atom = 0x31007 + Onclose Atom = 0x31707 + Oncontextmenu Atom = 0x32f0d + Oncuechange Atom = 0x3420b + Ondblclick Atom = 0x34d0a + Ondrag Atom = 0x35706 + Ondragend Atom = 0x35709 + Ondragenter Atom = 0x3600b + Ondragleave Atom = 0x36b0b + Ondragover Atom = 0x3760a + Ondragstart Atom = 0x3800b + Ondrop Atom = 0x38f06 + Ondurationchange Atom = 0x39f10 + Onemptied Atom = 0x39609 + Onended Atom = 0x3af07 + Onerror Atom = 0x3b607 + Onfocus Atom = 0x3bd07 + Onhashchange Atom = 0x3da0c + Oninput Atom = 0x3e607 + Oninvalid Atom = 0x3f209 + Onkeydown Atom = 0x3fb09 + Onkeypress Atom = 0x4080a + Onkeyup Atom = 0x41807 + Onlanguagechange Atom = 0x43210 + Onload Atom = 0x44206 + Onloadeddata Atom = 0x4420c + Onloadedmetadata Atom = 0x45510 + Onloadstart Atom = 0x46b0b + Onmessage Atom = 0x47609 + Onmousedown Atom = 0x47f0b + Onmousemove Atom = 0x48a0b + Onmouseout Atom = 0x4950a + Onmouseover Atom = 0x4a20b + Onmouseup Atom = 0x4ad09 + Onmousewheel Atom = 0x4b60c + Onoffline Atom = 0x4c209 + Ononline Atom = 0x4cb08 + Onpagehide Atom = 0x4d30a + Onpageshow Atom = 0x4fe0a + Onpause Atom = 0x50d07 + Onplay Atom = 0x51706 + Onplaying Atom = 0x51709 + Onpopstate Atom = 0x5200a + Onprogress Atom = 0x52a0a + Onratechange Atom = 0x53e0c + Onreset Atom = 0x54a07 + Onresize Atom = 0x55108 + Onscroll Atom = 0x55f08 + Onseeked Atom = 0x56708 + Onseeking Atom = 0x56f09 + Onselect Atom = 0x57808 + Onshow Atom = 0x58206 + Onsort Atom = 0x58b06 + Onstalled Atom = 0x59509 + Onstorage Atom = 0x59e09 + Onsubmit Atom = 0x5a708 + Onsuspend Atom = 0x5bb09 + Ontimeupdate Atom = 0xdb0c + Ontoggle Atom = 0x5c408 + Onunload Atom = 0x5cc08 + Onvolumechange Atom = 0x5d40e + Onwaiting Atom = 0x5e209 + Open Atom = 0x3cf04 + Optgroup Atom = 0xf608 + Optimum Atom = 0x5eb07 + Option Atom = 0x60006 + Output Atom = 0x49c06 + P Atom = 0xc01 + Param Atom = 0xc05 + Pattern Atom = 0x5107 + Ping Atom = 0x7704 + Placeholder Atom = 0xc30b + Plaintext Atom = 0xfd09 + Poster Atom = 0x15706 + Pre Atom = 0x25e03 + Preload Atom = 0x25e07 + Progress Atom = 0x52c08 + Prompt Atom = 0x5fa06 + Public Atom = 0x41e06 + Q Atom = 0x13101 + Radiogroup Atom = 0x30a + Readonly Atom = 0x2fb08 + Rel Atom = 0x25f03 + Required Atom = 0x1d008 + Reversed Atom = 0x5a08 + Rows Atom = 0x9204 + Rowspan Atom = 0x9207 + Rp Atom = 0x1c602 + Rt Atom = 0x13f02 + Ruby Atom = 0xaf04 + S Atom = 0x2c01 + Samp Atom = 0x4e04 + Sandbox Atom = 0xbb07 + Scope Atom = 0x2bd05 + Scoped Atom = 0x2bd06 + Script Atom = 0x3d406 + Seamless Atom = 0x31c08 + Section Atom = 0x4e207 + Select Atom = 0x57a06 + Selected Atom = 0x57a08 + Shape Atom = 0x4f905 + Size Atom = 0x55504 + Sizes Atom = 0x55505 + Small Atom = 0x18f05 + Sortable Atom = 0x58d08 + Sorted Atom = 0x19906 + Source Atom = 0x1aa06 + Spacer Atom = 0x2db06 + Span Atom = 0x9504 + Spellcheck Atom = 0x3230a + Src Atom = 0x3c303 + Srcdoc Atom = 0x3c306 + Srclang Atom = 0x41107 + Start Atom = 0x38605 + Step Atom = 0x5f704 + Strike Atom = 0x53306 + Strong Atom = 0x55906 + Style Atom = 0x61105 + Sub Atom = 0x5a903 + Summary Atom = 0x61607 + Sup Atom = 0x61d03 + Svg Atom = 0x62003 + System Atom = 0x62306 + Tabindex Atom = 0x46308 + Table Atom = 0x42d05 + Target Atom = 0x24b06 + Tbody Atom = 0x2e05 + Td Atom = 0x4702 + Template Atom = 0x62608 + Textarea Atom = 0x2f608 + Tfoot Atom = 0x8c05 + Th Atom = 0x22e02 + Thead Atom = 0x2d405 + Time Atom = 0xdd04 + Title Atom = 0xa105 + Tr Atom = 0x10502 + Track Atom = 0x10505 + Translate Atom = 0x14009 + Tt Atom = 0x5302 + Type Atom = 0x21404 + Typemustmatch Atom = 0x2140d + U Atom = 0xb01 + Ul Atom = 0x8a02 + Usemap Atom = 0x51106 + Value Atom = 0x4005 + Var Atom = 0x11503 + Video Atom = 0x28105 + Wbr Atom = 0x12103 + Width Atom = 0x50705 + Wrap Atom = 0x58704 + Xmp Atom = 0xc103 +) + +const hash0 = 0xc17da63e + +const maxAtomLen = 19 + +var table = [1 << 9]Atom{ + 0x1: 0x48a0b, // onmousemove + 0x2: 0x5e209, // onwaiting + 0x3: 0x1fa13, // onautocompleteerror + 0x4: 0x5fa06, // prompt + 0x7: 0x5eb07, // optimum + 0x8: 0x1604, // mark + 0xa: 0x5ad07, // itemref + 0xb: 0x4fe0a, // onpageshow + 0xc: 0x57a06, // select + 0xd: 0x17b09, // draggable + 0xe: 0x3e03, // nav + 0xf: 0x17507, // command + 0x11: 0xb01, // u + 0x14: 0x2d507, // headers + 0x15: 0x44a08, // datalist + 0x17: 0x4e04, // samp + 0x1a: 0x3fb09, // onkeydown + 0x1b: 0x55f08, // onscroll + 0x1c: 0x15003, // col + 0x20: 0x3c908, // itemprop + 0x21: 0x2780a, // http-equiv + 0x22: 0x61d03, // sup + 0x24: 0x1d008, // required + 0x2b: 0x25e07, // preload + 0x2c: 0x6040d, // onbeforeprint + 0x2d: 0x3600b, // ondragenter + 0x2e: 0x50902, // dt + 0x2f: 0x5a708, // onsubmit + 0x30: 0x27002, // hr + 0x31: 0x32f0d, // oncontextmenu + 0x33: 0x29c05, // image + 0x34: 0x50d07, // onpause + 0x35: 0x25906, // hgroup + 0x36: 0x7704, // ping + 0x37: 0x57808, // onselect + 0x3a: 0x11303, // div + 0x3b: 0x1fa0e, // onautocomplete + 0x40: 0x2eb02, // mi + 0x41: 0x31c08, // seamless + 0x42: 0x2807, // charset + 0x43: 0x8502, // id + 0x44: 0x5200a, // onpopstate + 0x45: 0x3ef03, // del + 0x46: 0x2cb07, // marquee + 0x47: 0x3309, // accesskey + 0x49: 0x8d06, // footer + 0x4a: 0x44e04, // list + 0x4b: 0x2b005, // ismap + 0x51: 0x33804, // menu + 0x52: 0x2f04, // body + 0x55: 0x9a08, // frameset + 0x56: 0x54a07, // onreset + 0x57: 0x12705, // blink + 0x58: 0xa105, // title + 0x59: 0x38807, // article + 0x5b: 0x22e02, // th + 0x5d: 0x13101, // q + 0x5e: 0x3cf04, // open + 0x5f: 0x2fa04, // area + 0x61: 0x44206, // onload + 0x62: 0xda04, // font + 0x63: 0xd604, // base + 0x64: 0x16207, // colspan + 0x65: 0x53707, // keytype + 0x66: 0x11e02, // dl + 0x68: 0x1b008, // fieldset + 0x6a: 0x2eb03, // min + 0x6b: 0x11503, // var + 0x6f: 0x2d506, // header + 0x70: 0x13f02, // rt + 0x71: 0x15008, // colgroup + 0x72: 0x23502, // mn + 0x74: 0x13a07, // onabort + 0x75: 0x3906, // keygen + 0x76: 0x4c209, // onoffline + 0x77: 0x21f09, // challenge + 0x78: 0x2b203, // map + 0x7a: 0x2e902, // h4 + 0x7b: 0x3b607, // onerror + 0x7c: 0x2e109, // maxlength + 0x7d: 0x2f505, // mtext + 0x7e: 0xbb07, // sandbox + 0x7f: 0x58b06, // onsort + 0x80: 0x100a, // malignmark + 0x81: 0x45d04, // meta + 0x82: 0x7b05, // async + 0x83: 0x2a702, // h3 + 0x84: 0x26702, // dd + 0x85: 0x27004, // href + 0x86: 0x6e0a, // mediagroup + 0x87: 0x19406, // coords + 0x88: 0x41107, // srclang + 0x89: 0x34d0a, // ondblclick + 0x8a: 0x4005, // value + 0x8c: 0xe908, // oncancel + 0x8e: 0x3230a, // spellcheck + 0x8f: 0x9a05, // frame + 0x91: 0x12403, // big + 0x94: 0x1f606, // action + 0x95: 0x6903, // dir + 0x97: 0x2fb08, // readonly + 0x99: 0x42d05, // table + 0x9a: 0x61607, // summary + 0x9b: 0x12103, // wbr + 0x9c: 0x30a, // radiogroup + 0x9d: 0x6c04, // name + 0x9f: 0x62306, // system + 0xa1: 0x15d05, // color + 0xa2: 0x7f06, // canvas + 0xa3: 0x25504, // html + 0xa5: 0x56f09, // onseeking + 0xac: 0x4f905, // shape + 0xad: 0x25f03, // rel + 0xae: 0x28510, // oncanplaythrough + 0xaf: 0x3760a, // ondragover + 0xb0: 0x62608, // template + 0xb1: 0x1d80d, // foreignObject + 0xb3: 0x9204, // rows + 0xb6: 0x44e07, // listing + 0xb7: 0x49c06, // output + 0xb9: 0x3310b, // contextmenu + 0xbb: 0x11f03, // low + 0xbc: 0x1c602, // rp + 0xbd: 0x5bb09, // onsuspend + 0xbe: 0x13606, // button + 0xbf: 0x4db04, // desc + 0xc1: 0x4e207, // section + 0xc2: 0x52a0a, // onprogress + 0xc3: 0x59e09, // onstorage + 0xc4: 0x2d204, // math + 0xc5: 0x4503, // alt + 0xc7: 0x8a02, // ul + 0xc8: 0x5107, // pattern + 0xc9: 0x4b60c, // onmousewheel + 0xca: 0x35709, // ondragend + 0xcb: 0xaf04, // ruby + 0xcc: 0xc01, // p + 0xcd: 0x31707, // onclose + 0xce: 0x24205, // meter + 0xcf: 0x11807, // bgsound + 0xd2: 0x25106, // height + 0xd4: 0x101, // b + 0xd5: 0x2c308, // itemtype + 0xd8: 0x1bb07, // caption + 0xd9: 0x10c08, // disabled + 0xdb: 0x33808, // menuitem + 0xdc: 0x62003, // svg + 0xdd: 0x18f05, // small + 0xde: 0x44a04, // data + 0xe0: 0x4cb08, // ononline + 0xe1: 0x2a206, // mglyph + 0xe3: 0x6505, // embed + 0xe4: 0x10502, // tr + 0xe5: 0x46b0b, // onloadstart + 0xe7: 0x3c306, // srcdoc + 0xeb: 0x5c408, // ontoggle + 0xed: 0xe703, // bdo + 0xee: 0x4702, // td + 0xef: 0x8305, // aside + 0xf0: 0x29402, // h2 + 0xf1: 0x52c08, // progress + 0xf2: 0x12c0a, // blockquote + 0xf4: 0xf005, // label + 0xf5: 0x601, // i + 0xf7: 0x9207, // rowspan + 0xfb: 0x51709, // onplaying + 0xfd: 0x2a103, // img + 0xfe: 0xf608, // optgroup + 0xff: 0x42307, // content + 0x101: 0x53e0c, // onratechange + 0x103: 0x3da0c, // onhashchange + 0x104: 0x4807, // details + 0x106: 0x40008, // download + 0x109: 0x14009, // translate + 0x10b: 0x4230f, // contenteditable + 0x10d: 0x36b0b, // ondragleave + 0x10e: 0x2106, // accept + 0x10f: 0x57a08, // selected + 0x112: 0x1f20a, // formaction + 0x113: 0x5b506, // center + 0x115: 0x45510, // onloadedmetadata + 0x116: 0x12804, // link + 0x117: 0xdd04, // time + 0x118: 0x19f0b, // crossorigin + 0x119: 0x3bd07, // onfocus + 0x11a: 0x58704, // wrap + 0x11b: 0x42204, // icon + 0x11d: 0x28105, // video + 0x11e: 0x4de05, // class + 0x121: 0x5d40e, // onvolumechange + 0x122: 0xaa06, // onblur + 0x123: 0x2b909, // itemscope + 0x124: 0x61105, // style + 0x127: 0x41e06, // public + 0x129: 0x2320e, // formnovalidate + 0x12a: 0x58206, // onshow + 0x12c: 0x51706, // onplay + 0x12d: 0x3c804, // cite + 0x12e: 0x2bc02, // ms + 0x12f: 0xdb0c, // ontimeupdate + 0x130: 0x10904, // kind + 0x131: 0x2470a, // formtarget + 0x135: 0x3af07, // onended + 0x136: 0x26506, // hidden + 0x137: 0x2c01, // s + 0x139: 0x2280a, // formmethod + 0x13a: 0x3e805, // input + 0x13c: 0x50b02, // h6 + 0x13d: 0xc902, // ol + 0x13e: 0x3420b, // oncuechange + 0x13f: 0x1e50d, // foreignobject + 0x143: 0x4e70e, // onbeforeunload + 0x144: 0x2bd05, // scope + 0x145: 0x39609, // onemptied + 0x146: 0x14b05, // defer + 0x147: 0xc103, // xmp + 0x148: 0x39f10, // ondurationchange + 0x149: 0x1903, // kbd + 0x14c: 0x47609, // onmessage + 0x14d: 0x60006, // option + 0x14e: 0x2eb09, // minlength + 0x14f: 0x32807, // checked + 0x150: 0xce08, // autoplay + 0x152: 0x202, // br + 0x153: 0x2360a, // novalidate + 0x156: 0x6307, // noembed + 0x159: 0x31007, // onclick + 0x15a: 0x47f0b, // onmousedown + 0x15b: 0x3a708, // onchange + 0x15e: 0x3f209, // oninvalid + 0x15f: 0x2bd06, // scoped + 0x160: 0x18808, // controls + 0x161: 0x30b05, // muted + 0x162: 0x58d08, // sortable + 0x163: 0x51106, // usemap + 0x164: 0x1b80a, // figcaption + 0x165: 0x35706, // ondrag + 0x166: 0x26b04, // high + 0x168: 0x3c303, // src + 0x169: 0x15706, // poster + 0x16b: 0x1670e, // annotation-xml + 0x16c: 0x5f704, // step + 0x16d: 0x4, // abbr + 0x16e: 0x1b06, // dialog + 0x170: 0x1202, // li + 0x172: 0x3ed02, // mo + 0x175: 0x1d803, // for + 0x176: 0x1a803, // ins + 0x178: 0x55504, // size + 0x179: 0x43210, // onlanguagechange + 0x17a: 0x8607, // default + 0x17b: 0x1a03, // bdi + 0x17c: 0x4d30a, // onpagehide + 0x17d: 0x6907, // dirname + 0x17e: 0x21404, // type + 0x17f: 0x1f204, // form + 0x181: 0x28509, // oncanplay + 0x182: 0x6103, // dfn + 0x183: 0x46308, // tabindex + 0x186: 0x6502, // em + 0x187: 0x27404, // lang + 0x189: 0x39108, // dropzone + 0x18a: 0x4080a, // onkeypress + 0x18b: 0x23c08, // datetime + 0x18c: 0x16204, // cols + 0x18d: 0x1, // a + 0x18e: 0x4420c, // onloadeddata + 0x190: 0xa605, // audio + 0x192: 0x2e05, // tbody + 0x193: 0x22c06, // method + 0x195: 0xf404, // loop + 0x196: 0x29606, // iframe + 0x198: 0x2d504, // head + 0x19e: 0x5f108, // manifest + 0x19f: 0xb309, // autofocus + 0x1a0: 0x14904, // code + 0x1a1: 0x55906, // strong + 0x1a2: 0x30308, // multiple + 0x1a3: 0xc05, // param + 0x1a6: 0x21107, // enctype + 0x1a7: 0x5b304, // face + 0x1a8: 0xfd09, // plaintext + 0x1a9: 0x26e02, // h1 + 0x1aa: 0x59509, // onstalled + 0x1ad: 0x3d406, // script + 0x1ae: 0x2db06, // spacer + 0x1af: 0x55108, // onresize + 0x1b0: 0x4a20b, // onmouseover + 0x1b1: 0x5cc08, // onunload + 0x1b2: 0x56708, // onseeked + 0x1b4: 0x2140d, // typemustmatch + 0x1b5: 0x1cc06, // figure + 0x1b6: 0x4950a, // onmouseout + 0x1b7: 0x25e03, // pre + 0x1b8: 0x50705, // width + 0x1b9: 0x19906, // sorted + 0x1bb: 0x5704, // nobr + 0x1be: 0x5302, // tt + 0x1bf: 0x1105, // align + 0x1c0: 0x3e607, // oninput + 0x1c3: 0x41807, // onkeyup + 0x1c6: 0x1c00c, // onafterprint + 0x1c7: 0x210e, // accept-charset + 0x1c8: 0x33c06, // itemid + 0x1c9: 0x3e809, // inputmode + 0x1cb: 0x53306, // strike + 0x1cc: 0x5a903, // sub + 0x1cd: 0x10505, // track + 0x1ce: 0x38605, // start + 0x1d0: 0xd608, // basefont + 0x1d6: 0x1aa06, // source + 0x1d7: 0x18206, // legend + 0x1d8: 0x2d405, // thead + 0x1da: 0x8c05, // tfoot + 0x1dd: 0x1ec06, // object + 0x1de: 0x6e05, // media + 0x1df: 0x1670a, // annotation + 0x1e0: 0x20d0b, // formenctype + 0x1e2: 0x3d208, // noscript + 0x1e4: 0x55505, // sizes + 0x1e5: 0x1fc0c, // autocomplete + 0x1e6: 0x9504, // span + 0x1e7: 0x9808, // noframes + 0x1e8: 0x24b06, // target + 0x1e9: 0x38f06, // ondrop + 0x1ea: 0x2b306, // applet + 0x1ec: 0x5a08, // reversed + 0x1f0: 0x2a907, // isindex + 0x1f3: 0x27008, // hreflang + 0x1f5: 0x2f302, // h5 + 0x1f6: 0x4f307, // address + 0x1fa: 0x2e103, // max + 0x1fb: 0xc30b, // placeholder + 0x1fc: 0x2f608, // textarea + 0x1fe: 0x4ad09, // onmouseup + 0x1ff: 0x3800b, // ondragstart +} + +const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" + + "genavaluealtdetailsampatternobreversedfnoembedirnamediagroup" + + "ingasyncanvasidefaultfooterowspanoframesetitleaudionblurubya" + + "utofocusandboxmplaceholderautoplaybasefontimeupdatebdoncance" + + "labelooptgrouplaintextrackindisabledivarbgsoundlowbrbigblink" + + "blockquotebuttonabortranslatecodefercolgroupostercolorcolspa" + + "nnotation-xmlcommandraggablegendcontrolsmallcoordsortedcross" + + "originsourcefieldsetfigcaptionafterprintfigurequiredforeignO" + + "bjectforeignobjectformactionautocompleteerrorformenctypemust" + + "matchallengeformmethodformnovalidatetimeterformtargetheightm" + + "lhgroupreloadhiddenhigh1hreflanghttp-equivideoncanplaythroug" + + "h2iframeimageimglyph3isindexismappletitemscopeditemtypemarqu" + + "eematheaderspacermaxlength4minlength5mtextareadonlymultiplem" + + "utedonclickoncloseamlesspellcheckedoncontextmenuitemidoncuec" + + "hangeondblclickondragendondragenterondragleaveondragoverondr" + + "agstarticleondropzonemptiedondurationchangeonendedonerroronf" + + "ocusrcdocitempropenoscriptonhashchangeoninputmodeloninvalido" + + "nkeydownloadonkeypressrclangonkeyupublicontenteditableonlang" + + "uagechangeonloadeddatalistingonloadedmetadatabindexonloadsta" + + "rtonmessageonmousedownonmousemoveonmouseoutputonmouseoveronm" + + "ouseuponmousewheelonofflineononlineonpagehidesclassectionbef" + + "oreunloaddresshapeonpageshowidth6onpausemaponplayingonpopsta" + + "teonprogresstrikeytypeonratechangeonresetonresizestrongonscr" + + "ollonseekedonseekingonselectedonshowraponsortableonstalledon" + + "storageonsubmitemrefacenteronsuspendontoggleonunloadonvolume" + + "changeonwaitingoptimumanifestepromptoptionbeforeprintstylesu" + + "mmarysupsvgsystemplate" diff --git a/vendor/golang.org/x/net/html/charset/charset.go b/vendor/golang.org/x/net/html/charset/charset.go new file mode 100644 index 000000000..13bed1599 --- /dev/null +++ b/vendor/golang.org/x/net/html/charset/charset.go @@ -0,0 +1,257 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package charset provides common text encodings for HTML documents. +// +// The mapping from encoding labels to encodings is defined at +// https://encoding.spec.whatwg.org/. +package charset // import "golang.org/x/net/html/charset" + +import ( + "bytes" + "fmt" + "io" + "mime" + "strings" + "unicode/utf8" + + "golang.org/x/net/html" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/htmlindex" + "golang.org/x/text/transform" +) + +// Lookup returns the encoding with the specified label, and its canonical +// name. It returns nil and the empty string if label is not one of the +// standard encodings for HTML. Matching is case-insensitive and ignores +// leading and trailing whitespace. Encoders will use HTML escape sequences for +// runes that are not supported by the character set. +func Lookup(label string) (e encoding.Encoding, name string) { + e, err := htmlindex.Get(label) + if err != nil { + return nil, "" + } + name, _ = htmlindex.Name(e) + return &htmlEncoding{e}, name +} + +type htmlEncoding struct{ encoding.Encoding } + +func (h *htmlEncoding) NewEncoder() *encoding.Encoder { + // HTML requires a non-terminating legacy encoder. We use HTML escapes to + // substitute unsupported code points. + return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder()) +} + +// DetermineEncoding determines the encoding of an HTML document by examining +// up to the first 1024 bytes of content and the declared Content-Type. +// +// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding +func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) { + if len(content) > 1024 { + content = content[:1024] + } + + for _, b := range boms { + if bytes.HasPrefix(content, b.bom) { + e, name = Lookup(b.enc) + return e, name, true + } + } + + if _, params, err := mime.ParseMediaType(contentType); err == nil { + if cs, ok := params["charset"]; ok { + if e, name = Lookup(cs); e != nil { + return e, name, true + } + } + } + + if len(content) > 0 { + e, name = prescan(content) + if e != nil { + return e, name, false + } + } + + // Try to detect UTF-8. + // First eliminate any partial rune at the end. + for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- { + b := content[i] + if b < 0x80 { + break + } + if utf8.RuneStart(b) { + content = content[:i] + break + } + } + hasHighBit := false + for _, c := range content { + if c >= 0x80 { + hasHighBit = true + break + } + } + if hasHighBit && utf8.Valid(content) { + return encoding.Nop, "utf-8", false + } + + // TODO: change default depending on user's locale? + return charmap.Windows1252, "windows-1252", false +} + +// NewReader returns an io.Reader that converts the content of r to UTF-8. +// It calls DetermineEncoding to find out what r's encoding is. +func NewReader(r io.Reader, contentType string) (io.Reader, error) { + preview := make([]byte, 1024) + n, err := io.ReadFull(r, preview) + switch { + case err == io.ErrUnexpectedEOF: + preview = preview[:n] + r = bytes.NewReader(preview) + case err != nil: + return nil, err + default: + r = io.MultiReader(bytes.NewReader(preview), r) + } + + if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop { + r = transform.NewReader(r, e.NewDecoder()) + } + return r, nil +} + +// NewReaderLabel returns a reader that converts from the specified charset to +// UTF-8. It uses Lookup to find the encoding that corresponds to label, and +// returns an error if Lookup returns nil. It is suitable for use as +// encoding/xml.Decoder's CharsetReader function. +func NewReaderLabel(label string, input io.Reader) (io.Reader, error) { + e, _ := Lookup(label) + if e == nil { + return nil, fmt.Errorf("unsupported charset: %q", label) + } + return transform.NewReader(input, e.NewDecoder()), nil +} + +func prescan(content []byte) (e encoding.Encoding, name string) { + z := html.NewTokenizer(bytes.NewReader(content)) + for { + switch z.Next() { + case html.ErrorToken: + return nil, "" + + case html.StartTagToken, html.SelfClosingTagToken: + tagName, hasAttr := z.TagName() + if !bytes.Equal(tagName, []byte("meta")) { + continue + } + attrList := make(map[string]bool) + gotPragma := false + + const ( + dontKnow = iota + doNeedPragma + doNotNeedPragma + ) + needPragma := dontKnow + + name = "" + e = nil + for hasAttr { + var key, val []byte + key, val, hasAttr = z.TagAttr() + ks := string(key) + if attrList[ks] { + continue + } + attrList[ks] = true + for i, c := range val { + if 'A' <= c && c <= 'Z' { + val[i] = c + 0x20 + } + } + + switch ks { + case "http-equiv": + if bytes.Equal(val, []byte("content-type")) { + gotPragma = true + } + + case "content": + if e == nil { + name = fromMetaElement(string(val)) + if name != "" { + e, name = Lookup(name) + if e != nil { + needPragma = doNeedPragma + } + } + } + + case "charset": + e, name = Lookup(string(val)) + needPragma = doNotNeedPragma + } + } + + if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma { + continue + } + + if strings.HasPrefix(name, "utf-16") { + name = "utf-8" + e = encoding.Nop + } + + if e != nil { + return e, name + } + } + } +} + +func fromMetaElement(s string) string { + for s != "" { + csLoc := strings.Index(s, "charset") + if csLoc == -1 { + return "" + } + s = s[csLoc+len("charset"):] + s = strings.TrimLeft(s, " \t\n\f\r") + if !strings.HasPrefix(s, "=") { + continue + } + s = s[1:] + s = strings.TrimLeft(s, " \t\n\f\r") + if s == "" { + return "" + } + if q := s[0]; q == '"' || q == '\'' { + s = s[1:] + closeQuote := strings.IndexRune(s, rune(q)) + if closeQuote == -1 { + return "" + } + return s[:closeQuote] + } + + end := strings.IndexAny(s, "; \t\n\f\r") + if end == -1 { + end = len(s) + } + return s[:end] + } + return "" +} + +var boms = []struct { + bom []byte + enc string +}{ + {[]byte{0xfe, 0xff}, "utf-16be"}, + {[]byte{0xff, 0xfe}, "utf-16le"}, + {[]byte{0xef, 0xbb, 0xbf}, "utf-8"}, +} diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go new file mode 100644 index 000000000..52f651ff6 --- /dev/null +++ b/vendor/golang.org/x/net/html/const.go @@ -0,0 +1,102 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +// Section 12.2.3.2 of the HTML5 specification says "The following elements +// have varying levels of special parsing rules". +// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements +var isSpecialElementMap = map[string]bool{ + "address": true, + "applet": true, + "area": true, + "article": true, + "aside": true, + "base": true, + "basefont": true, + "bgsound": true, + "blockquote": true, + "body": true, + "br": true, + "button": true, + "caption": true, + "center": true, + "col": true, + "colgroup": true, + "dd": true, + "details": true, + "dir": true, + "div": true, + "dl": true, + "dt": true, + "embed": true, + "fieldset": true, + "figcaption": true, + "figure": true, + "footer": true, + "form": true, + "frame": true, + "frameset": true, + "h1": true, + "h2": true, + "h3": true, + "h4": true, + "h5": true, + "h6": true, + "head": true, + "header": true, + "hgroup": true, + "hr": true, + "html": true, + "iframe": true, + "img": true, + "input": true, + "isindex": true, + "li": true, + "link": true, + "listing": true, + "marquee": true, + "menu": true, + "meta": true, + "nav": true, + "noembed": true, + "noframes": true, + "noscript": true, + "object": true, + "ol": true, + "p": true, + "param": true, + "plaintext": true, + "pre": true, + "script": true, + "section": true, + "select": true, + "source": true, + "style": true, + "summary": true, + "table": true, + "tbody": true, + "td": true, + "template": true, + "textarea": true, + "tfoot": true, + "th": true, + "thead": true, + "title": true, + "tr": true, + "track": true, + "ul": true, + "wbr": true, + "xmp": true, +} + +func isSpecialElement(element *Node) bool { + switch element.Namespace { + case "", "html": + return isSpecialElementMap[element.Data] + case "svg": + return element.Data == "foreignObject" + } + return false +} diff --git a/vendor/golang.org/x/net/html/doc.go b/vendor/golang.org/x/net/html/doc.go new file mode 100644 index 000000000..94f496874 --- /dev/null +++ b/vendor/golang.org/x/net/html/doc.go @@ -0,0 +1,106 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package html implements an HTML5-compliant tokenizer and parser. + +Tokenization is done by creating a Tokenizer for an io.Reader r. It is the +caller's responsibility to ensure that r provides UTF-8 encoded HTML. + + z := html.NewTokenizer(r) + +Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(), +which parses the next token and returns its type, or an error: + + for { + tt := z.Next() + if tt == html.ErrorToken { + // ... + return ... + } + // Process the current token. + } + +There are two APIs for retrieving the current token. The high-level API is to +call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs +allow optionally calling Raw after Next but before Token, Text, TagName, or +TagAttr. In EBNF notation, the valid call sequence per token is: + + Next {Raw} [ Token | Text | TagName {TagAttr} ] + +Token returns an independent data structure that completely describes a token. +Entities (such as "<") are unescaped, tag names and attribute keys are +lower-cased, and attributes are collected into a []Attribute. For example: + + for { + if z.Next() == html.ErrorToken { + // Returning io.EOF indicates success. + return z.Err() + } + emitToken(z.Token()) + } + +The low-level API performs fewer allocations and copies, but the contents of +the []byte values returned by Text, TagName and TagAttr may change on the next +call to Next. For example, to extract an HTML page's anchor text: + + depth := 0 + for { + tt := z.Next() + switch tt { + case ErrorToken: + return z.Err() + case TextToken: + if depth > 0 { + // emitBytes should copy the []byte it receives, + // if it doesn't process it immediately. + emitBytes(z.Text()) + } + case StartTagToken, EndTagToken: + tn, _ := z.TagName() + if len(tn) == 1 && tn[0] == 'a' { + if tt == StartTagToken { + depth++ + } else { + depth-- + } + } + } + } + +Parsing is done by calling Parse with an io.Reader, which returns the root of +the parse tree (the document element) as a *Node. It is the caller's +responsibility to ensure that the Reader provides UTF-8 encoded HTML. For +example, to process each anchor node in depth-first order: + + doc, err := html.Parse(r) + if err != nil { + // ... + } + var f func(*html.Node) + f = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "a" { + // Do something with n... + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + f(c) + } + } + f(doc) + +The relevant specifications include: +https://html.spec.whatwg.org/multipage/syntax.html and +https://html.spec.whatwg.org/multipage/syntax.html#tokenization +*/ +package html // import "golang.org/x/net/html" + +// The tokenization algorithm implemented by this package is not a line-by-line +// transliteration of the relatively verbose state-machine in the WHATWG +// specification. A more direct approach is used instead, where the program +// counter implies the state, such as whether it is tokenizing a tag or a text +// node. Specification compliance is verified by checking expected and actual +// outputs over a test suite rather than aiming for algorithmic fidelity. + +// TODO(nigeltao): Does a DOM API belong in this package or a separate one? +// TODO(nigeltao): How does parsing interact with a JavaScript engine? diff --git a/vendor/golang.org/x/net/html/doctype.go b/vendor/golang.org/x/net/html/doctype.go new file mode 100644 index 000000000..c484e5a94 --- /dev/null +++ b/vendor/golang.org/x/net/html/doctype.go @@ -0,0 +1,156 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "strings" +) + +// parseDoctype parses the data from a DoctypeToken into a name, +// public identifier, and system identifier. It returns a Node whose Type +// is DoctypeNode, whose Data is the name, and which has attributes +// named "system" and "public" for the two identifiers if they were present. +// quirks is whether the document should be parsed in "quirks mode". +func parseDoctype(s string) (n *Node, quirks bool) { + n = &Node{Type: DoctypeNode} + + // Find the name. + space := strings.IndexAny(s, whitespace) + if space == -1 { + space = len(s) + } + n.Data = s[:space] + // The comparison to "html" is case-sensitive. + if n.Data != "html" { + quirks = true + } + n.Data = strings.ToLower(n.Data) + s = strings.TrimLeft(s[space:], whitespace) + + if len(s) < 6 { + // It can't start with "PUBLIC" or "SYSTEM". + // Ignore the rest of the string. + return n, quirks || s != "" + } + + key := strings.ToLower(s[:6]) + s = s[6:] + for key == "public" || key == "system" { + s = strings.TrimLeft(s, whitespace) + if s == "" { + break + } + quote := s[0] + if quote != '"' && quote != '\'' { + break + } + s = s[1:] + q := strings.IndexRune(s, rune(quote)) + var id string + if q == -1 { + id = s + s = "" + } else { + id = s[:q] + s = s[q+1:] + } + n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) + if key == "public" { + key = "system" + } else { + key = "" + } + } + + if key != "" || s != "" { + quirks = true + } else if len(n.Attr) > 0 { + if n.Attr[0].Key == "public" { + public := strings.ToLower(n.Attr[0].Val) + switch public { + case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": + quirks = true + default: + for _, q := range quirkyIDs { + if strings.HasPrefix(public, q) { + quirks = true + break + } + } + } + // The following two public IDs only cause quirks mode if there is no system ID. + if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || + strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { + quirks = true + } + } + if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && + strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { + quirks = true + } + } + + return n, quirks +} + +// quirkyIDs is a list of public doctype identifiers that cause a document +// to be interpreted in quirks mode. The identifiers should be in lower case. +var quirkyIDs = []string{ + "+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//", +} diff --git a/vendor/golang.org/x/net/html/entity.go b/vendor/golang.org/x/net/html/entity.go new file mode 100644 index 000000000..a50c04c60 --- /dev/null +++ b/vendor/golang.org/x/net/html/entity.go @@ -0,0 +1,2253 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +// All entities that do not end with ';' are 6 or fewer bytes long. +const longestEntityWithoutSemicolon = 6 + +// entity is a map from HTML entity names to their values. The semicolon matters: +// https://html.spec.whatwg.org/multipage/syntax.html#named-character-references +// lists both "amp" and "amp;" as two separate entries. +// +// Note that the HTML5 list is larger than the HTML4 list at +// http://www.w3.org/TR/html4/sgml/entities.html +var entity = map[string]rune{ + "AElig;": '\U000000C6', + "AMP;": '\U00000026', + "Aacute;": '\U000000C1', + "Abreve;": '\U00000102', + "Acirc;": '\U000000C2', + "Acy;": '\U00000410', + "Afr;": '\U0001D504', + "Agrave;": '\U000000C0', + "Alpha;": '\U00000391', + "Amacr;": '\U00000100', + "And;": '\U00002A53', + "Aogon;": '\U00000104', + "Aopf;": '\U0001D538', + "ApplyFunction;": '\U00002061', + "Aring;": '\U000000C5', + "Ascr;": '\U0001D49C', + "Assign;": '\U00002254', + "Atilde;": '\U000000C3', + "Auml;": '\U000000C4', + "Backslash;": '\U00002216', + "Barv;": '\U00002AE7', + "Barwed;": '\U00002306', + "Bcy;": '\U00000411', + "Because;": '\U00002235', + "Bernoullis;": '\U0000212C', + "Beta;": '\U00000392', + "Bfr;": '\U0001D505', + "Bopf;": '\U0001D539', + "Breve;": '\U000002D8', + "Bscr;": '\U0000212C', + "Bumpeq;": '\U0000224E', + "CHcy;": '\U00000427', + "COPY;": '\U000000A9', + "Cacute;": '\U00000106', + "Cap;": '\U000022D2', + "CapitalDifferentialD;": '\U00002145', + "Cayleys;": '\U0000212D', + "Ccaron;": '\U0000010C', + "Ccedil;": '\U000000C7', + "Ccirc;": '\U00000108', + "Cconint;": '\U00002230', + "Cdot;": '\U0000010A', + "Cedilla;": '\U000000B8', + "CenterDot;": '\U000000B7', + "Cfr;": '\U0000212D', + "Chi;": '\U000003A7', + "CircleDot;": '\U00002299', + "CircleMinus;": '\U00002296', + "CirclePlus;": '\U00002295', + "CircleTimes;": '\U00002297', + "ClockwiseContourIntegral;": '\U00002232', + "CloseCurlyDoubleQuote;": '\U0000201D', + "CloseCurlyQuote;": '\U00002019', + "Colon;": '\U00002237', + "Colone;": '\U00002A74', + "Congruent;": '\U00002261', + "Conint;": '\U0000222F', + "ContourIntegral;": '\U0000222E', + "Copf;": '\U00002102', + "Coproduct;": '\U00002210', + "CounterClockwiseContourIntegral;": '\U00002233', + "Cross;": '\U00002A2F', + "Cscr;": '\U0001D49E', + "Cup;": '\U000022D3', + "CupCap;": '\U0000224D', + "DD;": '\U00002145', + "DDotrahd;": '\U00002911', + "DJcy;": '\U00000402', + "DScy;": '\U00000405', + "DZcy;": '\U0000040F', + "Dagger;": '\U00002021', + "Darr;": '\U000021A1', + "Dashv;": '\U00002AE4', + "Dcaron;": '\U0000010E', + "Dcy;": '\U00000414', + "Del;": '\U00002207', + "Delta;": '\U00000394', + "Dfr;": '\U0001D507', + "DiacriticalAcute;": '\U000000B4', + "DiacriticalDot;": '\U000002D9', + "DiacriticalDoubleAcute;": '\U000002DD', + "DiacriticalGrave;": '\U00000060', + "DiacriticalTilde;": '\U000002DC', + "Diamond;": '\U000022C4', + "DifferentialD;": '\U00002146', + "Dopf;": '\U0001D53B', + "Dot;": '\U000000A8', + "DotDot;": '\U000020DC', + "DotEqual;": '\U00002250', + "DoubleContourIntegral;": '\U0000222F', + "DoubleDot;": '\U000000A8', + "DoubleDownArrow;": '\U000021D3', + "DoubleLeftArrow;": '\U000021D0', + "DoubleLeftRightArrow;": '\U000021D4', + "DoubleLeftTee;": '\U00002AE4', + "DoubleLongLeftArrow;": '\U000027F8', + "DoubleLongLeftRightArrow;": '\U000027FA', + "DoubleLongRightArrow;": '\U000027F9', + "DoubleRightArrow;": '\U000021D2', + "DoubleRightTee;": '\U000022A8', + "DoubleUpArrow;": '\U000021D1', + "DoubleUpDownArrow;": '\U000021D5', + "DoubleVerticalBar;": '\U00002225', + "DownArrow;": '\U00002193', + "DownArrowBar;": '\U00002913', + "DownArrowUpArrow;": '\U000021F5', + "DownBreve;": '\U00000311', + "DownLeftRightVector;": '\U00002950', + "DownLeftTeeVector;": '\U0000295E', + "DownLeftVector;": '\U000021BD', + "DownLeftVectorBar;": '\U00002956', + "DownRightTeeVector;": '\U0000295F', + "DownRightVector;": '\U000021C1', + "DownRightVectorBar;": '\U00002957', + "DownTee;": '\U000022A4', + "DownTeeArrow;": '\U000021A7', + "Downarrow;": '\U000021D3', + "Dscr;": '\U0001D49F', + "Dstrok;": '\U00000110', + "ENG;": '\U0000014A', + "ETH;": '\U000000D0', + "Eacute;": '\U000000C9', + "Ecaron;": '\U0000011A', + "Ecirc;": '\U000000CA', + "Ecy;": '\U0000042D', + "Edot;": '\U00000116', + "Efr;": '\U0001D508', + "Egrave;": '\U000000C8', + "Element;": '\U00002208', + "Emacr;": '\U00000112', + "EmptySmallSquare;": '\U000025FB', + "EmptyVerySmallSquare;": '\U000025AB', + "Eogon;": '\U00000118', + "Eopf;": '\U0001D53C', + "Epsilon;": '\U00000395', + "Equal;": '\U00002A75', + "EqualTilde;": '\U00002242', + "Equilibrium;": '\U000021CC', + "Escr;": '\U00002130', + "Esim;": '\U00002A73', + "Eta;": '\U00000397', + "Euml;": '\U000000CB', + "Exists;": '\U00002203', + "ExponentialE;": '\U00002147', + "Fcy;": '\U00000424', + "Ffr;": '\U0001D509', + "FilledSmallSquare;": '\U000025FC', + "FilledVerySmallSquare;": '\U000025AA', + "Fopf;": '\U0001D53D', + "ForAll;": '\U00002200', + "Fouriertrf;": '\U00002131', + "Fscr;": '\U00002131', + "GJcy;": '\U00000403', + "GT;": '\U0000003E', + "Gamma;": '\U00000393', + "Gammad;": '\U000003DC', + "Gbreve;": '\U0000011E', + "Gcedil;": '\U00000122', + "Gcirc;": '\U0000011C', + "Gcy;": '\U00000413', + "Gdot;": '\U00000120', + "Gfr;": '\U0001D50A', + "Gg;": '\U000022D9', + "Gopf;": '\U0001D53E', + "GreaterEqual;": '\U00002265', + "GreaterEqualLess;": '\U000022DB', + "GreaterFullEqual;": '\U00002267', + "GreaterGreater;": '\U00002AA2', + "GreaterLess;": '\U00002277', + "GreaterSlantEqual;": '\U00002A7E', + "GreaterTilde;": '\U00002273', + "Gscr;": '\U0001D4A2', + "Gt;": '\U0000226B', + "HARDcy;": '\U0000042A', + "Hacek;": '\U000002C7', + "Hat;": '\U0000005E', + "Hcirc;": '\U00000124', + "Hfr;": '\U0000210C', + "HilbertSpace;": '\U0000210B', + "Hopf;": '\U0000210D', + "HorizontalLine;": '\U00002500', + "Hscr;": '\U0000210B', + "Hstrok;": '\U00000126', + "HumpDownHump;": '\U0000224E', + "HumpEqual;": '\U0000224F', + "IEcy;": '\U00000415', + "IJlig;": '\U00000132', + "IOcy;": '\U00000401', + "Iacute;": '\U000000CD', + "Icirc;": '\U000000CE', + "Icy;": '\U00000418', + "Idot;": '\U00000130', + "Ifr;": '\U00002111', + "Igrave;": '\U000000CC', + "Im;": '\U00002111', + "Imacr;": '\U0000012A', + "ImaginaryI;": '\U00002148', + "Implies;": '\U000021D2', + "Int;": '\U0000222C', + "Integral;": '\U0000222B', + "Intersection;": '\U000022C2', + "InvisibleComma;": '\U00002063', + "InvisibleTimes;": '\U00002062', + "Iogon;": '\U0000012E', + "Iopf;": '\U0001D540', + "Iota;": '\U00000399', + "Iscr;": '\U00002110', + "Itilde;": '\U00000128', + "Iukcy;": '\U00000406', + "Iuml;": '\U000000CF', + "Jcirc;": '\U00000134', + "Jcy;": '\U00000419', + "Jfr;": '\U0001D50D', + "Jopf;": '\U0001D541', + "Jscr;": '\U0001D4A5', + "Jsercy;": '\U00000408', + "Jukcy;": '\U00000404', + "KHcy;": '\U00000425', + "KJcy;": '\U0000040C', + "Kappa;": '\U0000039A', + "Kcedil;": '\U00000136', + "Kcy;": '\U0000041A', + "Kfr;": '\U0001D50E', + "Kopf;": '\U0001D542', + "Kscr;": '\U0001D4A6', + "LJcy;": '\U00000409', + "LT;": '\U0000003C', + "Lacute;": '\U00000139', + "Lambda;": '\U0000039B', + "Lang;": '\U000027EA', + "Laplacetrf;": '\U00002112', + "Larr;": '\U0000219E', + "Lcaron;": '\U0000013D', + "Lcedil;": '\U0000013B', + "Lcy;": '\U0000041B', + "LeftAngleBracket;": '\U000027E8', + "LeftArrow;": '\U00002190', + "LeftArrowBar;": '\U000021E4', + "LeftArrowRightArrow;": '\U000021C6', + "LeftCeiling;": '\U00002308', + "LeftDoubleBracket;": '\U000027E6', + "LeftDownTeeVector;": '\U00002961', + "LeftDownVector;": '\U000021C3', + "LeftDownVectorBar;": '\U00002959', + "LeftFloor;": '\U0000230A', + "LeftRightArrow;": '\U00002194', + "LeftRightVector;": '\U0000294E', + "LeftTee;": '\U000022A3', + "LeftTeeArrow;": '\U000021A4', + "LeftTeeVector;": '\U0000295A', + "LeftTriangle;": '\U000022B2', + "LeftTriangleBar;": '\U000029CF', + "LeftTriangleEqual;": '\U000022B4', + "LeftUpDownVector;": '\U00002951', + "LeftUpTeeVector;": '\U00002960', + "LeftUpVector;": '\U000021BF', + "LeftUpVectorBar;": '\U00002958', + "LeftVector;": '\U000021BC', + "LeftVectorBar;": '\U00002952', + "Leftarrow;": '\U000021D0', + "Leftrightarrow;": '\U000021D4', + "LessEqualGreater;": '\U000022DA', + "LessFullEqual;": '\U00002266', + "LessGreater;": '\U00002276', + "LessLess;": '\U00002AA1', + "LessSlantEqual;": '\U00002A7D', + "LessTilde;": '\U00002272', + "Lfr;": '\U0001D50F', + "Ll;": '\U000022D8', + "Lleftarrow;": '\U000021DA', + "Lmidot;": '\U0000013F', + "LongLeftArrow;": '\U000027F5', + "LongLeftRightArrow;": '\U000027F7', + "LongRightArrow;": '\U000027F6', + "Longleftarrow;": '\U000027F8', + "Longleftrightarrow;": '\U000027FA', + "Longrightarrow;": '\U000027F9', + "Lopf;": '\U0001D543', + "LowerLeftArrow;": '\U00002199', + "LowerRightArrow;": '\U00002198', + "Lscr;": '\U00002112', + "Lsh;": '\U000021B0', + "Lstrok;": '\U00000141', + "Lt;": '\U0000226A', + "Map;": '\U00002905', + "Mcy;": '\U0000041C', + "MediumSpace;": '\U0000205F', + "Mellintrf;": '\U00002133', + "Mfr;": '\U0001D510', + "MinusPlus;": '\U00002213', + "Mopf;": '\U0001D544', + "Mscr;": '\U00002133', + "Mu;": '\U0000039C', + "NJcy;": '\U0000040A', + "Nacute;": '\U00000143', + "Ncaron;": '\U00000147', + "Ncedil;": '\U00000145', + "Ncy;": '\U0000041D', + "NegativeMediumSpace;": '\U0000200B', + "NegativeThickSpace;": '\U0000200B', + "NegativeThinSpace;": '\U0000200B', + "NegativeVeryThinSpace;": '\U0000200B', + "NestedGreaterGreater;": '\U0000226B', + "NestedLessLess;": '\U0000226A', + "NewLine;": '\U0000000A', + "Nfr;": '\U0001D511', + "NoBreak;": '\U00002060', + "NonBreakingSpace;": '\U000000A0', + "Nopf;": '\U00002115', + "Not;": '\U00002AEC', + "NotCongruent;": '\U00002262', + "NotCupCap;": '\U0000226D', + "NotDoubleVerticalBar;": '\U00002226', + "NotElement;": '\U00002209', + "NotEqual;": '\U00002260', + "NotExists;": '\U00002204', + "NotGreater;": '\U0000226F', + "NotGreaterEqual;": '\U00002271', + "NotGreaterLess;": '\U00002279', + "NotGreaterTilde;": '\U00002275', + "NotLeftTriangle;": '\U000022EA', + "NotLeftTriangleEqual;": '\U000022EC', + "NotLess;": '\U0000226E', + "NotLessEqual;": '\U00002270', + "NotLessGreater;": '\U00002278', + "NotLessTilde;": '\U00002274', + "NotPrecedes;": '\U00002280', + "NotPrecedesSlantEqual;": '\U000022E0', + "NotReverseElement;": '\U0000220C', + "NotRightTriangle;": '\U000022EB', + "NotRightTriangleEqual;": '\U000022ED', + "NotSquareSubsetEqual;": '\U000022E2', + "NotSquareSupersetEqual;": '\U000022E3', + "NotSubsetEqual;": '\U00002288', + "NotSucceeds;": '\U00002281', + "NotSucceedsSlantEqual;": '\U000022E1', + "NotSupersetEqual;": '\U00002289', + "NotTilde;": '\U00002241', + "NotTildeEqual;": '\U00002244', + "NotTildeFullEqual;": '\U00002247', + "NotTildeTilde;": '\U00002249', + "NotVerticalBar;": '\U00002224', + "Nscr;": '\U0001D4A9', + "Ntilde;": '\U000000D1', + "Nu;": '\U0000039D', + "OElig;": '\U00000152', + "Oacute;": '\U000000D3', + "Ocirc;": '\U000000D4', + "Ocy;": '\U0000041E', + "Odblac;": '\U00000150', + "Ofr;": '\U0001D512', + "Ograve;": '\U000000D2', + "Omacr;": '\U0000014C', + "Omega;": '\U000003A9', + "Omicron;": '\U0000039F', + "Oopf;": '\U0001D546', + "OpenCurlyDoubleQuote;": '\U0000201C', + "OpenCurlyQuote;": '\U00002018', + "Or;": '\U00002A54', + "Oscr;": '\U0001D4AA', + "Oslash;": '\U000000D8', + "Otilde;": '\U000000D5', + "Otimes;": '\U00002A37', + "Ouml;": '\U000000D6', + "OverBar;": '\U0000203E', + "OverBrace;": '\U000023DE', + "OverBracket;": '\U000023B4', + "OverParenthesis;": '\U000023DC', + "PartialD;": '\U00002202', + "Pcy;": '\U0000041F', + "Pfr;": '\U0001D513', + "Phi;": '\U000003A6', + "Pi;": '\U000003A0', + "PlusMinus;": '\U000000B1', + "Poincareplane;": '\U0000210C', + "Popf;": '\U00002119', + "Pr;": '\U00002ABB', + "Precedes;": '\U0000227A', + "PrecedesEqual;": '\U00002AAF', + "PrecedesSlantEqual;": '\U0000227C', + "PrecedesTilde;": '\U0000227E', + "Prime;": '\U00002033', + "Product;": '\U0000220F', + "Proportion;": '\U00002237', + "Proportional;": '\U0000221D', + "Pscr;": '\U0001D4AB', + "Psi;": '\U000003A8', + "QUOT;": '\U00000022', + "Qfr;": '\U0001D514', + "Qopf;": '\U0000211A', + "Qscr;": '\U0001D4AC', + "RBarr;": '\U00002910', + "REG;": '\U000000AE', + "Racute;": '\U00000154', + "Rang;": '\U000027EB', + "Rarr;": '\U000021A0', + "Rarrtl;": '\U00002916', + "Rcaron;": '\U00000158', + "Rcedil;": '\U00000156', + "Rcy;": '\U00000420', + "Re;": '\U0000211C', + "ReverseElement;": '\U0000220B', + "ReverseEquilibrium;": '\U000021CB', + "ReverseUpEquilibrium;": '\U0000296F', + "Rfr;": '\U0000211C', + "Rho;": '\U000003A1', + "RightAngleBracket;": '\U000027E9', + "RightArrow;": '\U00002192', + "RightArrowBar;": '\U000021E5', + "RightArrowLeftArrow;": '\U000021C4', + "RightCeiling;": '\U00002309', + "RightDoubleBracket;": '\U000027E7', + "RightDownTeeVector;": '\U0000295D', + "RightDownVector;": '\U000021C2', + "RightDownVectorBar;": '\U00002955', + "RightFloor;": '\U0000230B', + "RightTee;": '\U000022A2', + "RightTeeArrow;": '\U000021A6', + "RightTeeVector;": '\U0000295B', + "RightTriangle;": '\U000022B3', + "RightTriangleBar;": '\U000029D0', + "RightTriangleEqual;": '\U000022B5', + "RightUpDownVector;": '\U0000294F', + "RightUpTeeVector;": '\U0000295C', + "RightUpVector;": '\U000021BE', + "RightUpVectorBar;": '\U00002954', + "RightVector;": '\U000021C0', + "RightVectorBar;": '\U00002953', + "Rightarrow;": '\U000021D2', + "Ropf;": '\U0000211D', + "RoundImplies;": '\U00002970', + "Rrightarrow;": '\U000021DB', + "Rscr;": '\U0000211B', + "Rsh;": '\U000021B1', + "RuleDelayed;": '\U000029F4', + "SHCHcy;": '\U00000429', + "SHcy;": '\U00000428', + "SOFTcy;": '\U0000042C', + "Sacute;": '\U0000015A', + "Sc;": '\U00002ABC', + "Scaron;": '\U00000160', + "Scedil;": '\U0000015E', + "Scirc;": '\U0000015C', + "Scy;": '\U00000421', + "Sfr;": '\U0001D516', + "ShortDownArrow;": '\U00002193', + "ShortLeftArrow;": '\U00002190', + "ShortRightArrow;": '\U00002192', + "ShortUpArrow;": '\U00002191', + "Sigma;": '\U000003A3', + "SmallCircle;": '\U00002218', + "Sopf;": '\U0001D54A', + "Sqrt;": '\U0000221A', + "Square;": '\U000025A1', + "SquareIntersection;": '\U00002293', + "SquareSubset;": '\U0000228F', + "SquareSubsetEqual;": '\U00002291', + "SquareSuperset;": '\U00002290', + "SquareSupersetEqual;": '\U00002292', + "SquareUnion;": '\U00002294', + "Sscr;": '\U0001D4AE', + "Star;": '\U000022C6', + "Sub;": '\U000022D0', + "Subset;": '\U000022D0', + "SubsetEqual;": '\U00002286', + "Succeeds;": '\U0000227B', + "SucceedsEqual;": '\U00002AB0', + "SucceedsSlantEqual;": '\U0000227D', + "SucceedsTilde;": '\U0000227F', + "SuchThat;": '\U0000220B', + "Sum;": '\U00002211', + "Sup;": '\U000022D1', + "Superset;": '\U00002283', + "SupersetEqual;": '\U00002287', + "Supset;": '\U000022D1', + "THORN;": '\U000000DE', + "TRADE;": '\U00002122', + "TSHcy;": '\U0000040B', + "TScy;": '\U00000426', + "Tab;": '\U00000009', + "Tau;": '\U000003A4', + "Tcaron;": '\U00000164', + "Tcedil;": '\U00000162', + "Tcy;": '\U00000422', + "Tfr;": '\U0001D517', + "Therefore;": '\U00002234', + "Theta;": '\U00000398', + "ThinSpace;": '\U00002009', + "Tilde;": '\U0000223C', + "TildeEqual;": '\U00002243', + "TildeFullEqual;": '\U00002245', + "TildeTilde;": '\U00002248', + "Topf;": '\U0001D54B', + "TripleDot;": '\U000020DB', + "Tscr;": '\U0001D4AF', + "Tstrok;": '\U00000166', + "Uacute;": '\U000000DA', + "Uarr;": '\U0000219F', + "Uarrocir;": '\U00002949', + "Ubrcy;": '\U0000040E', + "Ubreve;": '\U0000016C', + "Ucirc;": '\U000000DB', + "Ucy;": '\U00000423', + "Udblac;": '\U00000170', + "Ufr;": '\U0001D518', + "Ugrave;": '\U000000D9', + "Umacr;": '\U0000016A', + "UnderBar;": '\U0000005F', + "UnderBrace;": '\U000023DF', + "UnderBracket;": '\U000023B5', + "UnderParenthesis;": '\U000023DD', + "Union;": '\U000022C3', + "UnionPlus;": '\U0000228E', + "Uogon;": '\U00000172', + "Uopf;": '\U0001D54C', + "UpArrow;": '\U00002191', + "UpArrowBar;": '\U00002912', + "UpArrowDownArrow;": '\U000021C5', + "UpDownArrow;": '\U00002195', + "UpEquilibrium;": '\U0000296E', + "UpTee;": '\U000022A5', + "UpTeeArrow;": '\U000021A5', + "Uparrow;": '\U000021D1', + "Updownarrow;": '\U000021D5', + "UpperLeftArrow;": '\U00002196', + "UpperRightArrow;": '\U00002197', + "Upsi;": '\U000003D2', + "Upsilon;": '\U000003A5', + "Uring;": '\U0000016E', + "Uscr;": '\U0001D4B0', + "Utilde;": '\U00000168', + "Uuml;": '\U000000DC', + "VDash;": '\U000022AB', + "Vbar;": '\U00002AEB', + "Vcy;": '\U00000412', + "Vdash;": '\U000022A9', + "Vdashl;": '\U00002AE6', + "Vee;": '\U000022C1', + "Verbar;": '\U00002016', + "Vert;": '\U00002016', + "VerticalBar;": '\U00002223', + "VerticalLine;": '\U0000007C', + "VerticalSeparator;": '\U00002758', + "VerticalTilde;": '\U00002240', + "VeryThinSpace;": '\U0000200A', + "Vfr;": '\U0001D519', + "Vopf;": '\U0001D54D', + "Vscr;": '\U0001D4B1', + "Vvdash;": '\U000022AA', + "Wcirc;": '\U00000174', + "Wedge;": '\U000022C0', + "Wfr;": '\U0001D51A', + "Wopf;": '\U0001D54E', + "Wscr;": '\U0001D4B2', + "Xfr;": '\U0001D51B', + "Xi;": '\U0000039E', + "Xopf;": '\U0001D54F', + "Xscr;": '\U0001D4B3', + "YAcy;": '\U0000042F', + "YIcy;": '\U00000407', + "YUcy;": '\U0000042E', + "Yacute;": '\U000000DD', + "Ycirc;": '\U00000176', + "Ycy;": '\U0000042B', + "Yfr;": '\U0001D51C', + "Yopf;": '\U0001D550', + "Yscr;": '\U0001D4B4', + "Yuml;": '\U00000178', + "ZHcy;": '\U00000416', + "Zacute;": '\U00000179', + "Zcaron;": '\U0000017D', + "Zcy;": '\U00000417', + "Zdot;": '\U0000017B', + "ZeroWidthSpace;": '\U0000200B', + "Zeta;": '\U00000396', + "Zfr;": '\U00002128', + "Zopf;": '\U00002124', + "Zscr;": '\U0001D4B5', + "aacute;": '\U000000E1', + "abreve;": '\U00000103', + "ac;": '\U0000223E', + "acd;": '\U0000223F', + "acirc;": '\U000000E2', + "acute;": '\U000000B4', + "acy;": '\U00000430', + "aelig;": '\U000000E6', + "af;": '\U00002061', + "afr;": '\U0001D51E', + "agrave;": '\U000000E0', + "alefsym;": '\U00002135', + "aleph;": '\U00002135', + "alpha;": '\U000003B1', + "amacr;": '\U00000101', + "amalg;": '\U00002A3F', + "amp;": '\U00000026', + "and;": '\U00002227', + "andand;": '\U00002A55', + "andd;": '\U00002A5C', + "andslope;": '\U00002A58', + "andv;": '\U00002A5A', + "ang;": '\U00002220', + "ange;": '\U000029A4', + "angle;": '\U00002220', + "angmsd;": '\U00002221', + "angmsdaa;": '\U000029A8', + "angmsdab;": '\U000029A9', + "angmsdac;": '\U000029AA', + "angmsdad;": '\U000029AB', + "angmsdae;": '\U000029AC', + "angmsdaf;": '\U000029AD', + "angmsdag;": '\U000029AE', + "angmsdah;": '\U000029AF', + "angrt;": '\U0000221F', + "angrtvb;": '\U000022BE', + "angrtvbd;": '\U0000299D', + "angsph;": '\U00002222', + "angst;": '\U000000C5', + "angzarr;": '\U0000237C', + "aogon;": '\U00000105', + "aopf;": '\U0001D552', + "ap;": '\U00002248', + "apE;": '\U00002A70', + "apacir;": '\U00002A6F', + "ape;": '\U0000224A', + "apid;": '\U0000224B', + "apos;": '\U00000027', + "approx;": '\U00002248', + "approxeq;": '\U0000224A', + "aring;": '\U000000E5', + "ascr;": '\U0001D4B6', + "ast;": '\U0000002A', + "asymp;": '\U00002248', + "asympeq;": '\U0000224D', + "atilde;": '\U000000E3', + "auml;": '\U000000E4', + "awconint;": '\U00002233', + "awint;": '\U00002A11', + "bNot;": '\U00002AED', + "backcong;": '\U0000224C', + "backepsilon;": '\U000003F6', + "backprime;": '\U00002035', + "backsim;": '\U0000223D', + "backsimeq;": '\U000022CD', + "barvee;": '\U000022BD', + "barwed;": '\U00002305', + "barwedge;": '\U00002305', + "bbrk;": '\U000023B5', + "bbrktbrk;": '\U000023B6', + "bcong;": '\U0000224C', + "bcy;": '\U00000431', + "bdquo;": '\U0000201E', + "becaus;": '\U00002235', + "because;": '\U00002235', + "bemptyv;": '\U000029B0', + "bepsi;": '\U000003F6', + "bernou;": '\U0000212C', + "beta;": '\U000003B2', + "beth;": '\U00002136', + "between;": '\U0000226C', + "bfr;": '\U0001D51F', + "bigcap;": '\U000022C2', + "bigcirc;": '\U000025EF', + "bigcup;": '\U000022C3', + "bigodot;": '\U00002A00', + "bigoplus;": '\U00002A01', + "bigotimes;": '\U00002A02', + "bigsqcup;": '\U00002A06', + "bigstar;": '\U00002605', + "bigtriangledown;": '\U000025BD', + "bigtriangleup;": '\U000025B3', + "biguplus;": '\U00002A04', + "bigvee;": '\U000022C1', + "bigwedge;": '\U000022C0', + "bkarow;": '\U0000290D', + "blacklozenge;": '\U000029EB', + "blacksquare;": '\U000025AA', + "blacktriangle;": '\U000025B4', + "blacktriangledown;": '\U000025BE', + "blacktriangleleft;": '\U000025C2', + "blacktriangleright;": '\U000025B8', + "blank;": '\U00002423', + "blk12;": '\U00002592', + "blk14;": '\U00002591', + "blk34;": '\U00002593', + "block;": '\U00002588', + "bnot;": '\U00002310', + "bopf;": '\U0001D553', + "bot;": '\U000022A5', + "bottom;": '\U000022A5', + "bowtie;": '\U000022C8', + "boxDL;": '\U00002557', + "boxDR;": '\U00002554', + "boxDl;": '\U00002556', + "boxDr;": '\U00002553', + "boxH;": '\U00002550', + "boxHD;": '\U00002566', + "boxHU;": '\U00002569', + "boxHd;": '\U00002564', + "boxHu;": '\U00002567', + "boxUL;": '\U0000255D', + "boxUR;": '\U0000255A', + "boxUl;": '\U0000255C', + "boxUr;": '\U00002559', + "boxV;": '\U00002551', + "boxVH;": '\U0000256C', + "boxVL;": '\U00002563', + "boxVR;": '\U00002560', + "boxVh;": '\U0000256B', + "boxVl;": '\U00002562', + "boxVr;": '\U0000255F', + "boxbox;": '\U000029C9', + "boxdL;": '\U00002555', + "boxdR;": '\U00002552', + "boxdl;": '\U00002510', + "boxdr;": '\U0000250C', + "boxh;": '\U00002500', + "boxhD;": '\U00002565', + "boxhU;": '\U00002568', + "boxhd;": '\U0000252C', + "boxhu;": '\U00002534', + "boxminus;": '\U0000229F', + "boxplus;": '\U0000229E', + "boxtimes;": '\U000022A0', + "boxuL;": '\U0000255B', + "boxuR;": '\U00002558', + "boxul;": '\U00002518', + "boxur;": '\U00002514', + "boxv;": '\U00002502', + "boxvH;": '\U0000256A', + "boxvL;": '\U00002561', + "boxvR;": '\U0000255E', + "boxvh;": '\U0000253C', + "boxvl;": '\U00002524', + "boxvr;": '\U0000251C', + "bprime;": '\U00002035', + "breve;": '\U000002D8', + "brvbar;": '\U000000A6', + "bscr;": '\U0001D4B7', + "bsemi;": '\U0000204F', + "bsim;": '\U0000223D', + "bsime;": '\U000022CD', + "bsol;": '\U0000005C', + "bsolb;": '\U000029C5', + "bsolhsub;": '\U000027C8', + "bull;": '\U00002022', + "bullet;": '\U00002022', + "bump;": '\U0000224E', + "bumpE;": '\U00002AAE', + "bumpe;": '\U0000224F', + "bumpeq;": '\U0000224F', + "cacute;": '\U00000107', + "cap;": '\U00002229', + "capand;": '\U00002A44', + "capbrcup;": '\U00002A49', + "capcap;": '\U00002A4B', + "capcup;": '\U00002A47', + "capdot;": '\U00002A40', + "caret;": '\U00002041', + "caron;": '\U000002C7', + "ccaps;": '\U00002A4D', + "ccaron;": '\U0000010D', + "ccedil;": '\U000000E7', + "ccirc;": '\U00000109', + "ccups;": '\U00002A4C', + "ccupssm;": '\U00002A50', + "cdot;": '\U0000010B', + "cedil;": '\U000000B8', + "cemptyv;": '\U000029B2', + "cent;": '\U000000A2', + "centerdot;": '\U000000B7', + "cfr;": '\U0001D520', + "chcy;": '\U00000447', + "check;": '\U00002713', + "checkmark;": '\U00002713', + "chi;": '\U000003C7', + "cir;": '\U000025CB', + "cirE;": '\U000029C3', + "circ;": '\U000002C6', + "circeq;": '\U00002257', + "circlearrowleft;": '\U000021BA', + "circlearrowright;": '\U000021BB', + "circledR;": '\U000000AE', + "circledS;": '\U000024C8', + "circledast;": '\U0000229B', + "circledcirc;": '\U0000229A', + "circleddash;": '\U0000229D', + "cire;": '\U00002257', + "cirfnint;": '\U00002A10', + "cirmid;": '\U00002AEF', + "cirscir;": '\U000029C2', + "clubs;": '\U00002663', + "clubsuit;": '\U00002663', + "colon;": '\U0000003A', + "colone;": '\U00002254', + "coloneq;": '\U00002254', + "comma;": '\U0000002C', + "commat;": '\U00000040', + "comp;": '\U00002201', + "compfn;": '\U00002218', + "complement;": '\U00002201', + "complexes;": '\U00002102', + "cong;": '\U00002245', + "congdot;": '\U00002A6D', + "conint;": '\U0000222E', + "copf;": '\U0001D554', + "coprod;": '\U00002210', + "copy;": '\U000000A9', + "copysr;": '\U00002117', + "crarr;": '\U000021B5', + "cross;": '\U00002717', + "cscr;": '\U0001D4B8', + "csub;": '\U00002ACF', + "csube;": '\U00002AD1', + "csup;": '\U00002AD0', + "csupe;": '\U00002AD2', + "ctdot;": '\U000022EF', + "cudarrl;": '\U00002938', + "cudarrr;": '\U00002935', + "cuepr;": '\U000022DE', + "cuesc;": '\U000022DF', + "cularr;": '\U000021B6', + "cularrp;": '\U0000293D', + "cup;": '\U0000222A', + "cupbrcap;": '\U00002A48', + "cupcap;": '\U00002A46', + "cupcup;": '\U00002A4A', + "cupdot;": '\U0000228D', + "cupor;": '\U00002A45', + "curarr;": '\U000021B7', + "curarrm;": '\U0000293C', + "curlyeqprec;": '\U000022DE', + "curlyeqsucc;": '\U000022DF', + "curlyvee;": '\U000022CE', + "curlywedge;": '\U000022CF', + "curren;": '\U000000A4', + "curvearrowleft;": '\U000021B6', + "curvearrowright;": '\U000021B7', + "cuvee;": '\U000022CE', + "cuwed;": '\U000022CF', + "cwconint;": '\U00002232', + "cwint;": '\U00002231', + "cylcty;": '\U0000232D', + "dArr;": '\U000021D3', + "dHar;": '\U00002965', + "dagger;": '\U00002020', + "daleth;": '\U00002138', + "darr;": '\U00002193', + "dash;": '\U00002010', + "dashv;": '\U000022A3', + "dbkarow;": '\U0000290F', + "dblac;": '\U000002DD', + "dcaron;": '\U0000010F', + "dcy;": '\U00000434', + "dd;": '\U00002146', + "ddagger;": '\U00002021', + "ddarr;": '\U000021CA', + "ddotseq;": '\U00002A77', + "deg;": '\U000000B0', + "delta;": '\U000003B4', + "demptyv;": '\U000029B1', + "dfisht;": '\U0000297F', + "dfr;": '\U0001D521', + "dharl;": '\U000021C3', + "dharr;": '\U000021C2', + "diam;": '\U000022C4', + "diamond;": '\U000022C4', + "diamondsuit;": '\U00002666', + "diams;": '\U00002666', + "die;": '\U000000A8', + "digamma;": '\U000003DD', + "disin;": '\U000022F2', + "div;": '\U000000F7', + "divide;": '\U000000F7', + "divideontimes;": '\U000022C7', + "divonx;": '\U000022C7', + "djcy;": '\U00000452', + "dlcorn;": '\U0000231E', + "dlcrop;": '\U0000230D', + "dollar;": '\U00000024', + "dopf;": '\U0001D555', + "dot;": '\U000002D9', + "doteq;": '\U00002250', + "doteqdot;": '\U00002251', + "dotminus;": '\U00002238', + "dotplus;": '\U00002214', + "dotsquare;": '\U000022A1', + "doublebarwedge;": '\U00002306', + "downarrow;": '\U00002193', + "downdownarrows;": '\U000021CA', + "downharpoonleft;": '\U000021C3', + "downharpoonright;": '\U000021C2', + "drbkarow;": '\U00002910', + "drcorn;": '\U0000231F', + "drcrop;": '\U0000230C', + "dscr;": '\U0001D4B9', + "dscy;": '\U00000455', + "dsol;": '\U000029F6', + "dstrok;": '\U00000111', + "dtdot;": '\U000022F1', + "dtri;": '\U000025BF', + "dtrif;": '\U000025BE', + "duarr;": '\U000021F5', + "duhar;": '\U0000296F', + "dwangle;": '\U000029A6', + "dzcy;": '\U0000045F', + "dzigrarr;": '\U000027FF', + "eDDot;": '\U00002A77', + "eDot;": '\U00002251', + "eacute;": '\U000000E9', + "easter;": '\U00002A6E', + "ecaron;": '\U0000011B', + "ecir;": '\U00002256', + "ecirc;": '\U000000EA', + "ecolon;": '\U00002255', + "ecy;": '\U0000044D', + "edot;": '\U00000117', + "ee;": '\U00002147', + "efDot;": '\U00002252', + "efr;": '\U0001D522', + "eg;": '\U00002A9A', + "egrave;": '\U000000E8', + "egs;": '\U00002A96', + "egsdot;": '\U00002A98', + "el;": '\U00002A99', + "elinters;": '\U000023E7', + "ell;": '\U00002113', + "els;": '\U00002A95', + "elsdot;": '\U00002A97', + "emacr;": '\U00000113', + "empty;": '\U00002205', + "emptyset;": '\U00002205', + "emptyv;": '\U00002205', + "emsp;": '\U00002003', + "emsp13;": '\U00002004', + "emsp14;": '\U00002005', + "eng;": '\U0000014B', + "ensp;": '\U00002002', + "eogon;": '\U00000119', + "eopf;": '\U0001D556', + "epar;": '\U000022D5', + "eparsl;": '\U000029E3', + "eplus;": '\U00002A71', + "epsi;": '\U000003B5', + "epsilon;": '\U000003B5', + "epsiv;": '\U000003F5', + "eqcirc;": '\U00002256', + "eqcolon;": '\U00002255', + "eqsim;": '\U00002242', + "eqslantgtr;": '\U00002A96', + "eqslantless;": '\U00002A95', + "equals;": '\U0000003D', + "equest;": '\U0000225F', + "equiv;": '\U00002261', + "equivDD;": '\U00002A78', + "eqvparsl;": '\U000029E5', + "erDot;": '\U00002253', + "erarr;": '\U00002971', + "escr;": '\U0000212F', + "esdot;": '\U00002250', + "esim;": '\U00002242', + "eta;": '\U000003B7', + "eth;": '\U000000F0', + "euml;": '\U000000EB', + "euro;": '\U000020AC', + "excl;": '\U00000021', + "exist;": '\U00002203', + "expectation;": '\U00002130', + "exponentiale;": '\U00002147', + "fallingdotseq;": '\U00002252', + "fcy;": '\U00000444', + "female;": '\U00002640', + "ffilig;": '\U0000FB03', + "fflig;": '\U0000FB00', + "ffllig;": '\U0000FB04', + "ffr;": '\U0001D523', + "filig;": '\U0000FB01', + "flat;": '\U0000266D', + "fllig;": '\U0000FB02', + "fltns;": '\U000025B1', + "fnof;": '\U00000192', + "fopf;": '\U0001D557', + "forall;": '\U00002200', + "fork;": '\U000022D4', + "forkv;": '\U00002AD9', + "fpartint;": '\U00002A0D', + "frac12;": '\U000000BD', + "frac13;": '\U00002153', + "frac14;": '\U000000BC', + "frac15;": '\U00002155', + "frac16;": '\U00002159', + "frac18;": '\U0000215B', + "frac23;": '\U00002154', + "frac25;": '\U00002156', + "frac34;": '\U000000BE', + "frac35;": '\U00002157', + "frac38;": '\U0000215C', + "frac45;": '\U00002158', + "frac56;": '\U0000215A', + "frac58;": '\U0000215D', + "frac78;": '\U0000215E', + "frasl;": '\U00002044', + "frown;": '\U00002322', + "fscr;": '\U0001D4BB', + "gE;": '\U00002267', + "gEl;": '\U00002A8C', + "gacute;": '\U000001F5', + "gamma;": '\U000003B3', + "gammad;": '\U000003DD', + "gap;": '\U00002A86', + "gbreve;": '\U0000011F', + "gcirc;": '\U0000011D', + "gcy;": '\U00000433', + "gdot;": '\U00000121', + "ge;": '\U00002265', + "gel;": '\U000022DB', + "geq;": '\U00002265', + "geqq;": '\U00002267', + "geqslant;": '\U00002A7E', + "ges;": '\U00002A7E', + "gescc;": '\U00002AA9', + "gesdot;": '\U00002A80', + "gesdoto;": '\U00002A82', + "gesdotol;": '\U00002A84', + "gesles;": '\U00002A94', + "gfr;": '\U0001D524', + "gg;": '\U0000226B', + "ggg;": '\U000022D9', + "gimel;": '\U00002137', + "gjcy;": '\U00000453', + "gl;": '\U00002277', + "glE;": '\U00002A92', + "gla;": '\U00002AA5', + "glj;": '\U00002AA4', + "gnE;": '\U00002269', + "gnap;": '\U00002A8A', + "gnapprox;": '\U00002A8A', + "gne;": '\U00002A88', + "gneq;": '\U00002A88', + "gneqq;": '\U00002269', + "gnsim;": '\U000022E7', + "gopf;": '\U0001D558', + "grave;": '\U00000060', + "gscr;": '\U0000210A', + "gsim;": '\U00002273', + "gsime;": '\U00002A8E', + "gsiml;": '\U00002A90', + "gt;": '\U0000003E', + "gtcc;": '\U00002AA7', + "gtcir;": '\U00002A7A', + "gtdot;": '\U000022D7', + "gtlPar;": '\U00002995', + "gtquest;": '\U00002A7C', + "gtrapprox;": '\U00002A86', + "gtrarr;": '\U00002978', + "gtrdot;": '\U000022D7', + "gtreqless;": '\U000022DB', + "gtreqqless;": '\U00002A8C', + "gtrless;": '\U00002277', + "gtrsim;": '\U00002273', + "hArr;": '\U000021D4', + "hairsp;": '\U0000200A', + "half;": '\U000000BD', + "hamilt;": '\U0000210B', + "hardcy;": '\U0000044A', + "harr;": '\U00002194', + "harrcir;": '\U00002948', + "harrw;": '\U000021AD', + "hbar;": '\U0000210F', + "hcirc;": '\U00000125', + "hearts;": '\U00002665', + "heartsuit;": '\U00002665', + "hellip;": '\U00002026', + "hercon;": '\U000022B9', + "hfr;": '\U0001D525', + "hksearow;": '\U00002925', + "hkswarow;": '\U00002926', + "hoarr;": '\U000021FF', + "homtht;": '\U0000223B', + "hookleftarrow;": '\U000021A9', + "hookrightarrow;": '\U000021AA', + "hopf;": '\U0001D559', + "horbar;": '\U00002015', + "hscr;": '\U0001D4BD', + "hslash;": '\U0000210F', + "hstrok;": '\U00000127', + "hybull;": '\U00002043', + "hyphen;": '\U00002010', + "iacute;": '\U000000ED', + "ic;": '\U00002063', + "icirc;": '\U000000EE', + "icy;": '\U00000438', + "iecy;": '\U00000435', + "iexcl;": '\U000000A1', + "iff;": '\U000021D4', + "ifr;": '\U0001D526', + "igrave;": '\U000000EC', + "ii;": '\U00002148', + "iiiint;": '\U00002A0C', + "iiint;": '\U0000222D', + "iinfin;": '\U000029DC', + "iiota;": '\U00002129', + "ijlig;": '\U00000133', + "imacr;": '\U0000012B', + "image;": '\U00002111', + "imagline;": '\U00002110', + "imagpart;": '\U00002111', + "imath;": '\U00000131', + "imof;": '\U000022B7', + "imped;": '\U000001B5', + "in;": '\U00002208', + "incare;": '\U00002105', + "infin;": '\U0000221E', + "infintie;": '\U000029DD', + "inodot;": '\U00000131', + "int;": '\U0000222B', + "intcal;": '\U000022BA', + "integers;": '\U00002124', + "intercal;": '\U000022BA', + "intlarhk;": '\U00002A17', + "intprod;": '\U00002A3C', + "iocy;": '\U00000451', + "iogon;": '\U0000012F', + "iopf;": '\U0001D55A', + "iota;": '\U000003B9', + "iprod;": '\U00002A3C', + "iquest;": '\U000000BF', + "iscr;": '\U0001D4BE', + "isin;": '\U00002208', + "isinE;": '\U000022F9', + "isindot;": '\U000022F5', + "isins;": '\U000022F4', + "isinsv;": '\U000022F3', + "isinv;": '\U00002208', + "it;": '\U00002062', + "itilde;": '\U00000129', + "iukcy;": '\U00000456', + "iuml;": '\U000000EF', + "jcirc;": '\U00000135', + "jcy;": '\U00000439', + "jfr;": '\U0001D527', + "jmath;": '\U00000237', + "jopf;": '\U0001D55B', + "jscr;": '\U0001D4BF', + "jsercy;": '\U00000458', + "jukcy;": '\U00000454', + "kappa;": '\U000003BA', + "kappav;": '\U000003F0', + "kcedil;": '\U00000137', + "kcy;": '\U0000043A', + "kfr;": '\U0001D528', + "kgreen;": '\U00000138', + "khcy;": '\U00000445', + "kjcy;": '\U0000045C', + "kopf;": '\U0001D55C', + "kscr;": '\U0001D4C0', + "lAarr;": '\U000021DA', + "lArr;": '\U000021D0', + "lAtail;": '\U0000291B', + "lBarr;": '\U0000290E', + "lE;": '\U00002266', + "lEg;": '\U00002A8B', + "lHar;": '\U00002962', + "lacute;": '\U0000013A', + "laemptyv;": '\U000029B4', + "lagran;": '\U00002112', + "lambda;": '\U000003BB', + "lang;": '\U000027E8', + "langd;": '\U00002991', + "langle;": '\U000027E8', + "lap;": '\U00002A85', + "laquo;": '\U000000AB', + "larr;": '\U00002190', + "larrb;": '\U000021E4', + "larrbfs;": '\U0000291F', + "larrfs;": '\U0000291D', + "larrhk;": '\U000021A9', + "larrlp;": '\U000021AB', + "larrpl;": '\U00002939', + "larrsim;": '\U00002973', + "larrtl;": '\U000021A2', + "lat;": '\U00002AAB', + "latail;": '\U00002919', + "late;": '\U00002AAD', + "lbarr;": '\U0000290C', + "lbbrk;": '\U00002772', + "lbrace;": '\U0000007B', + "lbrack;": '\U0000005B', + "lbrke;": '\U0000298B', + "lbrksld;": '\U0000298F', + "lbrkslu;": '\U0000298D', + "lcaron;": '\U0000013E', + "lcedil;": '\U0000013C', + "lceil;": '\U00002308', + "lcub;": '\U0000007B', + "lcy;": '\U0000043B', + "ldca;": '\U00002936', + "ldquo;": '\U0000201C', + "ldquor;": '\U0000201E', + "ldrdhar;": '\U00002967', + "ldrushar;": '\U0000294B', + "ldsh;": '\U000021B2', + "le;": '\U00002264', + "leftarrow;": '\U00002190', + "leftarrowtail;": '\U000021A2', + "leftharpoondown;": '\U000021BD', + "leftharpoonup;": '\U000021BC', + "leftleftarrows;": '\U000021C7', + "leftrightarrow;": '\U00002194', + "leftrightarrows;": '\U000021C6', + "leftrightharpoons;": '\U000021CB', + "leftrightsquigarrow;": '\U000021AD', + "leftthreetimes;": '\U000022CB', + "leg;": '\U000022DA', + "leq;": '\U00002264', + "leqq;": '\U00002266', + "leqslant;": '\U00002A7D', + "les;": '\U00002A7D', + "lescc;": '\U00002AA8', + "lesdot;": '\U00002A7F', + "lesdoto;": '\U00002A81', + "lesdotor;": '\U00002A83', + "lesges;": '\U00002A93', + "lessapprox;": '\U00002A85', + "lessdot;": '\U000022D6', + "lesseqgtr;": '\U000022DA', + "lesseqqgtr;": '\U00002A8B', + "lessgtr;": '\U00002276', + "lesssim;": '\U00002272', + "lfisht;": '\U0000297C', + "lfloor;": '\U0000230A', + "lfr;": '\U0001D529', + "lg;": '\U00002276', + "lgE;": '\U00002A91', + "lhard;": '\U000021BD', + "lharu;": '\U000021BC', + "lharul;": '\U0000296A', + "lhblk;": '\U00002584', + "ljcy;": '\U00000459', + "ll;": '\U0000226A', + "llarr;": '\U000021C7', + "llcorner;": '\U0000231E', + "llhard;": '\U0000296B', + "lltri;": '\U000025FA', + "lmidot;": '\U00000140', + "lmoust;": '\U000023B0', + "lmoustache;": '\U000023B0', + "lnE;": '\U00002268', + "lnap;": '\U00002A89', + "lnapprox;": '\U00002A89', + "lne;": '\U00002A87', + "lneq;": '\U00002A87', + "lneqq;": '\U00002268', + "lnsim;": '\U000022E6', + "loang;": '\U000027EC', + "loarr;": '\U000021FD', + "lobrk;": '\U000027E6', + "longleftarrow;": '\U000027F5', + "longleftrightarrow;": '\U000027F7', + "longmapsto;": '\U000027FC', + "longrightarrow;": '\U000027F6', + "looparrowleft;": '\U000021AB', + "looparrowright;": '\U000021AC', + "lopar;": '\U00002985', + "lopf;": '\U0001D55D', + "loplus;": '\U00002A2D', + "lotimes;": '\U00002A34', + "lowast;": '\U00002217', + "lowbar;": '\U0000005F', + "loz;": '\U000025CA', + "lozenge;": '\U000025CA', + "lozf;": '\U000029EB', + "lpar;": '\U00000028', + "lparlt;": '\U00002993', + "lrarr;": '\U000021C6', + "lrcorner;": '\U0000231F', + "lrhar;": '\U000021CB', + "lrhard;": '\U0000296D', + "lrm;": '\U0000200E', + "lrtri;": '\U000022BF', + "lsaquo;": '\U00002039', + "lscr;": '\U0001D4C1', + "lsh;": '\U000021B0', + "lsim;": '\U00002272', + "lsime;": '\U00002A8D', + "lsimg;": '\U00002A8F', + "lsqb;": '\U0000005B', + "lsquo;": '\U00002018', + "lsquor;": '\U0000201A', + "lstrok;": '\U00000142', + "lt;": '\U0000003C', + "ltcc;": '\U00002AA6', + "ltcir;": '\U00002A79', + "ltdot;": '\U000022D6', + "lthree;": '\U000022CB', + "ltimes;": '\U000022C9', + "ltlarr;": '\U00002976', + "ltquest;": '\U00002A7B', + "ltrPar;": '\U00002996', + "ltri;": '\U000025C3', + "ltrie;": '\U000022B4', + "ltrif;": '\U000025C2', + "lurdshar;": '\U0000294A', + "luruhar;": '\U00002966', + "mDDot;": '\U0000223A', + "macr;": '\U000000AF', + "male;": '\U00002642', + "malt;": '\U00002720', + "maltese;": '\U00002720', + "map;": '\U000021A6', + "mapsto;": '\U000021A6', + "mapstodown;": '\U000021A7', + "mapstoleft;": '\U000021A4', + "mapstoup;": '\U000021A5', + "marker;": '\U000025AE', + "mcomma;": '\U00002A29', + "mcy;": '\U0000043C', + "mdash;": '\U00002014', + "measuredangle;": '\U00002221', + "mfr;": '\U0001D52A', + "mho;": '\U00002127', + "micro;": '\U000000B5', + "mid;": '\U00002223', + "midast;": '\U0000002A', + "midcir;": '\U00002AF0', + "middot;": '\U000000B7', + "minus;": '\U00002212', + "minusb;": '\U0000229F', + "minusd;": '\U00002238', + "minusdu;": '\U00002A2A', + "mlcp;": '\U00002ADB', + "mldr;": '\U00002026', + "mnplus;": '\U00002213', + "models;": '\U000022A7', + "mopf;": '\U0001D55E', + "mp;": '\U00002213', + "mscr;": '\U0001D4C2', + "mstpos;": '\U0000223E', + "mu;": '\U000003BC', + "multimap;": '\U000022B8', + "mumap;": '\U000022B8', + "nLeftarrow;": '\U000021CD', + "nLeftrightarrow;": '\U000021CE', + "nRightarrow;": '\U000021CF', + "nVDash;": '\U000022AF', + "nVdash;": '\U000022AE', + "nabla;": '\U00002207', + "nacute;": '\U00000144', + "nap;": '\U00002249', + "napos;": '\U00000149', + "napprox;": '\U00002249', + "natur;": '\U0000266E', + "natural;": '\U0000266E', + "naturals;": '\U00002115', + "nbsp;": '\U000000A0', + "ncap;": '\U00002A43', + "ncaron;": '\U00000148', + "ncedil;": '\U00000146', + "ncong;": '\U00002247', + "ncup;": '\U00002A42', + "ncy;": '\U0000043D', + "ndash;": '\U00002013', + "ne;": '\U00002260', + "neArr;": '\U000021D7', + "nearhk;": '\U00002924', + "nearr;": '\U00002197', + "nearrow;": '\U00002197', + "nequiv;": '\U00002262', + "nesear;": '\U00002928', + "nexist;": '\U00002204', + "nexists;": '\U00002204', + "nfr;": '\U0001D52B', + "nge;": '\U00002271', + "ngeq;": '\U00002271', + "ngsim;": '\U00002275', + "ngt;": '\U0000226F', + "ngtr;": '\U0000226F', + "nhArr;": '\U000021CE', + "nharr;": '\U000021AE', + "nhpar;": '\U00002AF2', + "ni;": '\U0000220B', + "nis;": '\U000022FC', + "nisd;": '\U000022FA', + "niv;": '\U0000220B', + "njcy;": '\U0000045A', + "nlArr;": '\U000021CD', + "nlarr;": '\U0000219A', + "nldr;": '\U00002025', + "nle;": '\U00002270', + "nleftarrow;": '\U0000219A', + "nleftrightarrow;": '\U000021AE', + "nleq;": '\U00002270', + "nless;": '\U0000226E', + "nlsim;": '\U00002274', + "nlt;": '\U0000226E', + "nltri;": '\U000022EA', + "nltrie;": '\U000022EC', + "nmid;": '\U00002224', + "nopf;": '\U0001D55F', + "not;": '\U000000AC', + "notin;": '\U00002209', + "notinva;": '\U00002209', + "notinvb;": '\U000022F7', + "notinvc;": '\U000022F6', + "notni;": '\U0000220C', + "notniva;": '\U0000220C', + "notnivb;": '\U000022FE', + "notnivc;": '\U000022FD', + "npar;": '\U00002226', + "nparallel;": '\U00002226', + "npolint;": '\U00002A14', + "npr;": '\U00002280', + "nprcue;": '\U000022E0', + "nprec;": '\U00002280', + "nrArr;": '\U000021CF', + "nrarr;": '\U0000219B', + "nrightarrow;": '\U0000219B', + "nrtri;": '\U000022EB', + "nrtrie;": '\U000022ED', + "nsc;": '\U00002281', + "nsccue;": '\U000022E1', + "nscr;": '\U0001D4C3', + "nshortmid;": '\U00002224', + "nshortparallel;": '\U00002226', + "nsim;": '\U00002241', + "nsime;": '\U00002244', + "nsimeq;": '\U00002244', + "nsmid;": '\U00002224', + "nspar;": '\U00002226', + "nsqsube;": '\U000022E2', + "nsqsupe;": '\U000022E3', + "nsub;": '\U00002284', + "nsube;": '\U00002288', + "nsubseteq;": '\U00002288', + "nsucc;": '\U00002281', + "nsup;": '\U00002285', + "nsupe;": '\U00002289', + "nsupseteq;": '\U00002289', + "ntgl;": '\U00002279', + "ntilde;": '\U000000F1', + "ntlg;": '\U00002278', + "ntriangleleft;": '\U000022EA', + "ntrianglelefteq;": '\U000022EC', + "ntriangleright;": '\U000022EB', + "ntrianglerighteq;": '\U000022ED', + "nu;": '\U000003BD', + "num;": '\U00000023', + "numero;": '\U00002116', + "numsp;": '\U00002007', + "nvDash;": '\U000022AD', + "nvHarr;": '\U00002904', + "nvdash;": '\U000022AC', + "nvinfin;": '\U000029DE', + "nvlArr;": '\U00002902', + "nvrArr;": '\U00002903', + "nwArr;": '\U000021D6', + "nwarhk;": '\U00002923', + "nwarr;": '\U00002196', + "nwarrow;": '\U00002196', + "nwnear;": '\U00002927', + "oS;": '\U000024C8', + "oacute;": '\U000000F3', + "oast;": '\U0000229B', + "ocir;": '\U0000229A', + "ocirc;": '\U000000F4', + "ocy;": '\U0000043E', + "odash;": '\U0000229D', + "odblac;": '\U00000151', + "odiv;": '\U00002A38', + "odot;": '\U00002299', + "odsold;": '\U000029BC', + "oelig;": '\U00000153', + "ofcir;": '\U000029BF', + "ofr;": '\U0001D52C', + "ogon;": '\U000002DB', + "ograve;": '\U000000F2', + "ogt;": '\U000029C1', + "ohbar;": '\U000029B5', + "ohm;": '\U000003A9', + "oint;": '\U0000222E', + "olarr;": '\U000021BA', + "olcir;": '\U000029BE', + "olcross;": '\U000029BB', + "oline;": '\U0000203E', + "olt;": '\U000029C0', + "omacr;": '\U0000014D', + "omega;": '\U000003C9', + "omicron;": '\U000003BF', + "omid;": '\U000029B6', + "ominus;": '\U00002296', + "oopf;": '\U0001D560', + "opar;": '\U000029B7', + "operp;": '\U000029B9', + "oplus;": '\U00002295', + "or;": '\U00002228', + "orarr;": '\U000021BB', + "ord;": '\U00002A5D', + "order;": '\U00002134', + "orderof;": '\U00002134', + "ordf;": '\U000000AA', + "ordm;": '\U000000BA', + "origof;": '\U000022B6', + "oror;": '\U00002A56', + "orslope;": '\U00002A57', + "orv;": '\U00002A5B', + "oscr;": '\U00002134', + "oslash;": '\U000000F8', + "osol;": '\U00002298', + "otilde;": '\U000000F5', + "otimes;": '\U00002297', + "otimesas;": '\U00002A36', + "ouml;": '\U000000F6', + "ovbar;": '\U0000233D', + "par;": '\U00002225', + "para;": '\U000000B6', + "parallel;": '\U00002225', + "parsim;": '\U00002AF3', + "parsl;": '\U00002AFD', + "part;": '\U00002202', + "pcy;": '\U0000043F', + "percnt;": '\U00000025', + "period;": '\U0000002E', + "permil;": '\U00002030', + "perp;": '\U000022A5', + "pertenk;": '\U00002031', + "pfr;": '\U0001D52D', + "phi;": '\U000003C6', + "phiv;": '\U000003D5', + "phmmat;": '\U00002133', + "phone;": '\U0000260E', + "pi;": '\U000003C0', + "pitchfork;": '\U000022D4', + "piv;": '\U000003D6', + "planck;": '\U0000210F', + "planckh;": '\U0000210E', + "plankv;": '\U0000210F', + "plus;": '\U0000002B', + "plusacir;": '\U00002A23', + "plusb;": '\U0000229E', + "pluscir;": '\U00002A22', + "plusdo;": '\U00002214', + "plusdu;": '\U00002A25', + "pluse;": '\U00002A72', + "plusmn;": '\U000000B1', + "plussim;": '\U00002A26', + "plustwo;": '\U00002A27', + "pm;": '\U000000B1', + "pointint;": '\U00002A15', + "popf;": '\U0001D561', + "pound;": '\U000000A3', + "pr;": '\U0000227A', + "prE;": '\U00002AB3', + "prap;": '\U00002AB7', + "prcue;": '\U0000227C', + "pre;": '\U00002AAF', + "prec;": '\U0000227A', + "precapprox;": '\U00002AB7', + "preccurlyeq;": '\U0000227C', + "preceq;": '\U00002AAF', + "precnapprox;": '\U00002AB9', + "precneqq;": '\U00002AB5', + "precnsim;": '\U000022E8', + "precsim;": '\U0000227E', + "prime;": '\U00002032', + "primes;": '\U00002119', + "prnE;": '\U00002AB5', + "prnap;": '\U00002AB9', + "prnsim;": '\U000022E8', + "prod;": '\U0000220F', + "profalar;": '\U0000232E', + "profline;": '\U00002312', + "profsurf;": '\U00002313', + "prop;": '\U0000221D', + "propto;": '\U0000221D', + "prsim;": '\U0000227E', + "prurel;": '\U000022B0', + "pscr;": '\U0001D4C5', + "psi;": '\U000003C8', + "puncsp;": '\U00002008', + "qfr;": '\U0001D52E', + "qint;": '\U00002A0C', + "qopf;": '\U0001D562', + "qprime;": '\U00002057', + "qscr;": '\U0001D4C6', + "quaternions;": '\U0000210D', + "quatint;": '\U00002A16', + "quest;": '\U0000003F', + "questeq;": '\U0000225F', + "quot;": '\U00000022', + "rAarr;": '\U000021DB', + "rArr;": '\U000021D2', + "rAtail;": '\U0000291C', + "rBarr;": '\U0000290F', + "rHar;": '\U00002964', + "racute;": '\U00000155', + "radic;": '\U0000221A', + "raemptyv;": '\U000029B3', + "rang;": '\U000027E9', + "rangd;": '\U00002992', + "range;": '\U000029A5', + "rangle;": '\U000027E9', + "raquo;": '\U000000BB', + "rarr;": '\U00002192', + "rarrap;": '\U00002975', + "rarrb;": '\U000021E5', + "rarrbfs;": '\U00002920', + "rarrc;": '\U00002933', + "rarrfs;": '\U0000291E', + "rarrhk;": '\U000021AA', + "rarrlp;": '\U000021AC', + "rarrpl;": '\U00002945', + "rarrsim;": '\U00002974', + "rarrtl;": '\U000021A3', + "rarrw;": '\U0000219D', + "ratail;": '\U0000291A', + "ratio;": '\U00002236', + "rationals;": '\U0000211A', + "rbarr;": '\U0000290D', + "rbbrk;": '\U00002773', + "rbrace;": '\U0000007D', + "rbrack;": '\U0000005D', + "rbrke;": '\U0000298C', + "rbrksld;": '\U0000298E', + "rbrkslu;": '\U00002990', + "rcaron;": '\U00000159', + "rcedil;": '\U00000157', + "rceil;": '\U00002309', + "rcub;": '\U0000007D', + "rcy;": '\U00000440', + "rdca;": '\U00002937', + "rdldhar;": '\U00002969', + "rdquo;": '\U0000201D', + "rdquor;": '\U0000201D', + "rdsh;": '\U000021B3', + "real;": '\U0000211C', + "realine;": '\U0000211B', + "realpart;": '\U0000211C', + "reals;": '\U0000211D', + "rect;": '\U000025AD', + "reg;": '\U000000AE', + "rfisht;": '\U0000297D', + "rfloor;": '\U0000230B', + "rfr;": '\U0001D52F', + "rhard;": '\U000021C1', + "rharu;": '\U000021C0', + "rharul;": '\U0000296C', + "rho;": '\U000003C1', + "rhov;": '\U000003F1', + "rightarrow;": '\U00002192', + "rightarrowtail;": '\U000021A3', + "rightharpoondown;": '\U000021C1', + "rightharpoonup;": '\U000021C0', + "rightleftarrows;": '\U000021C4', + "rightleftharpoons;": '\U000021CC', + "rightrightarrows;": '\U000021C9', + "rightsquigarrow;": '\U0000219D', + "rightthreetimes;": '\U000022CC', + "ring;": '\U000002DA', + "risingdotseq;": '\U00002253', + "rlarr;": '\U000021C4', + "rlhar;": '\U000021CC', + "rlm;": '\U0000200F', + "rmoust;": '\U000023B1', + "rmoustache;": '\U000023B1', + "rnmid;": '\U00002AEE', + "roang;": '\U000027ED', + "roarr;": '\U000021FE', + "robrk;": '\U000027E7', + "ropar;": '\U00002986', + "ropf;": '\U0001D563', + "roplus;": '\U00002A2E', + "rotimes;": '\U00002A35', + "rpar;": '\U00000029', + "rpargt;": '\U00002994', + "rppolint;": '\U00002A12', + "rrarr;": '\U000021C9', + "rsaquo;": '\U0000203A', + "rscr;": '\U0001D4C7', + "rsh;": '\U000021B1', + "rsqb;": '\U0000005D', + "rsquo;": '\U00002019', + "rsquor;": '\U00002019', + "rthree;": '\U000022CC', + "rtimes;": '\U000022CA', + "rtri;": '\U000025B9', + "rtrie;": '\U000022B5', + "rtrif;": '\U000025B8', + "rtriltri;": '\U000029CE', + "ruluhar;": '\U00002968', + "rx;": '\U0000211E', + "sacute;": '\U0000015B', + "sbquo;": '\U0000201A', + "sc;": '\U0000227B', + "scE;": '\U00002AB4', + "scap;": '\U00002AB8', + "scaron;": '\U00000161', + "sccue;": '\U0000227D', + "sce;": '\U00002AB0', + "scedil;": '\U0000015F', + "scirc;": '\U0000015D', + "scnE;": '\U00002AB6', + "scnap;": '\U00002ABA', + "scnsim;": '\U000022E9', + "scpolint;": '\U00002A13', + "scsim;": '\U0000227F', + "scy;": '\U00000441', + "sdot;": '\U000022C5', + "sdotb;": '\U000022A1', + "sdote;": '\U00002A66', + "seArr;": '\U000021D8', + "searhk;": '\U00002925', + "searr;": '\U00002198', + "searrow;": '\U00002198', + "sect;": '\U000000A7', + "semi;": '\U0000003B', + "seswar;": '\U00002929', + "setminus;": '\U00002216', + "setmn;": '\U00002216', + "sext;": '\U00002736', + "sfr;": '\U0001D530', + "sfrown;": '\U00002322', + "sharp;": '\U0000266F', + "shchcy;": '\U00000449', + "shcy;": '\U00000448', + "shortmid;": '\U00002223', + "shortparallel;": '\U00002225', + "shy;": '\U000000AD', + "sigma;": '\U000003C3', + "sigmaf;": '\U000003C2', + "sigmav;": '\U000003C2', + "sim;": '\U0000223C', + "simdot;": '\U00002A6A', + "sime;": '\U00002243', + "simeq;": '\U00002243', + "simg;": '\U00002A9E', + "simgE;": '\U00002AA0', + "siml;": '\U00002A9D', + "simlE;": '\U00002A9F', + "simne;": '\U00002246', + "simplus;": '\U00002A24', + "simrarr;": '\U00002972', + "slarr;": '\U00002190', + "smallsetminus;": '\U00002216', + "smashp;": '\U00002A33', + "smeparsl;": '\U000029E4', + "smid;": '\U00002223', + "smile;": '\U00002323', + "smt;": '\U00002AAA', + "smte;": '\U00002AAC', + "softcy;": '\U0000044C', + "sol;": '\U0000002F', + "solb;": '\U000029C4', + "solbar;": '\U0000233F', + "sopf;": '\U0001D564', + "spades;": '\U00002660', + "spadesuit;": '\U00002660', + "spar;": '\U00002225', + "sqcap;": '\U00002293', + "sqcup;": '\U00002294', + "sqsub;": '\U0000228F', + "sqsube;": '\U00002291', + "sqsubset;": '\U0000228F', + "sqsubseteq;": '\U00002291', + "sqsup;": '\U00002290', + "sqsupe;": '\U00002292', + "sqsupset;": '\U00002290', + "sqsupseteq;": '\U00002292', + "squ;": '\U000025A1', + "square;": '\U000025A1', + "squarf;": '\U000025AA', + "squf;": '\U000025AA', + "srarr;": '\U00002192', + "sscr;": '\U0001D4C8', + "ssetmn;": '\U00002216', + "ssmile;": '\U00002323', + "sstarf;": '\U000022C6', + "star;": '\U00002606', + "starf;": '\U00002605', + "straightepsilon;": '\U000003F5', + "straightphi;": '\U000003D5', + "strns;": '\U000000AF', + "sub;": '\U00002282', + "subE;": '\U00002AC5', + "subdot;": '\U00002ABD', + "sube;": '\U00002286', + "subedot;": '\U00002AC3', + "submult;": '\U00002AC1', + "subnE;": '\U00002ACB', + "subne;": '\U0000228A', + "subplus;": '\U00002ABF', + "subrarr;": '\U00002979', + "subset;": '\U00002282', + "subseteq;": '\U00002286', + "subseteqq;": '\U00002AC5', + "subsetneq;": '\U0000228A', + "subsetneqq;": '\U00002ACB', + "subsim;": '\U00002AC7', + "subsub;": '\U00002AD5', + "subsup;": '\U00002AD3', + "succ;": '\U0000227B', + "succapprox;": '\U00002AB8', + "succcurlyeq;": '\U0000227D', + "succeq;": '\U00002AB0', + "succnapprox;": '\U00002ABA', + "succneqq;": '\U00002AB6', + "succnsim;": '\U000022E9', + "succsim;": '\U0000227F', + "sum;": '\U00002211', + "sung;": '\U0000266A', + "sup;": '\U00002283', + "sup1;": '\U000000B9', + "sup2;": '\U000000B2', + "sup3;": '\U000000B3', + "supE;": '\U00002AC6', + "supdot;": '\U00002ABE', + "supdsub;": '\U00002AD8', + "supe;": '\U00002287', + "supedot;": '\U00002AC4', + "suphsol;": '\U000027C9', + "suphsub;": '\U00002AD7', + "suplarr;": '\U0000297B', + "supmult;": '\U00002AC2', + "supnE;": '\U00002ACC', + "supne;": '\U0000228B', + "supplus;": '\U00002AC0', + "supset;": '\U00002283', + "supseteq;": '\U00002287', + "supseteqq;": '\U00002AC6', + "supsetneq;": '\U0000228B', + "supsetneqq;": '\U00002ACC', + "supsim;": '\U00002AC8', + "supsub;": '\U00002AD4', + "supsup;": '\U00002AD6', + "swArr;": '\U000021D9', + "swarhk;": '\U00002926', + "swarr;": '\U00002199', + "swarrow;": '\U00002199', + "swnwar;": '\U0000292A', + "szlig;": '\U000000DF', + "target;": '\U00002316', + "tau;": '\U000003C4', + "tbrk;": '\U000023B4', + "tcaron;": '\U00000165', + "tcedil;": '\U00000163', + "tcy;": '\U00000442', + "tdot;": '\U000020DB', + "telrec;": '\U00002315', + "tfr;": '\U0001D531', + "there4;": '\U00002234', + "therefore;": '\U00002234', + "theta;": '\U000003B8', + "thetasym;": '\U000003D1', + "thetav;": '\U000003D1', + "thickapprox;": '\U00002248', + "thicksim;": '\U0000223C', + "thinsp;": '\U00002009', + "thkap;": '\U00002248', + "thksim;": '\U0000223C', + "thorn;": '\U000000FE', + "tilde;": '\U000002DC', + "times;": '\U000000D7', + "timesb;": '\U000022A0', + "timesbar;": '\U00002A31', + "timesd;": '\U00002A30', + "tint;": '\U0000222D', + "toea;": '\U00002928', + "top;": '\U000022A4', + "topbot;": '\U00002336', + "topcir;": '\U00002AF1', + "topf;": '\U0001D565', + "topfork;": '\U00002ADA', + "tosa;": '\U00002929', + "tprime;": '\U00002034', + "trade;": '\U00002122', + "triangle;": '\U000025B5', + "triangledown;": '\U000025BF', + "triangleleft;": '\U000025C3', + "trianglelefteq;": '\U000022B4', + "triangleq;": '\U0000225C', + "triangleright;": '\U000025B9', + "trianglerighteq;": '\U000022B5', + "tridot;": '\U000025EC', + "trie;": '\U0000225C', + "triminus;": '\U00002A3A', + "triplus;": '\U00002A39', + "trisb;": '\U000029CD', + "tritime;": '\U00002A3B', + "trpezium;": '\U000023E2', + "tscr;": '\U0001D4C9', + "tscy;": '\U00000446', + "tshcy;": '\U0000045B', + "tstrok;": '\U00000167', + "twixt;": '\U0000226C', + "twoheadleftarrow;": '\U0000219E', + "twoheadrightarrow;": '\U000021A0', + "uArr;": '\U000021D1', + "uHar;": '\U00002963', + "uacute;": '\U000000FA', + "uarr;": '\U00002191', + "ubrcy;": '\U0000045E', + "ubreve;": '\U0000016D', + "ucirc;": '\U000000FB', + "ucy;": '\U00000443', + "udarr;": '\U000021C5', + "udblac;": '\U00000171', + "udhar;": '\U0000296E', + "ufisht;": '\U0000297E', + "ufr;": '\U0001D532', + "ugrave;": '\U000000F9', + "uharl;": '\U000021BF', + "uharr;": '\U000021BE', + "uhblk;": '\U00002580', + "ulcorn;": '\U0000231C', + "ulcorner;": '\U0000231C', + "ulcrop;": '\U0000230F', + "ultri;": '\U000025F8', + "umacr;": '\U0000016B', + "uml;": '\U000000A8', + "uogon;": '\U00000173', + "uopf;": '\U0001D566', + "uparrow;": '\U00002191', + "updownarrow;": '\U00002195', + "upharpoonleft;": '\U000021BF', + "upharpoonright;": '\U000021BE', + "uplus;": '\U0000228E', + "upsi;": '\U000003C5', + "upsih;": '\U000003D2', + "upsilon;": '\U000003C5', + "upuparrows;": '\U000021C8', + "urcorn;": '\U0000231D', + "urcorner;": '\U0000231D', + "urcrop;": '\U0000230E', + "uring;": '\U0000016F', + "urtri;": '\U000025F9', + "uscr;": '\U0001D4CA', + "utdot;": '\U000022F0', + "utilde;": '\U00000169', + "utri;": '\U000025B5', + "utrif;": '\U000025B4', + "uuarr;": '\U000021C8', + "uuml;": '\U000000FC', + "uwangle;": '\U000029A7', + "vArr;": '\U000021D5', + "vBar;": '\U00002AE8', + "vBarv;": '\U00002AE9', + "vDash;": '\U000022A8', + "vangrt;": '\U0000299C', + "varepsilon;": '\U000003F5', + "varkappa;": '\U000003F0', + "varnothing;": '\U00002205', + "varphi;": '\U000003D5', + "varpi;": '\U000003D6', + "varpropto;": '\U0000221D', + "varr;": '\U00002195', + "varrho;": '\U000003F1', + "varsigma;": '\U000003C2', + "vartheta;": '\U000003D1', + "vartriangleleft;": '\U000022B2', + "vartriangleright;": '\U000022B3', + "vcy;": '\U00000432', + "vdash;": '\U000022A2', + "vee;": '\U00002228', + "veebar;": '\U000022BB', + "veeeq;": '\U0000225A', + "vellip;": '\U000022EE', + "verbar;": '\U0000007C', + "vert;": '\U0000007C', + "vfr;": '\U0001D533', + "vltri;": '\U000022B2', + "vopf;": '\U0001D567', + "vprop;": '\U0000221D', + "vrtri;": '\U000022B3', + "vscr;": '\U0001D4CB', + "vzigzag;": '\U0000299A', + "wcirc;": '\U00000175', + "wedbar;": '\U00002A5F', + "wedge;": '\U00002227', + "wedgeq;": '\U00002259', + "weierp;": '\U00002118', + "wfr;": '\U0001D534', + "wopf;": '\U0001D568', + "wp;": '\U00002118', + "wr;": '\U00002240', + "wreath;": '\U00002240', + "wscr;": '\U0001D4CC', + "xcap;": '\U000022C2', + "xcirc;": '\U000025EF', + "xcup;": '\U000022C3', + "xdtri;": '\U000025BD', + "xfr;": '\U0001D535', + "xhArr;": '\U000027FA', + "xharr;": '\U000027F7', + "xi;": '\U000003BE', + "xlArr;": '\U000027F8', + "xlarr;": '\U000027F5', + "xmap;": '\U000027FC', + "xnis;": '\U000022FB', + "xodot;": '\U00002A00', + "xopf;": '\U0001D569', + "xoplus;": '\U00002A01', + "xotime;": '\U00002A02', + "xrArr;": '\U000027F9', + "xrarr;": '\U000027F6', + "xscr;": '\U0001D4CD', + "xsqcup;": '\U00002A06', + "xuplus;": '\U00002A04', + "xutri;": '\U000025B3', + "xvee;": '\U000022C1', + "xwedge;": '\U000022C0', + "yacute;": '\U000000FD', + "yacy;": '\U0000044F', + "ycirc;": '\U00000177', + "ycy;": '\U0000044B', + "yen;": '\U000000A5', + "yfr;": '\U0001D536', + "yicy;": '\U00000457', + "yopf;": '\U0001D56A', + "yscr;": '\U0001D4CE', + "yucy;": '\U0000044E', + "yuml;": '\U000000FF', + "zacute;": '\U0000017A', + "zcaron;": '\U0000017E', + "zcy;": '\U00000437', + "zdot;": '\U0000017C', + "zeetrf;": '\U00002128', + "zeta;": '\U000003B6', + "zfr;": '\U0001D537', + "zhcy;": '\U00000436', + "zigrarr;": '\U000021DD', + "zopf;": '\U0001D56B', + "zscr;": '\U0001D4CF', + "zwj;": '\U0000200D', + "zwnj;": '\U0000200C', + "AElig": '\U000000C6', + "AMP": '\U00000026', + "Aacute": '\U000000C1', + "Acirc": '\U000000C2', + "Agrave": '\U000000C0', + "Aring": '\U000000C5', + "Atilde": '\U000000C3', + "Auml": '\U000000C4', + "COPY": '\U000000A9', + "Ccedil": '\U000000C7', + "ETH": '\U000000D0', + "Eacute": '\U000000C9', + "Ecirc": '\U000000CA', + "Egrave": '\U000000C8', + "Euml": '\U000000CB', + "GT": '\U0000003E', + "Iacute": '\U000000CD', + "Icirc": '\U000000CE', + "Igrave": '\U000000CC', + "Iuml": '\U000000CF', + "LT": '\U0000003C', + "Ntilde": '\U000000D1', + "Oacute": '\U000000D3', + "Ocirc": '\U000000D4', + "Ograve": '\U000000D2', + "Oslash": '\U000000D8', + "Otilde": '\U000000D5', + "Ouml": '\U000000D6', + "QUOT": '\U00000022', + "REG": '\U000000AE', + "THORN": '\U000000DE', + "Uacute": '\U000000DA', + "Ucirc": '\U000000DB', + "Ugrave": '\U000000D9', + "Uuml": '\U000000DC', + "Yacute": '\U000000DD', + "aacute": '\U000000E1', + "acirc": '\U000000E2', + "acute": '\U000000B4', + "aelig": '\U000000E6', + "agrave": '\U000000E0', + "amp": '\U00000026', + "aring": '\U000000E5', + "atilde": '\U000000E3', + "auml": '\U000000E4', + "brvbar": '\U000000A6', + "ccedil": '\U000000E7', + "cedil": '\U000000B8', + "cent": '\U000000A2', + "copy": '\U000000A9', + "curren": '\U000000A4', + "deg": '\U000000B0', + "divide": '\U000000F7', + "eacute": '\U000000E9', + "ecirc": '\U000000EA', + "egrave": '\U000000E8', + "eth": '\U000000F0', + "euml": '\U000000EB', + "frac12": '\U000000BD', + "frac14": '\U000000BC', + "frac34": '\U000000BE', + "gt": '\U0000003E', + "iacute": '\U000000ED', + "icirc": '\U000000EE', + "iexcl": '\U000000A1', + "igrave": '\U000000EC', + "iquest": '\U000000BF', + "iuml": '\U000000EF', + "laquo": '\U000000AB', + "lt": '\U0000003C', + "macr": '\U000000AF', + "micro": '\U000000B5', + "middot": '\U000000B7', + "nbsp": '\U000000A0', + "not": '\U000000AC', + "ntilde": '\U000000F1', + "oacute": '\U000000F3', + "ocirc": '\U000000F4', + "ograve": '\U000000F2', + "ordf": '\U000000AA', + "ordm": '\U000000BA', + "oslash": '\U000000F8', + "otilde": '\U000000F5', + "ouml": '\U000000F6', + "para": '\U000000B6', + "plusmn": '\U000000B1', + "pound": '\U000000A3', + "quot": '\U00000022', + "raquo": '\U000000BB', + "reg": '\U000000AE', + "sect": '\U000000A7', + "shy": '\U000000AD', + "sup1": '\U000000B9', + "sup2": '\U000000B2', + "sup3": '\U000000B3', + "szlig": '\U000000DF', + "thorn": '\U000000FE', + "times": '\U000000D7', + "uacute": '\U000000FA', + "ucirc": '\U000000FB', + "ugrave": '\U000000F9', + "uml": '\U000000A8', + "uuml": '\U000000FC', + "yacute": '\U000000FD', + "yen": '\U000000A5', + "yuml": '\U000000FF', +} + +// HTML entities that are two unicode codepoints. +var entity2 = map[string][2]rune{ + // TODO(nigeltao): Handle replacements that are wider than their names. + // "nLt;": {'\u226A', '\u20D2'}, + // "nGt;": {'\u226B', '\u20D2'}, + "NotEqualTilde;": {'\u2242', '\u0338'}, + "NotGreaterFullEqual;": {'\u2267', '\u0338'}, + "NotGreaterGreater;": {'\u226B', '\u0338'}, + "NotGreaterSlantEqual;": {'\u2A7E', '\u0338'}, + "NotHumpDownHump;": {'\u224E', '\u0338'}, + "NotHumpEqual;": {'\u224F', '\u0338'}, + "NotLeftTriangleBar;": {'\u29CF', '\u0338'}, + "NotLessLess;": {'\u226A', '\u0338'}, + "NotLessSlantEqual;": {'\u2A7D', '\u0338'}, + "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'}, + "NotNestedLessLess;": {'\u2AA1', '\u0338'}, + "NotPrecedesEqual;": {'\u2AAF', '\u0338'}, + "NotRightTriangleBar;": {'\u29D0', '\u0338'}, + "NotSquareSubset;": {'\u228F', '\u0338'}, + "NotSquareSuperset;": {'\u2290', '\u0338'}, + "NotSubset;": {'\u2282', '\u20D2'}, + "NotSucceedsEqual;": {'\u2AB0', '\u0338'}, + "NotSucceedsTilde;": {'\u227F', '\u0338'}, + "NotSuperset;": {'\u2283', '\u20D2'}, + "ThickSpace;": {'\u205F', '\u200A'}, + "acE;": {'\u223E', '\u0333'}, + "bne;": {'\u003D', '\u20E5'}, + "bnequiv;": {'\u2261', '\u20E5'}, + "caps;": {'\u2229', '\uFE00'}, + "cups;": {'\u222A', '\uFE00'}, + "fjlig;": {'\u0066', '\u006A'}, + "gesl;": {'\u22DB', '\uFE00'}, + "gvertneqq;": {'\u2269', '\uFE00'}, + "gvnE;": {'\u2269', '\uFE00'}, + "lates;": {'\u2AAD', '\uFE00'}, + "lesg;": {'\u22DA', '\uFE00'}, + "lvertneqq;": {'\u2268', '\uFE00'}, + "lvnE;": {'\u2268', '\uFE00'}, + "nGg;": {'\u22D9', '\u0338'}, + "nGtv;": {'\u226B', '\u0338'}, + "nLl;": {'\u22D8', '\u0338'}, + "nLtv;": {'\u226A', '\u0338'}, + "nang;": {'\u2220', '\u20D2'}, + "napE;": {'\u2A70', '\u0338'}, + "napid;": {'\u224B', '\u0338'}, + "nbump;": {'\u224E', '\u0338'}, + "nbumpe;": {'\u224F', '\u0338'}, + "ncongdot;": {'\u2A6D', '\u0338'}, + "nedot;": {'\u2250', '\u0338'}, + "nesim;": {'\u2242', '\u0338'}, + "ngE;": {'\u2267', '\u0338'}, + "ngeqq;": {'\u2267', '\u0338'}, + "ngeqslant;": {'\u2A7E', '\u0338'}, + "nges;": {'\u2A7E', '\u0338'}, + "nlE;": {'\u2266', '\u0338'}, + "nleqq;": {'\u2266', '\u0338'}, + "nleqslant;": {'\u2A7D', '\u0338'}, + "nles;": {'\u2A7D', '\u0338'}, + "notinE;": {'\u22F9', '\u0338'}, + "notindot;": {'\u22F5', '\u0338'}, + "nparsl;": {'\u2AFD', '\u20E5'}, + "npart;": {'\u2202', '\u0338'}, + "npre;": {'\u2AAF', '\u0338'}, + "npreceq;": {'\u2AAF', '\u0338'}, + "nrarrc;": {'\u2933', '\u0338'}, + "nrarrw;": {'\u219D', '\u0338'}, + "nsce;": {'\u2AB0', '\u0338'}, + "nsubE;": {'\u2AC5', '\u0338'}, + "nsubset;": {'\u2282', '\u20D2'}, + "nsubseteqq;": {'\u2AC5', '\u0338'}, + "nsucceq;": {'\u2AB0', '\u0338'}, + "nsupE;": {'\u2AC6', '\u0338'}, + "nsupset;": {'\u2283', '\u20D2'}, + "nsupseteqq;": {'\u2AC6', '\u0338'}, + "nvap;": {'\u224D', '\u20D2'}, + "nvge;": {'\u2265', '\u20D2'}, + "nvgt;": {'\u003E', '\u20D2'}, + "nvle;": {'\u2264', '\u20D2'}, + "nvlt;": {'\u003C', '\u20D2'}, + "nvltrie;": {'\u22B4', '\u20D2'}, + "nvrtrie;": {'\u22B5', '\u20D2'}, + "nvsim;": {'\u223C', '\u20D2'}, + "race;": {'\u223D', '\u0331'}, + "smtes;": {'\u2AAC', '\uFE00'}, + "sqcaps;": {'\u2293', '\uFE00'}, + "sqcups;": {'\u2294', '\uFE00'}, + "varsubsetneq;": {'\u228A', '\uFE00'}, + "varsubsetneqq;": {'\u2ACB', '\uFE00'}, + "varsupsetneq;": {'\u228B', '\uFE00'}, + "varsupsetneqq;": {'\u2ACC', '\uFE00'}, + "vnsub;": {'\u2282', '\u20D2'}, + "vnsup;": {'\u2283', '\u20D2'}, + "vsubnE;": {'\u2ACB', '\uFE00'}, + "vsubne;": {'\u228A', '\uFE00'}, + "vsupnE;": {'\u2ACC', '\uFE00'}, + "vsupne;": {'\u228B', '\uFE00'}, +} diff --git a/vendor/golang.org/x/net/html/escape.go b/vendor/golang.org/x/net/html/escape.go new file mode 100644 index 000000000..d85613962 --- /dev/null +++ b/vendor/golang.org/x/net/html/escape.go @@ -0,0 +1,258 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "bytes" + "strings" + "unicode/utf8" +) + +// These replacements permit compatibility with old numeric entities that +// assumed Windows-1252 encoding. +// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference +var replacementTable = [...]rune{ + '\u20AC', // First entry is what 0x80 should be replaced with. + '\u0081', + '\u201A', + '\u0192', + '\u201E', + '\u2026', + '\u2020', + '\u2021', + '\u02C6', + '\u2030', + '\u0160', + '\u2039', + '\u0152', + '\u008D', + '\u017D', + '\u008F', + '\u0090', + '\u2018', + '\u2019', + '\u201C', + '\u201D', + '\u2022', + '\u2013', + '\u2014', + '\u02DC', + '\u2122', + '\u0161', + '\u203A', + '\u0153', + '\u009D', + '\u017E', + '\u0178', // Last entry is 0x9F. + // 0x00->'\uFFFD' is handled programmatically. + // 0x0D->'\u000D' is a no-op. +} + +// unescapeEntity reads an entity like "<" from b[src:] and writes the +// corresponding "<" to b[dst:], returning the incremented dst and src cursors. +// Precondition: b[src] == '&' && dst <= src. +// attribute should be true if parsing an attribute value. +func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { + // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference + + // i starts at 1 because we already know that s[0] == '&'. + i, s := 1, b[src:] + + if len(s) <= 1 { + b[dst] = b[src] + return dst + 1, src + 1 + } + + if s[i] == '#' { + if len(s) <= 3 { // We need to have at least "&#.". + b[dst] = b[src] + return dst + 1, src + 1 + } + i++ + c := s[i] + hex := false + if c == 'x' || c == 'X' { + hex = true + i++ + } + + x := '\x00' + for i < len(s) { + c = s[i] + i++ + if hex { + if '0' <= c && c <= '9' { + x = 16*x + rune(c) - '0' + continue + } else if 'a' <= c && c <= 'f' { + x = 16*x + rune(c) - 'a' + 10 + continue + } else if 'A' <= c && c <= 'F' { + x = 16*x + rune(c) - 'A' + 10 + continue + } + } else if '0' <= c && c <= '9' { + x = 10*x + rune(c) - '0' + continue + } + if c != ';' { + i-- + } + break + } + + if i <= 3 { // No characters matched. + b[dst] = b[src] + return dst + 1, src + 1 + } + + if 0x80 <= x && x <= 0x9F { + // Replace characters from Windows-1252 with UTF-8 equivalents. + x = replacementTable[x-0x80] + } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF { + // Replace invalid characters with the replacement character. + x = '\uFFFD' + } + + return dst + utf8.EncodeRune(b[dst:], x), src + i + } + + // Consume the maximum number of characters possible, with the + // consumed characters matching one of the named references. + + for i < len(s) { + c := s[i] + i++ + // Lower-cased characters are more common in entities, so we check for them first. + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { + continue + } + if c != ';' { + i-- + } + break + } + + entityName := string(s[1:i]) + if entityName == "" { + // No-op. + } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' { + // No-op. + } else if x := entity[entityName]; x != 0 { + return dst + utf8.EncodeRune(b[dst:], x), src + i + } else if x := entity2[entityName]; x[0] != 0 { + dst1 := dst + utf8.EncodeRune(b[dst:], x[0]) + return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i + } else if !attribute { + maxLen := len(entityName) - 1 + if maxLen > longestEntityWithoutSemicolon { + maxLen = longestEntityWithoutSemicolon + } + for j := maxLen; j > 1; j-- { + if x := entity[entityName[:j]]; x != 0 { + return dst + utf8.EncodeRune(b[dst:], x), src + j + 1 + } + } + } + + dst1, src1 = dst+i, src+i + copy(b[dst:dst1], b[src:src1]) + return dst1, src1 +} + +// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b". +// attribute should be true if parsing an attribute value. +func unescape(b []byte, attribute bool) []byte { + for i, c := range b { + if c == '&' { + dst, src := unescapeEntity(b, i, i, attribute) + for src < len(b) { + c := b[src] + if c == '&' { + dst, src = unescapeEntity(b, dst, src, attribute) + } else { + b[dst] = c + dst, src = dst+1, src+1 + } + } + return b[0:dst] + } + } + return b +} + +// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc". +func lower(b []byte) []byte { + for i, c := range b { + if 'A' <= c && c <= 'Z' { + b[i] = c + 'a' - 'A' + } + } + return b +} + +const escapedChars = "&'<>\"\r" + +func escape(w writer, s string) error { + i := strings.IndexAny(s, escapedChars) + for i != -1 { + if _, err := w.WriteString(s[:i]); err != nil { + return err + } + var esc string + switch s[i] { + case '&': + esc = "&" + case '\'': + // "'" is shorter than "'" and apos was not in HTML until HTML5. + esc = "'" + case '<': + esc = "<" + case '>': + esc = ">" + case '"': + // """ is shorter than """. + esc = """ + case '\r': + esc = " " + default: + panic("unrecognized escape character") + } + s = s[i+1:] + if _, err := w.WriteString(esc); err != nil { + return err + } + i = strings.IndexAny(s, escapedChars) + } + _, err := w.WriteString(s) + return err +} + +// EscapeString escapes special characters like "<" to become "<". It +// escapes only five such characters: <, >, &, ' and ". +// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't +// always true. +func EscapeString(s string) string { + if strings.IndexAny(s, escapedChars) == -1 { + return s + } + var buf bytes.Buffer + escape(&buf, s) + return buf.String() +} + +// UnescapeString unescapes entities like "<" to become "<". It unescapes a +// larger range of entities than EscapeString escapes. For example, "á" +// unescapes to "á", as does "á" and "&xE1;". +// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't +// always true. +func UnescapeString(s string) string { + for _, c := range s { + if c == '&' { + return string(unescape([]byte(s), false)) + } + } + return s +} diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go new file mode 100644 index 000000000..d3b384409 --- /dev/null +++ b/vendor/golang.org/x/net/html/foreign.go @@ -0,0 +1,226 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "strings" +) + +func adjustAttributeNames(aa []Attribute, nameMap map[string]string) { + for i := range aa { + if newName, ok := nameMap[aa[i].Key]; ok { + aa[i].Key = newName + } + } +} + +func adjustForeignAttributes(aa []Attribute) { + for i, a := range aa { + if a.Key == "" || a.Key[0] != 'x' { + continue + } + switch a.Key { + case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show", + "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink": + j := strings.Index(a.Key, ":") + aa[i].Namespace = a.Key[:j] + aa[i].Key = a.Key[j+1:] + } + } +} + +func htmlIntegrationPoint(n *Node) bool { + if n.Type != ElementNode { + return false + } + switch n.Namespace { + case "math": + if n.Data == "annotation-xml" { + for _, a := range n.Attr { + if a.Key == "encoding" { + val := strings.ToLower(a.Val) + if val == "text/html" || val == "application/xhtml+xml" { + return true + } + } + } + } + case "svg": + switch n.Data { + case "desc", "foreignObject", "title": + return true + } + } + return false +} + +func mathMLTextIntegrationPoint(n *Node) bool { + if n.Namespace != "math" { + return false + } + switch n.Data { + case "mi", "mo", "mn", "ms", "mtext": + return true + } + return false +} + +// Section 12.2.5.5. +var breakout = map[string]bool{ + "b": true, + "big": true, + "blockquote": true, + "body": true, + "br": true, + "center": true, + "code": true, + "dd": true, + "div": true, + "dl": true, + "dt": true, + "em": true, + "embed": true, + "h1": true, + "h2": true, + "h3": true, + "h4": true, + "h5": true, + "h6": true, + "head": true, + "hr": true, + "i": true, + "img": true, + "li": true, + "listing": true, + "menu": true, + "meta": true, + "nobr": true, + "ol": true, + "p": true, + "pre": true, + "ruby": true, + "s": true, + "small": true, + "span": true, + "strong": true, + "strike": true, + "sub": true, + "sup": true, + "table": true, + "tt": true, + "u": true, + "ul": true, + "var": true, +} + +// Section 12.2.5.5. +var svgTagNameAdjustments = map[string]string{ + "altglyph": "altGlyph", + "altglyphdef": "altGlyphDef", + "altglyphitem": "altGlyphItem", + "animatecolor": "animateColor", + "animatemotion": "animateMotion", + "animatetransform": "animateTransform", + "clippath": "clipPath", + "feblend": "feBlend", + "fecolormatrix": "feColorMatrix", + "fecomponenttransfer": "feComponentTransfer", + "fecomposite": "feComposite", + "feconvolvematrix": "feConvolveMatrix", + "fediffuselighting": "feDiffuseLighting", + "fedisplacementmap": "feDisplacementMap", + "fedistantlight": "feDistantLight", + "feflood": "feFlood", + "fefunca": "feFuncA", + "fefuncb": "feFuncB", + "fefuncg": "feFuncG", + "fefuncr": "feFuncR", + "fegaussianblur": "feGaussianBlur", + "feimage": "feImage", + "femerge": "feMerge", + "femergenode": "feMergeNode", + "femorphology": "feMorphology", + "feoffset": "feOffset", + "fepointlight": "fePointLight", + "fespecularlighting": "feSpecularLighting", + "fespotlight": "feSpotLight", + "fetile": "feTile", + "feturbulence": "feTurbulence", + "foreignobject": "foreignObject", + "glyphref": "glyphRef", + "lineargradient": "linearGradient", + "radialgradient": "radialGradient", + "textpath": "textPath", +} + +// Section 12.2.5.1 +var mathMLAttributeAdjustments = map[string]string{ + "definitionurl": "definitionURL", +} + +var svgAttributeAdjustments = map[string]string{ + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan", +} diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go new file mode 100644 index 000000000..26b657aec --- /dev/null +++ b/vendor/golang.org/x/net/html/node.go @@ -0,0 +1,193 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "golang.org/x/net/html/atom" +) + +// A NodeType is the type of a Node. +type NodeType uint32 + +const ( + ErrorNode NodeType = iota + TextNode + DocumentNode + ElementNode + CommentNode + DoctypeNode + scopeMarkerNode +) + +// Section 12.2.3.3 says "scope markers are inserted when entering applet +// elements, buttons, object elements, marquees, table cells, and table +// captions, and are used to prevent formatting from 'leaking'". +var scopeMarker = Node{Type: scopeMarkerNode} + +// A Node consists of a NodeType and some Data (tag name for element nodes, +// content for text) and are part of a tree of Nodes. Element nodes may also +// have a Namespace and contain a slice of Attributes. Data is unescaped, so +// that it looks like "a<b" rather than "a<b". For element nodes, DataAtom +// is the atom for Data, or zero if Data is not a known tag name. +// +// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace. +// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and +// "svg" is short for "http://www.w3.org/2000/svg". +type Node struct { + Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node + + Type NodeType + DataAtom atom.Atom + Data string + Namespace string + Attr []Attribute +} + +// InsertBefore inserts newChild as a child of n, immediately before oldChild +// in the sequence of n's children. oldChild may be nil, in which case newChild +// is appended to the end of n's children. +// +// It will panic if newChild already has a parent or siblings. +func (n *Node) InsertBefore(newChild, oldChild *Node) { + if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil { + panic("html: InsertBefore called for an attached child Node") + } + var prev, next *Node + if oldChild != nil { + prev, next = oldChild.PrevSibling, oldChild + } else { + prev = n.LastChild + } + if prev != nil { + prev.NextSibling = newChild + } else { + n.FirstChild = newChild + } + if next != nil { + next.PrevSibling = newChild + } else { + n.LastChild = newChild + } + newChild.Parent = n + newChild.PrevSibling = prev + newChild.NextSibling = next +} + +// AppendChild adds a node c as a child of n. +// +// It will panic if c already has a parent or siblings. +func (n *Node) AppendChild(c *Node) { + if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil { + panic("html: AppendChild called for an attached child Node") + } + last := n.LastChild + if last != nil { + last.NextSibling = c + } else { + n.FirstChild = c + } + n.LastChild = c + c.Parent = n + c.PrevSibling = last +} + +// RemoveChild removes a node c that is a child of n. Afterwards, c will have +// no parent and no siblings. +// +// It will panic if c's parent is not n. +func (n *Node) RemoveChild(c *Node) { + if c.Parent != n { + panic("html: RemoveChild called for a non-child Node") + } + if n.FirstChild == c { + n.FirstChild = c.NextSibling + } + if c.NextSibling != nil { + c.NextSibling.PrevSibling = c.PrevSibling + } + if n.LastChild == c { + n.LastChild = c.PrevSibling + } + if c.PrevSibling != nil { + c.PrevSibling.NextSibling = c.NextSibling + } + c.Parent = nil + c.PrevSibling = nil + c.NextSibling = nil +} + +// reparentChildren reparents all of src's child nodes to dst. +func reparentChildren(dst, src *Node) { + for { + child := src.FirstChild + if child == nil { + break + } + src.RemoveChild(child) + dst.AppendChild(child) + } +} + +// clone returns a new node with the same type, data and attributes. +// The clone has no parent, no siblings and no children. +func (n *Node) clone() *Node { + m := &Node{ + Type: n.Type, + DataAtom: n.DataAtom, + Data: n.Data, + Attr: make([]Attribute, len(n.Attr)), + } + copy(m.Attr, n.Attr) + return m +} + +// nodeStack is a stack of nodes. +type nodeStack []*Node + +// pop pops the stack. It will panic if s is empty. +func (s *nodeStack) pop() *Node { + i := len(*s) + n := (*s)[i-1] + *s = (*s)[:i-1] + return n +} + +// top returns the most recently pushed node, or nil if s is empty. +func (s *nodeStack) top() *Node { + if i := len(*s); i > 0 { + return (*s)[i-1] + } + return nil +} + +// index returns the index of the top-most occurrence of n in the stack, or -1 +// if n is not present. +func (s *nodeStack) index(n *Node) int { + for i := len(*s) - 1; i >= 0; i-- { + if (*s)[i] == n { + return i + } + } + return -1 +} + +// insert inserts a node at the given index. +func (s *nodeStack) insert(i int, n *Node) { + (*s) = append(*s, nil) + copy((*s)[i+1:], (*s)[i:]) + (*s)[i] = n +} + +// remove removes a node from the stack. It is a no-op if n is not present. +func (s *nodeStack) remove(n *Node) { + i := s.index(n) + if i == -1 { + return + } + copy((*s)[i:], (*s)[i+1:]) + j := len(*s) - 1 + (*s)[j] = nil + *s = (*s)[:j] +} diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go new file mode 100644 index 000000000..be4b2bf5a --- /dev/null +++ b/vendor/golang.org/x/net/html/parse.go @@ -0,0 +1,2094 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "errors" + "fmt" + "io" + "strings" + + a "golang.org/x/net/html/atom" +) + +// A parser implements the HTML5 parsing algorithm: +// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction +type parser struct { + // tokenizer provides the tokens for the parser. + tokenizer *Tokenizer + // tok is the most recently read token. + tok Token + // Self-closing tags like <hr/> are treated as start tags, except that + // hasSelfClosingToken is set while they are being processed. + hasSelfClosingToken bool + // doc is the document root element. + doc *Node + // The stack of open elements (section 12.2.3.2) and active formatting + // elements (section 12.2.3.3). + oe, afe nodeStack + // Element pointers (section 12.2.3.4). + head, form *Node + // Other parsing state flags (section 12.2.3.5). + scripting, framesetOK bool + // im is the current insertion mode. + im insertionMode + // originalIM is the insertion mode to go back to after completing a text + // or inTableText insertion mode. + originalIM insertionMode + // fosterParenting is whether new elements should be inserted according to + // the foster parenting rules (section 12.2.5.3). + fosterParenting bool + // quirks is whether the parser is operating in "quirks mode." + quirks bool + // fragment is whether the parser is parsing an HTML fragment. + fragment bool + // context is the context element when parsing an HTML fragment + // (section 12.4). + context *Node +} + +func (p *parser) top() *Node { + if n := p.oe.top(); n != nil { + return n + } + return p.doc +} + +// Stop tags for use in popUntil. These come from section 12.2.3.2. +var ( + defaultScopeStopTags = map[string][]a.Atom{ + "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, + "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext}, + "svg": {a.Desc, a.ForeignObject, a.Title}, + } +) + +type scope int + +const ( + defaultScope scope = iota + listItemScope + buttonScope + tableScope + tableRowScope + tableBodyScope + selectScope +) + +// popUntil pops the stack of open elements at the highest element whose tag +// is in matchTags, provided there is no higher element in the scope's stop +// tags (as defined in section 12.2.3.2). It returns whether or not there was +// such an element. If there was not, popUntil leaves the stack unchanged. +// +// For example, the set of stop tags for table scope is: "html", "table". If +// the stack was: +// ["html", "body", "font", "table", "b", "i", "u"] +// then popUntil(tableScope, "font") would return false, but +// popUntil(tableScope, "i") would return true and the stack would become: +// ["html", "body", "font", "table", "b"] +// +// If an element's tag is in both the stop tags and matchTags, then the stack +// will be popped and the function returns true (provided, of course, there was +// no higher element in the stack that was also in the stop tags). For example, +// popUntil(tableScope, "table") returns true and leaves: +// ["html", "body", "font"] +func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { + if i := p.indexOfElementInScope(s, matchTags...); i != -1 { + p.oe = p.oe[:i] + return true + } + return false +} + +// indexOfElementInScope returns the index in p.oe of the highest element whose +// tag is in matchTags that is in scope. If no matching element is in scope, it +// returns -1. +func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { + for i := len(p.oe) - 1; i >= 0; i-- { + tagAtom := p.oe[i].DataAtom + if p.oe[i].Namespace == "" { + for _, t := range matchTags { + if t == tagAtom { + return i + } + } + switch s { + case defaultScope: + // No-op. + case listItemScope: + if tagAtom == a.Ol || tagAtom == a.Ul { + return -1 + } + case buttonScope: + if tagAtom == a.Button { + return -1 + } + case tableScope: + if tagAtom == a.Html || tagAtom == a.Table { + return -1 + } + case selectScope: + if tagAtom != a.Optgroup && tagAtom != a.Option { + return -1 + } + default: + panic("unreachable") + } + } + switch s { + case defaultScope, listItemScope, buttonScope: + for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { + if t == tagAtom { + return -1 + } + } + } + } + return -1 +} + +// elementInScope is like popUntil, except that it doesn't modify the stack of +// open elements. +func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { + return p.indexOfElementInScope(s, matchTags...) != -1 +} + +// clearStackToContext pops elements off the stack of open elements until a +// scope-defined element is found. +func (p *parser) clearStackToContext(s scope) { + for i := len(p.oe) - 1; i >= 0; i-- { + tagAtom := p.oe[i].DataAtom + switch s { + case tableScope: + if tagAtom == a.Html || tagAtom == a.Table { + p.oe = p.oe[:i+1] + return + } + case tableRowScope: + if tagAtom == a.Html || tagAtom == a.Tr { + p.oe = p.oe[:i+1] + return + } + case tableBodyScope: + if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead { + p.oe = p.oe[:i+1] + return + } + default: + panic("unreachable") + } + } +} + +// generateImpliedEndTags pops nodes off the stack of open elements as long as +// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt. +// If exceptions are specified, nodes with that name will not be popped off. +func (p *parser) generateImpliedEndTags(exceptions ...string) { + var i int +loop: + for i = len(p.oe) - 1; i >= 0; i-- { + n := p.oe[i] + if n.Type == ElementNode { + switch n.DataAtom { + case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt: + for _, except := range exceptions { + if n.Data == except { + break loop + } + } + continue + } + } + break + } + + p.oe = p.oe[:i+1] +} + +// addChild adds a child node n to the top element, and pushes n onto the stack +// of open elements if it is an element node. +func (p *parser) addChild(n *Node) { + if p.shouldFosterParent() { + p.fosterParent(n) + } else { + p.top().AppendChild(n) + } + + if n.Type == ElementNode { + p.oe = append(p.oe, n) + } +} + +// shouldFosterParent returns whether the next node to be added should be +// foster parented. +func (p *parser) shouldFosterParent() bool { + if p.fosterParenting { + switch p.top().DataAtom { + case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: + return true + } + } + return false +} + +// fosterParent adds a child node according to the foster parenting rules. +// Section 12.2.5.3, "foster parenting". +func (p *parser) fosterParent(n *Node) { + var table, parent, prev *Node + var i int + for i = len(p.oe) - 1; i >= 0; i-- { + if p.oe[i].DataAtom == a.Table { + table = p.oe[i] + break + } + } + + if table == nil { + // The foster parent is the html element. + parent = p.oe[0] + } else { + parent = table.Parent + } + if parent == nil { + parent = p.oe[i-1] + } + + if table != nil { + prev = table.PrevSibling + } else { + prev = parent.LastChild + } + if prev != nil && prev.Type == TextNode && n.Type == TextNode { + prev.Data += n.Data + return + } + + parent.InsertBefore(n, table) +} + +// addText adds text to the preceding node if it is a text node, or else it +// calls addChild with a new text node. +func (p *parser) addText(text string) { + if text == "" { + return + } + + if p.shouldFosterParent() { + p.fosterParent(&Node{ + Type: TextNode, + Data: text, + }) + return + } + + t := p.top() + if n := t.LastChild; n != nil && n.Type == TextNode { + n.Data += text + return + } + p.addChild(&Node{ + Type: TextNode, + Data: text, + }) +} + +// addElement adds a child element based on the current token. +func (p *parser) addElement() { + p.addChild(&Node{ + Type: ElementNode, + DataAtom: p.tok.DataAtom, + Data: p.tok.Data, + Attr: p.tok.Attr, + }) +} + +// Section 12.2.3.3. +func (p *parser) addFormattingElement() { + tagAtom, attr := p.tok.DataAtom, p.tok.Attr + p.addElement() + + // Implement the Noah's Ark clause, but with three per family instead of two. + identicalElements := 0 +findIdenticalElements: + for i := len(p.afe) - 1; i >= 0; i-- { + n := p.afe[i] + if n.Type == scopeMarkerNode { + break + } + if n.Type != ElementNode { + continue + } + if n.Namespace != "" { + continue + } + if n.DataAtom != tagAtom { + continue + } + if len(n.Attr) != len(attr) { + continue + } + compareAttributes: + for _, t0 := range n.Attr { + for _, t1 := range attr { + if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val { + // Found a match for this attribute, continue with the next attribute. + continue compareAttributes + } + } + // If we get here, there is no attribute that matches a. + // Therefore the element is not identical to the new one. + continue findIdenticalElements + } + + identicalElements++ + if identicalElements >= 3 { + p.afe.remove(n) + } + } + + p.afe = append(p.afe, p.top()) +} + +// Section 12.2.3.3. +func (p *parser) clearActiveFormattingElements() { + for { + n := p.afe.pop() + if len(p.afe) == 0 || n.Type == scopeMarkerNode { + return + } + } +} + +// Section 12.2.3.3. +func (p *parser) reconstructActiveFormattingElements() { + n := p.afe.top() + if n == nil { + return + } + if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { + return + } + i := len(p.afe) - 1 + for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { + if i == 0 { + i = -1 + break + } + i-- + n = p.afe[i] + } + for { + i++ + clone := p.afe[i].clone() + p.addChild(clone) + p.afe[i] = clone + if i == len(p.afe)-1 { + break + } + } +} + +// Section 12.2.4. +func (p *parser) acknowledgeSelfClosingTag() { + p.hasSelfClosingToken = false +} + +// An insertion mode (section 12.2.3.1) is the state transition function from +// a particular state in the HTML5 parser's state machine. It updates the +// parser's fields depending on parser.tok (where ErrorToken means EOF). +// It returns whether the token was consumed. +type insertionMode func(*parser) bool + +// setOriginalIM sets the insertion mode to return to after completing a text or +// inTableText insertion mode. +// Section 12.2.3.1, "using the rules for". +func (p *parser) setOriginalIM() { + if p.originalIM != nil { + panic("html: bad parser state: originalIM was set twice") + } + p.originalIM = p.im +} + +// Section 12.2.3.1, "reset the insertion mode". +func (p *parser) resetInsertionMode() { + for i := len(p.oe) - 1; i >= 0; i-- { + n := p.oe[i] + if i == 0 && p.context != nil { + n = p.context + } + + switch n.DataAtom { + case a.Select: + p.im = inSelectIM + case a.Td, a.Th: + p.im = inCellIM + case a.Tr: + p.im = inRowIM + case a.Tbody, a.Thead, a.Tfoot: + p.im = inTableBodyIM + case a.Caption: + p.im = inCaptionIM + case a.Colgroup: + p.im = inColumnGroupIM + case a.Table: + p.im = inTableIM + case a.Head: + p.im = inBodyIM + case a.Body: + p.im = inBodyIM + case a.Frameset: + p.im = inFramesetIM + case a.Html: + p.im = beforeHeadIM + default: + continue + } + return + } + p.im = inBodyIM +} + +const whitespace = " \t\r\n\f" + +// Section 12.2.5.4.1. +func initialIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } + case CommentToken: + p.doc.AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + n, quirks := parseDoctype(p.tok.Data) + p.doc.AppendChild(n) + p.quirks = quirks + p.im = beforeHTMLIM + return true + } + p.quirks = true + p.im = beforeHTMLIM + return false +} + +// Section 12.2.5.4.2. +func beforeHTMLIM(p *parser) bool { + switch p.tok.Type { + case DoctypeToken: + // Ignore the token. + return true + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } + case StartTagToken: + if p.tok.DataAtom == a.Html { + p.addElement() + p.im = beforeHeadIM + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Head, a.Body, a.Html, a.Br: + p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) + return false + default: + // Ignore the token. + return true + } + case CommentToken: + p.doc.AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + } + p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) + return false +} + +// Section 12.2.5.4.3. +func beforeHeadIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Head: + p.addElement() + p.head = p.top() + p.im = inHeadIM + return true + case a.Html: + return inBodyIM(p) + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Head, a.Body, a.Html, a.Br: + p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) + return false + default: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) + return false +} + +// Section 12.2.5.4.4. +func inHeadIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) < len(p.tok.Data) { + // Add the initial whitespace to the current node. + p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) + if s == "" { + return true + } + p.tok.Data = s + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + return true + case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: + p.addElement() + p.setOriginalIM() + p.im = textIM + return true + case a.Head: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Head: + n := p.oe.pop() + if n.DataAtom != a.Head { + panic("html: bad parser state: <head> element not found, in the in-head insertion mode") + } + p.im = afterHeadIM + return true + case a.Body, a.Html, a.Br: + p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) + return false + default: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) + return false +} + +// Section 12.2.5.4.6. +func afterHeadIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) < len(p.tok.Data) { + // Add the initial whitespace to the current node. + p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) + if s == "" { + return true + } + p.tok.Data = s + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Body: + p.addElement() + p.framesetOK = false + p.im = inBodyIM + return true + case a.Frameset: + p.addElement() + p.im = inFramesetIM + return true + case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title: + p.oe = append(p.oe, p.head) + defer p.oe.remove(p.head) + return inHeadIM(p) + case a.Head: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Body, a.Html, a.Br: + // Drop down to creating an implied <body> tag. + default: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.parseImpliedToken(StartTagToken, a.Body, a.Body.String()) + p.framesetOK = true + return false +} + +// copyAttributes copies attributes of src not found on dst to dst. +func copyAttributes(dst *Node, src Token) { + if len(src.Attr) == 0 { + return + } + attr := map[string]string{} + for _, t := range dst.Attr { + attr[t.Key] = t.Val + } + for _, t := range src.Attr { + if _, ok := attr[t.Key]; !ok { + dst.Attr = append(dst.Attr, t) + attr[t.Key] = t.Val + } + } +} + +// Section 12.2.5.4.7. +func inBodyIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + d := p.tok.Data + switch n := p.oe.top(); n.DataAtom { + case a.Pre, a.Listing: + if n.FirstChild == nil { + // Ignore a newline at the start of a <pre> block. + if d != "" && d[0] == '\r' { + d = d[1:] + } + if d != "" && d[0] == '\n' { + d = d[1:] + } + } + } + d = strings.Replace(d, "\x00", "", -1) + if d == "" { + return true + } + p.reconstructActiveFormattingElements() + p.addText(d) + if p.framesetOK && strings.TrimLeft(d, whitespace) != "" { + // There were non-whitespace characters inserted. + p.framesetOK = false + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + copyAttributes(p.oe[0], p.tok) + case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title: + return inHeadIM(p) + case a.Body: + if len(p.oe) >= 2 { + body := p.oe[1] + if body.Type == ElementNode && body.DataAtom == a.Body { + p.framesetOK = false + copyAttributes(body, p.tok) + } + } + case a.Frameset: + if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body { + // Ignore the token. + return true + } + body := p.oe[1] + if body.Parent != nil { + body.Parent.RemoveChild(body) + } + p.oe = p.oe[:1] + p.addElement() + p.im = inFramesetIM + return true + case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: + p.popUntil(buttonScope, a.P) + p.addElement() + case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: + p.popUntil(buttonScope, a.P) + switch n := p.top(); n.DataAtom { + case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: + p.oe.pop() + } + p.addElement() + case a.Pre, a.Listing: + p.popUntil(buttonScope, a.P) + p.addElement() + // The newline, if any, will be dealt with by the TextToken case. + p.framesetOK = false + case a.Form: + if p.form == nil { + p.popUntil(buttonScope, a.P) + p.addElement() + p.form = p.top() + } + case a.Li: + p.framesetOK = false + for i := len(p.oe) - 1; i >= 0; i-- { + node := p.oe[i] + switch node.DataAtom { + case a.Li: + p.oe = p.oe[:i] + case a.Address, a.Div, a.P: + continue + default: + if !isSpecialElement(node) { + continue + } + } + break + } + p.popUntil(buttonScope, a.P) + p.addElement() + case a.Dd, a.Dt: + p.framesetOK = false + for i := len(p.oe) - 1; i >= 0; i-- { + node := p.oe[i] + switch node.DataAtom { + case a.Dd, a.Dt: + p.oe = p.oe[:i] + case a.Address, a.Div, a.P: + continue + default: + if !isSpecialElement(node) { + continue + } + } + break + } + p.popUntil(buttonScope, a.P) + p.addElement() + case a.Plaintext: + p.popUntil(buttonScope, a.P) + p.addElement() + case a.Button: + p.popUntil(defaultScope, a.Button) + p.reconstructActiveFormattingElements() + p.addElement() + p.framesetOK = false + case a.A: + for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- { + if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A { + p.inBodyEndTagFormatting(a.A) + p.oe.remove(n) + p.afe.remove(n) + break + } + } + p.reconstructActiveFormattingElements() + p.addFormattingElement() + case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: + p.reconstructActiveFormattingElements() + p.addFormattingElement() + case a.Nobr: + p.reconstructActiveFormattingElements() + if p.elementInScope(defaultScope, a.Nobr) { + p.inBodyEndTagFormatting(a.Nobr) + p.reconstructActiveFormattingElements() + } + p.addFormattingElement() + case a.Applet, a.Marquee, a.Object: + p.reconstructActiveFormattingElements() + p.addElement() + p.afe = append(p.afe, &scopeMarker) + p.framesetOK = false + case a.Table: + if !p.quirks { + p.popUntil(buttonScope, a.P) + } + p.addElement() + p.framesetOK = false + p.im = inTableIM + return true + case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr: + p.reconstructActiveFormattingElements() + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + if p.tok.DataAtom == a.Input { + for _, t := range p.tok.Attr { + if t.Key == "type" { + if strings.ToLower(t.Val) == "hidden" { + // Skip setting framesetOK = false + return true + } + } + } + } + p.framesetOK = false + case a.Param, a.Source, a.Track: + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + case a.Hr: + p.popUntil(buttonScope, a.P) + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + p.framesetOK = false + case a.Image: + p.tok.DataAtom = a.Img + p.tok.Data = a.Img.String() + return false + case a.Isindex: + if p.form != nil { + // Ignore the token. + return true + } + action := "" + prompt := "This is a searchable index. Enter search keywords: " + attr := []Attribute{{Key: "name", Val: "isindex"}} + for _, t := range p.tok.Attr { + switch t.Key { + case "action": + action = t.Val + case "name": + // Ignore the attribute. + case "prompt": + prompt = t.Val + default: + attr = append(attr, t) + } + } + p.acknowledgeSelfClosingTag() + p.popUntil(buttonScope, a.P) + p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) + if action != "" { + p.form.Attr = []Attribute{{Key: "action", Val: action}} + } + p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) + p.parseImpliedToken(StartTagToken, a.Label, a.Label.String()) + p.addText(prompt) + p.addChild(&Node{ + Type: ElementNode, + DataAtom: a.Input, + Data: a.Input.String(), + Attr: attr, + }) + p.oe.pop() + p.parseImpliedToken(EndTagToken, a.Label, a.Label.String()) + p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) + p.parseImpliedToken(EndTagToken, a.Form, a.Form.String()) + case a.Textarea: + p.addElement() + p.setOriginalIM() + p.framesetOK = false + p.im = textIM + case a.Xmp: + p.popUntil(buttonScope, a.P) + p.reconstructActiveFormattingElements() + p.framesetOK = false + p.addElement() + p.setOriginalIM() + p.im = textIM + case a.Iframe: + p.framesetOK = false + p.addElement() + p.setOriginalIM() + p.im = textIM + case a.Noembed, a.Noscript: + p.addElement() + p.setOriginalIM() + p.im = textIM + case a.Select: + p.reconstructActiveFormattingElements() + p.addElement() + p.framesetOK = false + p.im = inSelectIM + return true + case a.Optgroup, a.Option: + if p.top().DataAtom == a.Option { + p.oe.pop() + } + p.reconstructActiveFormattingElements() + p.addElement() + case a.Rp, a.Rt: + if p.elementInScope(defaultScope, a.Ruby) { + p.generateImpliedEndTags() + } + p.addElement() + case a.Math, a.Svg: + p.reconstructActiveFormattingElements() + if p.tok.DataAtom == a.Math { + adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) + } else { + adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments) + } + adjustForeignAttributes(p.tok.Attr) + p.addElement() + p.top().Namespace = p.tok.Data + if p.hasSelfClosingToken { + p.oe.pop() + p.acknowledgeSelfClosingTag() + } + return true + case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: + // Ignore the token. + default: + p.reconstructActiveFormattingElements() + p.addElement() + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Body: + if p.elementInScope(defaultScope, a.Body) { + p.im = afterBodyIM + } + case a.Html: + if p.elementInScope(defaultScope, a.Body) { + p.parseImpliedToken(EndTagToken, a.Body, a.Body.String()) + return false + } + return true + case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: + p.popUntil(defaultScope, p.tok.DataAtom) + case a.Form: + node := p.form + p.form = nil + i := p.indexOfElementInScope(defaultScope, a.Form) + if node == nil || i == -1 || p.oe[i] != node { + // Ignore the token. + return true + } + p.generateImpliedEndTags() + p.oe.remove(node) + case a.P: + if !p.elementInScope(buttonScope, a.P) { + p.parseImpliedToken(StartTagToken, a.P, a.P.String()) + } + p.popUntil(buttonScope, a.P) + case a.Li: + p.popUntil(listItemScope, a.Li) + case a.Dd, a.Dt: + p.popUntil(defaultScope, p.tok.DataAtom) + case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: + p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) + case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: + p.inBodyEndTagFormatting(p.tok.DataAtom) + case a.Applet, a.Marquee, a.Object: + if p.popUntil(defaultScope, p.tok.DataAtom) { + p.clearActiveFormattingElements() + } + case a.Br: + p.tok.Type = StartTagToken + return false + default: + p.inBodyEndTagOther(p.tok.DataAtom) + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + } + + return true +} + +func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { + // This is the "adoption agency" algorithm, described at + // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency + + // TODO: this is a fairly literal line-by-line translation of that algorithm. + // Once the code successfully parses the comprehensive test suite, we should + // refactor this code to be more idiomatic. + + // Steps 1-4. The outer loop. + for i := 0; i < 8; i++ { + // Step 5. Find the formatting element. + var formattingElement *Node + for j := len(p.afe) - 1; j >= 0; j-- { + if p.afe[j].Type == scopeMarkerNode { + break + } + if p.afe[j].DataAtom == tagAtom { + formattingElement = p.afe[j] + break + } + } + if formattingElement == nil { + p.inBodyEndTagOther(tagAtom) + return + } + feIndex := p.oe.index(formattingElement) + if feIndex == -1 { + p.afe.remove(formattingElement) + return + } + if !p.elementInScope(defaultScope, tagAtom) { + // Ignore the tag. + return + } + + // Steps 9-10. Find the furthest block. + var furthestBlock *Node + for _, e := range p.oe[feIndex:] { + if isSpecialElement(e) { + furthestBlock = e + break + } + } + if furthestBlock == nil { + e := p.oe.pop() + for e != formattingElement { + e = p.oe.pop() + } + p.afe.remove(e) + return + } + + // Steps 11-12. Find the common ancestor and bookmark node. + commonAncestor := p.oe[feIndex-1] + bookmark := p.afe.index(formattingElement) + + // Step 13. The inner loop. Find the lastNode to reparent. + lastNode := furthestBlock + node := furthestBlock + x := p.oe.index(node) + // Steps 13.1-13.2 + for j := 0; j < 3; j++ { + // Step 13.3. + x-- + node = p.oe[x] + // Step 13.4 - 13.5. + if p.afe.index(node) == -1 { + p.oe.remove(node) + continue + } + // Step 13.6. + if node == formattingElement { + break + } + // Step 13.7. + clone := node.clone() + p.afe[p.afe.index(node)] = clone + p.oe[p.oe.index(node)] = clone + node = clone + // Step 13.8. + if lastNode == furthestBlock { + bookmark = p.afe.index(node) + 1 + } + // Step 13.9. + if lastNode.Parent != nil { + lastNode.Parent.RemoveChild(lastNode) + } + node.AppendChild(lastNode) + // Step 13.10. + lastNode = node + } + + // Step 14. Reparent lastNode to the common ancestor, + // or for misnested table nodes, to the foster parent. + if lastNode.Parent != nil { + lastNode.Parent.RemoveChild(lastNode) + } + switch commonAncestor.DataAtom { + case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: + p.fosterParent(lastNode) + default: + commonAncestor.AppendChild(lastNode) + } + + // Steps 15-17. Reparent nodes from the furthest block's children + // to a clone of the formatting element. + clone := formattingElement.clone() + reparentChildren(clone, furthestBlock) + furthestBlock.AppendChild(clone) + + // Step 18. Fix up the list of active formatting elements. + if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { + // Move the bookmark with the rest of the list. + bookmark-- + } + p.afe.remove(formattingElement) + p.afe.insert(bookmark, clone) + + // Step 19. Fix up the stack of open elements. + p.oe.remove(formattingElement) + p.oe.insert(p.oe.index(furthestBlock)+1, clone) + } +} + +// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. +// "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content +// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign +func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { + for i := len(p.oe) - 1; i >= 0; i-- { + if p.oe[i].DataAtom == tagAtom { + p.oe = p.oe[:i] + break + } + if isSpecialElement(p.oe[i]) { + break + } + } +} + +// Section 12.2.5.4.8. +func textIM(p *parser) bool { + switch p.tok.Type { + case ErrorToken: + p.oe.pop() + case TextToken: + d := p.tok.Data + if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil { + // Ignore a newline at the start of a <textarea> block. + if d != "" && d[0] == '\r' { + d = d[1:] + } + if d != "" && d[0] == '\n' { + d = d[1:] + } + } + if d == "" { + return true + } + p.addText(d) + return true + case EndTagToken: + p.oe.pop() + } + p.im = p.originalIM + p.originalIM = nil + return p.tok.Type == EndTagToken +} + +// Section 12.2.5.4.9. +func inTableIM(p *parser) bool { + switch p.tok.Type { + case ErrorToken: + // Stop parsing. + return true + case TextToken: + p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1) + switch p.oe.top().DataAtom { + case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: + if strings.Trim(p.tok.Data, whitespace) == "" { + p.addText(p.tok.Data) + return true + } + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Caption: + p.clearStackToContext(tableScope) + p.afe = append(p.afe, &scopeMarker) + p.addElement() + p.im = inCaptionIM + return true + case a.Colgroup: + p.clearStackToContext(tableScope) + p.addElement() + p.im = inColumnGroupIM + return true + case a.Col: + p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String()) + return false + case a.Tbody, a.Tfoot, a.Thead: + p.clearStackToContext(tableScope) + p.addElement() + p.im = inTableBodyIM + return true + case a.Td, a.Th, a.Tr: + p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String()) + return false + case a.Table: + if p.popUntil(tableScope, a.Table) { + p.resetInsertionMode() + return false + } + // Ignore the token. + return true + case a.Style, a.Script: + return inHeadIM(p) + case a.Input: + for _, t := range p.tok.Attr { + if t.Key == "type" && strings.ToLower(t.Val) == "hidden" { + p.addElement() + p.oe.pop() + return true + } + } + // Otherwise drop down to the default action. + case a.Form: + if p.form != nil { + // Ignore the token. + return true + } + p.addElement() + p.form = p.oe.pop() + case a.Select: + p.reconstructActiveFormattingElements() + switch p.top().DataAtom { + case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: + p.fosterParenting = true + } + p.addElement() + p.fosterParenting = false + p.framesetOK = false + p.im = inSelectInTableIM + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Table: + if p.popUntil(tableScope, a.Table) { + p.resetInsertionMode() + return true + } + // Ignore the token. + return true + case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.fosterParenting = true + defer func() { p.fosterParenting = false }() + + return inBodyIM(p) +} + +// Section 12.2.5.4.11. +func inCaptionIM(p *parser) bool { + switch p.tok.Type { + case StartTagToken: + switch p.tok.DataAtom { + case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr: + if p.popUntil(tableScope, a.Caption) { + p.clearActiveFormattingElements() + p.im = inTableIM + return false + } else { + // Ignore the token. + return true + } + case a.Select: + p.reconstructActiveFormattingElements() + p.addElement() + p.framesetOK = false + p.im = inSelectInTableIM + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Caption: + if p.popUntil(tableScope, a.Caption) { + p.clearActiveFormattingElements() + p.im = inTableIM + } + return true + case a.Table: + if p.popUntil(tableScope, a.Caption) { + p.clearActiveFormattingElements() + p.im = inTableIM + return false + } else { + // Ignore the token. + return true + } + case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: + // Ignore the token. + return true + } + } + return inBodyIM(p) +} + +// Section 12.2.5.4.12. +func inColumnGroupIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) < len(p.tok.Data) { + // Add the initial whitespace to the current node. + p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) + if s == "" { + return true + } + p.tok.Data = s + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Col: + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Colgroup: + if p.oe.top().DataAtom != a.Html { + p.oe.pop() + p.im = inTableIM + } + return true + case a.Col: + // Ignore the token. + return true + } + } + if p.oe.top().DataAtom != a.Html { + p.oe.pop() + p.im = inTableIM + return false + } + return true +} + +// Section 12.2.5.4.13. +func inTableBodyIM(p *parser) bool { + switch p.tok.Type { + case StartTagToken: + switch p.tok.DataAtom { + case a.Tr: + p.clearStackToContext(tableBodyScope) + p.addElement() + p.im = inRowIM + return true + case a.Td, a.Th: + p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String()) + return false + case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead: + if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) { + p.im = inTableIM + return false + } + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Tbody, a.Tfoot, a.Thead: + if p.elementInScope(tableScope, p.tok.DataAtom) { + p.clearStackToContext(tableBodyScope) + p.oe.pop() + p.im = inTableIM + } + return true + case a.Table: + if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) { + p.im = inTableIM + return false + } + // Ignore the token. + return true + case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + } + + return inTableIM(p) +} + +// Section 12.2.5.4.14. +func inRowIM(p *parser) bool { + switch p.tok.Type { + case StartTagToken: + switch p.tok.DataAtom { + case a.Td, a.Th: + p.clearStackToContext(tableRowScope) + p.addElement() + p.afe = append(p.afe, &scopeMarker) + p.im = inCellIM + return true + case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr: + if p.popUntil(tableScope, a.Tr) { + p.im = inTableBodyIM + return false + } + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Tr: + if p.popUntil(tableScope, a.Tr) { + p.im = inTableBodyIM + return true + } + // Ignore the token. + return true + case a.Table: + if p.popUntil(tableScope, a.Tr) { + p.im = inTableBodyIM + return false + } + // Ignore the token. + return true + case a.Tbody, a.Tfoot, a.Thead: + if p.elementInScope(tableScope, p.tok.DataAtom) { + p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String()) + return false + } + // Ignore the token. + return true + case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th: + // Ignore the token. + return true + } + } + + return inTableIM(p) +} + +// Section 12.2.5.4.15. +func inCellIM(p *parser) bool { + switch p.tok.Type { + case StartTagToken: + switch p.tok.DataAtom { + case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: + if p.popUntil(tableScope, a.Td, a.Th) { + // Close the cell and reprocess. + p.clearActiveFormattingElements() + p.im = inRowIM + return false + } + // Ignore the token. + return true + case a.Select: + p.reconstructActiveFormattingElements() + p.addElement() + p.framesetOK = false + p.im = inSelectInTableIM + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Td, a.Th: + if !p.popUntil(tableScope, p.tok.DataAtom) { + // Ignore the token. + return true + } + p.clearActiveFormattingElements() + p.im = inRowIM + return true + case a.Body, a.Caption, a.Col, a.Colgroup, a.Html: + // Ignore the token. + return true + case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: + if !p.elementInScope(tableScope, p.tok.DataAtom) { + // Ignore the token. + return true + } + // Close the cell and reprocess. + p.popUntil(tableScope, a.Td, a.Th) + p.clearActiveFormattingElements() + p.im = inRowIM + return false + } + } + return inBodyIM(p) +} + +// Section 12.2.5.4.16. +func inSelectIM(p *parser) bool { + switch p.tok.Type { + case ErrorToken: + // Stop parsing. + return true + case TextToken: + p.addText(strings.Replace(p.tok.Data, "\x00", "", -1)) + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Option: + if p.top().DataAtom == a.Option { + p.oe.pop() + } + p.addElement() + case a.Optgroup: + if p.top().DataAtom == a.Option { + p.oe.pop() + } + if p.top().DataAtom == a.Optgroup { + p.oe.pop() + } + p.addElement() + case a.Select: + p.tok.Type = EndTagToken + return false + case a.Input, a.Keygen, a.Textarea: + if p.elementInScope(selectScope, a.Select) { + p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) + return false + } + // In order to properly ignore <textarea>, we need to change the tokenizer mode. + p.tokenizer.NextIsNotRawText() + // Ignore the token. + return true + case a.Script: + return inHeadIM(p) + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Option: + if p.top().DataAtom == a.Option { + p.oe.pop() + } + case a.Optgroup: + i := len(p.oe) - 1 + if p.oe[i].DataAtom == a.Option { + i-- + } + if p.oe[i].DataAtom == a.Optgroup { + p.oe = p.oe[:i] + } + case a.Select: + if p.popUntil(selectScope, a.Select) { + p.resetInsertionMode() + } + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case DoctypeToken: + // Ignore the token. + return true + } + + return true +} + +// Section 12.2.5.4.17. +func inSelectInTableIM(p *parser) bool { + switch p.tok.Type { + case StartTagToken, EndTagToken: + switch p.tok.DataAtom { + case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th: + if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) { + p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) + return false + } else { + // Ignore the token. + return true + } + } + } + return inSelectIM(p) +} + +// Section 12.2.5.4.18. +func afterBodyIM(p *parser) bool { + switch p.tok.Type { + case ErrorToken: + // Stop parsing. + return true + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) == 0 { + // It was all whitespace. + return inBodyIM(p) + } + case StartTagToken: + if p.tok.DataAtom == a.Html { + return inBodyIM(p) + } + case EndTagToken: + if p.tok.DataAtom == a.Html { + if !p.fragment { + p.im = afterAfterBodyIM + } + return true + } + case CommentToken: + // The comment is attached to the <html> element. + if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html { + panic("html: bad parser state: <html> element not found, in the after-body insertion mode") + } + p.oe[0].AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + } + p.im = inBodyIM + return false +} + +// Section 12.2.5.4.19. +func inFramesetIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case TextToken: + // Ignore all text but whitespace. + s := strings.Map(func(c rune) rune { + switch c { + case ' ', '\t', '\n', '\f', '\r': + return c + } + return -1 + }, p.tok.Data) + if s != "" { + p.addText(s) + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Frameset: + p.addElement() + case a.Frame: + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + case a.Noframes: + return inHeadIM(p) + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Frameset: + if p.oe.top().DataAtom != a.Html { + p.oe.pop() + if p.oe.top().DataAtom != a.Frameset { + p.im = afterFramesetIM + return true + } + } + } + default: + // Ignore the token. + } + return true +} + +// Section 12.2.5.4.20. +func afterFramesetIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case TextToken: + // Ignore all text but whitespace. + s := strings.Map(func(c rune) rune { + switch c { + case ' ', '\t', '\n', '\f', '\r': + return c + } + return -1 + }, p.tok.Data) + if s != "" { + p.addText(s) + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Noframes: + return inHeadIM(p) + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Html: + p.im = afterAfterFramesetIM + return true + } + default: + // Ignore the token. + } + return true +} + +// Section 12.2.5.4.21. +func afterAfterBodyIM(p *parser) bool { + switch p.tok.Type { + case ErrorToken: + // Stop parsing. + return true + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) == 0 { + // It was all whitespace. + return inBodyIM(p) + } + case StartTagToken: + if p.tok.DataAtom == a.Html { + return inBodyIM(p) + } + case CommentToken: + p.doc.AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + return inBodyIM(p) + } + p.im = inBodyIM + return false +} + +// Section 12.2.5.4.22. +func afterAfterFramesetIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.doc.AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case TextToken: + // Ignore all text but whitespace. + s := strings.Map(func(c rune) rune { + switch c { + case ' ', '\t', '\n', '\f', '\r': + return c + } + return -1 + }, p.tok.Data) + if s != "" { + p.tok.Data = s + return inBodyIM(p) + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Noframes: + return inHeadIM(p) + } + case DoctypeToken: + return inBodyIM(p) + default: + // Ignore the token. + } + return true +} + +const whitespaceOrNUL = whitespace + "\x00" + +// Section 12.2.5.5. +func parseForeignContent(p *parser) bool { + switch p.tok.Type { + case TextToken: + if p.framesetOK { + p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == "" + } + p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1) + p.addText(p.tok.Data) + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + b := breakout[p.tok.Data] + if p.tok.DataAtom == a.Font { + loop: + for _, attr := range p.tok.Attr { + switch attr.Key { + case "color", "face", "size": + b = true + break loop + } + } + } + if b { + for i := len(p.oe) - 1; i >= 0; i-- { + n := p.oe[i] + if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) { + p.oe = p.oe[:i+1] + break + } + } + return false + } + switch p.top().Namespace { + case "math": + adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) + case "svg": + // Adjust SVG tag names. The tokenizer lower-cases tag names, but + // SVG wants e.g. "foreignObject" with a capital second "O". + if x := svgTagNameAdjustments[p.tok.Data]; x != "" { + p.tok.DataAtom = a.Lookup([]byte(x)) + p.tok.Data = x + } + adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments) + default: + panic("html: bad parser state: unexpected namespace") + } + adjustForeignAttributes(p.tok.Attr) + namespace := p.top().Namespace + p.addElement() + p.top().Namespace = namespace + if namespace != "" { + // Don't let the tokenizer go into raw text mode in foreign content + // (e.g. in an SVG <title> tag). + p.tokenizer.NextIsNotRawText() + } + if p.hasSelfClosingToken { + p.oe.pop() + p.acknowledgeSelfClosingTag() + } + case EndTagToken: + for i := len(p.oe) - 1; i >= 0; i-- { + if p.oe[i].Namespace == "" { + return p.im(p) + } + if strings.EqualFold(p.oe[i].Data, p.tok.Data) { + p.oe = p.oe[:i] + break + } + } + return true + default: + // Ignore the token. + } + return true +} + +// Section 12.2.5. +func (p *parser) inForeignContent() bool { + if len(p.oe) == 0 { + return false + } + n := p.oe[len(p.oe)-1] + if n.Namespace == "" { + return false + } + if mathMLTextIntegrationPoint(n) { + if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark { + return false + } + if p.tok.Type == TextToken { + return false + } + } + if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg { + return false + } + if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) { + return false + } + if p.tok.Type == ErrorToken { + return false + } + return true +} + +// parseImpliedToken parses a token as though it had appeared in the parser's +// input. +func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) { + realToken, selfClosing := p.tok, p.hasSelfClosingToken + p.tok = Token{ + Type: t, + DataAtom: dataAtom, + Data: data, + } + p.hasSelfClosingToken = false + p.parseCurrentToken() + p.tok, p.hasSelfClosingToken = realToken, selfClosing +} + +// parseCurrentToken runs the current token through the parsing routines +// until it is consumed. +func (p *parser) parseCurrentToken() { + if p.tok.Type == SelfClosingTagToken { + p.hasSelfClosingToken = true + p.tok.Type = StartTagToken + } + + consumed := false + for !consumed { + if p.inForeignContent() { + consumed = parseForeignContent(p) + } else { + consumed = p.im(p) + } + } + + if p.hasSelfClosingToken { + // This is a parse error, but ignore it. + p.hasSelfClosingToken = false + } +} + +func (p *parser) parse() error { + // Iterate until EOF. Any other error will cause an early return. + var err error + for err != io.EOF { + // CDATA sections are allowed only in foreign content. + n := p.oe.top() + p.tokenizer.AllowCDATA(n != nil && n.Namespace != "") + // Read and parse the next token. + p.tokenizer.Next() + p.tok = p.tokenizer.Token() + if p.tok.Type == ErrorToken { + err = p.tokenizer.Err() + if err != nil && err != io.EOF { + return err + } + } + p.parseCurrentToken() + } + return nil +} + +// Parse returns the parse tree for the HTML from the given Reader. +// The input is assumed to be UTF-8 encoded. +func Parse(r io.Reader) (*Node, error) { + p := &parser{ + tokenizer: NewTokenizer(r), + doc: &Node{ + Type: DocumentNode, + }, + scripting: true, + framesetOK: true, + im: initialIM, + } + err := p.parse() + if err != nil { + return nil, err + } + return p.doc, nil +} + +// ParseFragment parses a fragment of HTML and returns the nodes that were +// found. If the fragment is the InnerHTML for an existing element, pass that +// element in context. +func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { + contextTag := "" + if context != nil { + if context.Type != ElementNode { + return nil, errors.New("html: ParseFragment of non-element Node") + } + // The next check isn't just context.DataAtom.String() == context.Data because + // it is valid to pass an element whose tag isn't a known atom. For example, + // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent. + if context.DataAtom != a.Lookup([]byte(context.Data)) { + return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data) + } + contextTag = context.DataAtom.String() + } + p := &parser{ + tokenizer: NewTokenizerFragment(r, contextTag), + doc: &Node{ + Type: DocumentNode, + }, + scripting: true, + fragment: true, + context: context, + } + + root := &Node{ + Type: ElementNode, + DataAtom: a.Html, + Data: a.Html.String(), + } + p.doc.AppendChild(root) + p.oe = nodeStack{root} + p.resetInsertionMode() + + for n := context; n != nil; n = n.Parent { + if n.Type == ElementNode && n.DataAtom == a.Form { + p.form = n + break + } + } + + err := p.parse() + if err != nil { + return nil, err + } + + parent := p.doc + if context != nil { + parent = root + } + + var result []*Node + for c := parent.FirstChild; c != nil; { + next := c.NextSibling + parent.RemoveChild(c) + result = append(result, c) + c = next + } + return result, nil +} diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go new file mode 100644 index 000000000..d34564f49 --- /dev/null +++ b/vendor/golang.org/x/net/html/render.go @@ -0,0 +1,271 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "bufio" + "errors" + "fmt" + "io" + "strings" +) + +type writer interface { + io.Writer + io.ByteWriter + WriteString(string) (int, error) +} + +// Render renders the parse tree n to the given writer. +// +// Rendering is done on a 'best effort' basis: calling Parse on the output of +// Render will always result in something similar to the original tree, but it +// is not necessarily an exact clone unless the original tree was 'well-formed'. +// 'Well-formed' is not easily specified; the HTML5 specification is +// complicated. +// +// Calling Parse on arbitrary input typically results in a 'well-formed' parse +// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. +// For example, in a 'well-formed' parse tree, no <a> element is a child of +// another <a> element: parsing "<a><a>" results in two sibling elements. +// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a +// <table> element: parsing "<p><table><a>" results in a <p> with two sibling +// children; the <a> is reparented to the <table>'s parent. However, calling +// Parse on "<a><table><a>" does not return an error, but the result has an <a> +// element with an <a> child, and is therefore not 'well-formed'. +// +// Programmatically constructed trees are typically also 'well-formed', but it +// is possible to construct a tree that looks innocuous but, when rendered and +// re-parsed, results in a different tree. A simple example is that a solitary +// text node would become a tree containing <html>, <head> and <body> elements. +// Another example is that the programmatic equivalent of "a<head>b</head>c" +// becomes "<html><head><head/><body>abc</body></html>". +func Render(w io.Writer, n *Node) error { + if x, ok := w.(writer); ok { + return render(x, n) + } + buf := bufio.NewWriter(w) + if err := render(buf, n); err != nil { + return err + } + return buf.Flush() +} + +// plaintextAbort is returned from render1 when a <plaintext> element +// has been rendered. No more end tags should be rendered after that. +var plaintextAbort = errors.New("html: internal error (plaintext abort)") + +func render(w writer, n *Node) error { + err := render1(w, n) + if err == plaintextAbort { + err = nil + } + return err +} + +func render1(w writer, n *Node) error { + // Render non-element nodes; these are the easy cases. + switch n.Type { + case ErrorNode: + return errors.New("html: cannot render an ErrorNode node") + case TextNode: + return escape(w, n.Data) + case DocumentNode: + for c := n.FirstChild; c != nil; c = c.NextSibling { + if err := render1(w, c); err != nil { + return err + } + } + return nil + case ElementNode: + // No-op. + case CommentNode: + if _, err := w.WriteString("<!--"); err != nil { + return err + } + if _, err := w.WriteString(n.Data); err != nil { + return err + } + if _, err := w.WriteString("-->"); err != nil { + return err + } + return nil + case DoctypeNode: + if _, err := w.WriteString("<!DOCTYPE "); err != nil { + return err + } + if _, err := w.WriteString(n.Data); err != nil { + return err + } + if n.Attr != nil { + var p, s string + for _, a := range n.Attr { + switch a.Key { + case "public": + p = a.Val + case "system": + s = a.Val + } + } + if p != "" { + if _, err := w.WriteString(" PUBLIC "); err != nil { + return err + } + if err := writeQuoted(w, p); err != nil { + return err + } + if s != "" { + if err := w.WriteByte(' '); err != nil { + return err + } + if err := writeQuoted(w, s); err != nil { + return err + } + } + } else if s != "" { + if _, err := w.WriteString(" SYSTEM "); err != nil { + return err + } + if err := writeQuoted(w, s); err != nil { + return err + } + } + } + return w.WriteByte('>') + default: + return errors.New("html: unknown node type") + } + + // Render the <xxx> opening tag. + if err := w.WriteByte('<'); err != nil { + return err + } + if _, err := w.WriteString(n.Data); err != nil { + return err + } + for _, a := range n.Attr { + if err := w.WriteByte(' '); err != nil { + return err + } + if a.Namespace != "" { + if _, err := w.WriteString(a.Namespace); err != nil { + return err + } + if err := w.WriteByte(':'); err != nil { + return err + } + } + if _, err := w.WriteString(a.Key); err != nil { + return err + } + if _, err := w.WriteString(`="`); err != nil { + return err + } + if err := escape(w, a.Val); err != nil { + return err + } + if err := w.WriteByte('"'); err != nil { + return err + } + } + if voidElements[n.Data] { + if n.FirstChild != nil { + return fmt.Errorf("html: void element <%s> has child nodes", n.Data) + } + _, err := w.WriteString("/>") + return err + } + if err := w.WriteByte('>'); err != nil { + return err + } + + // Add initial newline where there is danger of a newline beging ignored. + if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { + switch n.Data { + case "pre", "listing", "textarea": + if err := w.WriteByte('\n'); err != nil { + return err + } + } + } + + // Render any child nodes. + switch n.Data { + case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type == TextNode { + if _, err := w.WriteString(c.Data); err != nil { + return err + } + } else { + if err := render1(w, c); err != nil { + return err + } + } + } + if n.Data == "plaintext" { + // Don't render anything else. <plaintext> must be the + // last element in the file, with no closing tag. + return plaintextAbort + } + default: + for c := n.FirstChild; c != nil; c = c.NextSibling { + if err := render1(w, c); err != nil { + return err + } + } + } + + // Render the </xxx> closing tag. + if _, err := w.WriteString("</"); err != nil { + return err + } + if _, err := w.WriteString(n.Data); err != nil { + return err + } + return w.WriteByte('>') +} + +// writeQuoted writes s to w surrounded by quotes. Normally it will use double +// quotes, but if s contains a double quote, it will use single quotes. +// It is used for writing the identifiers in a doctype declaration. +// In valid HTML, they can't contain both types of quotes. +func writeQuoted(w writer, s string) error { + var q byte = '"' + if strings.Contains(s, `"`) { + q = '\'' + } + if err := w.WriteByte(q); err != nil { + return err + } + if _, err := w.WriteString(s); err != nil { + return err + } + if err := w.WriteByte(q); err != nil { + return err + } + return nil +} + +// Section 12.1.2, "Elements", gives this list of void elements. Void elements +// are those that can't have any contents. +var voidElements = map[string]bool{ + "area": true, + "base": true, + "br": true, + "col": true, + "command": true, + "embed": true, + "hr": true, + "img": true, + "input": true, + "keygen": true, + "link": true, + "meta": true, + "param": true, + "source": true, + "track": true, + "wbr": true, +} diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go new file mode 100644 index 000000000..893e272a9 --- /dev/null +++ b/vendor/golang.org/x/net/html/token.go @@ -0,0 +1,1219 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "bytes" + "errors" + "io" + "strconv" + "strings" + + "golang.org/x/net/html/atom" +) + +// A TokenType is the type of a Token. +type TokenType uint32 + +const ( + // ErrorToken means that an error occurred during tokenization. + ErrorToken TokenType = iota + // TextToken means a text node. + TextToken + // A StartTagToken looks like <a>. + StartTagToken + // An EndTagToken looks like </a>. + EndTagToken + // A SelfClosingTagToken tag looks like <br/>. + SelfClosingTagToken + // A CommentToken looks like <!--x-->. + CommentToken + // A DoctypeToken looks like <!DOCTYPE x> + DoctypeToken +) + +// ErrBufferExceeded means that the buffering limit was exceeded. +var ErrBufferExceeded = errors.New("max buffer exceeded") + +// String returns a string representation of the TokenType. +func (t TokenType) String() string { + switch t { + case ErrorToken: + return "Error" + case TextToken: + return "Text" + case StartTagToken: + return "StartTag" + case EndTagToken: + return "EndTag" + case SelfClosingTagToken: + return "SelfClosingTag" + case CommentToken: + return "Comment" + case DoctypeToken: + return "Doctype" + } + return "Invalid(" + strconv.Itoa(int(t)) + ")" +} + +// An Attribute is an attribute namespace-key-value triple. Namespace is +// non-empty for foreign attributes like xlink, Key is alphabetic (and hence +// does not contain escapable characters like '&', '<' or '>'), and Val is +// unescaped (it looks like "a<b" rather than "a<b"). +// +// Namespace is only used by the parser, not the tokenizer. +type Attribute struct { + Namespace, Key, Val string +} + +// A Token consists of a TokenType and some Data (tag name for start and end +// tags, content for text, comments and doctypes). A tag Token may also contain +// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b" +// rather than "a<b"). For tag Tokens, DataAtom is the atom for Data, or +// zero if Data is not a known tag name. +type Token struct { + Type TokenType + DataAtom atom.Atom + Data string + Attr []Attribute +} + +// tagString returns a string representation of a tag Token's Data and Attr. +func (t Token) tagString() string { + if len(t.Attr) == 0 { + return t.Data + } + buf := bytes.NewBufferString(t.Data) + for _, a := range t.Attr { + buf.WriteByte(' ') + buf.WriteString(a.Key) + buf.WriteString(`="`) + escape(buf, a.Val) + buf.WriteByte('"') + } + return buf.String() +} + +// String returns a string representation of the Token. +func (t Token) String() string { + switch t.Type { + case ErrorToken: + return "" + case TextToken: + return EscapeString(t.Data) + case StartTagToken: + return "<" + t.tagString() + ">" + case EndTagToken: + return "</" + t.tagString() + ">" + case SelfClosingTagToken: + return "<" + t.tagString() + "/>" + case CommentToken: + return "<!--" + t.Data + "-->" + case DoctypeToken: + return "<!DOCTYPE " + t.Data + ">" + } + return "Invalid(" + strconv.Itoa(int(t.Type)) + ")" +} + +// span is a range of bytes in a Tokenizer's buffer. The start is inclusive, +// the end is exclusive. +type span struct { + start, end int +} + +// A Tokenizer returns a stream of HTML Tokens. +type Tokenizer struct { + // r is the source of the HTML text. + r io.Reader + // tt is the TokenType of the current token. + tt TokenType + // err is the first error encountered during tokenization. It is possible + // for tt != Error && err != nil to hold: this means that Next returned a + // valid token but the subsequent Next call will return an error token. + // For example, if the HTML text input was just "plain", then the first + // Next call would set z.err to io.EOF but return a TextToken, and all + // subsequent Next calls would return an ErrorToken. + // err is never reset. Once it becomes non-nil, it stays non-nil. + err error + // readErr is the error returned by the io.Reader r. It is separate from + // err because it is valid for an io.Reader to return (n int, err1 error) + // such that n > 0 && err1 != nil, and callers should always process the + // n > 0 bytes before considering the error err1. + readErr error + // buf[raw.start:raw.end] holds the raw bytes of the current token. + // buf[raw.end:] is buffered input that will yield future tokens. + raw span + buf []byte + // maxBuf limits the data buffered in buf. A value of 0 means unlimited. + maxBuf int + // buf[data.start:data.end] holds the raw bytes of the current token's data: + // a text token's text, a tag token's tag name, etc. + data span + // pendingAttr is the attribute key and value currently being tokenized. + // When complete, pendingAttr is pushed onto attr. nAttrReturned is + // incremented on each call to TagAttr. + pendingAttr [2]span + attr [][2]span + nAttrReturned int + // rawTag is the "script" in "</script>" that closes the next token. If + // non-empty, the subsequent call to Next will return a raw or RCDATA text + // token: one that treats "<p>" as text instead of an element. + // rawTag's contents are lower-cased. + rawTag string + // textIsRaw is whether the current text token's data is not escaped. + textIsRaw bool + // convertNUL is whether NUL bytes in the current token's data should + // be converted into \ufffd replacement characters. + convertNUL bool + // allowCDATA is whether CDATA sections are allowed in the current context. + allowCDATA bool +} + +// AllowCDATA sets whether or not the tokenizer recognizes <![CDATA[foo]]> as +// the text "foo". The default value is false, which means to recognize it as +// a bogus comment "<!-- [CDATA[foo]] -->" instead. +// +// Strictly speaking, an HTML5 compliant tokenizer should allow CDATA if and +// only if tokenizing foreign content, such as MathML and SVG. However, +// tracking foreign-contentness is difficult to do purely in the tokenizer, +// as opposed to the parser, due to HTML integration points: an <svg> element +// can contain a <foreignObject> that is foreign-to-SVG but not foreign-to- +// HTML. For strict compliance with the HTML5 tokenization algorithm, it is the +// responsibility of the user of a tokenizer to call AllowCDATA as appropriate. +// In practice, if using the tokenizer without caring whether MathML or SVG +// CDATA is text or comments, such as tokenizing HTML to find all the anchor +// text, it is acceptable to ignore this responsibility. +func (z *Tokenizer) AllowCDATA(allowCDATA bool) { + z.allowCDATA = allowCDATA +} + +// NextIsNotRawText instructs the tokenizer that the next token should not be +// considered as 'raw text'. Some elements, such as script and title elements, +// normally require the next token after the opening tag to be 'raw text' that +// has no child elements. For example, tokenizing "<title>a<b>c</b>d</title>" +// yields a start tag token for "<title>", a text token for "a<b>c</b>d", and +// an end tag token for "</title>". There are no distinct start tag or end tag +// tokens for the "<b>" and "</b>". +// +// This tokenizer implementation will generally look for raw text at the right +// times. Strictly speaking, an HTML5 compliant tokenizer should not look for +// raw text if in foreign content: <title> generally needs raw text, but a +// <title> inside an <svg> does not. Another example is that a <textarea> +// generally needs raw text, but a <textarea> is not allowed as an immediate +// child of a <select>; in normal parsing, a <textarea> implies </select>, but +// one cannot close the implicit element when parsing a <select>'s InnerHTML. +// Similarly to AllowCDATA, tracking the correct moment to override raw-text- +// ness is difficult to do purely in the tokenizer, as opposed to the parser. +// For strict compliance with the HTML5 tokenization algorithm, it is the +// responsibility of the user of a tokenizer to call NextIsNotRawText as +// appropriate. In practice, like AllowCDATA, it is acceptable to ignore this +// responsibility for basic usage. +// +// Note that this 'raw text' concept is different from the one offered by the +// Tokenizer.Raw method. +func (z *Tokenizer) NextIsNotRawText() { + z.rawTag = "" +} + +// Err returns the error associated with the most recent ErrorToken token. +// This is typically io.EOF, meaning the end of tokenization. +func (z *Tokenizer) Err() error { + if z.tt != ErrorToken { + return nil + } + return z.err +} + +// readByte returns the next byte from the input stream, doing a buffered read +// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte +// slice that holds all the bytes read so far for the current token. +// It sets z.err if the underlying reader returns an error. +// Pre-condition: z.err == nil. +func (z *Tokenizer) readByte() byte { + if z.raw.end >= len(z.buf) { + // Our buffer is exhausted and we have to read from z.r. Check if the + // previous read resulted in an error. + if z.readErr != nil { + z.err = z.readErr + return 0 + } + // We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length + // z.raw.end - z.raw.start is more than half the capacity of z.buf, then we + // allocate a new buffer before the copy. + c := cap(z.buf) + d := z.raw.end - z.raw.start + var buf1 []byte + if 2*d > c { + buf1 = make([]byte, d, 2*c) + } else { + buf1 = z.buf[:d] + } + copy(buf1, z.buf[z.raw.start:z.raw.end]) + if x := z.raw.start; x != 0 { + // Adjust the data/attr spans to refer to the same contents after the copy. + z.data.start -= x + z.data.end -= x + z.pendingAttr[0].start -= x + z.pendingAttr[0].end -= x + z.pendingAttr[1].start -= x + z.pendingAttr[1].end -= x + for i := range z.attr { + z.attr[i][0].start -= x + z.attr[i][0].end -= x + z.attr[i][1].start -= x + z.attr[i][1].end -= x + } + } + z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d] + // Now that we have copied the live bytes to the start of the buffer, + // we read from z.r into the remainder. + var n int + n, z.readErr = readAtLeastOneByte(z.r, buf1[d:cap(buf1)]) + if n == 0 { + z.err = z.readErr + return 0 + } + z.buf = buf1[:d+n] + } + x := z.buf[z.raw.end] + z.raw.end++ + if z.maxBuf > 0 && z.raw.end-z.raw.start >= z.maxBuf { + z.err = ErrBufferExceeded + return 0 + } + return x +} + +// Buffered returns a slice containing data buffered but not yet tokenized. +func (z *Tokenizer) Buffered() []byte { + return z.buf[z.raw.end:] +} + +// readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil). +// It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil) +// too many times in succession. +func readAtLeastOneByte(r io.Reader, b []byte) (int, error) { + for i := 0; i < 100; i++ { + n, err := r.Read(b) + if n != 0 || err != nil { + return n, err + } + } + return 0, io.ErrNoProgress +} + +// skipWhiteSpace skips past any white space. +func (z *Tokenizer) skipWhiteSpace() { + if z.err != nil { + return + } + for { + c := z.readByte() + if z.err != nil { + return + } + switch c { + case ' ', '\n', '\r', '\t', '\f': + // No-op. + default: + z.raw.end-- + return + } + } +} + +// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and +// is typically something like "script" or "textarea". +func (z *Tokenizer) readRawOrRCDATA() { + if z.rawTag == "script" { + z.readScript() + z.textIsRaw = true + z.rawTag = "" + return + } +loop: + for { + c := z.readByte() + if z.err != nil { + break loop + } + if c != '<' { + continue loop + } + c = z.readByte() + if z.err != nil { + break loop + } + if c != '/' { + continue loop + } + if z.readRawEndTag() || z.err != nil { + break loop + } + } + z.data.end = z.raw.end + // A textarea's or title's RCDATA can contain escaped entities. + z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title" + z.rawTag = "" +} + +// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag. +// If it succeeds, it backs up the input position to reconsume the tag and +// returns true. Otherwise it returns false. The opening "</" has already been +// consumed. +func (z *Tokenizer) readRawEndTag() bool { + for i := 0; i < len(z.rawTag); i++ { + c := z.readByte() + if z.err != nil { + return false + } + if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') { + z.raw.end-- + return false + } + } + c := z.readByte() + if z.err != nil { + return false + } + switch c { + case ' ', '\n', '\r', '\t', '\f', '/', '>': + // The 3 is 2 for the leading "</" plus 1 for the trailing character c. + z.raw.end -= 3 + len(z.rawTag) + return true + } + z.raw.end-- + return false +} + +// readScript reads until the next </script> tag, following the byzantine +// rules for escaping/hiding the closing tag. +func (z *Tokenizer) readScript() { + defer func() { + z.data.end = z.raw.end + }() + var c byte + +scriptData: + c = z.readByte() + if z.err != nil { + return + } + if c == '<' { + goto scriptDataLessThanSign + } + goto scriptData + +scriptDataLessThanSign: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '/': + goto scriptDataEndTagOpen + case '!': + goto scriptDataEscapeStart + } + z.raw.end-- + goto scriptData + +scriptDataEndTagOpen: + if z.readRawEndTag() || z.err != nil { + return + } + goto scriptData + +scriptDataEscapeStart: + c = z.readByte() + if z.err != nil { + return + } + if c == '-' { + goto scriptDataEscapeStartDash + } + z.raw.end-- + goto scriptData + +scriptDataEscapeStartDash: + c = z.readByte() + if z.err != nil { + return + } + if c == '-' { + goto scriptDataEscapedDashDash + } + z.raw.end-- + goto scriptData + +scriptDataEscaped: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '-': + goto scriptDataEscapedDash + case '<': + goto scriptDataEscapedLessThanSign + } + goto scriptDataEscaped + +scriptDataEscapedDash: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '-': + goto scriptDataEscapedDashDash + case '<': + goto scriptDataEscapedLessThanSign + } + goto scriptDataEscaped + +scriptDataEscapedDashDash: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '-': + goto scriptDataEscapedDashDash + case '<': + goto scriptDataEscapedLessThanSign + case '>': + goto scriptData + } + goto scriptDataEscaped + +scriptDataEscapedLessThanSign: + c = z.readByte() + if z.err != nil { + return + } + if c == '/' { + goto scriptDataEscapedEndTagOpen + } + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { + goto scriptDataDoubleEscapeStart + } + z.raw.end-- + goto scriptData + +scriptDataEscapedEndTagOpen: + if z.readRawEndTag() || z.err != nil { + return + } + goto scriptDataEscaped + +scriptDataDoubleEscapeStart: + z.raw.end-- + for i := 0; i < len("script"); i++ { + c = z.readByte() + if z.err != nil { + return + } + if c != "script"[i] && c != "SCRIPT"[i] { + z.raw.end-- + goto scriptDataEscaped + } + } + c = z.readByte() + if z.err != nil { + return + } + switch c { + case ' ', '\n', '\r', '\t', '\f', '/', '>': + goto scriptDataDoubleEscaped + } + z.raw.end-- + goto scriptDataEscaped + +scriptDataDoubleEscaped: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '-': + goto scriptDataDoubleEscapedDash + case '<': + goto scriptDataDoubleEscapedLessThanSign + } + goto scriptDataDoubleEscaped + +scriptDataDoubleEscapedDash: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '-': + goto scriptDataDoubleEscapedDashDash + case '<': + goto scriptDataDoubleEscapedLessThanSign + } + goto scriptDataDoubleEscaped + +scriptDataDoubleEscapedDashDash: + c = z.readByte() + if z.err != nil { + return + } + switch c { + case '-': + goto scriptDataDoubleEscapedDashDash + case '<': + goto scriptDataDoubleEscapedLessThanSign + case '>': + goto scriptData + } + goto scriptDataDoubleEscaped + +scriptDataDoubleEscapedLessThanSign: + c = z.readByte() + if z.err != nil { + return + } + if c == '/' { + goto scriptDataDoubleEscapeEnd + } + z.raw.end-- + goto scriptDataDoubleEscaped + +scriptDataDoubleEscapeEnd: + if z.readRawEndTag() { + z.raw.end += len("</script>") + goto scriptDataEscaped + } + if z.err != nil { + return + } + goto scriptDataDoubleEscaped +} + +// readComment reads the next comment token starting with "<!--". The opening +// "<!--" has already been consumed. +func (z *Tokenizer) readComment() { + z.data.start = z.raw.end + defer func() { + if z.data.end < z.data.start { + // It's a comment with no data, like <!-->. + z.data.end = z.data.start + } + }() + for dashCount := 2; ; { + c := z.readByte() + if z.err != nil { + // Ignore up to two dashes at EOF. + if dashCount > 2 { + dashCount = 2 + } + z.data.end = z.raw.end - dashCount + return + } + switch c { + case '-': + dashCount++ + continue + case '>': + if dashCount >= 2 { + z.data.end = z.raw.end - len("-->") + return + } + case '!': + if dashCount >= 2 { + c = z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return + } + if c == '>' { + z.data.end = z.raw.end - len("--!>") + return + } + } + } + dashCount = 0 + } +} + +// readUntilCloseAngle reads until the next ">". +func (z *Tokenizer) readUntilCloseAngle() { + z.data.start = z.raw.end + for { + c := z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return + } + if c == '>' { + z.data.end = z.raw.end - len(">") + return + } + } +} + +// readMarkupDeclaration reads the next token starting with "<!". It might be +// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or +// "<!a bogus comment". The opening "<!" has already been consumed. +func (z *Tokenizer) readMarkupDeclaration() TokenType { + z.data.start = z.raw.end + var c [2]byte + for i := 0; i < 2; i++ { + c[i] = z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return CommentToken + } + } + if c[0] == '-' && c[1] == '-' { + z.readComment() + return CommentToken + } + z.raw.end -= 2 + if z.readDoctype() { + return DoctypeToken + } + if z.allowCDATA && z.readCDATA() { + z.convertNUL = true + return TextToken + } + // It's a bogus comment. + z.readUntilCloseAngle() + return CommentToken +} + +// readDoctype attempts to read a doctype declaration and returns true if +// successful. The opening "<!" has already been consumed. +func (z *Tokenizer) readDoctype() bool { + const s = "DOCTYPE" + for i := 0; i < len(s); i++ { + c := z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return false + } + if c != s[i] && c != s[i]+('a'-'A') { + // Back up to read the fragment of "DOCTYPE" again. + z.raw.end = z.data.start + return false + } + } + if z.skipWhiteSpace(); z.err != nil { + z.data.start = z.raw.end + z.data.end = z.raw.end + return true + } + z.readUntilCloseAngle() + return true +} + +// readCDATA attempts to read a CDATA section and returns true if +// successful. The opening "<!" has already been consumed. +func (z *Tokenizer) readCDATA() bool { + const s = "[CDATA[" + for i := 0; i < len(s); i++ { + c := z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return false + } + if c != s[i] { + // Back up to read the fragment of "[CDATA[" again. + z.raw.end = z.data.start + return false + } + } + z.data.start = z.raw.end + brackets := 0 + for { + c := z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return true + } + switch c { + case ']': + brackets++ + case '>': + if brackets >= 2 { + z.data.end = z.raw.end - len("]]>") + return true + } + brackets = 0 + default: + brackets = 0 + } + } +} + +// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end] +// case-insensitively matches any element of ss. +func (z *Tokenizer) startTagIn(ss ...string) bool { +loop: + for _, s := range ss { + if z.data.end-z.data.start != len(s) { + continue loop + } + for i := 0; i < len(s); i++ { + c := z.buf[z.data.start+i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + if c != s[i] { + continue loop + } + } + return true + } + return false +} + +// readStartTag reads the next start tag token. The opening "<a" has already +// been consumed, where 'a' means anything in [A-Za-z]. +func (z *Tokenizer) readStartTag() TokenType { + z.readTag(true) + if z.err != nil { + return ErrorToken + } + // Several tags flag the tokenizer's next token as raw. + c, raw := z.buf[z.data.start], false + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + switch c { + case 'i': + raw = z.startTagIn("iframe") + case 'n': + raw = z.startTagIn("noembed", "noframes", "noscript") + case 'p': + raw = z.startTagIn("plaintext") + case 's': + raw = z.startTagIn("script", "style") + case 't': + raw = z.startTagIn("textarea", "title") + case 'x': + raw = z.startTagIn("xmp") + } + if raw { + z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end])) + } + // Look for a self-closing token like "<br/>". + if z.err == nil && z.buf[z.raw.end-2] == '/' { + return SelfClosingTagToken + } + return StartTagToken +} + +// readTag reads the next tag token and its attributes. If saveAttr, those +// attributes are saved in z.attr, otherwise z.attr is set to an empty slice. +// The opening "<a" or "</a" has already been consumed, where 'a' means anything +// in [A-Za-z]. +func (z *Tokenizer) readTag(saveAttr bool) { + z.attr = z.attr[:0] + z.nAttrReturned = 0 + // Read the tag name and attribute key/value pairs. + z.readTagName() + if z.skipWhiteSpace(); z.err != nil { + return + } + for { + c := z.readByte() + if z.err != nil || c == '>' { + break + } + z.raw.end-- + z.readTagAttrKey() + z.readTagAttrVal() + // Save pendingAttr if saveAttr and that attribute has a non-empty key. + if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end { + z.attr = append(z.attr, z.pendingAttr) + } + if z.skipWhiteSpace(); z.err != nil { + break + } + } +} + +// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end) +// is positioned such that the first byte of the tag name (the "d" in "<div") +// has already been consumed. +func (z *Tokenizer) readTagName() { + z.data.start = z.raw.end - 1 + for { + c := z.readByte() + if z.err != nil { + z.data.end = z.raw.end + return + } + switch c { + case ' ', '\n', '\r', '\t', '\f': + z.data.end = z.raw.end - 1 + return + case '/', '>': + z.raw.end-- + z.data.end = z.raw.end + return + } + } +} + +// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>". +// Precondition: z.err == nil. +func (z *Tokenizer) readTagAttrKey() { + z.pendingAttr[0].start = z.raw.end + for { + c := z.readByte() + if z.err != nil { + z.pendingAttr[0].end = z.raw.end + return + } + switch c { + case ' ', '\n', '\r', '\t', '\f', '/': + z.pendingAttr[0].end = z.raw.end - 1 + return + case '=', '>': + z.raw.end-- + z.pendingAttr[0].end = z.raw.end + return + } + } +} + +// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>". +func (z *Tokenizer) readTagAttrVal() { + z.pendingAttr[1].start = z.raw.end + z.pendingAttr[1].end = z.raw.end + if z.skipWhiteSpace(); z.err != nil { + return + } + c := z.readByte() + if z.err != nil { + return + } + if c != '=' { + z.raw.end-- + return + } + if z.skipWhiteSpace(); z.err != nil { + return + } + quote := z.readByte() + if z.err != nil { + return + } + switch quote { + case '>': + z.raw.end-- + return + + case '\'', '"': + z.pendingAttr[1].start = z.raw.end + for { + c := z.readByte() + if z.err != nil { + z.pendingAttr[1].end = z.raw.end + return + } + if c == quote { + z.pendingAttr[1].end = z.raw.end - 1 + return + } + } + + default: + z.pendingAttr[1].start = z.raw.end - 1 + for { + c := z.readByte() + if z.err != nil { + z.pendingAttr[1].end = z.raw.end + return + } + switch c { + case ' ', '\n', '\r', '\t', '\f': + z.pendingAttr[1].end = z.raw.end - 1 + return + case '>': + z.raw.end-- + z.pendingAttr[1].end = z.raw.end + return + } + } + } +} + +// Next scans the next token and returns its type. +func (z *Tokenizer) Next() TokenType { + z.raw.start = z.raw.end + z.data.start = z.raw.end + z.data.end = z.raw.end + if z.err != nil { + z.tt = ErrorToken + return z.tt + } + if z.rawTag != "" { + if z.rawTag == "plaintext" { + // Read everything up to EOF. + for z.err == nil { + z.readByte() + } + z.data.end = z.raw.end + z.textIsRaw = true + } else { + z.readRawOrRCDATA() + } + if z.data.end > z.data.start { + z.tt = TextToken + z.convertNUL = true + return z.tt + } + } + z.textIsRaw = false + z.convertNUL = false + +loop: + for { + c := z.readByte() + if z.err != nil { + break loop + } + if c != '<' { + continue loop + } + + // Check if the '<' we have just read is part of a tag, comment + // or doctype. If not, it's part of the accumulated text token. + c = z.readByte() + if z.err != nil { + break loop + } + var tokenType TokenType + switch { + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': + tokenType = StartTagToken + case c == '/': + tokenType = EndTagToken + case c == '!' || c == '?': + // We use CommentToken to mean any of "<!--actual comments-->", + // "<!DOCTYPE declarations>" and "<?xml processing instructions?>". + tokenType = CommentToken + default: + // Reconsume the current character. + z.raw.end-- + continue + } + + // We have a non-text token, but we might have accumulated some text + // before that. If so, we return the text first, and return the non- + // text token on the subsequent call to Next. + if x := z.raw.end - len("<a"); z.raw.start < x { + z.raw.end = x + z.data.end = x + z.tt = TextToken + return z.tt + } + switch tokenType { + case StartTagToken: + z.tt = z.readStartTag() + return z.tt + case EndTagToken: + c = z.readByte() + if z.err != nil { + break loop + } + if c == '>' { + // "</>" does not generate a token at all. Generate an empty comment + // to allow passthrough clients to pick up the data using Raw. + // Reset the tokenizer state and start again. + z.tt = CommentToken + return z.tt + } + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { + z.readTag(false) + if z.err != nil { + z.tt = ErrorToken + } else { + z.tt = EndTagToken + } + return z.tt + } + z.raw.end-- + z.readUntilCloseAngle() + z.tt = CommentToken + return z.tt + case CommentToken: + if c == '!' { + z.tt = z.readMarkupDeclaration() + return z.tt + } + z.raw.end-- + z.readUntilCloseAngle() + z.tt = CommentToken + return z.tt + } + } + if z.raw.start < z.raw.end { + z.data.end = z.raw.end + z.tt = TextToken + return z.tt + } + z.tt = ErrorToken + return z.tt +} + +// Raw returns the unmodified text of the current token. Calling Next, Token, +// Text, TagName or TagAttr may change the contents of the returned slice. +func (z *Tokenizer) Raw() []byte { + return z.buf[z.raw.start:z.raw.end] +} + +// convertNewlines converts "\r" and "\r\n" in s to "\n". +// The conversion happens in place, but the resulting slice may be shorter. +func convertNewlines(s []byte) []byte { + for i, c := range s { + if c != '\r' { + continue + } + + src := i + 1 + if src >= len(s) || s[src] != '\n' { + s[i] = '\n' + continue + } + + dst := i + for src < len(s) { + if s[src] == '\r' { + if src+1 < len(s) && s[src+1] == '\n' { + src++ + } + s[dst] = '\n' + } else { + s[dst] = s[src] + } + src++ + dst++ + } + return s[:dst] + } + return s +} + +var ( + nul = []byte("\x00") + replacement = []byte("\ufffd") +) + +// Text returns the unescaped text of a text, comment or doctype token. The +// contents of the returned slice may change on the next call to Next. +func (z *Tokenizer) Text() []byte { + switch z.tt { + case TextToken, CommentToken, DoctypeToken: + s := z.buf[z.data.start:z.data.end] + z.data.start = z.raw.end + z.data.end = z.raw.end + s = convertNewlines(s) + if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) { + s = bytes.Replace(s, nul, replacement, -1) + } + if !z.textIsRaw { + s = unescape(s, false) + } + return s + } + return nil +} + +// TagName returns the lower-cased name of a tag token (the `img` out of +// `<IMG SRC="foo">`) and whether the tag has attributes. +// The contents of the returned slice may change on the next call to Next. +func (z *Tokenizer) TagName() (name []byte, hasAttr bool) { + if z.data.start < z.data.end { + switch z.tt { + case StartTagToken, EndTagToken, SelfClosingTagToken: + s := z.buf[z.data.start:z.data.end] + z.data.start = z.raw.end + z.data.end = z.raw.end + return lower(s), z.nAttrReturned < len(z.attr) + } + } + return nil, false +} + +// TagAttr returns the lower-cased key and unescaped value of the next unparsed +// attribute for the current tag token and whether there are more attributes. +// The contents of the returned slices may change on the next call to Next. +func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) { + if z.nAttrReturned < len(z.attr) { + switch z.tt { + case StartTagToken, SelfClosingTagToken: + x := z.attr[z.nAttrReturned] + z.nAttrReturned++ + key = z.buf[x[0].start:x[0].end] + val = z.buf[x[1].start:x[1].end] + return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr) + } + } + return nil, nil, false +} + +// Token returns the next Token. The result's Data and Attr values remain valid +// after subsequent Next calls. +func (z *Tokenizer) Token() Token { + t := Token{Type: z.tt} + switch z.tt { + case TextToken, CommentToken, DoctypeToken: + t.Data = string(z.Text()) + case StartTagToken, SelfClosingTagToken, EndTagToken: + name, moreAttr := z.TagName() + for moreAttr { + var key, val []byte + key, val, moreAttr = z.TagAttr() + t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)}) + } + if a := atom.Lookup(name); a != 0 { + t.DataAtom, t.Data = a, a.String() + } else { + t.DataAtom, t.Data = 0, string(name) + } + } + return t +} + +// SetMaxBuf sets a limit on the amount of data buffered during tokenization. +// A value of 0 means unlimited. +func (z *Tokenizer) SetMaxBuf(n int) { + z.maxBuf = n +} + +// NewTokenizer returns a new HTML Tokenizer for the given Reader. +// The input is assumed to be UTF-8 encoded. +func NewTokenizer(r io.Reader) *Tokenizer { + return NewTokenizerFragment(r, "") +} + +// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for +// tokenizing an existing element's InnerHTML fragment. contextTag is that +// element's tag, such as "div" or "iframe". +// +// For example, how the InnerHTML "a<b" is tokenized depends on whether it is +// for a <p> tag or a <script> tag. +// +// The input is assumed to be UTF-8 encoded. +func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer { + z := &Tokenizer{ + r: r, + buf: make([]byte, 0, 4096), + } + if contextTag != "" { + switch s := strings.ToLower(contextTag); s { + case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp": + z.rawTag = s + } + } + return z +} |