summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/encoding
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/encoding')
-rw-r--r--vendor/golang.org/x/text/encoding/charmap/maketables.go556
-rw-r--r--vendor/golang.org/x/text/encoding/htmlindex/gen.go173
-rw-r--r--vendor/golang.org/x/text/encoding/internal/identifier/gen.go142
-rw-r--r--vendor/golang.org/x/text/encoding/internal/identifier/mib.go94
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/maketables.go161
-rw-r--r--vendor/golang.org/x/text/encoding/korean/maketables.go143
-rw-r--r--vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go161
-rw-r--r--vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go140
8 files changed, 1522 insertions, 48 deletions
diff --git a/vendor/golang.org/x/text/encoding/charmap/maketables.go b/vendor/golang.org/x/text/encoding/charmap/maketables.go
new file mode 100644
index 000000000..f7941701e
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/charmap/maketables.go
@@ -0,0 +1,556 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "log"
+ "net/http"
+ "sort"
+ "strings"
+ "unicode/utf8"
+
+ "golang.org/x/text/encoding"
+ "golang.org/x/text/internal/gen"
+)
+
+const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
+
+var encodings = []struct {
+ name string
+ mib string
+ comment string
+ varName string
+ replacement byte
+ mapping string
+}{
+ {
+ "IBM Code Page 037",
+ "IBM037",
+ "",
+ "CodePage037",
+ 0x3f,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 437",
+ "PC8CodePage437",
+ "",
+ "CodePage437",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 850",
+ "PC850Multilingual",
+ "",
+ "CodePage850",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 852",
+ "PCp852",
+ "",
+ "CodePage852",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 855",
+ "IBM855",
+ "",
+ "CodePage855",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
+ },
+ {
+ "Windows Code Page 858", // PC latin1 with Euro
+ "IBM00858",
+ "",
+ "CodePage858",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
+ },
+ {
+ "IBM Code Page 860",
+ "IBM860",
+ "",
+ "CodePage860",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 862",
+ "PC862LatinHebrew",
+ "",
+ "CodePage862",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 863",
+ "IBM863",
+ "",
+ "CodePage863",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 865",
+ "IBM865",
+ "",
+ "CodePage865",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 866",
+ "IBM866",
+ "",
+ "CodePage866",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-ibm866.txt",
+ },
+ {
+ "IBM Code Page 1047",
+ "IBM1047",
+ "",
+ "CodePage1047",
+ 0x3f,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 1140",
+ "IBM01140",
+ "",
+ "CodePage1140",
+ 0x3f,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
+ },
+ {
+ "ISO 8859-1",
+ "ISOLatin1",
+ "",
+ "ISO8859_1",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
+ },
+ {
+ "ISO 8859-2",
+ "ISOLatin2",
+ "",
+ "ISO8859_2",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
+ },
+ {
+ "ISO 8859-3",
+ "ISOLatin3",
+ "",
+ "ISO8859_3",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
+ },
+ {
+ "ISO 8859-4",
+ "ISOLatin4",
+ "",
+ "ISO8859_4",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
+ },
+ {
+ "ISO 8859-5",
+ "ISOLatinCyrillic",
+ "",
+ "ISO8859_5",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
+ },
+ {
+ "ISO 8859-6",
+ "ISOLatinArabic",
+ "",
+ "ISO8859_6,ISO8859_6E,ISO8859_6I",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
+ },
+ {
+ "ISO 8859-7",
+ "ISOLatinGreek",
+ "",
+ "ISO8859_7",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
+ },
+ {
+ "ISO 8859-8",
+ "ISOLatinHebrew",
+ "",
+ "ISO8859_8,ISO8859_8E,ISO8859_8I",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
+ },
+ {
+ "ISO 8859-9",
+ "ISOLatin5",
+ "",
+ "ISO8859_9",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
+ },
+ {
+ "ISO 8859-10",
+ "ISOLatin6",
+ "",
+ "ISO8859_10",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
+ },
+ {
+ "ISO 8859-13",
+ "ISO885913",
+ "",
+ "ISO8859_13",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
+ },
+ {
+ "ISO 8859-14",
+ "ISO885914",
+ "",
+ "ISO8859_14",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
+ },
+ {
+ "ISO 8859-15",
+ "ISO885915",
+ "",
+ "ISO8859_15",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
+ },
+ {
+ "ISO 8859-16",
+ "ISO885916",
+ "",
+ "ISO8859_16",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
+ },
+ {
+ "KOI8-R",
+ "KOI8R",
+ "",
+ "KOI8R",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-koi8-r.txt",
+ },
+ {
+ "KOI8-U",
+ "KOI8U",
+ "",
+ "KOI8U",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-koi8-u.txt",
+ },
+ {
+ "Macintosh",
+ "Macintosh",
+ "",
+ "Macintosh",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-macintosh.txt",
+ },
+ {
+ "Macintosh Cyrillic",
+ "MacintoshCyrillic",
+ "",
+ "MacintoshCyrillic",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
+ },
+ {
+ "Windows 874",
+ "Windows874",
+ "",
+ "Windows874",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-874.txt",
+ },
+ {
+ "Windows 1250",
+ "Windows1250",
+ "",
+ "Windows1250",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1250.txt",
+ },
+ {
+ "Windows 1251",
+ "Windows1251",
+ "",
+ "Windows1251",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1251.txt",
+ },
+ {
+ "Windows 1252",
+ "Windows1252",
+ "",
+ "Windows1252",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1252.txt",
+ },
+ {
+ "Windows 1253",
+ "Windows1253",
+ "",
+ "Windows1253",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1253.txt",
+ },
+ {
+ "Windows 1254",
+ "Windows1254",
+ "",
+ "Windows1254",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1254.txt",
+ },
+ {
+ "Windows 1255",
+ "Windows1255",
+ "",
+ "Windows1255",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1255.txt",
+ },
+ {
+ "Windows 1256",
+ "Windows1256",
+ "",
+ "Windows1256",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1256.txt",
+ },
+ {
+ "Windows 1257",
+ "Windows1257",
+ "",
+ "Windows1257",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1257.txt",
+ },
+ {
+ "Windows 1258",
+ "Windows1258",
+ "",
+ "Windows1258",
+ encoding.ASCIISub,
+ "http://encoding.spec.whatwg.org/index-windows-1258.txt",
+ },
+ {
+ "X-User-Defined",
+ "XUserDefined",
+ "It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
+ "XUserDefined",
+ encoding.ASCIISub,
+ ascii +
+ "\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
+ "\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
+ "\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
+ "\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
+ "\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
+ "\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
+ "\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
+ "\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
+ "\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
+ "\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
+ "\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
+ "\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
+ "\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
+ "\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
+ "\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
+ "\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
+ },
+}
+
+func getWHATWG(url string) string {
+ res, err := http.Get(url)
+ if err != nil {
+ log.Fatalf("%q: Get: %v", url, err)
+ }
+ defer res.Body.Close()
+
+ mapping := make([]rune, 128)
+ for i := range mapping {
+ mapping[i] = '\ufffd'
+ }
+
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ x, y := 0, 0
+ if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
+ log.Fatalf("could not parse %q", s)
+ }
+ if x < 0 || 128 <= x {
+ log.Fatalf("code %d is out of range", x)
+ }
+ if 0x80 <= y && y < 0xa0 {
+ // We diverge from the WHATWG spec by mapping control characters
+ // in the range [0x80, 0xa0) to U+FFFD.
+ continue
+ }
+ mapping[x] = rune(y)
+ }
+ return ascii + string(mapping)
+}
+
+func getUCM(url string) string {
+ res, err := http.Get(url)
+ if err != nil {
+ log.Fatalf("%q: Get: %v", url, err)
+ }
+ defer res.Body.Close()
+
+ mapping := make([]rune, 256)
+ for i := range mapping {
+ mapping[i] = '\ufffd'
+ }
+
+ charsFound := 0
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ var c byte
+ var r rune
+ if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
+ continue
+ }
+ mapping[c] = r
+ charsFound++
+ }
+
+ if charsFound < 200 {
+ log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
+ }
+
+ return string(mapping)
+}
+
+func main() {
+ mibs := map[string]bool{}
+ all := []string{}
+
+ w := gen.NewCodeWriter()
+ defer w.WriteGoFile("tables.go", "charmap")
+
+ printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
+
+ printf("import (\n")
+ printf("\t\"golang.org/x/text/encoding\"\n")
+ printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
+ printf(")\n\n")
+ for _, e := range encodings {
+ varNames := strings.Split(e.varName, ",")
+ all = append(all, varNames...)
+ varName := varNames[0]
+ switch {
+ case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
+ e.mapping = getWHATWG(e.mapping)
+ case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
+ e.mapping = getUCM(e.mapping)
+ }
+
+ asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
+ if asciiSuperset {
+ low = 0x80
+ }
+ lvn := 1
+ if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
+ lvn = 3
+ }
+ lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
+ printf("// %s is the %s encoding.\n", varName, e.name)
+ if e.comment != "" {
+ printf("//\n// %s\n", e.comment)
+ }
+ printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
+ varName, lowerVarName, lowerVarName, e.name)
+ if mibs[e.mib] {
+ log.Fatalf("MIB type %q declared multiple times.", e.mib)
+ }
+ printf("mib: identifier.%s,\n", e.mib)
+ printf("asciiSuperset: %t,\n", asciiSuperset)
+ printf("low: 0x%02x,\n", low)
+ printf("replacement: 0x%02x,\n", e.replacement)
+
+ printf("decode: [256]utf8Enc{\n")
+ i, backMapping := 0, map[rune]byte{}
+ for _, c := range e.mapping {
+ if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
+ backMapping[c] = byte(i)
+ }
+ var buf [8]byte
+ n := utf8.EncodeRune(buf[:], c)
+ if n > 3 {
+ panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
+ }
+ printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
+ if i%2 == 1 {
+ printf("\n")
+ }
+ i++
+ }
+ printf("},\n")
+
+ printf("encode: [256]uint32{\n")
+ encode := make([]uint32, 0, 256)
+ for c, i := range backMapping {
+ encode = append(encode, uint32(i)<<24|uint32(c))
+ }
+ sort.Sort(byRune(encode))
+ for len(encode) < cap(encode) {
+ encode = append(encode, encode[len(encode)-1])
+ }
+ for i, enc := range encode {
+ printf("0x%08x,", enc)
+ if i%8 == 7 {
+ printf("\n")
+ }
+ }
+ printf("},\n}\n")
+
+ // Add an estimate of the size of a single Charmap{} struct value, which
+ // includes two 256 elem arrays of 4 bytes and some extra fields, which
+ // align to 3 uint64s on 64-bit architectures.
+ w.Size += 2*4*256 + 3*8
+ }
+ // TODO: add proper line breaking.
+ printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
+}
+
+type byRune []uint32
+
+func (b byRune) Len() int { return len(b) }
+func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
+func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go
new file mode 100644
index 000000000..ac6b4a77f
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/htmlindex/gen.go
@@ -0,0 +1,173 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "log"
+ "strings"
+
+ "golang.org/x/text/internal/gen"
+)
+
+type group struct {
+ Encodings []struct {
+ Labels []string
+ Name string
+ }
+}
+
+func main() {
+ gen.Init()
+
+ r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
+ var groups []group
+ if err := json.NewDecoder(r).Decode(&groups); err != nil {
+ log.Fatalf("Error reading encodings.json: %v", err)
+ }
+
+ w := &bytes.Buffer{}
+ fmt.Fprintln(w, "type htmlEncoding byte")
+ fmt.Fprintln(w, "const (")
+ for i, g := range groups {
+ for _, e := range g.Encodings {
+ key := strings.ToLower(e.Name)
+ name := consts[key]
+ if name == "" {
+ log.Fatalf("No const defined for %s.", key)
+ }
+ if i == 0 {
+ fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
+ } else {
+ fmt.Fprintf(w, "%s\n", name)
+ }
+ }
+ }
+ fmt.Fprintln(w, "numEncodings")
+ fmt.Fprint(w, ")\n\n")
+
+ fmt.Fprintln(w, "var canonical = [numEncodings]string{")
+ for _, g := range groups {
+ for _, e := range g.Encodings {
+ fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
+ }
+ }
+ fmt.Fprint(w, "}\n\n")
+
+ fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
+ for _, g := range groups {
+ for _, e := range g.Encodings {
+ for _, l := range e.Labels {
+ key := strings.ToLower(e.Name)
+ name := consts[key]
+ fmt.Fprintf(w, "%q: %s,\n", l, name)
+ }
+ }
+ }
+ fmt.Fprint(w, "}\n\n")
+
+ var tags []string
+ fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
+ for _, loc := range locales {
+ tags = append(tags, loc.tag)
+ fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
+ }
+ fmt.Fprint(w, "}\n\n")
+
+ fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
+
+ gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
+}
+
+// consts maps canonical encoding name to internal constant.
+var consts = map[string]string{
+ "utf-8": "utf8",
+ "ibm866": "ibm866",
+ "iso-8859-2": "iso8859_2",
+ "iso-8859-3": "iso8859_3",
+ "iso-8859-4": "iso8859_4",
+ "iso-8859-5": "iso8859_5",
+ "iso-8859-6": "iso8859_6",
+ "iso-8859-7": "iso8859_7",
+ "iso-8859-8": "iso8859_8",
+ "iso-8859-8-i": "iso8859_8I",
+ "iso-8859-10": "iso8859_10",
+ "iso-8859-13": "iso8859_13",
+ "iso-8859-14": "iso8859_14",
+ "iso-8859-15": "iso8859_15",
+ "iso-8859-16": "iso8859_16",
+ "koi8-r": "koi8r",
+ "koi8-u": "koi8u",
+ "macintosh": "macintosh",
+ "windows-874": "windows874",
+ "windows-1250": "windows1250",
+ "windows-1251": "windows1251",
+ "windows-1252": "windows1252",
+ "windows-1253": "windows1253",
+ "windows-1254": "windows1254",
+ "windows-1255": "windows1255",
+ "windows-1256": "windows1256",
+ "windows-1257": "windows1257",
+ "windows-1258": "windows1258",
+ "x-mac-cyrillic": "macintoshCyrillic",
+ "gbk": "gbk",
+ "gb18030": "gb18030",
+ // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
+ "big5": "big5",
+ "euc-jp": "eucjp",
+ "iso-2022-jp": "iso2022jp",
+ "shift_jis": "shiftJIS",
+ "euc-kr": "euckr",
+ "replacement": "replacement",
+ "utf-16be": "utf16be",
+ "utf-16le": "utf16le",
+ "x-user-defined": "xUserDefined",
+}
+
+// locales is taken from
+// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
+var locales = []struct{ tag, name string }{
+ // The default value. Explicitly state latin to benefit from the exact
+ // script option, while still making 1252 the default encoding for languages
+ // written in Latin script.
+ {"und_Latn", "windows-1252"},
+ {"ar", "windows-1256"},
+ {"ba", "windows-1251"},
+ {"be", "windows-1251"},
+ {"bg", "windows-1251"},
+ {"cs", "windows-1250"},
+ {"el", "iso-8859-7"},
+ {"et", "windows-1257"},
+ {"fa", "windows-1256"},
+ {"he", "windows-1255"},
+ {"hr", "windows-1250"},
+ {"hu", "iso-8859-2"},
+ {"ja", "shift_jis"},
+ {"kk", "windows-1251"},
+ {"ko", "euc-kr"},
+ {"ku", "windows-1254"},
+ {"ky", "windows-1251"},
+ {"lt", "windows-1257"},
+ {"lv", "windows-1257"},
+ {"mk", "windows-1251"},
+ {"pl", "iso-8859-2"},
+ {"ru", "windows-1251"},
+ {"sah", "windows-1251"},
+ {"sk", "windows-1250"},
+ {"sl", "iso-8859-2"},
+ {"sr", "windows-1251"},
+ {"tg", "windows-1251"},
+ {"th", "windows-874"},
+ {"tr", "windows-1254"},
+ {"tt", "windows-1251"},
+ {"uk", "windows-1251"},
+ {"vi", "windows-1258"},
+ {"zh-hans", "gb18030"},
+ {"zh-hant", "big5"},
+}
diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go
new file mode 100644
index 000000000..26cfef9c6
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go
@@ -0,0 +1,142 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+ "bytes"
+ "encoding/xml"
+ "fmt"
+ "io"
+ "log"
+ "strings"
+
+ "golang.org/x/text/internal/gen"
+)
+
+type registry struct {
+ XMLName xml.Name `xml:"registry"`
+ Updated string `xml:"updated"`
+ Registry []struct {
+ ID string `xml:"id,attr"`
+ Record []struct {
+ Name string `xml:"name"`
+ Xref []struct {
+ Type string `xml:"type,attr"`
+ Data string `xml:"data,attr"`
+ } `xml:"xref"`
+ Desc struct {
+ Data string `xml:",innerxml"`
+ // Any []struct {
+ // Data string `xml:",chardata"`
+ // } `xml:",any"`
+ // Data string `xml:",chardata"`
+ } `xml:"description,"`
+ MIB string `xml:"value"`
+ Alias []string `xml:"alias"`
+ MIME string `xml:"preferred_alias"`
+ } `xml:"record"`
+ } `xml:"registry"`
+}
+
+func main() {
+ r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
+ reg := &registry{}
+ if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
+ log.Fatalf("Error decoding charset registry: %v", err)
+ }
+ if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
+ log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
+ }
+
+ w := &bytes.Buffer{}
+ fmt.Fprintf(w, "const (\n")
+ for _, rec := range reg.Registry[0].Record {
+ constName := ""
+ for _, a := range rec.Alias {
+ if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
+ // Some of the constant definitions have comments in them. Strip those.
+ constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
+ }
+ }
+ if constName == "" {
+ switch rec.MIB {
+ case "2085":
+ constName = "HZGB2312" // Not listed as alias for some reason.
+ default:
+ log.Fatalf("No cs alias defined for %s.", rec.MIB)
+ }
+ }
+ if rec.MIME != "" {
+ rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
+ }
+ fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
+ if len(rec.Desc.Data) > 0 {
+ fmt.Fprint(w, "// ")
+ d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
+ inElem := true
+ attr := ""
+ for {
+ t, err := d.Token()
+ if err != nil {
+ if err != io.EOF {
+ log.Fatal(err)
+ }
+ break
+ }
+ switch x := t.(type) {
+ case xml.CharData:
+ attr = "" // Don't need attribute info.
+ a := bytes.Split([]byte(x), []byte("\n"))
+ for i, b := range a {
+ if b = bytes.TrimSpace(b); len(b) != 0 {
+ if !inElem && i > 0 {
+ fmt.Fprint(w, "\n// ")
+ }
+ inElem = false
+ fmt.Fprintf(w, "%s ", string(b))
+ }
+ }
+ case xml.StartElement:
+ if x.Name.Local == "xref" {
+ inElem = true
+ use := false
+ for _, a := range x.Attr {
+ if a.Name.Local == "type" {
+ use = use || a.Value != "person"
+ }
+ if a.Name.Local == "data" && use {
+ // Patch up URLs to use https. From some links, the
+ // https version is different from the http one.
+ s := a.Value
+ s = strings.Replace(s, "http://", "https://", -1)
+ s = strings.Replace(s, "/unicode/", "/", -1)
+ attr = s + " "
+ }
+ }
+ }
+ case xml.EndElement:
+ inElem = false
+ fmt.Fprint(w, attr)
+ }
+ }
+ fmt.Fprint(w, "\n")
+ }
+ for _, x := range rec.Xref {
+ switch x.Type {
+ case "rfc":
+ fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
+ case "uri":
+ fmt.Fprintf(w, "// Reference: %s\n", x.Data)
+ }
+ }
+ fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
+ fmt.Fprintln(w)
+ }
+ fmt.Fprintln(w, ")")
+
+ gen.WriteGoFile("mib.go", "identifier", w.Bytes())
+}
diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go
index 8cc29021c..fc7df1bc7 100644
--- a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go
+++ b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go
@@ -538,8 +538,6 @@ const (
// ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic.
//
// ISO registry
- // (formerly ECMA
- // registry )
ISO111ECMACyrillic MIB = 77
// ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1.
@@ -732,18 +730,18 @@ const (
// ISO885913 is the MIB identifier with IANA name ISO-8859-13.
//
- // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-13 http://www.iana.org/assignments/charset-reg/ISO-8859-13
+ // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-13 https://www.iana.org/assignments/charset-reg/ISO-8859-13
ISO885913 MIB = 109
// ISO885914 is the MIB identifier with IANA name ISO-8859-14.
//
- // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-14
+ // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-14
ISO885914 MIB = 110
// ISO885915 is the MIB identifier with IANA name ISO-8859-15.
//
// ISO
- // Please see: http://www.iana.org/assignments/charset-reg/ISO-8859-15
+ // Please see: https://www.iana.org/assignments/charset-reg/ISO-8859-15
ISO885915 MIB = 111
// ISO885916 is the MIB identifier with IANA name ISO-8859-16.
@@ -754,41 +752,41 @@ const (
// GBK is the MIB identifier with IANA name GBK.
//
// Chinese IT Standardization Technical Committee
- // Please see: http://www.iana.org/assignments/charset-reg/GBK
+ // Please see: https://www.iana.org/assignments/charset-reg/GBK
GBK MIB = 113
// GB18030 is the MIB identifier with IANA name GB18030.
//
// Chinese IT Standardization Technical Committee
- // Please see: http://www.iana.org/assignments/charset-reg/GB18030
+ // Please see: https://www.iana.org/assignments/charset-reg/GB18030
GB18030 MIB = 114
// OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15.
//
// Fujitsu-Siemens standard mainframe EBCDIC encoding
- // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15
+ // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15
OSDEBCDICDF0415 MIB = 115
// OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV.
//
// Fujitsu-Siemens standard mainframe EBCDIC encoding
- // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV
+ // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV
OSDEBCDICDF03IRV MIB = 116
// OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1.
//
// Fujitsu-Siemens standard mainframe EBCDIC encoding
- // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1
+ // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1
OSDEBCDICDF041 MIB = 117
// ISO115481 is the MIB identifier with IANA name ISO-11548-1.
//
- // See http://www.iana.org/assignments/charset-reg/ISO-11548-1
+ // See https://www.iana.org/assignments/charset-reg/ISO-11548-1
ISO115481 MIB = 118
// KZ1048 is the MIB identifier with IANA name KZ-1048.
//
- // See http://www.iana.org/assignments/charset-reg/KZ-1048
+ // See https://www.iana.org/assignments/charset-reg/KZ-1048
KZ1048 MIB = 119
// Unicode is the MIB identifier with IANA name ISO-10646-UCS-2.
@@ -855,7 +853,7 @@ const (
// SCSU is the MIB identifier with IANA name SCSU.
//
- // SCSU See http://www.iana.org/assignments/charset-reg/SCSU
+ // SCSU See https://www.iana.org/assignments/charset-reg/SCSU
SCSU MIB = 1011
// UTF7 is the MIB identifier with IANA name UTF-7.
@@ -884,22 +882,22 @@ const (
// CESU8 is the MIB identifier with IANA name CESU-8.
//
- // https://www.unicode.org/unicode/reports/tr26
+ // https://www.unicode.org/reports/tr26
CESU8 MIB = 1016
// UTF32 is the MIB identifier with IANA name UTF-32.
//
- // https://www.unicode.org/unicode/reports/tr19/
+ // https://www.unicode.org/reports/tr19/
UTF32 MIB = 1017
// UTF32BE is the MIB identifier with IANA name UTF-32BE.
//
- // https://www.unicode.org/unicode/reports/tr19/
+ // https://www.unicode.org/reports/tr19/
UTF32BE MIB = 1018
// UTF32LE is the MIB identifier with IANA name UTF-32LE.
//
- // https://www.unicode.org/unicode/reports/tr19/
+ // https://www.unicode.org/reports/tr19/
UTF32LE MIB = 1019
// BOCU1 is the MIB identifier with IANA name BOCU-1.
@@ -1461,152 +1459,152 @@ const (
// IBM00858 is the MIB identifier with IANA name IBM00858.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM00858
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM00858
IBM00858 MIB = 2089
// IBM00924 is the MIB identifier with IANA name IBM00924.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM00924
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM00924
IBM00924 MIB = 2090
// IBM01140 is the MIB identifier with IANA name IBM01140.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01140
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01140
IBM01140 MIB = 2091
// IBM01141 is the MIB identifier with IANA name IBM01141.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01141
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01141
IBM01141 MIB = 2092
// IBM01142 is the MIB identifier with IANA name IBM01142.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01142
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01142
IBM01142 MIB = 2093
// IBM01143 is the MIB identifier with IANA name IBM01143.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01143
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01143
IBM01143 MIB = 2094
// IBM01144 is the MIB identifier with IANA name IBM01144.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01144
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01144
IBM01144 MIB = 2095
// IBM01145 is the MIB identifier with IANA name IBM01145.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01145
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01145
IBM01145 MIB = 2096
// IBM01146 is the MIB identifier with IANA name IBM01146.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01146
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01146
IBM01146 MIB = 2097
// IBM01147 is the MIB identifier with IANA name IBM01147.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01147
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01147
IBM01147 MIB = 2098
// IBM01148 is the MIB identifier with IANA name IBM01148.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01148
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01148
IBM01148 MIB = 2099
// IBM01149 is the MIB identifier with IANA name IBM01149.
//
- // IBM See http://www.iana.org/assignments/charset-reg/IBM01149
+ // IBM See https://www.iana.org/assignments/charset-reg/IBM01149
IBM01149 MIB = 2100
// Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS.
//
- // See http://www.iana.org/assignments/charset-reg/Big5-HKSCS
+ // See https://www.iana.org/assignments/charset-reg/Big5-HKSCS
Big5HKSCS MIB = 2101
// IBM1047 is the MIB identifier with IANA name IBM1047.
//
- // IBM1047 (EBCDIC Latin 1/Open Systems) http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+ // IBM1047 (EBCDIC Latin 1/Open Systems) https://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
IBM1047 MIB = 2102
// PTCP154 is the MIB identifier with IANA name PTCP154.
//
- // See http://www.iana.org/assignments/charset-reg/PTCP154
+ // See https://www.iana.org/assignments/charset-reg/PTCP154
PTCP154 MIB = 2103
// Amiga1251 is the MIB identifier with IANA name Amiga-1251.
//
- // See http://www.amiga.ultranet.ru/Amiga-1251.html
+ // See https://www.amiga.ultranet.ru/Amiga-1251.html
Amiga1251 MIB = 2104
// KOI7switched is the MIB identifier with IANA name KOI7-switched.
//
- // See http://www.iana.org/assignments/charset-reg/KOI7-switched
+ // See https://www.iana.org/assignments/charset-reg/KOI7-switched
KOI7switched MIB = 2105
// BRF is the MIB identifier with IANA name BRF.
//
- // See http://www.iana.org/assignments/charset-reg/BRF
+ // See https://www.iana.org/assignments/charset-reg/BRF
BRF MIB = 2106
// TSCII is the MIB identifier with IANA name TSCII.
//
- // See http://www.iana.org/assignments/charset-reg/TSCII
+ // See https://www.iana.org/assignments/charset-reg/TSCII
TSCII MIB = 2107
// CP51932 is the MIB identifier with IANA name CP51932.
//
- // See http://www.iana.org/assignments/charset-reg/CP51932
+ // See https://www.iana.org/assignments/charset-reg/CP51932
CP51932 MIB = 2108
// Windows874 is the MIB identifier with IANA name windows-874.
//
- // See http://www.iana.org/assignments/charset-reg/windows-874
+ // See https://www.iana.org/assignments/charset-reg/windows-874
Windows874 MIB = 2109
// Windows1250 is the MIB identifier with IANA name windows-1250.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1250
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1250
Windows1250 MIB = 2250
// Windows1251 is the MIB identifier with IANA name windows-1251.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1251
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1251
Windows1251 MIB = 2251
// Windows1252 is the MIB identifier with IANA name windows-1252.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1252
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1252
Windows1252 MIB = 2252
// Windows1253 is the MIB identifier with IANA name windows-1253.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1253
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1253
Windows1253 MIB = 2253
// Windows1254 is the MIB identifier with IANA name windows-1254.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1254
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1254
Windows1254 MIB = 2254
// Windows1255 is the MIB identifier with IANA name windows-1255.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1255
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1255
Windows1255 MIB = 2255
// Windows1256 is the MIB identifier with IANA name windows-1256.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1256
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1256
Windows1256 MIB = 2256
// Windows1257 is the MIB identifier with IANA name windows-1257.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1257
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1257
Windows1257 MIB = 2257
// Windows1258 is the MIB identifier with IANA name windows-1258.
//
- // Microsoft http://www.iana.org/assignments/charset-reg/windows-1258
+ // Microsoft https://www.iana.org/assignments/charset-reg/windows-1258
Windows1258 MIB = 2258
// TIS620 is the MIB identifier with IANA name TIS-620.
@@ -1616,6 +1614,6 @@ const (
// CP50220 is the MIB identifier with IANA name CP50220.
//
- // See http://www.iana.org/assignments/charset-reg/CP50220
+ // See https://www.iana.org/assignments/charset-reg/CP50220
CP50220 MIB = 2260
)
diff --git a/vendor/golang.org/x/text/encoding/japanese/maketables.go b/vendor/golang.org/x/text/encoding/japanese/maketables.go
new file mode 100644
index 000000000..023957a67
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/japanese/maketables.go
@@ -0,0 +1,161 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+// go run maketables.go | gofmt > tables.go
+
+// TODO: Emoji extensions?
+// https://www.unicode.org/faq/emoji_dingbats.html
+// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt
+
+import (
+ "bufio"
+ "fmt"
+ "log"
+ "net/http"
+ "sort"
+ "strings"
+)
+
+type entry struct {
+ jisCode, table int
+}
+
+func main() {
+ fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+ fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
+ fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
+
+ reverse := [65536]entry{}
+ for i := range reverse {
+ reverse[i].table = -1
+ }
+
+ tables := []struct {
+ url string
+ name string
+ }{
+ {"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
+ {"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
+ }
+ for i, table := range tables {
+ res, err := http.Get(table.url)
+ if err != nil {
+ log.Fatalf("%q: Get: %v", table.url, err)
+ }
+ defer res.Body.Close()
+
+ mapping := [65536]uint16{}
+
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ x, y := 0, uint16(0)
+ if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+ log.Fatalf("%q: could not parse %q", table.url, s)
+ }
+ if x < 0 || 120*94 <= x {
+ log.Fatalf("%q: JIS code %d is out of range", table.url, x)
+ }
+ mapping[x] = y
+ if reverse[y].table == -1 {
+ reverse[y] = entry{jisCode: x, table: i}
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ log.Fatalf("%q: scanner error: %v", table.url, err)
+ }
+
+ fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
+ table.name, table.name, table.url)
+ fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
+ for i, m := range mapping {
+ if m != 0 {
+ fmt.Printf("\t%d: 0x%04X,\n", i, m)
+ }
+ }
+ fmt.Printf("}\n\n")
+ }
+
+ // Any run of at least separation continuous zero entries in the reverse map will
+ // be a separate encode table.
+ const separation = 1024
+
+ intervals := []interval(nil)
+ low, high := -1, -1
+ for i, v := range reverse {
+ if v.table == -1 {
+ continue
+ }
+ if low < 0 {
+ low = i
+ } else if i-high >= separation {
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ low = i
+ }
+ high = i + 1
+ }
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ sort.Sort(byDecreasingLength(intervals))
+
+ fmt.Printf("const (\n")
+ fmt.Printf("\tjis0208 = 1\n")
+ fmt.Printf("\tjis0212 = 2\n")
+ fmt.Printf("\tcodeMask = 0x7f\n")
+ fmt.Printf("\tcodeShift = 7\n")
+ fmt.Printf("\ttableShift = 14\n")
+ fmt.Printf(")\n\n")
+
+ fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+ fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
+ fmt.Printf("// sorted by decreasing length.\n")
+ for i, v := range intervals {
+ fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
+ }
+ fmt.Printf("//\n")
+ fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
+ fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
+ fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
+ fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
+ fmt.Printf("\n")
+
+ for i, v := range intervals {
+ fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+ fmt.Printf("var encode%d = [...]uint16{\n", i)
+ for j := v.low; j < v.high; j++ {
+ x := reverse[j]
+ if x.table == -1 {
+ continue
+ }
+ fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
+ j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
+ }
+ fmt.Printf("}\n\n")
+ }
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+ low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/korean/maketables.go b/vendor/golang.org/x/text/encoding/korean/maketables.go
new file mode 100644
index 000000000..c84034fb6
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/korean/maketables.go
@@ -0,0 +1,143 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+// go run maketables.go | gofmt > tables.go
+
+import (
+ "bufio"
+ "fmt"
+ "log"
+ "net/http"
+ "sort"
+ "strings"
+)
+
+func main() {
+ fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+ fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
+ fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
+
+ res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
+ if err != nil {
+ log.Fatalf("Get: %v", err)
+ }
+ defer res.Body.Close()
+
+ mapping := [65536]uint16{}
+ reverse := [65536]uint16{}
+
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ x, y := uint16(0), uint16(0)
+ if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+ log.Fatalf("could not parse %q", s)
+ }
+ if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
+ log.Fatalf("EUC-KR code %d is out of range", x)
+ }
+ mapping[x] = y
+ if reverse[y] == 0 {
+ c0, c1 := uint16(0), uint16(0)
+ if x < 178*(0xc7-0x81) {
+ c0 = uint16(x/178) + 0x81
+ c1 = uint16(x % 178)
+ switch {
+ case c1 < 1*26:
+ c1 += 0x41
+ case c1 < 2*26:
+ c1 += 0x47
+ default:
+ c1 += 0x4d
+ }
+ } else {
+ x -= 178 * (0xc7 - 0x81)
+ c0 = uint16(x/94) + 0xc7
+ c1 = uint16(x%94) + 0xa1
+ }
+ reverse[y] = c0<<8 | c1
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ log.Fatalf("scanner error: %v", err)
+ }
+
+ fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
+ fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
+ fmt.Printf("var decode = [...]uint16{\n")
+ for i, v := range mapping {
+ if v != 0 {
+ fmt.Printf("\t%d: 0x%04X,\n", i, v)
+ }
+ }
+ fmt.Printf("}\n\n")
+
+ // Any run of at least separation continuous zero entries in the reverse map will
+ // be a separate encode table.
+ const separation = 1024
+
+ intervals := []interval(nil)
+ low, high := -1, -1
+ for i, v := range reverse {
+ if v == 0 {
+ continue
+ }
+ if low < 0 {
+ low = i
+ } else if i-high >= separation {
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ low = i
+ }
+ high = i + 1
+ }
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ sort.Sort(byDecreasingLength(intervals))
+
+ fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+ fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
+ fmt.Printf("// sorted by decreasing length.\n")
+ for i, v := range intervals {
+ fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
+ }
+ fmt.Printf("\n")
+
+ for i, v := range intervals {
+ fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+ fmt.Printf("var encode%d = [...]uint16{\n", i)
+ for j := v.low; j < v.high; j++ {
+ x := reverse[j]
+ if x == 0 {
+ continue
+ }
+ fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
+ }
+ fmt.Printf("}\n\n")
+ }
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+ low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
new file mode 100644
index 000000000..55016c786
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
@@ -0,0 +1,161 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+// go run maketables.go | gofmt > tables.go
+
+import (
+ "bufio"
+ "fmt"
+ "log"
+ "net/http"
+ "sort"
+ "strings"
+)
+
+func main() {
+ fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+ fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
+ fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
+
+ printGB18030()
+ printGBK()
+}
+
+func printGB18030() {
+ res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
+ if err != nil {
+ log.Fatalf("Get: %v", err)
+ }
+ defer res.Body.Close()
+
+ fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
+ fmt.Printf("var gb18030 = [...][2]uint16{\n")
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ x, y := uint32(0), uint32(0)
+ if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+ log.Fatalf("could not parse %q", s)
+ }
+ if x < 0x10000 && y < 0x10000 {
+ fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
+ }
+ }
+ fmt.Printf("}\n\n")
+}
+
+func printGBK() {
+ res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
+ if err != nil {
+ log.Fatalf("Get: %v", err)
+ }
+ defer res.Body.Close()
+
+ mapping := [65536]uint16{}
+ reverse := [65536]uint16{}
+
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ x, y := uint16(0), uint16(0)
+ if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+ log.Fatalf("could not parse %q", s)
+ }
+ if x < 0 || 126*190 <= x {
+ log.Fatalf("GBK code %d is out of range", x)
+ }
+ mapping[x] = y
+ if reverse[y] == 0 {
+ c0, c1 := x/190, x%190
+ if c1 >= 0x3f {
+ c1++
+ }
+ reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ log.Fatalf("scanner error: %v", err)
+ }
+
+ fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
+ fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
+ fmt.Printf("var decode = [...]uint16{\n")
+ for i, v := range mapping {
+ if v != 0 {
+ fmt.Printf("\t%d: 0x%04X,\n", i, v)
+ }
+ }
+ fmt.Printf("}\n\n")
+
+ // Any run of at least separation continuous zero entries in the reverse map will
+ // be a separate encode table.
+ const separation = 1024
+
+ intervals := []interval(nil)
+ low, high := -1, -1
+ for i, v := range reverse {
+ if v == 0 {
+ continue
+ }
+ if low < 0 {
+ low = i
+ } else if i-high >= separation {
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ low = i
+ }
+ high = i + 1
+ }
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ sort.Sort(byDecreasingLength(intervals))
+
+ fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+ fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
+ fmt.Printf("// sorted by decreasing length.\n")
+ for i, v := range intervals {
+ fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
+ }
+ fmt.Printf("\n")
+
+ for i, v := range intervals {
+ fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+ fmt.Printf("var encode%d = [...]uint16{\n", i)
+ for j := v.low; j < v.high; j++ {
+ x := reverse[j]
+ if x == 0 {
+ continue
+ }
+ fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
+ }
+ fmt.Printf("}\n\n")
+ }
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+ low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
new file mode 100644
index 000000000..cf7fdb31a
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
@@ -0,0 +1,140 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+// go run maketables.go | gofmt > tables.go
+
+import (
+ "bufio"
+ "fmt"
+ "log"
+ "net/http"
+ "sort"
+ "strings"
+)
+
+func main() {
+ fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+ fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
+ fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
+
+ res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
+ if err != nil {
+ log.Fatalf("Get: %v", err)
+ }
+ defer res.Body.Close()
+
+ mapping := [65536]uint32{}
+ reverse := [65536 * 4]uint16{}
+
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ x, y := uint16(0), uint32(0)
+ if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+ log.Fatalf("could not parse %q", s)
+ }
+ if x < 0 || 126*157 <= x {
+ log.Fatalf("Big5 code %d is out of range", x)
+ }
+ mapping[x] = y
+
+ // The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
+ // "The index pointer for code point in index is the first pointer
+ // corresponding to code point in index", which would normally mean
+ // that the code below should be guarded by "if reverse[y] == 0", but
+ // last instead of first seems to match the behavior of
+ // "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
+ // http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
+ // (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
+ // and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
+ c0, c1 := x/157, x%157
+ if c1 < 0x3f {
+ c1 += 0x40
+ } else {
+ c1 += 0x62
+ }
+ reverse[y] = (0x81+c0)<<8 | c1
+ }
+ if err := scanner.Err(); err != nil {
+ log.Fatalf("scanner error: %v", err)
+ }
+
+ fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
+ fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
+ fmt.Printf("var decode = [...]uint32{\n")
+ for i, v := range mapping {
+ if v != 0 {
+ fmt.Printf("\t%d: 0x%08X,\n", i, v)
+ }
+ }
+ fmt.Printf("}\n\n")
+
+ // Any run of at least separation continuous zero entries in the reverse map will
+ // be a separate encode table.
+ const separation = 1024
+
+ intervals := []interval(nil)
+ low, high := -1, -1
+ for i, v := range reverse {
+ if v == 0 {
+ continue
+ }
+ if low < 0 {
+ low = i
+ } else if i-high >= separation {
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ low = i
+ }
+ high = i + 1
+ }
+ if high >= 0 {
+ intervals = append(intervals, interval{low, high})
+ }
+ sort.Sort(byDecreasingLength(intervals))
+
+ fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+ fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
+ fmt.Printf("// sorted by decreasing length.\n")
+ for i, v := range intervals {
+ fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
+ }
+ fmt.Printf("\n")
+
+ for i, v := range intervals {
+ fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+ fmt.Printf("var encode%d = [...]uint16{\n", i)
+ for j := v.low; j < v.high; j++ {
+ x := reverse[j]
+ if x == 0 {
+ continue
+ }
+ fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
+ }
+ fmt.Printf("}\n\n")
+ }
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+ low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }