diff options
Diffstat (limited to 'vendor/golang.org/x/text')
18 files changed, 0 insertions, 4909 deletions
diff --git a/vendor/golang.org/x/text/encoding/charmap/maketables.go b/vendor/golang.org/x/text/encoding/charmap/maketables.go deleted file mode 100644 index f7941701e..000000000 --- a/vendor/golang.org/x/text/encoding/charmap/maketables.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" - "unicode/utf8" - - "golang.org/x/text/encoding" - "golang.org/x/text/internal/gen" -) - -const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - ` !"#$%&'()*+,-./0123456789:;<=>?` + - `@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` + - "`abcdefghijklmnopqrstuvwxyz{|}~\u007f" - -var encodings = []struct { - name string - mib string - comment string - varName string - replacement byte - mapping string -}{ - { - "IBM Code Page 037", - "IBM037", - "", - "CodePage037", - 0x3f, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm", - }, - { - "IBM Code Page 437", - "PC8CodePage437", - "", - "CodePage437", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm", - }, - { - "IBM Code Page 850", - "PC850Multilingual", - "", - "CodePage850", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm", - }, - { - "IBM Code Page 852", - "PCp852", - "", - "CodePage852", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm", - }, - { - "IBM Code Page 855", - "IBM855", - "", - "CodePage855", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm", - }, - { - "Windows Code Page 858", // PC latin1 with Euro - "IBM00858", - "", - "CodePage858", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm", - }, - { - "IBM Code Page 860", - "IBM860", - "", - "CodePage860", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm", - }, - { - "IBM Code Page 862", - "PC862LatinHebrew", - "", - "CodePage862", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm", - }, - { - "IBM Code Page 863", - "IBM863", - "", - "CodePage863", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm", - }, - { - "IBM Code Page 865", - "IBM865", - "", - "CodePage865", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm", - }, - { - "IBM Code Page 866", - "IBM866", - "", - "CodePage866", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-ibm866.txt", - }, - { - "IBM Code Page 1047", - "IBM1047", - "", - "CodePage1047", - 0x3f, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm", - }, - { - "IBM Code Page 1140", - "IBM01140", - "", - "CodePage1140", - 0x3f, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm", - }, - { - "ISO 8859-1", - "ISOLatin1", - "", - "ISO8859_1", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm", - }, - { - "ISO 8859-2", - "ISOLatin2", - "", - "ISO8859_2", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-2.txt", - }, - { - "ISO 8859-3", - "ISOLatin3", - "", - "ISO8859_3", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-3.txt", - }, - { - "ISO 8859-4", - "ISOLatin4", - "", - "ISO8859_4", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-4.txt", - }, - { - "ISO 8859-5", - "ISOLatinCyrillic", - "", - "ISO8859_5", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-5.txt", - }, - { - "ISO 8859-6", - "ISOLatinArabic", - "", - "ISO8859_6,ISO8859_6E,ISO8859_6I", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-6.txt", - }, - { - "ISO 8859-7", - "ISOLatinGreek", - "", - "ISO8859_7", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-7.txt", - }, - { - "ISO 8859-8", - "ISOLatinHebrew", - "", - "ISO8859_8,ISO8859_8E,ISO8859_8I", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-8.txt", - }, - { - "ISO 8859-9", - "ISOLatin5", - "", - "ISO8859_9", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm", - }, - { - "ISO 8859-10", - "ISOLatin6", - "", - "ISO8859_10", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-10.txt", - }, - { - "ISO 8859-13", - "ISO885913", - "", - "ISO8859_13", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-13.txt", - }, - { - "ISO 8859-14", - "ISO885914", - "", - "ISO8859_14", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-14.txt", - }, - { - "ISO 8859-15", - "ISO885915", - "", - "ISO8859_15", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-15.txt", - }, - { - "ISO 8859-16", - "ISO885916", - "", - "ISO8859_16", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-16.txt", - }, - { - "KOI8-R", - "KOI8R", - "", - "KOI8R", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-koi8-r.txt", - }, - { - "KOI8-U", - "KOI8U", - "", - "KOI8U", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-koi8-u.txt", - }, - { - "Macintosh", - "Macintosh", - "", - "Macintosh", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-macintosh.txt", - }, - { - "Macintosh Cyrillic", - "MacintoshCyrillic", - "", - "MacintoshCyrillic", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt", - }, - { - "Windows 874", - "Windows874", - "", - "Windows874", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-874.txt", - }, - { - "Windows 1250", - "Windows1250", - "", - "Windows1250", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1250.txt", - }, - { - "Windows 1251", - "Windows1251", - "", - "Windows1251", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1251.txt", - }, - { - "Windows 1252", - "Windows1252", - "", - "Windows1252", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1252.txt", - }, - { - "Windows 1253", - "Windows1253", - "", - "Windows1253", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1253.txt", - }, - { - "Windows 1254", - "Windows1254", - "", - "Windows1254", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1254.txt", - }, - { - "Windows 1255", - "Windows1255", - "", - "Windows1255", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1255.txt", - }, - { - "Windows 1256", - "Windows1256", - "", - "Windows1256", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1256.txt", - }, - { - "Windows 1257", - "Windows1257", - "", - "Windows1257", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1257.txt", - }, - { - "Windows 1258", - "Windows1258", - "", - "Windows1258", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1258.txt", - }, - { - "X-User-Defined", - "XUserDefined", - "It is defined at http://encoding.spec.whatwg.org/#x-user-defined", - "XUserDefined", - encoding.ASCIISub, - ascii + - "\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" + - "\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" + - "\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" + - "\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" + - "\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" + - "\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" + - "\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" + - "\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" + - "\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" + - "\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" + - "\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" + - "\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" + - "\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" + - "\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" + - "\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" + - "\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff", - }, -} - -func getWHATWG(url string) string { - res, err := http.Get(url) - if err != nil { - log.Fatalf("%q: Get: %v", url, err) - } - defer res.Body.Close() - - mapping := make([]rune, 128) - for i := range mapping { - mapping[i] = '\ufffd' - } - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := 0, 0 - if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 128 <= x { - log.Fatalf("code %d is out of range", x) - } - if 0x80 <= y && y < 0xa0 { - // We diverge from the WHATWG spec by mapping control characters - // in the range [0x80, 0xa0) to U+FFFD. - continue - } - mapping[x] = rune(y) - } - return ascii + string(mapping) -} - -func getUCM(url string) string { - res, err := http.Get(url) - if err != nil { - log.Fatalf("%q: Get: %v", url, err) - } - defer res.Body.Close() - - mapping := make([]rune, 256) - for i := range mapping { - mapping[i] = '\ufffd' - } - - charsFound := 0 - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - var c byte - var r rune - if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil { - continue - } - mapping[c] = r - charsFound++ - } - - if charsFound < 200 { - log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound) - } - - return string(mapping) -} - -func main() { - mibs := map[string]bool{} - all := []string{} - - w := gen.NewCodeWriter() - defer w.WriteGoFile("tables.go", "charmap") - - printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) } - - printf("import (\n") - printf("\t\"golang.org/x/text/encoding\"\n") - printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") - printf(")\n\n") - for _, e := range encodings { - varNames := strings.Split(e.varName, ",") - all = append(all, varNames...) - varName := varNames[0] - switch { - case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"): - e.mapping = getWHATWG(e.mapping) - case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"): - e.mapping = getUCM(e.mapping) - } - - asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00 - if asciiSuperset { - low = 0x80 - } - lvn := 1 - if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") { - lvn = 3 - } - lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:] - printf("// %s is the %s encoding.\n", varName, e.name) - if e.comment != "" { - printf("//\n// %s\n", e.comment) - } - printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n", - varName, lowerVarName, lowerVarName, e.name) - if mibs[e.mib] { - log.Fatalf("MIB type %q declared multiple times.", e.mib) - } - printf("mib: identifier.%s,\n", e.mib) - printf("asciiSuperset: %t,\n", asciiSuperset) - printf("low: 0x%02x,\n", low) - printf("replacement: 0x%02x,\n", e.replacement) - - printf("decode: [256]utf8Enc{\n") - i, backMapping := 0, map[rune]byte{} - for _, c := range e.mapping { - if _, ok := backMapping[c]; !ok && c != utf8.RuneError { - backMapping[c] = byte(i) - } - var buf [8]byte - n := utf8.EncodeRune(buf[:], c) - if n > 3 { - panic(fmt.Sprintf("rune %q (%U) is too long", c, c)) - } - printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) - if i%2 == 1 { - printf("\n") - } - i++ - } - printf("},\n") - - printf("encode: [256]uint32{\n") - encode := make([]uint32, 0, 256) - for c, i := range backMapping { - encode = append(encode, uint32(i)<<24|uint32(c)) - } - sort.Sort(byRune(encode)) - for len(encode) < cap(encode) { - encode = append(encode, encode[len(encode)-1]) - } - for i, enc := range encode { - printf("0x%08x,", enc) - if i%8 == 7 { - printf("\n") - } - } - printf("},\n}\n") - - // Add an estimate of the size of a single Charmap{} struct value, which - // includes two 256 elem arrays of 4 bytes and some extra fields, which - // align to 3 uint64s on 64-bit architectures. - w.Size += 2*4*256 + 3*8 - } - // TODO: add proper line breaking. - printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) -} - -type byRune []uint32 - -func (b byRune) Len() int { return len(b) } -func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff } -func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go deleted file mode 100644 index ac6b4a77f..000000000 --- a/vendor/golang.org/x/text/encoding/htmlindex/gen.go +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bytes" - "encoding/json" - "fmt" - "log" - "strings" - - "golang.org/x/text/internal/gen" -) - -type group struct { - Encodings []struct { - Labels []string - Name string - } -} - -func main() { - gen.Init() - - r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json") - var groups []group - if err := json.NewDecoder(r).Decode(&groups); err != nil { - log.Fatalf("Error reading encodings.json: %v", err) - } - - w := &bytes.Buffer{} - fmt.Fprintln(w, "type htmlEncoding byte") - fmt.Fprintln(w, "const (") - for i, g := range groups { - for _, e := range g.Encodings { - key := strings.ToLower(e.Name) - name := consts[key] - if name == "" { - log.Fatalf("No const defined for %s.", key) - } - if i == 0 { - fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) - } else { - fmt.Fprintf(w, "%s\n", name) - } - } - } - fmt.Fprintln(w, "numEncodings") - fmt.Fprint(w, ")\n\n") - - fmt.Fprintln(w, "var canonical = [numEncodings]string{") - for _, g := range groups { - for _, e := range g.Encodings { - fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name)) - } - } - fmt.Fprint(w, "}\n\n") - - fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") - for _, g := range groups { - for _, e := range g.Encodings { - for _, l := range e.Labels { - key := strings.ToLower(e.Name) - name := consts[key] - fmt.Fprintf(w, "%q: %s,\n", l, name) - } - } - } - fmt.Fprint(w, "}\n\n") - - var tags []string - fmt.Fprintln(w, "var localeMap = []htmlEncoding{") - for _, loc := range locales { - tags = append(tags, loc.tag) - fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) - } - fmt.Fprint(w, "}\n\n") - - fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) - - gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) -} - -// consts maps canonical encoding name to internal constant. -var consts = map[string]string{ - "utf-8": "utf8", - "ibm866": "ibm866", - "iso-8859-2": "iso8859_2", - "iso-8859-3": "iso8859_3", - "iso-8859-4": "iso8859_4", - "iso-8859-5": "iso8859_5", - "iso-8859-6": "iso8859_6", - "iso-8859-7": "iso8859_7", - "iso-8859-8": "iso8859_8", - "iso-8859-8-i": "iso8859_8I", - "iso-8859-10": "iso8859_10", - "iso-8859-13": "iso8859_13", - "iso-8859-14": "iso8859_14", - "iso-8859-15": "iso8859_15", - "iso-8859-16": "iso8859_16", - "koi8-r": "koi8r", - "koi8-u": "koi8u", - "macintosh": "macintosh", - "windows-874": "windows874", - "windows-1250": "windows1250", - "windows-1251": "windows1251", - "windows-1252": "windows1252", - "windows-1253": "windows1253", - "windows-1254": "windows1254", - "windows-1255": "windows1255", - "windows-1256": "windows1256", - "windows-1257": "windows1257", - "windows-1258": "windows1258", - "x-mac-cyrillic": "macintoshCyrillic", - "gbk": "gbk", - "gb18030": "gb18030", - // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG - "big5": "big5", - "euc-jp": "eucjp", - "iso-2022-jp": "iso2022jp", - "shift_jis": "shiftJIS", - "euc-kr": "euckr", - "replacement": "replacement", - "utf-16be": "utf16be", - "utf-16le": "utf16le", - "x-user-defined": "xUserDefined", -} - -// locales is taken from -// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm. -var locales = []struct{ tag, name string }{ - // The default value. Explicitly state latin to benefit from the exact - // script option, while still making 1252 the default encoding for languages - // written in Latin script. - {"und_Latn", "windows-1252"}, - {"ar", "windows-1256"}, - {"ba", "windows-1251"}, - {"be", "windows-1251"}, - {"bg", "windows-1251"}, - {"cs", "windows-1250"}, - {"el", "iso-8859-7"}, - {"et", "windows-1257"}, - {"fa", "windows-1256"}, - {"he", "windows-1255"}, - {"hr", "windows-1250"}, - {"hu", "iso-8859-2"}, - {"ja", "shift_jis"}, - {"kk", "windows-1251"}, - {"ko", "euc-kr"}, - {"ku", "windows-1254"}, - {"ky", "windows-1251"}, - {"lt", "windows-1257"}, - {"lv", "windows-1257"}, - {"mk", "windows-1251"}, - {"pl", "iso-8859-2"}, - {"ru", "windows-1251"}, - {"sah", "windows-1251"}, - {"sk", "windows-1250"}, - {"sl", "iso-8859-2"}, - {"sr", "windows-1251"}, - {"tg", "windows-1251"}, - {"th", "windows-874"}, - {"tr", "windows-1254"}, - {"tt", "windows-1251"}, - {"uk", "windows-1251"}, - {"vi", "windows-1258"}, - {"zh-hans", "gb18030"}, - {"zh-hant", "big5"}, -} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go deleted file mode 100644 index 26cfef9c6..000000000 --- a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bytes" - "encoding/xml" - "fmt" - "io" - "log" - "strings" - - "golang.org/x/text/internal/gen" -) - -type registry struct { - XMLName xml.Name `xml:"registry"` - Updated string `xml:"updated"` - Registry []struct { - ID string `xml:"id,attr"` - Record []struct { - Name string `xml:"name"` - Xref []struct { - Type string `xml:"type,attr"` - Data string `xml:"data,attr"` - } `xml:"xref"` - Desc struct { - Data string `xml:",innerxml"` - // Any []struct { - // Data string `xml:",chardata"` - // } `xml:",any"` - // Data string `xml:",chardata"` - } `xml:"description,"` - MIB string `xml:"value"` - Alias []string `xml:"alias"` - MIME string `xml:"preferred_alias"` - } `xml:"record"` - } `xml:"registry"` -} - -func main() { - r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") - reg := ®istry{} - if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { - log.Fatalf("Error decoding charset registry: %v", err) - } - if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { - log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) - } - - w := &bytes.Buffer{} - fmt.Fprintf(w, "const (\n") - for _, rec := range reg.Registry[0].Record { - constName := "" - for _, a := range rec.Alias { - if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { - // Some of the constant definitions have comments in them. Strip those. - constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) - } - } - if constName == "" { - switch rec.MIB { - case "2085": - constName = "HZGB2312" // Not listed as alias for some reason. - default: - log.Fatalf("No cs alias defined for %s.", rec.MIB) - } - } - if rec.MIME != "" { - rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) - } - fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) - if len(rec.Desc.Data) > 0 { - fmt.Fprint(w, "// ") - d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) - inElem := true - attr := "" - for { - t, err := d.Token() - if err != nil { - if err != io.EOF { - log.Fatal(err) - } - break - } - switch x := t.(type) { - case xml.CharData: - attr = "" // Don't need attribute info. - a := bytes.Split([]byte(x), []byte("\n")) - for i, b := range a { - if b = bytes.TrimSpace(b); len(b) != 0 { - if !inElem && i > 0 { - fmt.Fprint(w, "\n// ") - } - inElem = false - fmt.Fprintf(w, "%s ", string(b)) - } - } - case xml.StartElement: - if x.Name.Local == "xref" { - inElem = true - use := false - for _, a := range x.Attr { - if a.Name.Local == "type" { - use = use || a.Value != "person" - } - if a.Name.Local == "data" && use { - // Patch up URLs to use https. From some links, the - // https version is different from the http one. - s := a.Value - s = strings.Replace(s, "http://", "https://", -1) - s = strings.Replace(s, "/unicode/", "/", -1) - attr = s + " " - } - } - } - case xml.EndElement: - inElem = false - fmt.Fprint(w, attr) - } - } - fmt.Fprint(w, "\n") - } - for _, x := range rec.Xref { - switch x.Type { - case "rfc": - fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) - case "uri": - fmt.Fprintf(w, "// Reference: %s\n", x.Data) - } - } - fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) - fmt.Fprintln(w) - } - fmt.Fprintln(w, ")") - - gen.WriteGoFile("mib.go", "identifier", w.Bytes()) -} diff --git a/vendor/golang.org/x/text/encoding/japanese/maketables.go b/vendor/golang.org/x/text/encoding/japanese/maketables.go deleted file mode 100644 index 023957a67..000000000 --- a/vendor/golang.org/x/text/encoding/japanese/maketables.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -// TODO: Emoji extensions? -// https://www.unicode.org/faq/emoji_dingbats.html -// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -type entry struct { - jisCode, table int -} - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n") - fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n") - - reverse := [65536]entry{} - for i := range reverse { - reverse[i].table = -1 - } - - tables := []struct { - url string - name string - }{ - {"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"}, - {"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"}, - } - for i, table := range tables { - res, err := http.Get(table.url) - if err != nil { - log.Fatalf("%q: Get: %v", table.url, err) - } - defer res.Body.Close() - - mapping := [65536]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := 0, uint16(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("%q: could not parse %q", table.url, s) - } - if x < 0 || 120*94 <= x { - log.Fatalf("%q: JIS code %d is out of range", table.url, x) - } - mapping[x] = y - if reverse[y].table == -1 { - reverse[y] = entry{jisCode: x, table: i} - } - } - if err := scanner.Err(); err != nil { - log.Fatalf("%q: scanner error: %v", table.url, err) - } - - fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n", - table.name, table.name, table.url) - fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name) - for i, m := range mapping { - if m != 0 { - fmt.Printf("\t%d: 0x%04X,\n", i, m) - } - } - fmt.Printf("}\n\n") - } - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v.table == -1 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const (\n") - fmt.Printf("\tjis0208 = 1\n") - fmt.Printf("\tjis0212 = 2\n") - fmt.Printf("\tcodeMask = 0x7f\n") - fmt.Printf("\tcodeShift = 7\n") - fmt.Printf("\ttableShift = 14\n") - fmt.Printf(")\n\n") - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("//\n") - fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n") - fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n") - fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n") - fmt.Printf("// JIS code (94*j1 + j2) within that table.\n") - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x.table == -1 { - continue - } - fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n", - j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/korean/maketables.go b/vendor/golang.org/x/text/encoding/korean/maketables.go deleted file mode 100644 index c84034fb6..000000000 --- a/vendor/golang.org/x/text/encoding/korean/maketables.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n") - fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n") - - res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - mapping := [65536]uint16{} - reverse := [65536]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint16(0), uint16(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x { - log.Fatalf("EUC-KR code %d is out of range", x) - } - mapping[x] = y - if reverse[y] == 0 { - c0, c1 := uint16(0), uint16(0) - if x < 178*(0xc7-0x81) { - c0 = uint16(x/178) + 0x81 - c1 = uint16(x % 178) - switch { - case c1 < 1*26: - c1 += 0x41 - case c1 < 2*26: - c1 += 0x47 - default: - c1 += 0x4d - } - } else { - x -= 178 * (0xc7 - 0x81) - c0 = uint16(x/94) + 0xc7 - c1 = uint16(x%94) + 0xa1 - } - reverse[y] = c0<<8 | c1 - } - } - if err := scanner.Err(); err != nil { - log.Fatalf("scanner error: %v", err) - } - - fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n") - fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n") - fmt.Printf("var decode = [...]uint16{\n") - for i, v := range mapping { - if v != 0 { - fmt.Printf("\t%d: 0x%04X,\n", i, v) - } - } - fmt.Printf("}\n\n") - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v == 0 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x == 0 { - continue - } - fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go deleted file mode 100644 index 55016c786..000000000 --- a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n") - fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n") - - printGB18030() - printGBK() -} - -func printGB18030() { - res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n") - fmt.Printf("var gb18030 = [...][2]uint16{\n") - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint32(0), uint32(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0x10000 && y < 0x10000 { - fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y) - } - } - fmt.Printf("}\n\n") -} - -func printGBK() { - res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - mapping := [65536]uint16{} - reverse := [65536]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint16(0), uint16(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 126*190 <= x { - log.Fatalf("GBK code %d is out of range", x) - } - mapping[x] = y - if reverse[y] == 0 { - c0, c1 := x/190, x%190 - if c1 >= 0x3f { - c1++ - } - reverse[y] = (0x81+c0)<<8 | (0x40 + c1) - } - } - if err := scanner.Err(); err != nil { - log.Fatalf("scanner error: %v", err) - } - - fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n") - fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n") - fmt.Printf("var decode = [...]uint16{\n") - for i, v := range mapping { - if v != 0 { - fmt.Printf("\t%d: 0x%04X,\n", i, v) - } - } - fmt.Printf("}\n\n") - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v == 0 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x == 0 { - continue - } - fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go deleted file mode 100644 index cf7fdb31a..000000000 --- a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n") - fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n") - - res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - mapping := [65536]uint32{} - reverse := [65536 * 4]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint16(0), uint32(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 126*157 <= x { - log.Fatalf("Big5 code %d is out of range", x) - } - mapping[x] = y - - // The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that - // "The index pointer for code point in index is the first pointer - // corresponding to code point in index", which would normally mean - // that the code below should be guarded by "if reverse[y] == 0", but - // last instead of first seems to match the behavior of - // "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in - // http://encoding.spec.whatwg.org/index-big5.txt, as index 2148 - // (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc") - // and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc". - c0, c1 := x/157, x%157 - if c1 < 0x3f { - c1 += 0x40 - } else { - c1 += 0x62 - } - reverse[y] = (0x81+c0)<<8 | c1 - } - if err := scanner.Err(); err != nil { - log.Fatalf("scanner error: %v", err) - } - - fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n") - fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n") - fmt.Printf("var decode = [...]uint32{\n") - for i, v := range mapping { - if v != 0 { - fmt.Printf("\t%d: 0x%08X,\n", i, v) - } - } - fmt.Printf("}\n\n") - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v == 0 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x == 0 { - continue - } - fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/internal/language/compact/gen.go b/vendor/golang.org/x/text/internal/language/compact/gen.go deleted file mode 100644 index 0c36a052f..000000000 --- a/vendor/golang.org/x/text/internal/language/compact/gen.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Language tag table generator. -// Data read from the web. - -package main - -import ( - "flag" - "fmt" - "log" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/unicode/cldr" -) - -var ( - test = flag.Bool("test", - false, - "test existing tables; can be used to compare web data with package data.") - outputFile = flag.String("output", - "tables.go", - "output file for generated tables") -) - -func main() { - gen.Init() - - w := gen.NewCodeWriter() - defer w.WriteGoFile("tables.go", "compact") - - fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`) - - b := newBuilder(w) - gen.WriteCLDRVersion(w) - - b.writeCompactIndex() -} - -type builder struct { - w *gen.CodeWriter - data *cldr.CLDR - supp *cldr.SupplementalData -} - -func newBuilder(w *gen.CodeWriter) *builder { - r := gen.OpenCLDRCoreZip() - defer r.Close() - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - if err != nil { - log.Fatal(err) - } - b := builder{ - w: w, - data: data, - supp: data.Supplemental(), - } - return &b -} diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_index.go b/vendor/golang.org/x/text/internal/language/compact/gen_index.go deleted file mode 100644 index 136cefaf0..000000000 --- a/vendor/golang.org/x/text/internal/language/compact/gen_index.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This file generates derivative tables based on the language package itself. - -import ( - "fmt" - "log" - "sort" - "strings" - - "golang.org/x/text/internal/language" -) - -// Compact indices: -// Note -va-X variants only apply to localization variants. -// BCP variants only ever apply to language. -// The only ambiguity between tags is with regions. - -func (b *builder) writeCompactIndex() { - // Collect all language tags for which we have any data in CLDR. - m := map[language.Tag]bool{} - for _, lang := range b.data.Locales() { - // We include all locales unconditionally to be consistent with en_US. - // We want en_US, even though it has no data associated with it. - - // TODO: put any of the languages for which no data exists at the end - // of the index. This allows all components based on ICU to use that - // as the cutoff point. - // if x := data.RawLDML(lang); false || - // x.LocaleDisplayNames != nil || - // x.Characters != nil || - // x.Delimiters != nil || - // x.Measurement != nil || - // x.Dates != nil || - // x.Numbers != nil || - // x.Units != nil || - // x.ListPatterns != nil || - // x.Collations != nil || - // x.Segmentations != nil || - // x.Rbnf != nil || - // x.Annotations != nil || - // x.Metadata != nil { - - // TODO: support POSIX natively, albeit non-standard. - tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) - m[tag] = true - // } - } - - // TODO: plural rules are also defined for the deprecated tags: - // iw mo sh tl - // Consider removing these as compact tags. - - // Include locales for plural rules, which uses a different structure. - for _, plurals := range b.supp.Plurals { - for _, rules := range plurals.PluralRules { - for _, lang := range strings.Split(rules.Locales, " ") { - m[language.Make(lang)] = true - } - } - } - - var coreTags []language.CompactCoreInfo - var special []string - - for t := range m { - if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { - log.Fatalf("Unexpected extension %v in %v", x, t) - } - if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { - cci, ok := language.GetCompactCore(t) - if !ok { - log.Fatalf("Locale for non-basic language %q", t) - } - coreTags = append(coreTags, cci) - } else { - special = append(special, t.String()) - } - } - - w := b.w - - sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] }) - sort.Strings(special) - - w.WriteComment(` - NumCompactTags is the number of common tags. The maximum tag is - NumCompactTags-1.`) - w.WriteConst("NumCompactTags", len(m)) - - fmt.Fprintln(w, "const (") - for i, t := range coreTags { - fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i) - } - for i, t := range special { - fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags)) - } - fmt.Fprintln(w, ")") - - w.WriteVar("coreTags", coreTags) - - w.WriteConst("specialTagsStr", strings.Join(special, " ")) -} - -func ident(s string) string { - return strings.Replace(s, "-", "", -1) + "Index" -} diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go b/vendor/golang.org/x/text/internal/language/compact/gen_parents.go deleted file mode 100644 index 9543d5832..000000000 --- a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "log" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/language" - "golang.org/x/text/internal/language/compact" - "golang.org/x/text/unicode/cldr" -) - -func main() { - r := gen.OpenCLDRCoreZip() - defer r.Close() - - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - if err != nil { - log.Fatalf("DecodeZip: %v", err) - } - - w := gen.NewCodeWriter() - defer w.WriteGoFile("parents.go", "compact") - - // Create parents table. - type ID uint16 - parents := make([]ID, compact.NumCompactTags) - for _, loc := range data.Locales() { - tag := language.MustParse(loc) - index, ok := compact.FromTag(tag) - if !ok { - continue - } - parentIndex := compact.ID(0) // und - for p := tag.Parent(); p != language.Und; p = p.Parent() { - if x, ok := compact.FromTag(p); ok { - parentIndex = x - break - } - } - parents[index] = ID(parentIndex) - } - - w.WriteComment(` - parents maps a compact index of a tag to the compact index of the parent of - this tag.`) - w.WriteVar("parents", parents) -} diff --git a/vendor/golang.org/x/text/internal/language/gen.go b/vendor/golang.org/x/text/internal/language/gen.go deleted file mode 100644 index cdcc7febc..000000000 --- a/vendor/golang.org/x/text/internal/language/gen.go +++ /dev/null @@ -1,1520 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Language tag table generator. -// Data read from the web. - -package main - -import ( - "bufio" - "flag" - "fmt" - "io" - "io/ioutil" - "log" - "math" - "reflect" - "regexp" - "sort" - "strconv" - "strings" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/tag" - "golang.org/x/text/unicode/cldr" -) - -var ( - test = flag.Bool("test", - false, - "test existing tables; can be used to compare web data with package data.") - outputFile = flag.String("output", - "tables.go", - "output file for generated tables") -) - -var comment = []string{ - ` -lang holds an alphabetically sorted list of ISO-639 language identifiers. -All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. -For 2-byte language identifiers, the two successive bytes have the following meaning: - - if the first letter of the 2- and 3-letter ISO codes are the same: - the second and third letter of the 3-letter ISO code. - - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. -For 3-byte language identifiers the 4th byte is 0.`, - ` -langNoIndex is a bit vector of all 3-letter language codes that are not used as an index -in lookup tables. The language ids for these language codes are derived directly -from the letters and are not consecutive.`, - ` -altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives -to 2-letter language codes that cannot be derived using the method described above. -Each 3-letter code is followed by its 1-byte langID.`, - ` -altLangIndex is used to convert indexes in altLangISO3 to langIDs.`, - ` -AliasMap maps langIDs to their suggested replacements.`, - ` -script is an alphabetically sorted list of ISO 15924 codes. The index -of the script in the string, divided by 4, is the internal scriptID.`, - ` -isoRegionOffset needs to be added to the index of regionISO to obtain the regionID -for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for -the UN.M49 codes used for groups.)`, - ` -regionISO holds a list of alphabetically sorted 2-letter ISO region codes. -Each 2-letter codes is followed by two bytes with the following meaning: - - [A-Z}{2}: the first letter of the 2-letter code plus these two - letters form the 3-letter ISO code. - - 0, n: index into altRegionISO3.`, - ` -regionTypes defines the status of a region for various standards.`, - ` -m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are -codes indicating collections of regions.`, - ` -m49Index gives indexes into fromM49 based on the three most significant bits -of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in - fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] -for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. -The region code is stored in the 9 lsb of the indexed value.`, - ` -fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`, - ` -altRegionISO3 holds a list of 3-letter region codes that cannot be -mapped to 2-letter codes using the default algorithm. This is a short list.`, - ` -altRegionIDs holds a list of regionIDs the positions of which match those -of the 3-letter ISO codes in altRegionISO3.`, - ` -variantNumSpecialized is the number of specialized variants in variants.`, - ` -suppressScript is an index from langID to the dominant script for that language, -if it exists. If a script is given, it should be suppressed from the language tag.`, - ` -likelyLang is a lookup table, indexed by langID, for the most likely -scripts and regions given incomplete information. If more entries exist for a -given language, region and script are the index and size respectively -of the list in likelyLangList.`, - ` -likelyLangList holds lists info associated with likelyLang.`, - ` -likelyRegion is a lookup table, indexed by regionID, for the most likely -languages and scripts given incomplete information. If more entries exist -for a given regionID, lang and script are the index and size respectively -of the list in likelyRegionList. -TODO: exclude containers and user-definable regions from the list.`, - ` -likelyRegionList holds lists info associated with likelyRegion.`, - ` -likelyScript is a lookup table, indexed by scriptID, for the most likely -languages and regions given a script.`, - ` -nRegionGroups is the number of region groups.`, - ` -regionInclusion maps region identifiers to sets of regions in regionInclusionBits, -where each set holds all groupings that are directly connected in a region -containment graph.`, - ` -regionInclusionBits is an array of bit vectors where every vector represents -a set of region groupings. These sets are used to compute the distance -between two regions for the purpose of language matching.`, - ` -regionInclusionNext marks, for each entry in regionInclusionBits, the set of -all groups that are reachable from the groups set in the respective entry.`, -} - -// TODO: consider changing some of these structures to tries. This can reduce -// memory, but may increase the need for memory allocations. This could be -// mitigated if we can piggyback on language tags for common cases. - -func failOnError(e error) { - if e != nil { - log.Panic(e) - } -} - -type setType int - -const ( - Indexed setType = 1 + iota // all elements must be of same size - Linear -) - -type stringSet struct { - s []string - sorted, frozen bool - - // We often need to update values after the creation of an index is completed. - // We include a convenience map for keeping track of this. - update map[string]string - typ setType // used for checking. -} - -func (ss *stringSet) clone() stringSet { - c := *ss - c.s = append([]string(nil), c.s...) - return c -} - -func (ss *stringSet) setType(t setType) { - if ss.typ != t && ss.typ != 0 { - log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ) - } -} - -// parse parses a whitespace-separated string and initializes ss with its -// components. -func (ss *stringSet) parse(s string) { - scan := bufio.NewScanner(strings.NewReader(s)) - scan.Split(bufio.ScanWords) - for scan.Scan() { - ss.add(scan.Text()) - } -} - -func (ss *stringSet) assertChangeable() { - if ss.frozen { - log.Panic("attempt to modify a frozen stringSet") - } -} - -func (ss *stringSet) add(s string) { - ss.assertChangeable() - ss.s = append(ss.s, s) - ss.sorted = ss.frozen -} - -func (ss *stringSet) freeze() { - ss.compact() - ss.frozen = true -} - -func (ss *stringSet) compact() { - if ss.sorted { - return - } - a := ss.s - sort.Strings(a) - k := 0 - for i := 1; i < len(a); i++ { - if a[k] != a[i] { - a[k+1] = a[i] - k++ - } - } - ss.s = a[:k+1] - ss.sorted = ss.frozen -} - -type funcSorter struct { - fn func(a, b string) bool - sort.StringSlice -} - -func (s funcSorter) Less(i, j int) bool { - return s.fn(s.StringSlice[i], s.StringSlice[j]) -} - -func (ss *stringSet) sortFunc(f func(a, b string) bool) { - ss.compact() - sort.Sort(funcSorter{f, sort.StringSlice(ss.s)}) -} - -func (ss *stringSet) remove(s string) { - ss.assertChangeable() - if i, ok := ss.find(s); ok { - copy(ss.s[i:], ss.s[i+1:]) - ss.s = ss.s[:len(ss.s)-1] - } -} - -func (ss *stringSet) replace(ol, nu string) { - ss.s[ss.index(ol)] = nu - ss.sorted = ss.frozen -} - -func (ss *stringSet) index(s string) int { - ss.setType(Indexed) - i, ok := ss.find(s) - if !ok { - if i < len(ss.s) { - log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i]) - } - log.Panicf("find: item %q is not in list", s) - - } - return i -} - -func (ss *stringSet) find(s string) (int, bool) { - ss.compact() - i := sort.SearchStrings(ss.s, s) - return i, i != len(ss.s) && ss.s[i] == s -} - -func (ss *stringSet) slice() []string { - ss.compact() - return ss.s -} - -func (ss *stringSet) updateLater(v, key string) { - if ss.update == nil { - ss.update = map[string]string{} - } - ss.update[v] = key -} - -// join joins the string and ensures that all entries are of the same length. -func (ss *stringSet) join() string { - ss.setType(Indexed) - n := len(ss.s[0]) - for _, s := range ss.s { - if len(s) != n { - log.Panicf("join: not all entries are of the same length: %q", s) - } - } - ss.s = append(ss.s, strings.Repeat("\xff", n)) - return strings.Join(ss.s, "") -} - -// ianaEntry holds information for an entry in the IANA Language Subtag Repository. -// All types use the same entry. -// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various -// fields. -type ianaEntry struct { - typ string - description []string - scope string - added string - preferred string - deprecated string - suppressScript string - macro string - prefix []string -} - -type builder struct { - w *gen.CodeWriter - hw io.Writer // MultiWriter for w and w.Hash - data *cldr.CLDR - supp *cldr.SupplementalData - - // indices - locale stringSet // common locales - lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data - langNoIndex stringSet // 3-letter ISO codes with no associated data - script stringSet // 4-letter ISO codes - region stringSet // 2-letter ISO or 3-digit UN M49 codes - variant stringSet // 4-8-alphanumeric variant code. - - // Region codes that are groups with their corresponding group IDs. - groups map[int]index - - // langInfo - registry map[string]*ianaEntry -} - -type index uint - -func newBuilder(w *gen.CodeWriter) *builder { - r := gen.OpenCLDRCoreZip() - defer r.Close() - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - failOnError(err) - b := builder{ - w: w, - hw: io.MultiWriter(w, w.Hash), - data: data, - supp: data.Supplemental(), - } - b.parseRegistry() - return &b -} - -func (b *builder) parseRegistry() { - r := gen.OpenIANAFile("assignments/language-subtag-registry") - defer r.Close() - b.registry = make(map[string]*ianaEntry) - - scan := bufio.NewScanner(r) - scan.Split(bufio.ScanWords) - var record *ianaEntry - for more := scan.Scan(); more; { - key := scan.Text() - more = scan.Scan() - value := scan.Text() - switch key { - case "Type:": - record = &ianaEntry{typ: value} - case "Subtag:", "Tag:": - if s := strings.SplitN(value, "..", 2); len(s) > 1 { - for a := s[0]; a <= s[1]; a = inc(a) { - b.addToRegistry(a, record) - } - } else { - b.addToRegistry(value, record) - } - case "Suppress-Script:": - record.suppressScript = value - case "Added:": - record.added = value - case "Deprecated:": - record.deprecated = value - case "Macrolanguage:": - record.macro = value - case "Preferred-Value:": - record.preferred = value - case "Prefix:": - record.prefix = append(record.prefix, value) - case "Scope:": - record.scope = value - case "Description:": - buf := []byte(value) - for more = scan.Scan(); more; more = scan.Scan() { - b := scan.Bytes() - if b[0] == '%' || b[len(b)-1] == ':' { - break - } - buf = append(buf, ' ') - buf = append(buf, b...) - } - record.description = append(record.description, string(buf)) - continue - default: - continue - } - more = scan.Scan() - } - if scan.Err() != nil { - log.Panic(scan.Err()) - } -} - -func (b *builder) addToRegistry(key string, entry *ianaEntry) { - if info, ok := b.registry[key]; ok { - if info.typ != "language" || entry.typ != "extlang" { - log.Fatalf("parseRegistry: tag %q already exists", key) - } - } else { - b.registry[key] = entry - } -} - -var commentIndex = make(map[string]string) - -func init() { - for _, s := range comment { - key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0]) - commentIndex[key] = s - } -} - -func (b *builder) comment(name string) { - if s := commentIndex[name]; len(s) > 0 { - b.w.WriteComment(s) - } else { - fmt.Fprintln(b.w) - } -} - -func (b *builder) pf(f string, x ...interface{}) { - fmt.Fprintf(b.hw, f, x...) - fmt.Fprint(b.hw, "\n") -} - -func (b *builder) p(x ...interface{}) { - fmt.Fprintln(b.hw, x...) -} - -func (b *builder) addSize(s int) { - b.w.Size += s - b.pf("// Size: %d bytes", s) -} - -func (b *builder) writeConst(name string, x interface{}) { - b.comment(name) - b.w.WriteConst(name, x) -} - -// writeConsts computes f(v) for all v in values and writes the results -// as constants named _v to a single constant block. -func (b *builder) writeConsts(f func(string) int, values ...string) { - b.pf("const (") - for _, v := range values { - b.pf("\t_%s = %v", v, f(v)) - } - b.pf(")") -} - -// writeType writes the type of the given value, which must be a struct. -func (b *builder) writeType(value interface{}) { - b.comment(reflect.TypeOf(value).Name()) - b.w.WriteType(value) -} - -func (b *builder) writeSlice(name string, ss interface{}) { - b.writeSliceAddSize(name, 0, ss) -} - -func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) { - b.comment(name) - b.w.Size += extraSize - v := reflect.ValueOf(ss) - t := v.Type().Elem() - b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len()) - - fmt.Fprintf(b.w, "var %s = ", name) - b.w.WriteArray(ss) - b.p() -} - -type FromTo struct { - From, To uint16 -} - -func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) { - ss.sortFunc(func(a, b string) bool { - return index(a) < index(b) - }) - m := []FromTo{} - for _, s := range ss.s { - m = append(m, FromTo{index(s), index(ss.update[s])}) - } - b.writeSlice(name, m) -} - -const base = 'z' - 'a' + 1 - -func strToInt(s string) uint { - v := uint(0) - for i := 0; i < len(s); i++ { - v *= base - v += uint(s[i] - 'a') - } - return v -} - -// converts the given integer to the original ASCII string passed to strToInt. -// len(s) must match the number of characters obtained. -func intToStr(v uint, s []byte) { - for i := len(s) - 1; i >= 0; i-- { - s[i] = byte(v%base) + 'a' - v /= base - } -} - -func (b *builder) writeBitVector(name string, ss []string) { - vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8))) - for _, s := range ss { - v := strToInt(s) - vec[v/8] |= 1 << (v % 8) - } - b.writeSlice(name, vec) -} - -// TODO: convert this type into a list or two-stage trie. -func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) { - b.comment(name) - v := reflect.ValueOf(m) - sz := v.Len() * (2 + int(v.Type().Key().Size())) - for _, k := range m { - sz += len(k) - } - b.addSize(sz) - keys := []string{} - b.pf(`var %s = map[string]uint16{`, name) - for k := range m { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - b.pf("\t%q: %v,", k, f(m[k])) - } - b.p("}") -} - -func (b *builder) writeMap(name string, m interface{}) { - b.comment(name) - v := reflect.ValueOf(m) - sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size())) - b.addSize(sz) - f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool { - return strings.IndexRune("{}, ", r) != -1 - }) - sort.Strings(f[1:]) - b.pf(`var %s = %s{`, name, f[0]) - for _, kv := range f[1:] { - b.pf("\t%s,", kv) - } - b.p("}") -} - -func (b *builder) langIndex(s string) uint16 { - if s == "und" { - return 0 - } - if i, ok := b.lang.find(s); ok { - return uint16(i) - } - return uint16(strToInt(s)) + uint16(len(b.lang.s)) -} - -// inc advances the string to its lexicographical successor. -func inc(s string) string { - const maxTagLength = 4 - var buf [maxTagLength]byte - intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)]) - for i := 0; i < len(s); i++ { - if s[i] <= 'Z' { - buf[i] -= 'a' - 'A' - } - } - return string(buf[:len(s)]) -} - -func (b *builder) parseIndices() { - meta := b.supp.Metadata - - for k, v := range b.registry { - var ss *stringSet - switch v.typ { - case "language": - if len(k) == 2 || v.suppressScript != "" || v.scope == "special" { - b.lang.add(k) - continue - } else { - ss = &b.langNoIndex - } - case "region": - ss = &b.region - case "script": - ss = &b.script - case "variant": - ss = &b.variant - default: - continue - } - ss.add(k) - } - // Include any language for which there is data. - for _, lang := range b.data.Locales() { - if x := b.data.RawLDML(lang); false || - x.LocaleDisplayNames != nil || - x.Characters != nil || - x.Delimiters != nil || - x.Measurement != nil || - x.Dates != nil || - x.Numbers != nil || - x.Units != nil || - x.ListPatterns != nil || - x.Collations != nil || - x.Segmentations != nil || - x.Rbnf != nil || - x.Annotations != nil || - x.Metadata != nil { - - from := strings.Split(lang, "_") - if lang := from[0]; lang != "root" { - b.lang.add(lang) - } - } - } - // Include locales for plural rules, which uses a different structure. - for _, plurals := range b.data.Supplemental().Plurals { - for _, rules := range plurals.PluralRules { - for _, lang := range strings.Split(rules.Locales, " ") { - if lang = strings.Split(lang, "_")[0]; lang != "root" { - b.lang.add(lang) - } - } - } - } - // Include languages in likely subtags. - for _, m := range b.supp.LikelySubtags.LikelySubtag { - from := strings.Split(m.From, "_") - b.lang.add(from[0]) - } - // Include ISO-639 alpha-3 bibliographic entries. - for _, a := range meta.Alias.LanguageAlias { - if a.Reason == "bibliographic" { - b.langNoIndex.add(a.Type) - } - } - // Include regions in territoryAlias (not all are in the IANA registry!) - for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { - if len(reg.Type) == 2 { - b.region.add(reg.Type) - } - } - - for _, s := range b.lang.s { - if len(s) == 3 { - b.langNoIndex.remove(s) - } - } - b.writeConst("NumLanguages", len(b.lang.slice())+len(b.langNoIndex.slice())) - b.writeConst("NumScripts", len(b.script.slice())) - b.writeConst("NumRegions", len(b.region.slice())) - - // Add dummy codes at the start of each list to represent "unspecified". - b.lang.add("---") - b.script.add("----") - b.region.add("---") - - // common locales - b.locale.parse(meta.DefaultContent.Locales) -} - -// TODO: region inclusion data will probably not be use used in future matchers. - -func (b *builder) computeRegionGroups() { - b.groups = make(map[int]index) - - // Create group indices. - for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID. - b.groups[i] = index(len(b.groups)) - } - for _, g := range b.supp.TerritoryContainment.Group { - // Skip UN and EURO zone as they are flattening the containment - // relationship. - if g.Type == "EZ" || g.Type == "UN" { - continue - } - group := b.region.index(g.Type) - if _, ok := b.groups[group]; !ok { - b.groups[group] = index(len(b.groups)) - } - } - if len(b.groups) > 64 { - log.Fatalf("only 64 groups supported, found %d", len(b.groups)) - } - b.writeConst("nRegionGroups", len(b.groups)) -} - -var langConsts = []string{ - "af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es", - "et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is", - "it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml", - "mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt", - "ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th", - "tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu", - - // constants for grandfathered tags (if not already defined) - "jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu", - "nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn", -} - -// writeLanguage generates all tables needed for language canonicalization. -func (b *builder) writeLanguage() { - meta := b.supp.Metadata - - b.writeConst("nonCanonicalUnd", b.lang.index("und")) - b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) - b.writeConst("langPrivateStart", b.langIndex("qaa")) - b.writeConst("langPrivateEnd", b.langIndex("qtz")) - - // Get language codes that need to be mapped (overlong 3-letter codes, - // deprecated 2-letter codes, legacy and grandfathered tags.) - langAliasMap := stringSet{} - aliasTypeMap := map[string]AliasType{} - - // altLangISO3 get the alternative ISO3 names that need to be mapped. - altLangISO3 := stringSet{} - // Add dummy start to avoid the use of index 0. - altLangISO3.add("---") - altLangISO3.updateLater("---", "aa") - - lang := b.lang.clone() - for _, a := range meta.Alias.LanguageAlias { - if a.Replacement == "" { - a.Replacement = "und" - } - // TODO: support mapping to tags - repl := strings.SplitN(a.Replacement, "_", 2)[0] - if a.Reason == "overlong" { - if len(a.Replacement) == 2 && len(a.Type) == 3 { - lang.updateLater(a.Replacement, a.Type) - } - } else if len(a.Type) <= 3 { - switch a.Reason { - case "macrolanguage": - aliasTypeMap[a.Type] = Macro - case "deprecated": - // handled elsewhere - continue - case "bibliographic", "legacy": - if a.Type == "no" { - continue - } - aliasTypeMap[a.Type] = Legacy - default: - log.Fatalf("new %s alias: %s", a.Reason, a.Type) - } - langAliasMap.add(a.Type) - langAliasMap.updateLater(a.Type, repl) - } - } - // Manually add the mapping of "nb" (Norwegian) to its macro language. - // This can be removed if CLDR adopts this change. - langAliasMap.add("nb") - langAliasMap.updateLater("nb", "no") - aliasTypeMap["nb"] = Macro - - for k, v := range b.registry { - // Also add deprecated values for 3-letter ISO codes, which CLDR omits. - if v.typ == "language" && v.deprecated != "" && v.preferred != "" { - langAliasMap.add(k) - langAliasMap.updateLater(k, v.preferred) - aliasTypeMap[k] = Deprecated - } - } - // Fix CLDR mappings. - lang.updateLater("tl", "tgl") - lang.updateLater("sh", "hbs") - lang.updateLater("mo", "mol") - lang.updateLater("no", "nor") - lang.updateLater("tw", "twi") - lang.updateLater("nb", "nob") - lang.updateLater("ak", "aka") - lang.updateLater("bh", "bih") - - // Ensure that each 2-letter code is matched with a 3-letter code. - for _, v := range lang.s[1:] { - s, ok := lang.update[v] - if !ok { - if s, ok = lang.update[langAliasMap.update[v]]; !ok { - continue - } - lang.update[v] = s - } - if v[0] != s[0] { - altLangISO3.add(s) - altLangISO3.updateLater(s, v) - } - } - - // Complete canonicalized language tags. - lang.freeze() - for i, v := range lang.s { - // We can avoid these manual entries by using the IANA registry directly. - // Seems easier to update the list manually, as changes are rare. - // The panic in this loop will trigger if we miss an entry. - add := "" - if s, ok := lang.update[v]; ok { - if s[0] == v[0] { - add = s[1:] - } else { - add = string([]byte{0, byte(altLangISO3.index(s))}) - } - } else if len(v) == 3 { - add = "\x00" - } else { - log.Panicf("no data for long form of %q", v) - } - lang.s[i] += add - } - b.writeConst("lang", tag.Index(lang.join())) - - b.writeConst("langNoIndexOffset", len(b.lang.s)) - - // space of all valid 3-letter language identifiers. - b.writeBitVector("langNoIndex", b.langNoIndex.slice()) - - altLangIndex := []uint16{} - for i, s := range altLangISO3.slice() { - altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))}) - if i > 0 { - idx := b.lang.index(altLangISO3.update[s]) - altLangIndex = append(altLangIndex, uint16(idx)) - } - } - b.writeConst("altLangISO3", tag.Index(altLangISO3.join())) - b.writeSlice("altLangIndex", altLangIndex) - - b.writeSortedMap("AliasMap", &langAliasMap, b.langIndex) - types := make([]AliasType, len(langAliasMap.s)) - for i, s := range langAliasMap.s { - types[i] = aliasTypeMap[s] - } - b.writeSlice("AliasTypes", types) -} - -var scriptConsts = []string{ - "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy", - "Zzzz", -} - -func (b *builder) writeScript() { - b.writeConsts(b.script.index, scriptConsts...) - b.writeConst("script", tag.Index(b.script.join())) - - supp := make([]uint8, len(b.lang.slice())) - for i, v := range b.lang.slice()[1:] { - if sc := b.registry[v].suppressScript; sc != "" { - supp[i+1] = uint8(b.script.index(sc)) - } - } - b.writeSlice("suppressScript", supp) - - // There is only one deprecated script in CLDR. This value is hard-coded. - // We check here if the code must be updated. - for _, a := range b.supp.Metadata.Alias.ScriptAlias { - if a.Type != "Qaai" { - log.Panicf("unexpected deprecated stript %q", a.Type) - } - } -} - -func parseM49(s string) int16 { - if len(s) == 0 { - return 0 - } - v, err := strconv.ParseUint(s, 10, 10) - failOnError(err) - return int16(v) -} - -var regionConsts = []string{ - "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US", - "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo. -} - -func (b *builder) writeRegion() { - b.writeConsts(b.region.index, regionConsts...) - - isoOffset := b.region.index("AA") - m49map := make([]int16, len(b.region.slice())) - fromM49map := make(map[int16]int) - altRegionISO3 := "" - altRegionIDs := []uint16{} - - b.writeConst("isoRegionOffset", isoOffset) - - // 2-letter region lookup and mapping to numeric codes. - regionISO := b.region.clone() - regionISO.s = regionISO.s[isoOffset:] - regionISO.sorted = false - - regionTypes := make([]byte, len(b.region.s)) - - // Is the region valid BCP 47? - for s, e := range b.registry { - if len(s) == 2 && s == strings.ToUpper(s) { - i := b.region.index(s) - for _, d := range e.description { - if strings.Contains(d, "Private use") { - regionTypes[i] = iso3166UserAssigned - } - } - regionTypes[i] |= bcp47Region - } - } - - // Is the region a valid ccTLD? - r := gen.OpenIANAFile("domains/root/db") - defer r.Close() - - buf, err := ioutil.ReadAll(r) - failOnError(err) - re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) - for _, m := range re.FindAllSubmatch(buf, -1) { - i := b.region.index(strings.ToUpper(string(m[1]))) - regionTypes[i] |= ccTLD - } - - b.writeSlice("regionTypes", regionTypes) - - iso3Set := make(map[string]int) - update := func(iso2, iso3 string) { - i := regionISO.index(iso2) - if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { - regionISO.s[i] += iso3[1:] - iso3Set[iso3] = -1 - } else { - if ok && j >= 0 { - regionISO.s[i] += string([]byte{0, byte(j)}) - } else { - iso3Set[iso3] = len(altRegionISO3) - regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) - altRegionISO3 += iso3 - altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) - } - } - } - for _, tc := range b.supp.CodeMappings.TerritoryCodes { - i := regionISO.index(tc.Type) + isoOffset - if d := m49map[i]; d != 0 { - log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) - } - m49 := parseM49(tc.Numeric) - m49map[i] = m49 - if r := fromM49map[m49]; r == 0 { - fromM49map[m49] = i - } else if r != i { - dep := b.registry[regionISO.s[r-isoOffset]].deprecated - if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { - fromM49map[m49] = i - } - } - } - for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { - if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { - from := parseM49(ta.Type) - if r := fromM49map[from]; r == 0 { - fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset - } - } - } - for _, tc := range b.supp.CodeMappings.TerritoryCodes { - if len(tc.Alpha3) == 3 { - update(tc.Type, tc.Alpha3) - } - } - // This entries are not included in territoryCodes. Mostly 3-letter variants - // of deleted codes and an entry for QU. - for _, m := range []struct{ iso2, iso3 string }{ - {"CT", "CTE"}, - {"DY", "DHY"}, - {"HV", "HVO"}, - {"JT", "JTN"}, - {"MI", "MID"}, - {"NH", "NHB"}, - {"NQ", "ATN"}, - {"PC", "PCI"}, - {"PU", "PUS"}, - {"PZ", "PCZ"}, - {"RH", "RHO"}, - {"VD", "VDR"}, - {"WK", "WAK"}, - // These three-letter codes are used for others as well. - {"FQ", "ATF"}, - } { - update(m.iso2, m.iso3) - } - for i, s := range regionISO.s { - if len(s) != 4 { - regionISO.s[i] = s + " " - } - } - b.writeConst("regionISO", tag.Index(regionISO.join())) - b.writeConst("altRegionISO3", altRegionISO3) - b.writeSlice("altRegionIDs", altRegionIDs) - - // Create list of deprecated regions. - // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only - // Transitionally-reserved mapping not included. - regionOldMap := stringSet{} - // Include regions in territoryAlias (not all are in the IANA registry!) - for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { - if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { - regionOldMap.add(reg.Type) - regionOldMap.updateLater(reg.Type, reg.Replacement) - i, _ := regionISO.find(reg.Type) - j, _ := regionISO.find(reg.Replacement) - if k := m49map[i+isoOffset]; k == 0 { - m49map[i+isoOffset] = m49map[j+isoOffset] - } - } - } - b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { - return uint16(b.region.index(s)) - }) - // 3-digit region lookup, groupings. - for i := 1; i < isoOffset; i++ { - m := parseM49(b.region.s[i]) - m49map[i] = m - fromM49map[m] = i - } - b.writeSlice("m49", m49map) - - const ( - searchBits = 7 - regionBits = 9 - ) - if len(m49map) >= 1<<regionBits { - log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits) - } - m49Index := [9]int16{} - fromM49 := []uint16{} - m49 := []int{} - for k, _ := range fromM49map { - m49 = append(m49, int(k)) - } - sort.Ints(m49) - for _, k := range m49[1:] { - val := (k & (1<<searchBits - 1)) << regionBits - fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)])) - m49Index[1:][k>>searchBits] = int16(len(fromM49)) - } - b.writeSlice("m49Index", m49Index) - b.writeSlice("fromM49", fromM49) -} - -const ( - // TODO: put these lists in regionTypes as user data? Could be used for - // various optimizations and refinements and could be exposed in the API. - iso3166Except = "AC CP DG EA EU FX IC SU TA UK" - iso3166Trans = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions. - // DY and RH are actually not deleted, but indeterminately reserved. - iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD" -) - -const ( - iso3166UserAssigned = 1 << iota - ccTLD - bcp47Region -) - -func find(list []string, s string) int { - for i, t := range list { - if t == s { - return i - } - } - return -1 -} - -// writeVariants generates per-variant information and creates a map from variant -// name to index value. We assign index values such that sorting multiple -// variants by index value will result in the correct order. -// There are two types of variants: specialized and general. Specialized variants -// are only applicable to certain language or language-script pairs. Generalized -// variants apply to any language. Generalized variants always sort after -// specialized variants. We will therefore always assign a higher index value -// to a generalized variant than any other variant. Generalized variants are -// sorted alphabetically among themselves. -// Specialized variants may also sort after other specialized variants. Such -// variants will be ordered after any of the variants they may follow. -// We assume that if a variant x is followed by a variant y, then for any prefix -// p of x, p-x is a prefix of y. This allows us to order tags based on the -// maximum of the length of any of its prefixes. -// TODO: it is possible to define a set of Prefix values on variants such that -// a total order cannot be defined to the point that this algorithm breaks. -// In other words, we cannot guarantee the same order of variants for the -// future using the same algorithm or for non-compliant combinations of -// variants. For this reason, consider using simple alphabetic sorting -// of variants and ignore Prefix restrictions altogether. -func (b *builder) writeVariant() { - generalized := stringSet{} - specialized := stringSet{} - specializedExtend := stringSet{} - // Collate the variants by type and check assumptions. - for _, v := range b.variant.slice() { - e := b.registry[v] - if len(e.prefix) == 0 { - generalized.add(v) - continue - } - c := strings.Split(e.prefix[0], "-") - hasScriptOrRegion := false - if len(c) > 1 { - _, hasScriptOrRegion = b.script.find(c[1]) - if !hasScriptOrRegion { - _, hasScriptOrRegion = b.region.find(c[1]) - - } - } - if len(c) == 1 || len(c) == 2 && hasScriptOrRegion { - // Variant is preceded by a language. - specialized.add(v) - continue - } - // Variant is preceded by another variant. - specializedExtend.add(v) - prefix := c[0] + "-" - if hasScriptOrRegion { - prefix += c[1] - } - for _, p := range e.prefix { - // Verify that the prefix minus the last element is a prefix of the - // predecessor element. - i := strings.LastIndex(p, "-") - pred := b.registry[p[i+1:]] - if find(pred.prefix, p[:i]) < 0 { - log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v) - } - // The sorting used below does not work in the general case. It works - // if we assume that variants that may be followed by others only have - // prefixes of the same length. Verify this. - count := strings.Count(p[:i], "-") - for _, q := range pred.prefix { - if c := strings.Count(q, "-"); c != count { - log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count) - } - } - if !strings.HasPrefix(p, prefix) { - log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix) - } - } - } - - // Sort extended variants. - a := specializedExtend.s - less := func(v, w string) bool { - // Sort by the maximum number of elements. - maxCount := func(s string) (max int) { - for _, p := range b.registry[s].prefix { - if c := strings.Count(p, "-"); c > max { - max = c - } - } - return - } - if cv, cw := maxCount(v), maxCount(w); cv != cw { - return cv < cw - } - // Sort by name as tie breaker. - return v < w - } - sort.Sort(funcSorter{less, sort.StringSlice(a)}) - specializedExtend.frozen = true - - // Create index from variant name to index. - variantIndex := make(map[string]uint8) - add := func(s []string) { - for _, v := range s { - variantIndex[v] = uint8(len(variantIndex)) - } - } - add(specialized.slice()) - add(specializedExtend.s) - numSpecialized := len(variantIndex) - add(generalized.slice()) - if n := len(variantIndex); n > 255 { - log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n) - } - b.writeMap("variantIndex", variantIndex) - b.writeConst("variantNumSpecialized", numSpecialized) -} - -func (b *builder) writeLanguageInfo() { -} - -// writeLikelyData writes tables that are used both for finding parent relations and for -// language matching. Each entry contains additional bits to indicate the status of the -// data to know when it cannot be used for parent relations. -func (b *builder) writeLikelyData() { - const ( - isList = 1 << iota - scriptInFrom - regionInFrom - ) - type ( // generated types - likelyScriptRegion struct { - region uint16 - script uint8 - flags uint8 - } - likelyLangScript struct { - lang uint16 - script uint8 - flags uint8 - } - likelyLangRegion struct { - lang uint16 - region uint16 - } - // likelyTag is used for getting likely tags for group regions, where - // the likely region might be a region contained in the group. - likelyTag struct { - lang uint16 - region uint16 - script uint8 - } - ) - var ( // generated variables - likelyRegionGroup = make([]likelyTag, len(b.groups)) - likelyLang = make([]likelyScriptRegion, len(b.lang.s)) - likelyRegion = make([]likelyLangScript, len(b.region.s)) - likelyScript = make([]likelyLangRegion, len(b.script.s)) - likelyLangList = []likelyScriptRegion{} - likelyRegionList = []likelyLangScript{} - ) - type fromTo struct { - from, to []string - } - langToOther := map[int][]fromTo{} - regionToOther := map[int][]fromTo{} - for _, m := range b.supp.LikelySubtags.LikelySubtag { - from := strings.Split(m.From, "_") - to := strings.Split(m.To, "_") - if len(to) != 3 { - log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to)) - } - if len(from) > 3 { - log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from)) - } - if from[0] != to[0] && from[0] != "und" { - log.Fatalf("unexpected language change in expansion: %s -> %s", from, to) - } - if len(from) == 3 { - if from[2] != to[2] { - log.Fatalf("unexpected region change in expansion: %s -> %s", from, to) - } - if from[0] != "und" { - log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to) - } - } - if len(from) == 1 || from[0] != "und" { - id := 0 - if from[0] != "und" { - id = b.lang.index(from[0]) - } - langToOther[id] = append(langToOther[id], fromTo{from, to}) - } else if len(from) == 2 && len(from[1]) == 4 { - sid := b.script.index(from[1]) - likelyScript[sid].lang = uint16(b.langIndex(to[0])) - likelyScript[sid].region = uint16(b.region.index(to[2])) - } else { - r := b.region.index(from[len(from)-1]) - if id, ok := b.groups[r]; ok { - if from[0] != "und" { - log.Fatalf("region changed unexpectedly: %s -> %s", from, to) - } - likelyRegionGroup[id].lang = uint16(b.langIndex(to[0])) - likelyRegionGroup[id].script = uint8(b.script.index(to[1])) - likelyRegionGroup[id].region = uint16(b.region.index(to[2])) - } else { - regionToOther[r] = append(regionToOther[r], fromTo{from, to}) - } - } - } - b.writeType(likelyLangRegion{}) - b.writeSlice("likelyScript", likelyScript) - - for id := range b.lang.s { - list := langToOther[id] - if len(list) == 1 { - likelyLang[id].region = uint16(b.region.index(list[0].to[2])) - likelyLang[id].script = uint8(b.script.index(list[0].to[1])) - } else if len(list) > 1 { - likelyLang[id].flags = isList - likelyLang[id].region = uint16(len(likelyLangList)) - likelyLang[id].script = uint8(len(list)) - for _, x := range list { - flags := uint8(0) - if len(x.from) > 1 { - if x.from[1] == x.to[2] { - flags = regionInFrom - } else { - flags = scriptInFrom - } - } - likelyLangList = append(likelyLangList, likelyScriptRegion{ - region: uint16(b.region.index(x.to[2])), - script: uint8(b.script.index(x.to[1])), - flags: flags, - }) - } - } - } - // TODO: merge suppressScript data with this table. - b.writeType(likelyScriptRegion{}) - b.writeSlice("likelyLang", likelyLang) - b.writeSlice("likelyLangList", likelyLangList) - - for id := range b.region.s { - list := regionToOther[id] - if len(list) == 1 { - likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0])) - likelyRegion[id].script = uint8(b.script.index(list[0].to[1])) - if len(list[0].from) > 2 { - likelyRegion[id].flags = scriptInFrom - } - } else if len(list) > 1 { - likelyRegion[id].flags = isList - likelyRegion[id].lang = uint16(len(likelyRegionList)) - likelyRegion[id].script = uint8(len(list)) - for i, x := range list { - if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 { - log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i) - } - x := likelyLangScript{ - lang: uint16(b.langIndex(x.to[0])), - script: uint8(b.script.index(x.to[1])), - } - if len(list[0].from) > 2 { - x.flags = scriptInFrom - } - likelyRegionList = append(likelyRegionList, x) - } - } - } - b.writeType(likelyLangScript{}) - b.writeSlice("likelyRegion", likelyRegion) - b.writeSlice("likelyRegionList", likelyRegionList) - - b.writeType(likelyTag{}) - b.writeSlice("likelyRegionGroup", likelyRegionGroup) -} - -func (b *builder) writeRegionInclusionData() { - var ( - // mm holds for each group the set of groups with a distance of 1. - mm = make(map[int][]index) - - // containment holds for each group the transitive closure of - // containment of other groups. - containment = make(map[index][]index) - ) - for _, g := range b.supp.TerritoryContainment.Group { - // Skip UN and EURO zone as they are flattening the containment - // relationship. - if g.Type == "EZ" || g.Type == "UN" { - continue - } - group := b.region.index(g.Type) - groupIdx := b.groups[group] - for _, mem := range strings.Split(g.Contains, " ") { - r := b.region.index(mem) - mm[r] = append(mm[r], groupIdx) - if g, ok := b.groups[r]; ok { - mm[group] = append(mm[group], g) - containment[groupIdx] = append(containment[groupIdx], g) - } - } - } - - regionContainment := make([]uint64, len(b.groups)) - for _, g := range b.groups { - l := containment[g] - - // Compute the transitive closure of containment. - for i := 0; i < len(l); i++ { - l = append(l, containment[l[i]]...) - } - - // Compute the bitmask. - regionContainment[g] = 1 << g - for _, v := range l { - regionContainment[g] |= 1 << v - } - } - b.writeSlice("regionContainment", regionContainment) - - regionInclusion := make([]uint8, len(b.region.s)) - bvs := make(map[uint64]index) - // Make the first bitvector positions correspond with the groups. - for r, i := range b.groups { - bv := uint64(1 << i) - for _, g := range mm[r] { - bv |= 1 << g - } - bvs[bv] = i - regionInclusion[r] = uint8(bvs[bv]) - } - for r := 1; r < len(b.region.s); r++ { - if _, ok := b.groups[r]; !ok { - bv := uint64(0) - for _, g := range mm[r] { - bv |= 1 << g - } - if bv == 0 { - // Pick the world for unspecified regions. - bv = 1 << b.groups[b.region.index("001")] - } - if _, ok := bvs[bv]; !ok { - bvs[bv] = index(len(bvs)) - } - regionInclusion[r] = uint8(bvs[bv]) - } - } - b.writeSlice("regionInclusion", regionInclusion) - regionInclusionBits := make([]uint64, len(bvs)) - for k, v := range bvs { - regionInclusionBits[v] = uint64(k) - } - // Add bit vectors for increasingly large distances until a fixed point is reached. - regionInclusionNext := []uint8{} - for i := 0; i < len(regionInclusionBits); i++ { - bits := regionInclusionBits[i] - next := bits - for i := uint(0); i < uint(len(b.groups)); i++ { - if bits&(1<<i) != 0 { - next |= regionInclusionBits[i] - } - } - if _, ok := bvs[next]; !ok { - bvs[next] = index(len(bvs)) - regionInclusionBits = append(regionInclusionBits, next) - } - regionInclusionNext = append(regionInclusionNext, uint8(bvs[next])) - } - b.writeSlice("regionInclusionBits", regionInclusionBits) - b.writeSlice("regionInclusionNext", regionInclusionNext) -} - -type parentRel struct { - lang uint16 - script uint8 - maxScript uint8 - toRegion uint16 - fromRegion []uint16 -} - -func (b *builder) writeParents() { - b.writeType(parentRel{}) - - parents := []parentRel{} - - // Construct parent overrides. - n := 0 - for _, p := range b.data.Supplemental().ParentLocales.ParentLocale { - // Skipping non-standard scripts to root is implemented using addTags. - if p.Parent == "root" { - continue - } - - sub := strings.Split(p.Parent, "_") - parent := parentRel{lang: b.langIndex(sub[0])} - if len(sub) == 2 { - // TODO: check that all undefined scripts are indeed Latn in these - // cases. - parent.maxScript = uint8(b.script.index("Latn")) - parent.toRegion = uint16(b.region.index(sub[1])) - } else { - parent.script = uint8(b.script.index(sub[1])) - parent.maxScript = parent.script - parent.toRegion = uint16(b.region.index(sub[2])) - } - for _, c := range strings.Split(p.Locales, " ") { - region := b.region.index(c[strings.LastIndex(c, "_")+1:]) - parent.fromRegion = append(parent.fromRegion, uint16(region)) - } - parents = append(parents, parent) - n += len(parent.fromRegion) - } - b.writeSliceAddSize("parents", n*2, parents) -} - -func main() { - gen.Init() - - gen.Repackage("gen_common.go", "common.go", "language") - - w := gen.NewCodeWriter() - defer w.WriteGoFile("tables.go", "language") - - fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`) - - b := newBuilder(w) - gen.WriteCLDRVersion(w) - - b.parseIndices() - b.writeType(FromTo{}) - b.writeLanguage() - b.writeScript() - b.writeRegion() - b.writeVariant() - // TODO: b.writeLocale() - b.computeRegionGroups() - b.writeLikelyData() - b.writeRegionInclusionData() - b.writeParents() -} diff --git a/vendor/golang.org/x/text/internal/language/gen_common.go b/vendor/golang.org/x/text/internal/language/gen_common.go deleted file mode 100644 index c419ceeb1..000000000 --- a/vendor/golang.org/x/text/internal/language/gen_common.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This file contains code common to the maketables.go and the package code. - -// AliasType is the type of an alias in AliasMap. -type AliasType int8 - -const ( - Deprecated AliasType = iota - Macro - Legacy - - AliasTypeUnknown AliasType = -1 -) diff --git a/vendor/golang.org/x/text/language/gen.go b/vendor/golang.org/x/text/language/gen.go deleted file mode 100644 index 3004eb42c..000000000 --- a/vendor/golang.org/x/text/language/gen.go +++ /dev/null @@ -1,305 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Language tag table generator. -// Data read from the web. - -package main - -import ( - "flag" - "fmt" - "io" - "log" - "sort" - "strconv" - "strings" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/language" - "golang.org/x/text/unicode/cldr" -) - -var ( - test = flag.Bool("test", - false, - "test existing tables; can be used to compare web data with package data.") - outputFile = flag.String("output", - "tables.go", - "output file for generated tables") -) - -func main() { - gen.Init() - - w := gen.NewCodeWriter() - defer w.WriteGoFile("tables.go", "language") - - b := newBuilder(w) - gen.WriteCLDRVersion(w) - - b.writeConstants() - b.writeMatchData() -} - -type builder struct { - w *gen.CodeWriter - hw io.Writer // MultiWriter for w and w.Hash - data *cldr.CLDR - supp *cldr.SupplementalData -} - -func (b *builder) langIndex(s string) uint16 { - return uint16(language.MustParseBase(s)) -} - -func (b *builder) regionIndex(s string) int { - return int(language.MustParseRegion(s)) -} - -func (b *builder) scriptIndex(s string) int { - return int(language.MustParseScript(s)) -} - -func newBuilder(w *gen.CodeWriter) *builder { - r := gen.OpenCLDRCoreZip() - defer r.Close() - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - if err != nil { - log.Fatal(err) - } - b := builder{ - w: w, - hw: io.MultiWriter(w, w.Hash), - data: data, - supp: data.Supplemental(), - } - return &b -} - -// writeConsts computes f(v) for all v in values and writes the results -// as constants named _v to a single constant block. -func (b *builder) writeConsts(f func(string) int, values ...string) { - fmt.Fprintln(b.w, "const (") - for _, v := range values { - fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v)) - } - fmt.Fprintln(b.w, ")") -} - -// TODO: region inclusion data will probably not be use used in future matchers. - -var langConsts = []string{ - "de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und", -} - -var scriptConsts = []string{ - "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy", - "Zzzz", -} - -var regionConsts = []string{ - "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US", - "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo. -} - -func (b *builder) writeConstants() { - b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) - b.writeConsts(b.regionIndex, regionConsts...) - b.writeConsts(b.scriptIndex, scriptConsts...) -} - -type mutualIntelligibility struct { - want, have uint16 - distance uint8 - oneway bool -} - -type scriptIntelligibility struct { - wantLang, haveLang uint16 - wantScript, haveScript uint8 - distance uint8 - // Always oneway -} - -type regionIntelligibility struct { - lang uint16 // compact language id - script uint8 // 0 means any - group uint8 // 0 means any; if bit 7 is set it means inverse - distance uint8 - // Always twoway. -} - -// writeMatchData writes tables with languages and scripts for which there is -// mutual intelligibility. The data is based on CLDR's languageMatching data. -// Note that we use a different algorithm than the one defined by CLDR and that -// we slightly modify the data. For example, we convert scores to confidence levels. -// We also drop all region-related data as we use a different algorithm to -// determine region equivalence. -func (b *builder) writeMatchData() { - lm := b.supp.LanguageMatching.LanguageMatches - cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new") - - regionHierarchy := map[string][]string{} - for _, g := range b.supp.TerritoryContainment.Group { - regions := strings.Split(g.Contains, " ") - regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...) - } - regionToGroups := make([]uint8, language.NumRegions) - - idToIndex := map[string]uint8{} - for i, mv := range lm[0].MatchVariable { - if i > 6 { - log.Fatalf("Too many groups: %d", i) - } - idToIndex[mv.Id] = uint8(i + 1) - // TODO: also handle '-' - for _, r := range strings.Split(mv.Value, "+") { - todo := []string{r} - for k := 0; k < len(todo); k++ { - r := todo[k] - regionToGroups[b.regionIndex(r)] |= 1 << uint8(i) - todo = append(todo, regionHierarchy[r]...) - } - } - } - b.w.WriteVar("regionToGroups", regionToGroups) - - // maps language id to in- and out-of-group region. - paradigmLocales := [][3]uint16{} - locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ") - for i := 0; i < len(locales); i += 2 { - x := [3]uint16{} - for j := 0; j < 2; j++ { - pc := strings.SplitN(locales[i+j], "-", 2) - x[0] = b.langIndex(pc[0]) - if len(pc) == 2 { - x[1+j] = uint16(b.regionIndex(pc[1])) - } - } - paradigmLocales = append(paradigmLocales, x) - } - b.w.WriteVar("paradigmLocales", paradigmLocales) - - b.w.WriteType(mutualIntelligibility{}) - b.w.WriteType(scriptIntelligibility{}) - b.w.WriteType(regionIntelligibility{}) - - matchLang := []mutualIntelligibility{} - matchScript := []scriptIntelligibility{} - matchRegion := []regionIntelligibility{} - // Convert the languageMatch entries in lists keyed by desired language. - for _, m := range lm[0].LanguageMatch { - // Different versions of CLDR use different separators. - desired := strings.Replace(m.Desired, "-", "_", -1) - supported := strings.Replace(m.Supported, "-", "_", -1) - d := strings.Split(desired, "_") - s := strings.Split(supported, "_") - if len(d) != len(s) { - log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) - continue - } - distance, _ := strconv.ParseInt(m.Distance, 10, 8) - switch len(d) { - case 2: - if desired == supported && desired == "*_*" { - continue - } - // language-script pair. - matchScript = append(matchScript, scriptIntelligibility{ - wantLang: uint16(b.langIndex(d[0])), - haveLang: uint16(b.langIndex(s[0])), - wantScript: uint8(b.scriptIndex(d[1])), - haveScript: uint8(b.scriptIndex(s[1])), - distance: uint8(distance), - }) - if m.Oneway != "true" { - matchScript = append(matchScript, scriptIntelligibility{ - wantLang: uint16(b.langIndex(s[0])), - haveLang: uint16(b.langIndex(d[0])), - wantScript: uint8(b.scriptIndex(s[1])), - haveScript: uint8(b.scriptIndex(d[1])), - distance: uint8(distance), - }) - } - case 1: - if desired == supported && desired == "*" { - continue - } - if distance == 1 { - // nb == no is already handled by macro mapping. Check there - // really is only this case. - if d[0] != "no" || s[0] != "nb" { - log.Fatalf("unhandled equivalence %s == %s", s[0], d[0]) - } - continue - } - // TODO: consider dropping oneway field and just doubling the entry. - matchLang = append(matchLang, mutualIntelligibility{ - want: uint16(b.langIndex(d[0])), - have: uint16(b.langIndex(s[0])), - distance: uint8(distance), - oneway: m.Oneway == "true", - }) - case 3: - if desired == supported && desired == "*_*_*" { - continue - } - if desired != supported { - // This is now supported by CLDR, but only one case, which - // should already be covered by paradigm locales. For instance, - // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in - // testdata/CLDRLocaleMatcherTest.txt tests this. - if supported != "en_*_GB" { - log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) - } - continue - } - ri := regionIntelligibility{ - lang: b.langIndex(d[0]), - distance: uint8(distance), - } - if d[1] != "*" { - ri.script = uint8(b.scriptIndex(d[1])) - } - switch { - case d[2] == "*": - ri.group = 0x80 // not contained in anything - case strings.HasPrefix(d[2], "$!"): - ri.group = 0x80 - d[2] = "$" + d[2][len("$!"):] - fallthrough - case strings.HasPrefix(d[2], "$"): - ri.group |= idToIndex[d[2]] - } - matchRegion = append(matchRegion, ri) - default: - log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) - } - } - sort.SliceStable(matchLang, func(i, j int) bool { - return matchLang[i].distance < matchLang[j].distance - }) - b.w.WriteComment(` - matchLang holds pairs of langIDs of base languages that are typically - mutually intelligible. Each pair is associated with a confidence and - whether the intelligibility goes one or both ways.`) - b.w.WriteVar("matchLang", matchLang) - - b.w.WriteComment(` - matchScript holds pairs of scriptIDs where readers of one script - can typically also read the other. Each is associated with a confidence.`) - sort.SliceStable(matchScript, func(i, j int) bool { - return matchScript[i].distance < matchScript[j].distance - }) - b.w.WriteVar("matchScript", matchScript) - - sort.SliceStable(matchRegion, func(i, j int) bool { - return matchRegion[i].distance < matchRegion[j].distance - }) - b.w.WriteVar("matchRegion", matchRegion) -} diff --git a/vendor/golang.org/x/text/unicode/bidi/gen.go b/vendor/golang.org/x/text/unicode/bidi/gen.go deleted file mode 100644 index 987fc169c..000000000 --- a/vendor/golang.org/x/text/unicode/bidi/gen.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "flag" - "log" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/triegen" - "golang.org/x/text/internal/ucd" -) - -var outputFile = flag.String("out", "tables.go", "output file") - -func main() { - gen.Init() - gen.Repackage("gen_trieval.go", "trieval.go", "bidi") - gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi") - - genTables() -} - -// bidiClass names and codes taken from class "bc" in -// https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt -var bidiClass = map[string]Class{ - "AL": AL, // ArabicLetter - "AN": AN, // ArabicNumber - "B": B, // ParagraphSeparator - "BN": BN, // BoundaryNeutral - "CS": CS, // CommonSeparator - "EN": EN, // EuropeanNumber - "ES": ES, // EuropeanSeparator - "ET": ET, // EuropeanTerminator - "L": L, // LeftToRight - "NSM": NSM, // NonspacingMark - "ON": ON, // OtherNeutral - "R": R, // RightToLeft - "S": S, // SegmentSeparator - "WS": WS, // WhiteSpace - - "FSI": Control, - "PDF": Control, - "PDI": Control, - "LRE": Control, - "LRI": Control, - "LRO": Control, - "RLE": Control, - "RLI": Control, - "RLO": Control, -} - -func genTables() { - if numClass > 0x0F { - log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass) - } - w := gen.NewCodeWriter() - defer w.WriteVersionedGoFile(*outputFile, "bidi") - - gen.WriteUnicodeVersion(w) - - t := triegen.NewTrie("bidi") - - // Build data about bracket mapping. These bits need to be or-ed with - // any other bits. - orMask := map[rune]uint64{} - - xorMap := map[rune]int{} - xorMasks := []rune{0} // First value is no-op. - - ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) { - r1 := p.Rune(0) - r2 := p.Rune(1) - xor := r1 ^ r2 - if _, ok := xorMap[xor]; !ok { - xorMap[xor] = len(xorMasks) - xorMasks = append(xorMasks, xor) - } - entry := uint64(xorMap[xor]) << xorMaskShift - switch p.String(2) { - case "o": - entry |= openMask - case "c", "n": - default: - log.Fatalf("Unknown bracket class %q.", p.String(2)) - } - orMask[r1] = entry - }) - - w.WriteComment(` - xorMasks contains masks to be xor-ed with brackets to get the reverse - version.`) - w.WriteVar("xorMasks", xorMasks) - - done := map[rune]bool{} - - insert := func(r rune, c Class) { - if !done[r] { - t.Insert(r, orMask[r]|uint64(c)) - done[r] = true - } - } - - // Insert the derived BiDi properties. - ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) { - r := p.Rune(0) - class, ok := bidiClass[p.String(1)] - if !ok { - log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1)) - } - insert(r, class) - }) - visitDefaults(insert) - - // TODO: use sparse blocks. This would reduce table size considerably - // from the looks of it. - - sz, err := t.Gen(w) - if err != nil { - log.Fatal(err) - } - w.Size += sz -} - -// dummy values to make methods in gen_common compile. The real versions -// will be generated by this file to tables.go. -var ( - xorMasks []rune -) diff --git a/vendor/golang.org/x/text/unicode/bidi/gen_ranges.go b/vendor/golang.org/x/text/unicode/bidi/gen_ranges.go deleted file mode 100644 index 02c3b505d..000000000 --- a/vendor/golang.org/x/text/unicode/bidi/gen_ranges.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "unicode" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/ucd" - "golang.org/x/text/unicode/rangetable" -) - -// These tables are hand-extracted from: -// https://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt -func visitDefaults(fn func(r rune, c Class)) { - // first write default values for ranges listed above. - visitRunes(fn, AL, []rune{ - 0x0600, 0x07BF, // Arabic - 0x08A0, 0x08FF, // Arabic Extended-A - 0xFB50, 0xFDCF, // Arabic Presentation Forms - 0xFDF0, 0xFDFF, - 0xFE70, 0xFEFF, - 0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols - }) - visitRunes(fn, R, []rune{ - 0x0590, 0x05FF, // Hebrew - 0x07C0, 0x089F, // Nko et al. - 0xFB1D, 0xFB4F, - 0x00010800, 0x00010FFF, // Cypriot Syllabary et. al. - 0x0001E800, 0x0001EDFF, - 0x0001EF00, 0x0001EFFF, - }) - visitRunes(fn, ET, []rune{ // European Terminator - 0x20A0, 0x20Cf, // Currency symbols - }) - rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) { - fn(r, BN) // Boundary Neutral - }) - ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) { - if p.String(1) == "Default_Ignorable_Code_Point" { - fn(p.Rune(0), BN) // Boundary Neutral - } - }) -} - -func visitRunes(fn func(r rune, c Class), c Class, runes []rune) { - for i := 0; i < len(runes); i += 2 { - lo, hi := runes[i], runes[i+1] - for j := lo; j <= hi; j++ { - fn(j, c) - } - } -} diff --git a/vendor/golang.org/x/text/unicode/bidi/gen_trieval.go b/vendor/golang.org/x/text/unicode/bidi/gen_trieval.go deleted file mode 100644 index 9cb994289..000000000 --- a/vendor/golang.org/x/text/unicode/bidi/gen_trieval.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// Class is the Unicode BiDi class. Each rune has a single class. -type Class uint - -const ( - L Class = iota // LeftToRight - R // RightToLeft - EN // EuropeanNumber - ES // EuropeanSeparator - ET // EuropeanTerminator - AN // ArabicNumber - CS // CommonSeparator - B // ParagraphSeparator - S // SegmentSeparator - WS // WhiteSpace - ON // OtherNeutral - BN // BoundaryNeutral - NSM // NonspacingMark - AL // ArabicLetter - Control // Control LRO - PDI - - numClass - - LRO // LeftToRightOverride - RLO // RightToLeftOverride - LRE // LeftToRightEmbedding - RLE // RightToLeftEmbedding - PDF // PopDirectionalFormat - LRI // LeftToRightIsolate - RLI // RightToLeftIsolate - FSI // FirstStrongIsolate - PDI // PopDirectionalIsolate - - unknownClass = ^Class(0) -) - -var controlToClass = map[rune]Class{ - 0x202D: LRO, // LeftToRightOverride, - 0x202E: RLO, // RightToLeftOverride, - 0x202A: LRE, // LeftToRightEmbedding, - 0x202B: RLE, // RightToLeftEmbedding, - 0x202C: PDF, // PopDirectionalFormat, - 0x2066: LRI, // LeftToRightIsolate, - 0x2067: RLI, // RightToLeftIsolate, - 0x2068: FSI, // FirstStrongIsolate, - 0x2069: PDI, // PopDirectionalIsolate, -} - -// A trie entry has the following bits: -// 7..5 XOR mask for brackets -// 4 1: Bracket open, 0: Bracket close -// 3..0 Class type - -const ( - openMask = 0x10 - xorMaskShift = 5 -) diff --git a/vendor/golang.org/x/text/unicode/norm/maketables.go b/vendor/golang.org/x/text/unicode/norm/maketables.go deleted file mode 100644 index 30a3aa933..000000000 --- a/vendor/golang.org/x/text/unicode/norm/maketables.go +++ /dev/null @@ -1,986 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Normalization table generator. -// Data read from the web. -// See forminfo.go for a description of the trie values associated with each rune. - -package main - -import ( - "bytes" - "encoding/binary" - "flag" - "fmt" - "io" - "log" - "sort" - "strconv" - "strings" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/triegen" - "golang.org/x/text/internal/ucd" -) - -func main() { - gen.Init() - loadUnicodeData() - compactCCC() - loadCompositionExclusions() - completeCharFields(FCanonical) - completeCharFields(FCompatibility) - computeNonStarterCounts() - verifyComputed() - printChars() - testDerived() - printTestdata() - makeTables() -} - -var ( - tablelist = flag.String("tables", - "all", - "comma-separated list of which tables to generate; "+ - "can be 'decomp', 'recomp', 'info' and 'all'") - test = flag.Bool("test", - false, - "test existing tables against DerivedNormalizationProps and generate test data for regression testing") - verbose = flag.Bool("verbose", - false, - "write data to stdout as it is parsed") -) - -const MaxChar = 0x10FFFF // anything above this shouldn't exist - -// Quick Check properties of runes allow us to quickly -// determine whether a rune may occur in a normal form. -// For a given normal form, a rune may be guaranteed to occur -// verbatim (QC=Yes), may or may not combine with another -// rune (QC=Maybe), or may not occur (QC=No). -type QCResult int - -const ( - QCUnknown QCResult = iota - QCYes - QCNo - QCMaybe -) - -func (r QCResult) String() string { - switch r { - case QCYes: - return "Yes" - case QCNo: - return "No" - case QCMaybe: - return "Maybe" - } - return "***UNKNOWN***" -} - -const ( - FCanonical = iota // NFC or NFD - FCompatibility // NFKC or NFKD - FNumberOfFormTypes -) - -const ( - MComposed = iota // NFC or NFKC - MDecomposed // NFD or NFKD - MNumberOfModes -) - -// This contains only the properties we're interested in. -type Char struct { - name string - codePoint rune // if zero, this index is not a valid code point. - ccc uint8 // canonical combining class - origCCC uint8 - excludeInComp bool // from CompositionExclusions.txt - compatDecomp bool // it has a compatibility expansion - - nTrailingNonStarters uint8 - nLeadingNonStarters uint8 // must be equal to trailing if non-zero - - forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility - - state State -} - -var chars = make([]Char, MaxChar+1) -var cccMap = make(map[uint8]uint8) - -func (c Char) String() string { - buf := new(bytes.Buffer) - - fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name) - fmt.Fprintf(buf, " ccc: %v\n", c.ccc) - fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp) - fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp) - fmt.Fprintf(buf, " state: %v\n", c.state) - fmt.Fprintf(buf, " NFC:\n") - fmt.Fprint(buf, c.forms[FCanonical]) - fmt.Fprintf(buf, " NFKC:\n") - fmt.Fprint(buf, c.forms[FCompatibility]) - - return buf.String() -} - -// In UnicodeData.txt, some ranges are marked like this: -// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; -// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; -// parseCharacter keeps a state variable indicating the weirdness. -type State int - -const ( - SNormal State = iota // known to be zero for the type - SFirst - SLast - SMissing -) - -var lastChar = rune('\u0000') - -func (c Char) isValid() bool { - return c.codePoint != 0 && c.state != SMissing -} - -type FormInfo struct { - quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed - verified [MNumberOfModes]bool // index: MComposed or MDecomposed - - combinesForward bool // May combine with rune on the right - combinesBackward bool // May combine with rune on the left - isOneWay bool // Never appears in result - inDecomp bool // Some decompositions result in this char. - decomp Decomposition - expandedDecomp Decomposition -} - -func (f FormInfo) String() string { - buf := bytes.NewBuffer(make([]byte, 0)) - - fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed]) - fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed]) - fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward) - fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward) - fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay) - fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp) - fmt.Fprintf(buf, " decomposition: %X\n", f.decomp) - fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp) - - return buf.String() -} - -type Decomposition []rune - -func parseDecomposition(s string, skipfirst bool) (a []rune, err error) { - decomp := strings.Split(s, " ") - if len(decomp) > 0 && skipfirst { - decomp = decomp[1:] - } - for _, d := range decomp { - point, err := strconv.ParseUint(d, 16, 64) - if err != nil { - return a, err - } - a = append(a, rune(point)) - } - return a, nil -} - -func loadUnicodeData() { - f := gen.OpenUCDFile("UnicodeData.txt") - defer f.Close() - p := ucd.New(f) - for p.Next() { - r := p.Rune(ucd.CodePoint) - char := &chars[r] - - char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass)) - decmap := p.String(ucd.DecompMapping) - - exp, err := parseDecomposition(decmap, false) - isCompat := false - if err != nil { - if len(decmap) > 0 { - exp, err = parseDecomposition(decmap, true) - if err != nil { - log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err) - } - isCompat = true - } - } - - char.name = p.String(ucd.Name) - char.codePoint = r - char.forms[FCompatibility].decomp = exp - if !isCompat { - char.forms[FCanonical].decomp = exp - } else { - char.compatDecomp = true - } - if len(decmap) > 0 { - char.forms[FCompatibility].decomp = exp - } - } - if err := p.Err(); err != nil { - log.Fatal(err) - } -} - -// compactCCC converts the sparse set of CCC values to a continguous one, -// reducing the number of bits needed from 8 to 6. -func compactCCC() { - m := make(map[uint8]uint8) - for i := range chars { - c := &chars[i] - m[c.ccc] = 0 - } - cccs := []int{} - for v, _ := range m { - cccs = append(cccs, int(v)) - } - sort.Ints(cccs) - for i, c := range cccs { - cccMap[uint8(i)] = uint8(c) - m[uint8(c)] = uint8(i) - } - for i := range chars { - c := &chars[i] - c.origCCC = c.ccc - c.ccc = m[c.ccc] - } - if len(m) >= 1<<6 { - log.Fatalf("too many difference CCC values: %d >= 64", len(m)) - } -} - -// CompositionExclusions.txt has form: -// 0958 # ... -// See https://unicode.org/reports/tr44/ for full explanation -func loadCompositionExclusions() { - f := gen.OpenUCDFile("CompositionExclusions.txt") - defer f.Close() - p := ucd.New(f) - for p.Next() { - c := &chars[p.Rune(0)] - if c.excludeInComp { - log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint) - } - c.excludeInComp = true - } - if e := p.Err(); e != nil { - log.Fatal(e) - } -} - -// hasCompatDecomp returns true if any of the recursive -// decompositions contains a compatibility expansion. -// In this case, the character may not occur in NFK*. -func hasCompatDecomp(r rune) bool { - c := &chars[r] - if c.compatDecomp { - return true - } - for _, d := range c.forms[FCompatibility].decomp { - if hasCompatDecomp(d) { - return true - } - } - return false -} - -// Hangul related constants. -const ( - HangulBase = 0xAC00 - HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28) - - JamoLBase = 0x1100 - JamoLEnd = 0x1113 - JamoVBase = 0x1161 - JamoVEnd = 0x1176 - JamoTBase = 0x11A8 - JamoTEnd = 0x11C3 - - JamoLVTCount = 19 * 21 * 28 - JamoTCount = 28 -) - -func isHangul(r rune) bool { - return HangulBase <= r && r < HangulEnd -} - -func isHangulWithoutJamoT(r rune) bool { - if !isHangul(r) { - return false - } - r -= HangulBase - return r < JamoLVTCount && r%JamoTCount == 0 -} - -func ccc(r rune) uint8 { - return chars[r].ccc -} - -// Insert a rune in a buffer, ordered by Canonical Combining Class. -func insertOrdered(b Decomposition, r rune) Decomposition { - n := len(b) - b = append(b, 0) - cc := ccc(r) - if cc > 0 { - // Use bubble sort. - for ; n > 0; n-- { - if ccc(b[n-1]) <= cc { - break - } - b[n] = b[n-1] - } - } - b[n] = r - return b -} - -// Recursively decompose. -func decomposeRecursive(form int, r rune, d Decomposition) Decomposition { - dcomp := chars[r].forms[form].decomp - if len(dcomp) == 0 { - return insertOrdered(d, r) - } - for _, c := range dcomp { - d = decomposeRecursive(form, c, d) - } - return d -} - -func completeCharFields(form int) { - // Phase 0: pre-expand decomposition. - for i := range chars { - f := &chars[i].forms[form] - if len(f.decomp) == 0 { - continue - } - exp := make(Decomposition, 0) - for _, c := range f.decomp { - exp = decomposeRecursive(form, c, exp) - } - f.expandedDecomp = exp - } - - // Phase 1: composition exclusion, mark decomposition. - for i := range chars { - c := &chars[i] - f := &c.forms[form] - - // Marks script-specific exclusions and version restricted. - f.isOneWay = c.excludeInComp - - // Singletons - f.isOneWay = f.isOneWay || len(f.decomp) == 1 - - // Non-starter decompositions - if len(f.decomp) > 1 { - chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0 - f.isOneWay = f.isOneWay || chk - } - - // Runes that decompose into more than two runes. - f.isOneWay = f.isOneWay || len(f.decomp) > 2 - - if form == FCompatibility { - f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint) - } - - for _, r := range f.decomp { - chars[r].forms[form].inDecomp = true - } - } - - // Phase 2: forward and backward combining. - for i := range chars { - c := &chars[i] - f := &c.forms[form] - - if !f.isOneWay && len(f.decomp) == 2 { - f0 := &chars[f.decomp[0]].forms[form] - f1 := &chars[f.decomp[1]].forms[form] - if !f0.isOneWay { - f0.combinesForward = true - } - if !f1.isOneWay { - f1.combinesBackward = true - } - } - if isHangulWithoutJamoT(rune(i)) { - f.combinesForward = true - } - } - - // Phase 3: quick check values. - for i := range chars { - c := &chars[i] - f := &c.forms[form] - - switch { - case len(f.decomp) > 0: - f.quickCheck[MDecomposed] = QCNo - case isHangul(rune(i)): - f.quickCheck[MDecomposed] = QCNo - default: - f.quickCheck[MDecomposed] = QCYes - } - switch { - case f.isOneWay: - f.quickCheck[MComposed] = QCNo - case (i & 0xffff00) == JamoLBase: - f.quickCheck[MComposed] = QCYes - if JamoLBase <= i && i < JamoLEnd { - f.combinesForward = true - } - if JamoVBase <= i && i < JamoVEnd { - f.quickCheck[MComposed] = QCMaybe - f.combinesBackward = true - f.combinesForward = true - } - if JamoTBase <= i && i < JamoTEnd { - f.quickCheck[MComposed] = QCMaybe - f.combinesBackward = true - } - case !f.combinesBackward: - f.quickCheck[MComposed] = QCYes - default: - f.quickCheck[MComposed] = QCMaybe - } - } -} - -func computeNonStarterCounts() { - // Phase 4: leading and trailing non-starter count - for i := range chars { - c := &chars[i] - - runes := []rune{rune(i)} - // We always use FCompatibility so that the CGJ insertion points do not - // change for repeated normalizations with different forms. - if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 { - runes = exp - } - // We consider runes that combine backwards to be non-starters for the - // purpose of Stream-Safe Text Processing. - for _, r := range runes { - if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { - break - } - c.nLeadingNonStarters++ - } - for i := len(runes) - 1; i >= 0; i-- { - if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { - break - } - c.nTrailingNonStarters++ - } - if c.nTrailingNonStarters > 3 { - log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes) - } - - if isHangul(rune(i)) { - c.nTrailingNonStarters = 2 - if isHangulWithoutJamoT(rune(i)) { - c.nTrailingNonStarters = 1 - } - } - - if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t { - log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t) - } - if t := c.nTrailingNonStarters; t > 3 { - log.Fatalf("%U: number of trailing non-starters is %d > 3", t) - } - } -} - -func printBytes(w io.Writer, b []byte, name string) { - fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b)) - fmt.Fprintf(w, "var %s = [...]byte {", name) - for i, c := range b { - switch { - case i%64 == 0: - fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63) - case i%8 == 0: - fmt.Fprintf(w, "\n") - } - fmt.Fprintf(w, "0x%.2X, ", c) - } - fmt.Fprint(w, "\n}\n\n") -} - -// See forminfo.go for format. -func makeEntry(f *FormInfo, c *Char) uint16 { - e := uint16(0) - if r := c.codePoint; HangulBase <= r && r < HangulEnd { - e |= 0x40 - } - if f.combinesForward { - e |= 0x20 - } - if f.quickCheck[MDecomposed] == QCNo { - e |= 0x4 - } - switch f.quickCheck[MComposed] { - case QCYes: - case QCNo: - e |= 0x10 - case QCMaybe: - e |= 0x18 - default: - log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed]) - } - e |= uint16(c.nTrailingNonStarters) - return e -} - -// decompSet keeps track of unique decompositions, grouped by whether -// the decomposition is followed by a trailing and/or leading CCC. -type decompSet [7]map[string]bool - -const ( - normalDecomp = iota - firstMulti - firstCCC - endMulti - firstLeadingCCC - firstCCCZeroExcept - firstStarterWithNLead - lastDecomp -) - -var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"} - -func makeDecompSet() decompSet { - m := decompSet{} - for i := range m { - m[i] = make(map[string]bool) - } - return m -} -func (m *decompSet) insert(key int, s string) { - m[key][s] = true -} - -func printCharInfoTables(w io.Writer) int { - mkstr := func(r rune, f *FormInfo) (int, string) { - d := f.expandedDecomp - s := string([]rune(d)) - if max := 1 << 6; len(s) >= max { - const msg = "%U: too many bytes in decomposition: %d >= %d" - log.Fatalf(msg, r, len(s), max) - } - head := uint8(len(s)) - if f.quickCheck[MComposed] != QCYes { - head |= 0x40 - } - if f.combinesForward { - head |= 0x80 - } - s = string([]byte{head}) + s - - lccc := ccc(d[0]) - tccc := ccc(d[len(d)-1]) - cc := ccc(r) - if cc != 0 && lccc == 0 && tccc == 0 { - log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc) - } - if tccc < lccc && lccc != 0 { - const msg = "%U: lccc (%d) must be <= tcc (%d)" - log.Fatalf(msg, r, lccc, tccc) - } - index := normalDecomp - nTrail := chars[r].nTrailingNonStarters - nLead := chars[r].nLeadingNonStarters - if tccc > 0 || lccc > 0 || nTrail > 0 { - tccc <<= 2 - tccc |= nTrail - s += string([]byte{tccc}) - index = endMulti - for _, r := range d[1:] { - if ccc(r) == 0 { - index = firstCCC - } - } - if lccc > 0 || nLead > 0 { - s += string([]byte{lccc}) - if index == firstCCC { - log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r) - } - index = firstLeadingCCC - } - if cc != lccc { - if cc != 0 { - log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc) - } - index = firstCCCZeroExcept - } - } else if len(d) > 1 { - index = firstMulti - } - return index, s - } - - decompSet := makeDecompSet() - const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail. - decompSet.insert(firstStarterWithNLead, nLeadStr) - - // Store the uniqued decompositions in a byte buffer, - // preceded by their byte length. - for _, c := range chars { - for _, f := range c.forms { - if len(f.expandedDecomp) == 0 { - continue - } - if f.combinesBackward { - log.Fatalf("%U: combinesBackward and decompose", c.codePoint) - } - index, s := mkstr(c.codePoint, &f) - decompSet.insert(index, s) - } - } - - decompositions := bytes.NewBuffer(make([]byte, 0, 10000)) - size := 0 - positionMap := make(map[string]uint16) - decompositions.WriteString("\000") - fmt.Fprintln(w, "const (") - for i, m := range decompSet { - sa := []string{} - for s := range m { - sa = append(sa, s) - } - sort.Strings(sa) - for _, s := range sa { - p := decompositions.Len() - decompositions.WriteString(s) - positionMap[s] = uint16(p) - } - if cname[i] != "" { - fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len()) - } - } - fmt.Fprintln(w, "maxDecomp = 0x8000") - fmt.Fprintln(w, ")") - b := decompositions.Bytes() - printBytes(w, b, "decomps") - size += len(b) - - varnames := []string{"nfc", "nfkc"} - for i := 0; i < FNumberOfFormTypes; i++ { - trie := triegen.NewTrie(varnames[i]) - - for r, c := range chars { - f := c.forms[i] - d := f.expandedDecomp - if len(d) != 0 { - _, key := mkstr(c.codePoint, &f) - trie.Insert(rune(r), uint64(positionMap[key])) - if c.ccc != ccc(d[0]) { - // We assume the lead ccc of a decomposition !=0 in this case. - if ccc(d[0]) == 0 { - log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc) - } - } - } else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward { - // Handle cases where it can't be detected that the nLead should be equal - // to nTrail. - trie.Insert(c.codePoint, uint64(positionMap[nLeadStr])) - } else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 { - trie.Insert(c.codePoint, uint64(0x8000|v)) - } - } - sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]})) - if err != nil { - log.Fatal(err) - } - size += sz - } - return size -} - -func contains(sa []string, s string) bool { - for _, a := range sa { - if a == s { - return true - } - } - return false -} - -func makeTables() { - w := &bytes.Buffer{} - - size := 0 - if *tablelist == "" { - return - } - list := strings.Split(*tablelist, ",") - if *tablelist == "all" { - list = []string{"recomp", "info"} - } - - // Compute maximum decomposition size. - max := 0 - for _, c := range chars { - if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max { - max = n - } - } - fmt.Fprintln(w, `import "sync"`) - fmt.Fprintln(w) - - fmt.Fprintln(w, "const (") - fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.") - fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion()) - fmt.Fprintln(w) - fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform") - fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at") - fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that") - fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.") - fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max) - fmt.Fprintln(w, ")\n") - - // Print the CCC remap table. - size += len(cccMap) - fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap)) - for i := 0; i < len(cccMap); i++ { - if i%8 == 0 { - fmt.Fprintln(w) - } - fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)]) - } - fmt.Fprintln(w, "\n}\n") - - if contains(list, "info") { - size += printCharInfoTables(w) - } - - if contains(list, "recomp") { - // Note that we use 32 bit keys, instead of 64 bit. - // This clips the bits of three entries, but we know - // this won't cause a collision. The compiler will catch - // any changes made to UnicodeData.txt that introduces - // a collision. - // Note that the recomposition map for NFC and NFKC - // are identical. - - // Recomposition map - nrentries := 0 - for _, c := range chars { - f := c.forms[FCanonical] - if !f.isOneWay && len(f.decomp) > 0 { - nrentries++ - } - } - sz := nrentries * 8 - size += sz - fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz) - fmt.Fprintln(w, "var recompMap map[uint32]rune") - fmt.Fprintln(w, "var recompMapOnce sync.Once\n") - fmt.Fprintln(w, `const recompMapPacked = "" +`) - var buf [8]byte - for i, c := range chars { - f := c.forms[FCanonical] - d := f.decomp - if !f.isOneWay && len(d) > 0 { - key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1])) - binary.BigEndian.PutUint32(buf[:4], key) - binary.BigEndian.PutUint32(buf[4:], uint32(i)) - fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i)) - } - } - // hack so we don't have to special case the trailing plus sign - fmt.Fprintf(w, ` ""`) - fmt.Fprintln(w) - } - - fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size) - gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes()) -} - -func printChars() { - if *verbose { - for _, c := range chars { - if !c.isValid() || c.state == SMissing { - continue - } - fmt.Println(c) - } - } -} - -// verifyComputed does various consistency tests. -func verifyComputed() { - for i, c := range chars { - for _, f := range c.forms { - isNo := (f.quickCheck[MDecomposed] == QCNo) - if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) { - log.Fatalf("%U: NF*D QC must be No if rune decomposes", i) - } - - isMaybe := f.quickCheck[MComposed] == QCMaybe - if f.combinesBackward != isMaybe { - log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i) - } - if len(f.decomp) > 0 && f.combinesForward && isMaybe { - log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i) - } - - if len(f.expandedDecomp) != 0 { - continue - } - if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b { - // We accept these runes to be treated differently (it only affects - // segment breaking in iteration, most likely on improper use), but - // reconsider if more characters are added. - // U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;; - // U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;; - // U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;; - // U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;; - // U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;; - // U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;; - if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) { - log.Fatalf("%U: nLead was %v; want %v", i, a, b) - } - } - } - nfc := c.forms[FCanonical] - nfkc := c.forms[FCompatibility] - if nfc.combinesBackward != nfkc.combinesBackward { - log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint) - } - } -} - -// Use values in DerivedNormalizationProps.txt to compare against the -// values we computed. -// DerivedNormalizationProps.txt has form: -// 00C0..00C5 ; NFD_QC; N # ... -// 0374 ; NFD_QC; N # ... -// See https://unicode.org/reports/tr44/ for full explanation -func testDerived() { - f := gen.OpenUCDFile("DerivedNormalizationProps.txt") - defer f.Close() - p := ucd.New(f) - for p.Next() { - r := p.Rune(0) - c := &chars[r] - - var ftype, mode int - qt := p.String(1) - switch qt { - case "NFC_QC": - ftype, mode = FCanonical, MComposed - case "NFD_QC": - ftype, mode = FCanonical, MDecomposed - case "NFKC_QC": - ftype, mode = FCompatibility, MComposed - case "NFKD_QC": - ftype, mode = FCompatibility, MDecomposed - default: - continue - } - var qr QCResult - switch p.String(2) { - case "Y": - qr = QCYes - case "N": - qr = QCNo - case "M": - qr = QCMaybe - default: - log.Fatalf(`Unexpected quick check value "%s"`, p.String(2)) - } - if got := c.forms[ftype].quickCheck[mode]; got != qr { - log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr) - } - c.forms[ftype].verified[mode] = true - } - if err := p.Err(); err != nil { - log.Fatal(err) - } - // Any unspecified value must be QCYes. Verify this. - for i, c := range chars { - for j, fd := range c.forms { - for k, qr := range fd.quickCheck { - if !fd.verified[k] && qr != QCYes { - m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n" - log.Printf(m, i, j, k, qr, c.name) - } - } - } - } -} - -var testHeader = `const ( - Yes = iota - No - Maybe -) - -type formData struct { - qc uint8 - combinesForward bool - decomposition string -} - -type runeData struct { - r rune - ccc uint8 - nLead uint8 - nTrail uint8 - f [2]formData // 0: canonical; 1: compatibility -} - -func f(qc uint8, cf bool, dec string) [2]formData { - return [2]formData{{qc, cf, dec}, {qc, cf, dec}} -} - -func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData { - return [2]formData{{qc, cf, d}, {qck, cfk, dk}} -} - -var testData = []runeData{ -` - -func printTestdata() { - type lastInfo struct { - ccc uint8 - nLead uint8 - nTrail uint8 - f string - } - - last := lastInfo{} - w := &bytes.Buffer{} - fmt.Fprintf(w, testHeader) - for r, c := range chars { - f := c.forms[FCanonical] - qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) - f = c.forms[FCompatibility] - qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) - s := "" - if d == dk && qc == qck && cf == cfk { - s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d) - } else { - s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk) - } - current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s} - if last != current { - fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s) - last = current - } - } - fmt.Fprintln(w, "}") - gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes()) -} diff --git a/vendor/golang.org/x/text/unicode/norm/triegen.go b/vendor/golang.org/x/text/unicode/norm/triegen.go deleted file mode 100644 index 45d711900..000000000 --- a/vendor/golang.org/x/text/unicode/norm/triegen.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Trie table generator. -// Used by make*tables tools to generate a go file with trie data structures -// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte -// sequence are used to lookup offsets in the index table to be used for the -// next byte. The last byte is used to index into a table with 16-bit values. - -package main - -import ( - "fmt" - "io" -) - -const maxSparseEntries = 16 - -type normCompacter struct { - sparseBlocks [][]uint64 - sparseOffset []uint16 - sparseCount int - name string -} - -func mostFrequentStride(a []uint64) int { - counts := make(map[int]int) - var v int - for _, x := range a { - if stride := int(x) - v; v != 0 && stride >= 0 { - counts[stride]++ - } - v = int(x) - } - var maxs, maxc int - for stride, cnt := range counts { - if cnt > maxc || (cnt == maxc && stride < maxs) { - maxs, maxc = stride, cnt - } - } - return maxs -} - -func countSparseEntries(a []uint64) int { - stride := mostFrequentStride(a) - var v, count int - for _, tv := range a { - if int(tv)-v != stride { - if tv != 0 { - count++ - } - } - v = int(tv) - } - return count -} - -func (c *normCompacter) Size(v []uint64) (sz int, ok bool) { - if n := countSparseEntries(v); n <= maxSparseEntries { - return (n+1)*4 + 2, true - } - return 0, false -} - -func (c *normCompacter) Store(v []uint64) uint32 { - h := uint32(len(c.sparseOffset)) - c.sparseBlocks = append(c.sparseBlocks, v) - c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount)) - c.sparseCount += countSparseEntries(v) + 1 - return h -} - -func (c *normCompacter) Handler() string { - return c.name + "Sparse.lookup" -} - -func (c *normCompacter) Print(w io.Writer) (retErr error) { - p := func(f string, x ...interface{}) { - if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil { - retErr = err - } - } - - ls := len(c.sparseBlocks) - p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2) - p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset) - - ns := c.sparseCount - p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4) - p("var %sSparseValues = [%d]valueRange {", c.name, ns) - for i, b := range c.sparseBlocks { - p("\n// Block %#x, offset %#x", i, c.sparseOffset[i]) - var v int - stride := mostFrequentStride(b) - n := countSparseEntries(b) - p("\n{value:%#04x,lo:%#02x},", stride, uint8(n)) - for i, nv := range b { - if int(nv)-v != stride { - if v != 0 { - p(",hi:%#02x},", 0x80+i-1) - } - if nv != 0 { - p("\n{value:%#04x,lo:%#02x", nv, 0x80+i) - } - } - v = int(nv) - } - if v != 0 { - p(",hi:%#02x},", 0x80+len(b)-1) - } - } - p("\n}\n\n") - return -} |