summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/net/idna/idna.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/net/idna/idna.go')
-rw-r--r--vendor/golang.org/x/net/idna/idna.go424
1 files changed, 324 insertions, 100 deletions
diff --git a/vendor/golang.org/x/net/idna/idna.go b/vendor/golang.org/x/net/idna/idna.go
index 47466e947..346fe4423 100644
--- a/vendor/golang.org/x/net/idna/idna.go
+++ b/vendor/golang.org/x/net/idna/idna.go
@@ -1,4 +1,4 @@
-// Copied from the golang.org/x/text repo; DO NOT EDIT
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
@@ -21,6 +21,7 @@ import (
"unicode/utf8"
"golang.org/x/text/secure/bidirule"
+ "golang.org/x/text/unicode/bidi"
"golang.org/x/text/unicode/norm"
)
@@ -39,27 +40,24 @@ import (
// error in the future.
// I think Option 1 is best, but it is quite opinionated.
-// ToASCII converts a domain or domain label to its ASCII form. For example,
-// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
-// ToASCII("golang") is "golang". If an error is encountered it will return
-// an error and a (partially) processed result.
+// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) {
- return Resolve.process(s, true)
+ return Punycode.process(s, true)
}
-// ToUnicode converts a domain or domain label to its Unicode form. For example,
-// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
-// ToUnicode("golang") is "golang". If an error is encountered it will return
-// an error and a (partially) processed result.
+// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) {
- return NonTransitional.process(s, false)
+ return Punycode.process(s, false)
}
// An Option configures a Profile at creation time.
type Option func(*options)
-// Transitional sets a Profile to use the Transitional mapping as defined
-// in UTS #46.
+// Transitional sets a Profile to use the Transitional mapping as defined in UTS
+// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
+// transitional mapping provides a compromise between IDNA2003 and IDNA2008
+// compatibility. It is used by most browsers when resolving domain names. This
+// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option {
return func(o *options) { o.transitional = true }
}
@@ -70,22 +68,106 @@ func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
-// IgnoreSTD3Rules sets whether ASCII characters outside the A-Z, a-z, 0-9 and
-// the hyphen should be allowed. By default this is not allowed, but IDNA2003,
-// and as a consequence UTS #46, allows this to be overridden to support
-// browsers that allow characters outside this range, for example a '_' (U+005F
-// LOW LINE). See http://www.rfc- editor.org/std/std3.txt for more details.
-func IgnoreSTD3Rules(ignore bool) Option {
- return func(o *options) { o.ignoreSTD3Rules = ignore }
+// RemoveLeadingDots removes leading label separators. Leading runes that map to
+// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
+//
+// This is the behavior suggested by the UTS #46 and is adopted by some
+// browsers.
+func RemoveLeadingDots(remove bool) Option {
+ return func(o *options) { o.removeLeadingDots = remove }
+}
+
+// ValidateLabels sets whether to check the mandatory label validation criteria
+// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
+// of hyphens ('-'), normalization, validity of runes, and the context rules.
+func ValidateLabels(enable bool) Option {
+ return func(o *options) {
+ // Don't override existing mappings, but set one that at least checks
+ // normalization if it is not set.
+ if o.mapping == nil && enable {
+ o.mapping = normalize
+ }
+ o.trie = trie
+ o.validateLabels = enable
+ o.fromPuny = validateFromPunycode
+ }
+}
+
+// StrictDomainName limits the set of permissible ASCII characters to those
+// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
+// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
+//
+// This option is useful, for instance, for browsers that allow characters
+// outside this range, for example a '_' (U+005F LOW LINE). See
+// http://www.rfc-editor.org/std/std3.txt for more details This option
+// corresponds to the UseSTD3ASCIIRules option in UTS #46.
+func StrictDomainName(use bool) Option {
+ return func(o *options) {
+ o.trie = trie
+ o.useSTD3Rules = use
+ o.fromPuny = validateFromPunycode
+ }
+}
+
+// NOTE: the following options pull in tables. The tables should not be linked
+// in as long as the options are not used.
+
+// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
+// that relies on proper validation of labels should include this rule.
+func BidiRule() Option {
+ return func(o *options) { o.bidirule = bidirule.ValidString }
+}
+
+// ValidateForRegistration sets validation options to verify that a given IDN is
+// properly formatted for registration as defined by Section 4 of RFC 5891.
+func ValidateForRegistration() Option {
+ return func(o *options) {
+ o.mapping = validateRegistration
+ StrictDomainName(true)(o)
+ ValidateLabels(true)(o)
+ VerifyDNSLength(true)(o)
+ BidiRule()(o)
+ }
+}
+
+// MapForLookup sets validation and mapping options such that a given IDN is
+// transformed for domain name lookup according to the requirements set out in
+// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
+// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
+// to add this check.
+//
+// The mappings include normalization and mapping case, width and other
+// compatibility mappings.
+func MapForLookup() Option {
+ return func(o *options) {
+ o.mapping = validateAndMap
+ StrictDomainName(true)(o)
+ ValidateLabels(true)(o)
+ }
}
type options struct {
- transitional bool
- ignoreSTD3Rules bool
- verifyDNSLength bool
+ transitional bool
+ useSTD3Rules bool
+ validateLabels bool
+ verifyDNSLength bool
+ removeLeadingDots bool
+
+ trie *idnaTrie
+
+ // fromPuny calls validation rules when converting A-labels to U-labels.
+ fromPuny func(p *Profile, s string) error
+
+ // mapping implements a validation and mapping step as defined in RFC 5895
+ // or UTS 46, tailored to, for example, domain registration or lookup.
+ mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
+
+ // bidirule, if specified, checks whether s conforms to the Bidi Rule
+ // defined in RFC 5893.
+ bidirule func(s string) bool
}
-// A Profile defines the configuration of a IDNA mapper.
+// A Profile defines the configuration of an IDNA mapper.
type Profile struct {
options
}
@@ -97,8 +179,13 @@ func apply(o *options, opts []Option) {
}
// New creates a new Profile.
-// With no options, the returned profile is the non-transitional profile as
-// defined in UTS #46.
+//
+// With no options, the returned Profile is the most permissive and equals the
+// Punycode Profile. Options can be passed to further restrict the Profile. The
+// MapForLookup and ValidateForRegistration options set a collection of options,
+// for lookup and registration purposes respectively, which can be tailored by
+// adding more fine-grained options, where later options override earlier
+// options.
func New(o ...Option) *Profile {
p := &Profile{}
apply(&p.options, o)
@@ -132,33 +219,67 @@ func (p *Profile) String() string {
} else {
s = "NonTransitional"
}
- if p.ignoreSTD3Rules {
- s += ":NoSTD3Rules"
+ if p.useSTD3Rules {
+ s += ":UseSTD3Rules"
+ }
+ if p.validateLabels {
+ s += ":ValidateLabels"
+ }
+ if p.verifyDNSLength {
+ s += ":VerifyDNSLength"
}
return s
}
var (
- // Resolve is the recommended profile for resolving domain names.
- // The configuration of this profile may change over time.
- Resolve = resolve
+ // Punycode is a Profile that does raw punycode processing with a minimum
+ // of validation.
+ Punycode *Profile = punycode
+
+ // Lookup is the recommended profile for looking up domain names, according
+ // to Section 5 of RFC 5891. The exact configuration of this profile may
+ // change over time.
+ Lookup *Profile = lookup
// Display is the recommended profile for displaying domain names.
// The configuration of this profile may change over time.
- Display = display
-
- // NonTransitional defines a profile that implements the Transitional
- // mapping as defined in UTS #46 with no additional constraints.
- NonTransitional = nonTransitional
-
- resolve = &Profile{options{transitional: true}}
- display = &Profile{}
- nonTransitional = &Profile{}
+ Display *Profile = display
+
+ // Registration is the recommended profile for checking whether a given
+ // IDN is valid for registration, according to Section 4 of RFC 5891.
+ Registration *Profile = registration
+
+ punycode = &Profile{}
+ lookup = &Profile{options{
+ transitional: true,
+ useSTD3Rules: true,
+ validateLabels: true,
+ trie: trie,
+ fromPuny: validateFromPunycode,
+ mapping: validateAndMap,
+ bidirule: bidirule.ValidString,
+ }}
+ display = &Profile{options{
+ useSTD3Rules: true,
+ validateLabels: true,
+ trie: trie,
+ fromPuny: validateFromPunycode,
+ mapping: validateAndMap,
+ bidirule: bidirule.ValidString,
+ }}
+ registration = &Profile{options{
+ useSTD3Rules: true,
+ validateLabels: true,
+ verifyDNSLength: true,
+ trie: trie,
+ fromPuny: validateFromPunycode,
+ mapping: validateRegistration,
+ bidirule: bidirule.ValidString,
+ }}
// TODO: profiles
- // V2008: strict IDNA2008
- // Register: recommended for approving domain names: nontransitional, but
- // bundle or block deviation characters.
+ // Register: recommended for approving domain names: don't do any mappings
+ // but rather reject on invalid input. Bundle or block deviation characters.
)
type labelError struct{ label, code_ string }
@@ -178,53 +299,21 @@ func (e runeError) Error() string {
// process implements the algorithm described in section 4 of UTS #46,
// see http://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
- var (
- b []byte
- err error
- k, i int
- )
- for i < len(s) {
- v, sz := trie.lookupString(s[i:])
- start := i
- i += sz
- // Copy bytes not copied so far.
- switch p.simplify(info(v).category()) {
- case valid:
- continue
- case disallowed:
- if err == nil {
- r, _ := utf8.DecodeRuneInString(s[i:])
- err = runeError(r)
- }
- continue
- case mapped, deviation:
- b = append(b, s[k:start]...)
- b = info(v).appendMapping(b, s[start:i])
- case ignored:
- b = append(b, s[k:start]...)
- // drop the rune
- case unknown:
- b = append(b, s[k:start]...)
- b = append(b, "\ufffd"...)
- }
- k = i
+ var err error
+ var isBidi bool
+ if p.mapping != nil {
+ s, isBidi, err = p.mapping(p, s)
}
- if k == 0 {
- // No changes so far.
- s = norm.NFC.String(s)
- } else {
- b = append(b, s[k:]...)
- if norm.NFC.QuickSpan(b) != len(b) {
- b = norm.NFC.Bytes(b)
+ // Remove leading empty labels.
+ if p.removeLeadingDots {
+ for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
- // TODO: the punycode converters require strings as input.
- s = string(b)
}
- // Remove leading empty labels
- for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
- }
- if s == "" {
- return "", &labelError{s, "A4"}
+ // TODO: allow for a quick check of the tables data.
+ // It seems like we should only create this error on ToASCII, but the
+ // UTS 46 conformance tests suggests we should always check this.
+ if err == nil && p.verifyDNSLength && s == "" {
+ err = &labelError{s, "A4"}
}
labels := labelIter{orig: s}
for ; !labels.done(); labels.next() {
@@ -232,7 +321,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
if label == "" {
// Empty labels are not okay. The label iterator skips the last
// label if it is empty.
- if err == nil {
+ if err == nil && p.verifyDNSLength {
err = &labelError{s, "A4"}
}
continue
@@ -246,15 +335,27 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
// Spec says keep the old label.
continue
}
+ isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u)
- if err == nil {
- err = p.validateFromPunycode(u)
+ if err == nil && p.validateLabels {
+ err = p.fromPuny(p, u)
}
if err == nil {
- err = NonTransitional.validate(u)
+ // This should be called on NonTransitional, according to the
+ // spec, but that currently does not have any effect. Use the
+ // original profile to preserve options.
+ err = p.validateLabel(u)
}
} else if err == nil {
- err = p.validate(label)
+ err = p.validateLabel(label)
+ }
+ }
+ if isBidi && p.bidirule != nil && err == nil {
+ for labels.reset(); !labels.done(); labels.next() {
+ if !p.bidirule(labels.label()) {
+ err = &labelError{s, "B"}
+ break
+ }
}
}
if toASCII {
@@ -288,6 +389,117 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
return s, err
}
+func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
+ // TODO: consider first doing a quick check to see if any of these checks
+ // need to be done. This will make it slower in the general case, but
+ // faster in the common case.
+ mapped = norm.NFC.String(s)
+ isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
+ return mapped, isBidi, nil
+}
+
+func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
+ // TODO: filter need for normalization in loop below.
+ if !norm.NFC.IsNormalString(s) {
+ return s, false, &labelError{s, "V1"}
+ }
+ for i := 0; i < len(s); {
+ v, sz := trie.lookupString(s[i:])
+ if sz == 0 {
+ return s, bidi, runeError(utf8.RuneError)
+ }
+ bidi = bidi || info(v).isBidi(s[i:])
+ // Copy bytes not copied so far.
+ switch p.simplify(info(v).category()) {
+ // TODO: handle the NV8 defined in the Unicode idna data set to allow
+ // for strict conformance to IDNA2008.
+ case valid, deviation:
+ case disallowed, mapped, unknown, ignored:
+ r, _ := utf8.DecodeRuneInString(s[i:])
+ return s, bidi, runeError(r)
+ }
+ i += sz
+ }
+ return s, bidi, nil
+}
+
+func (c info) isBidi(s string) bool {
+ if !c.isMapped() {
+ return c&attributesMask == rtl
+ }
+ // TODO: also store bidi info for mapped data. This is possible, but a bit
+ // cumbersome and not for the common case.
+ p, _ := bidi.LookupString(s)
+ switch p.Class() {
+ case bidi.R, bidi.AL, bidi.AN:
+ return true
+ }
+ return false
+}
+
+func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
+ var (
+ b []byte
+ k int
+ )
+ // combinedInfoBits contains the or-ed bits of all runes. We use this
+ // to derive the mayNeedNorm bit later. This may trigger normalization
+ // overeagerly, but it will not do so in the common case. The end result
+ // is another 10% saving on BenchmarkProfile for the common case.
+ var combinedInfoBits info
+ for i := 0; i < len(s); {
+ v, sz := trie.lookupString(s[i:])
+ if sz == 0 {
+ b = append(b, s[k:i]...)
+ b = append(b, "\ufffd"...)
+ k = len(s)
+ if err == nil {
+ err = runeError(utf8.RuneError)
+ }
+ break
+ }
+ combinedInfoBits |= info(v)
+ bidi = bidi || info(v).isBidi(s[i:])
+ start := i
+ i += sz
+ // Copy bytes not copied so far.
+ switch p.simplify(info(v).category()) {
+ case valid:
+ continue
+ case disallowed:
+ if err == nil {
+ r, _ := utf8.DecodeRuneInString(s[start:])
+ err = runeError(r)
+ }
+ continue
+ case mapped, deviation:
+ b = append(b, s[k:start]...)
+ b = info(v).appendMapping(b, s[start:i])
+ case ignored:
+ b = append(b, s[k:start]...)
+ // drop the rune
+ case unknown:
+ b = append(b, s[k:start]...)
+ b = append(b, "\ufffd"...)
+ }
+ k = i
+ }
+ if k == 0 {
+ // No changes so far.
+ if combinedInfoBits&mayNeedNorm != 0 {
+ s = norm.NFC.String(s)
+ }
+ } else {
+ b = append(b, s[k:]...)
+ if norm.NFC.QuickSpan(b) != len(b) {
+ b = norm.NFC.Bytes(b)
+ }
+ // TODO: the punycode converters require strings as input.
+ s = string(b)
+ }
+ return s, bidi, err
+}
+
// A labelIter allows iterating over domain name labels.
type labelIter struct {
orig string
@@ -354,13 +566,13 @@ const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category {
switch cat {
case disallowedSTD3Mapped:
- if !p.ignoreSTD3Rules {
+ if p.useSTD3Rules {
cat = disallowed
} else {
cat = mapped
}
case disallowedSTD3Valid:
- if !p.ignoreSTD3Rules {
+ if p.useSTD3Rules {
cat = disallowed
} else {
cat = valid
@@ -376,12 +588,17 @@ func (p *Profile) simplify(cat category) category {
return cat
}
-func (p *Profile) validateFromPunycode(s string) error {
+func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"}
}
+ // TODO: detect whether string may have to be normalized in the following
+ // loop.
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
+ if sz == 0 {
+ return runeError(utf8.RuneError)
+ }
if c := p.simplify(info(v).category()); c != valid && c != deviation {
return &labelError{s, "V6"}
}
@@ -452,9 +669,19 @@ var joinStates = [][numJoinTypes]joinState{
},
}
-// validate validates the criteria from Section 4.1. Item 1, 4, and 6 are
+// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
-func (p *Profile) validate(s string) error {
+func (p *Profile) validateLabel(s string) (err error) {
+ if s == "" {
+ if p.verifyDNSLength {
+ return &labelError{s, "A4"}
+ }
+ return nil
+ }
+ if !p.validateLabels {
+ return nil
+ }
+ trie := p.trie // p.validateLabels is only set if trie is set.
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
}
@@ -467,9 +694,6 @@ func (p *Profile) validate(s string) error {
if x.isModifier() {
return &labelError{s, "V5"}
}
- if !bidirule.ValidString(s) {
- return &labelError{s, "B"}
- }
// Quickly return in the absence of zero-width (non) joiners.
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
return nil