aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/net/idna/idna9.0.0.go
blob: 55f718f12744713d679c049765086d87a7262132 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !go1.10
// +build !go1.10

// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in https://www.unicode.org/reports/tr46.
// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/net/idna"

import (
	"fmt"
	"strings"
	"unicode/utf8"

	"golang.org/x/text/secure/bidirule"
	"golang.org/x/text/unicode/norm"
)

// NOTE: Unlike common practice in Go APIs, the functions will return a
// sanitized domain name in case of errors. Browsers sometimes use a partially
// evaluated string as lookup.
// TODO: the current error handling is, in my opinion, the least opinionated.
// Other strategies are also viable, though:
// Option 1) Return an empty string in case of error, but allow the user to
//    specify explicitly which errors to ignore.
// Option 2) Return the partially evaluated string if it is itself a valid
//    string, otherwise return the empty string in case of error.
// Option 3) Option 1 and 2.
// Option 4) Always return an empty string for now and implement Option 1 as
//    needed, and document that the return string may not be empty in case of
//    error in the future.
// I think Option 1 is best, but it is quite opinionated.

// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) {
	return Punycode.process(s, true)
}

// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) {
	return Punycode.process(s, false)
}

// An Option configures a Profile at creation time.
type Option func(*options)

// Transitional sets a Profile to use the Transitional mapping as defined in UTS
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
// compatibility. It is used by most browsers when resolving domain names. This
// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option {
	return func(o *options) { o.transitional = true }
}

// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
//
// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
	return func(o *options) { o.verifyDNSLength = verify }
}

// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
func RemoveLeadingDots(remove bool) Option {
	return func(o *options) { o.removeLeadingDots = remove }
}

// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
// in UTS #46.
func ValidateLabels(enable bool) Option {
	return func(o *options) {
		// Don't override existing mappings, but set one that at least checks
		// normalization if it is not set.
		if o.mapping == nil && enable {
			o.mapping = normalize
		}
		o.trie = trie
		o.checkJoiners = enable
		o.checkHyphens = enable
		if enable {
			o.fromPuny = validateFromPunycode
		} else {
			o.fromPuny = nil
		}
	}
}

// CheckHyphens sets whether to check for correct use of hyphens ('-') in
// labels. Most web browsers do not have this option set, since labels such as
// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
//
// This option corresponds to the CheckHyphens flag in UTS #46.
func CheckHyphens(enable bool) Option {
	return func(o *options) { o.checkHyphens = enable }
}

// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
// A of RFC 5892, concerning the use of joiner runes.
//
// This option corresponds to the CheckJoiners flag in UTS #46.
func CheckJoiners(enable bool) Option {
	return func(o *options) {
		o.trie = trie
		o.checkJoiners = enable
	}
}

// StrictDomainName limits the set of permissable ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details.
//
// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
	return func(o *options) { o.useSTD3Rules = use }
}

// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.

// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
//
// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
	return func(o *options) { o.bidirule = bidirule.ValidString }
}

// ValidateForRegistration sets validation options to verify that a given IDN is
// properly formatted for registration as defined by Section 4 of RFC 5891.
func ValidateForRegistration() Option {
	return func(o *options) {
		o.mapping = validateRegistration
		StrictDomainName(true)(o)
		ValidateLabels(true)(o)
		VerifyDNSLength(true)(o)
		BidiRule()(o)
	}
}

// MapForLookup sets validation and mapping options such that a given IDN is
// transformed for domain name lookup according to the requirements set out in
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
// to add this check.
//
// The mappings include normalization and mapping case, width and other
// compatibility mappings.
func MapForLookup() Option {
	return func(o *options) {
		o.mapping = validateAndMap
		StrictDomainName(true)(o)
		ValidateLabels(true)(o)
		RemoveLeadingDots(true)(o)
	}
}

type options struct {
	transitional      bool
	useSTD3Rules      bool
	checkHyphens      bool
	checkJoiners      bool
	verifyDNSLength   bool
	removeLeadingDots bool

	trie *idnaTrie

	// fromPuny calls validation rules when converting A-labels to U-labels.
	fromPuny func(p *Profile, s string) error

	// mapping implements a validation and mapping step as defined in RFC 5895
	// or UTS 46, tailored to, for example, domain registration or lookup.
	mapping func(p *Profile, s string) (string, error)

	// bidirule, if specified, checks whether s conforms to the Bidi Rule
	// defined in RFC 5893.
	bidirule func(s string) bool
}

// A Profile defines the configuration of a IDNA mapper.
type Profile struct {
	options
}

func apply(o *options, opts []Option) {
	for _, f := range opts {
		f(o)
	}
}

// New creates a new Profile.
//
// With no options, the returned Profile is the most permissive and equals the
// Punycode Profile. Options can be passed to further restrict the Profile. The
// MapForLookup and ValidateForRegistration options set a collection of options,
// for lookup and registration purposes respectively, which can be tailored by
// adding more fine-grained options, where later options override earlier
// options.
func New(o ...Option) *Profile {
	p := &Profile{}
	apply(&p.options, o)
	return p
}

// ToASCII converts a domain or domain label to its ASCII form. For example,
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
// ToASCII("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToASCII(s string) (string, error) {
	return p.process(s, true)
}

// ToUnicode converts a domain or domain label to its Unicode form. For example,
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
// ToUnicode("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToUnicode(s string) (string, error) {
	pp := *p
	pp.transitional = false
	return pp.process(s, false)
}

// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string {
	s := ""
	if p.transitional {
		s = "Transitional"
	} else {
		s = "NonTransitional"
	}
	if p.useSTD3Rules {
		s += ":UseSTD3Rules"
	}
	if p.checkHyphens {
		s += ":CheckHyphens"
	}
	if p.checkJoiners {
		s += ":CheckJoiners"
	}
	if p.verifyDNSLength {
		s += ":VerifyDNSLength"
	}
	return s
}

var (
	// Punycode is a Profile that does raw punycode processing with a minimum
	// of validation.
	Punycode *Profile = punycode

	// Lookup is the recommended profile for looking up domain names, according
	// to Section 5 of RFC 5891. The exact configuration of this profile may
	// change over time.
	Lookup *Profile = lookup

	// Display is the recommended profile for displaying domain names.
	// The configuration of this profile may change over time.
	Display *Profile = display

	// Registration is the recommended profile for checking whether a given
	// IDN is valid for registration, according to Section 4 of RFC 5891.
	Registration *Profile = registration

	punycode = &Profile{}
	lookup   = &Profile{options{
		transitional:      true,
		removeLeadingDots: true,
		useSTD3Rules:      true,
		checkHyphens:      true,
		checkJoiners:      true,
		trie:              trie,
		fromPuny:          validateFromPunycode,
		mapping:           validateAndMap,
		bidirule:          bidirule.ValidString,
	}}
	display = &Profile{options{
		useSTD3Rules:      true,
		removeLeadingDots: true,
		checkHyphens:      true,
		checkJoiners:      true,
		trie:              trie,
		fromPuny:          validateFromPunycode,
		mapping:           validateAndMap,
		bidirule:          bidirule.ValidString,
	}}
	registration = &Profile{options{
		useSTD3Rules:    true,
		verifyDNSLength: true,
		checkHyphens:    true,
		checkJoiners:    true,
		trie:            trie,
		fromPuny:        validateFromPunycode,
		mapping:         validateRegistration,
		bidirule:        bidirule.ValidString,
	}}

	// TODO: profiles
	// Register: recommended for approving domain names: don't do any mappings
	// but rather reject on invalid input. Bundle or block deviation characters.
)

type labelError struct{ label, code_ string }

func (e labelError) code() string { return e.code_ }
func (e labelError) Error() string {
	return fmt.Sprintf("idna: invalid label %q", e.label)
}

type runeError rune

func (e runeError) code() string { return "P1" }
func (e runeError) Error() string {
	return fmt.Sprintf("idna: disallowed rune %U", e)
}

// process implements the algorithm described in section 4 of UTS #46,
// see https://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
	var err error
	if p.mapping != nil {
		s, err = p.mapping(p, s)
	}
	// Remove leading empty labels.
	if p.removeLeadingDots {
		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
		}
	}
	// It seems like we should only create this error on ToASCII, but the
	// UTS 46 conformance tests suggests we should always check this.
	if err == nil && p.verifyDNSLength && s == "" {
		err = &labelError{s, "A4"}
	}
	labels := labelIter{orig: s}
	for ; !labels.done(); labels.next() {
		label := labels.label()
		if label == "" {
			// Empty labels are not okay. The label iterator skips the last
			// label if it is empty.
			if err == nil && p.verifyDNSLength {
				err = &labelError{s, "A4"}
			}
			continue
		}
		if strings.HasPrefix(label, acePrefix) {
			u, err2 := decode(label[len(acePrefix):])
			if err2 != nil {
				if err == nil {
					err = err2
				}
				// Spec says keep the old label.
				continue
			}
			labels.set(u)
			if err == nil && p.fromPuny != nil {
				err = p.fromPuny(p, u)
			}
			if err == nil {
				// This should be called on NonTransitional, according to the
				// spec, but that currently does not have any effect. Use the
				// original profile to preserve options.
				err = p.validateLabel(u)
			}
		} else if err == nil {
			err = p.validateLabel(label)
		}
	}
	if toASCII {
		for labels.reset(); !labels.done(); labels.next() {
			label := labels.label()
			if !ascii(label) {
				a, err2 := encode(acePrefix, label)
				if err == nil {
					err = err2
				}
				label = a
				labels.set(a)
			}
			n := len(label)
			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
				err = &labelError{label, "A4"}
			}
		}
	}
	s = labels.result()
	if toASCII && p.verifyDNSLength && err == nil {
		// Compute the length of the domain name minus the root label and its dot.
		n := len(s)
		if n > 0 && s[n-1] == '.' {
			n--
		}
		if len(s) < 1 || n > 253 {
			err = &labelError{s, "A4"}
		}
	}
	return s, err
}

func normalize(p *Profile, s string) (string, error) {
	return norm.NFC.String(s), nil
}

func validateRegistration(p *Profile, s string) (string, error) {
	if !norm.NFC.IsNormalString(s) {
		return s, &labelError{s, "V1"}
	}
	for i := 0; i < len(s); {
		v, sz := trie.lookupString(s[i:])
		// Copy bytes not copied so far.
		switch p.simplify(info(v).category()) {
		// TODO: handle the NV8 defined in the Unicode idna data set to allow
		// for strict conformance to IDNA2008.
		case valid, deviation:
		case disallowed, mapped, unknown, ignored:
			r, _ := utf8.DecodeRuneInString(s[i:])
			return s, runeError(r)
		}
		i += sz
	}
	return s, nil
}

func validateAndMap(p *Profile, s string) (string, error) {
	var (
		err error
		b   []byte
		k   int
	)
	for i := 0; i < len(s); {
		v, sz := trie.lookupString(s[i:])
		start := i
		i += sz
		// Copy bytes not copied so far.
		switch p.simplify(info(v).category()) {
		case valid:
			continue
		case disallowed:
			if err == nil {
				r, _ := utf8.DecodeRuneInString(s[start:])
				err = runeError(r)
			}
			continue
		case mapped, deviation:
			b = append(b, s[k:start]...)
			b = info(v).appendMapping(b, s[start:i])
		case ignored:
			b = append(b, s[k:start]...)
			// drop the rune
		case unknown:
			b = append(b, s[k:start]...)
			b = append(b, "\ufffd"...)
		}
		k = i
	}
	if k == 0 {
		// No changes so far.
		s = norm.NFC.String(s)
	} else {
		b = append(b, s[k:]...)
		if norm.NFC.QuickSpan(b) != len(b) {
			b = norm.NFC.Bytes(b)
		}
		// TODO: the punycode converters require strings as input.
		s = string(b)
	}
	return s, err
}

// A labelIter allows iterating over domain name labels.
type labelIter struct {
	orig     string
	slice    []string
	curStart int
	curEnd   int
	i        int
}

func (l *labelIter) reset() {
	l.curStart = 0
	l.curEnd = 0
	l.i = 0
}

func (l *labelIter) done() bool {
	return l.curStart >= len(l.orig)
}

func (l *labelIter) result() string {
	if l.slice != nil {
		return strings.Join(l.slice, ".")
	}
	return l.orig
}

func (l *labelIter) label() string {
	if l.slice != nil {
		return l.slice[l.i]
	}
	p := strings.IndexByte(l.orig[l.curStart:], '.')
	l.curEnd = l.curStart + p
	if p == -1 {
		l.curEnd = len(l.orig)
	}
	return l.orig[l.curStart:l.curEnd]
}

// next sets the value to the next label. It skips the last label if it is empty.
func (l *labelIter) next() {
	l.i++
	if l.slice != nil {
		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
			l.curStart = len(l.orig)
		}
	} else {
		l.curStart = l.curEnd + 1
		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
			l.curStart = len(l.orig)
		}
	}
}

func (l *labelIter) set(s string) {
	if l.slice == nil {
		l.slice = strings.Split(l.orig, ".")
	}
	l.slice[l.i] = s
}

// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"

func (p *Profile) simplify(cat category) category {
	switch cat {
	case disallowedSTD3Mapped:
		if p.useSTD3Rules {
			cat = disallowed
		} else {
			cat = mapped
		}
	case disallowedSTD3Valid:
		if p.useSTD3Rules {
			cat = disallowed
		} else {
			cat = valid
		}
	case deviation:
		if !p.transitional {
			cat = valid
		}
	case validNV8, validXV8:
		// TODO: handle V2008
		cat = valid
	}
	return cat
}

func validateFromPunycode(p *Profile, s string) error {
	if !norm.NFC.IsNormalString(s) {
		return &labelError{s, "V1"}
	}
	for i := 0; i < len(s); {
		v, sz := trie.lookupString(s[i:])
		if c := p.simplify(info(v).category()); c != valid && c != deviation {
			return &labelError{s, "V6"}
		}
		i += sz
	}
	return nil
}

const (
	zwnj = "\u200c"
	zwj  = "\u200d"
)

type joinState int8

const (
	stateStart joinState = iota
	stateVirama
	stateBefore
	stateBeforeVirama
	stateAfter
	stateFAIL
)

var joinStates = [][numJoinTypes]joinState{
	stateStart: {
		joiningL:   stateBefore,
		joiningD:   stateBefore,
		joinZWNJ:   stateFAIL,
		joinZWJ:    stateFAIL,
		joinVirama: stateVirama,
	},
	stateVirama: {
		joiningL: stateBefore,
		joiningD: stateBefore,
	},
	stateBefore: {
		joiningL:   stateBefore,
		joiningD:   stateBefore,
		joiningT:   stateBefore,
		joinZWNJ:   stateAfter,
		joinZWJ:    stateFAIL,
		joinVirama: stateBeforeVirama,
	},
	stateBeforeVirama: {
		joiningL: stateBefore,
		joiningD: stateBefore,
		joiningT: stateBefore,
	},
	stateAfter: {
		joiningL:   stateFAIL,
		joiningD:   stateBefore,
		joiningT:   stateAfter,
		joiningR:   stateStart,
		joinZWNJ:   stateFAIL,
		joinZWJ:    stateFAIL,
		joinVirama: stateAfter, // no-op as we can't accept joiners here
	},
	stateFAIL: {
		0:          stateFAIL,
		joiningL:   stateFAIL,
		joiningD:   stateFAIL,
		joiningT:   stateFAIL,
		joiningR:   stateFAIL,
		joinZWNJ:   stateFAIL,
		joinZWJ:    stateFAIL,
		joinVirama: stateFAIL,
	},
}

// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) error {
	if s == "" {
		if p.verifyDNSLength {
			return &labelError{s, "A4"}
		}
		return nil
	}
	if p.bidirule != nil && !p.bidirule(s) {
		return &labelError{s, "B"}
	}
	if p.checkHyphens {
		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
			return &labelError{s, "V2"}
		}
		if s[0] == '-' || s[len(s)-1] == '-' {
			return &labelError{s, "V3"}
		}
	}
	if !p.checkJoiners {
		return nil
	}
	trie := p.trie // p.checkJoiners is only set if trie is set.
	// TODO: merge the use of this in the trie.
	v, sz := trie.lookupString(s)
	x := info(v)
	if x.isModifier() {
		return &labelError{s, "V5"}
	}
	// Quickly return in the absence of zero-width (non) joiners.
	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
		return nil
	}
	st := stateStart
	for i := 0; ; {
		jt := x.joinType()
		if s[i:i+sz] == zwj {
			jt = joinZWJ
		} else if s[i:i+sz] == zwnj {
			jt = joinZWNJ
		}
		st = joinStates[st][jt]
		if x.isViramaModifier() {
			st = joinStates[st][joinVirama]
		}
		if i += sz; i == len(s) {
			break
		}
		v, sz = trie.lookupString(s[i:])
		x = info(v)
	}
	if st == stateFAIL || st == stateAfter {
		return &labelError{s, "C"}
	}
	return nil
}

func ascii(s string) bool {
	for i := 0; i < len(s); i++ {
		if s[i] >= utf8.RuneSelf {
			return false
		}
	}
	return true
}